polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, Function, FunctionBody, Identifier, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, TokenType, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519#[derive(Debug)]
520enum TransformTask {
521 Visit(Expression),
522 Finish(FinishTask),
523}
524
525#[derive(Debug)]
526enum FinishTask {
527 Unary(Expression),
528 Binary(Expression),
529 CastLike(Expression),
530 List(Expression, usize),
531 From(crate::expressions::From, usize),
532 Select(SelectFrame),
533 SetOp(Expression),
534}
535
536#[derive(Debug)]
537struct SelectFrame {
538 select: Box<crate::expressions::Select>,
539 expr_count: usize,
540 from_present: bool,
541 where_present: bool,
542 group_by_count: usize,
543 having_present: bool,
544 qualify_present: bool,
545}
546
547fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
548 results
549 .pop()
550 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
551}
552
553fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
554 if results.len() < count {
555 return Err(crate::error::Error::Internal(
556 "transform result stack underflow".to_string(),
557 ));
558 }
559 Ok(results.split_off(results.len() - count))
560}
561
562/// Applies a transform function bottom-up through an entire expression tree.
563///
564/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
565/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
566/// trees, and common binary/unary expression chains). Less common shapes currently
567/// reuse the reference recursive implementation so semantics stay identical while
568/// the hot path avoids stack growth.
569pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
570where
571 F: Fn(Expression) -> Result<Expression>,
572{
573 #[cfg(feature = "stacker")]
574 {
575 let red_zone = if cfg!(debug_assertions) {
576 4 * 1024 * 1024
577 } else {
578 1024 * 1024
579 };
580 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
581 transform_recursive_inner(expr, transform_fn)
582 })
583 }
584 #[cfg(not(feature = "stacker"))]
585 {
586 transform_recursive_inner(expr, transform_fn)
587 }
588}
589
590fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
591where
592 F: Fn(Expression) -> Result<Expression>,
593{
594 let mut tasks = vec![TransformTask::Visit(expr)];
595 let mut results = Vec::new();
596
597 while let Some(task) = tasks.pop() {
598 match task {
599 TransformTask::Visit(expr) => {
600 if matches!(
601 &expr,
602 Expression::Literal(_)
603 | Expression::Boolean(_)
604 | Expression::Null(_)
605 | Expression::Identifier(_)
606 | Expression::Star(_)
607 | Expression::Parameter(_)
608 | Expression::Placeholder(_)
609 | Expression::SessionParameter(_)
610 ) {
611 results.push(transform_fn(expr)?);
612 continue;
613 }
614
615 match expr {
616 Expression::Alias(mut alias) => {
617 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
618 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
619 alias,
620 ))));
621 tasks.push(TransformTask::Visit(child));
622 }
623 Expression::Paren(mut paren) => {
624 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
625 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
626 paren,
627 ))));
628 tasks.push(TransformTask::Visit(child));
629 }
630 Expression::Not(mut not) => {
631 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
632 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
633 not,
634 ))));
635 tasks.push(TransformTask::Visit(child));
636 }
637 Expression::Neg(mut neg) => {
638 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
639 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
640 neg,
641 ))));
642 tasks.push(TransformTask::Visit(child));
643 }
644 Expression::IsNull(mut expr) => {
645 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
646 tasks.push(TransformTask::Finish(FinishTask::Unary(
647 Expression::IsNull(expr),
648 )));
649 tasks.push(TransformTask::Visit(child));
650 }
651 Expression::IsTrue(mut expr) => {
652 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
653 tasks.push(TransformTask::Finish(FinishTask::Unary(
654 Expression::IsTrue(expr),
655 )));
656 tasks.push(TransformTask::Visit(child));
657 }
658 Expression::IsFalse(mut expr) => {
659 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
660 tasks.push(TransformTask::Finish(FinishTask::Unary(
661 Expression::IsFalse(expr),
662 )));
663 tasks.push(TransformTask::Visit(child));
664 }
665 Expression::Subquery(mut subquery) => {
666 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
667 tasks.push(TransformTask::Finish(FinishTask::Unary(
668 Expression::Subquery(subquery),
669 )));
670 tasks.push(TransformTask::Visit(child));
671 }
672 Expression::Exists(mut exists) => {
673 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
674 tasks.push(TransformTask::Finish(FinishTask::Unary(
675 Expression::Exists(exists),
676 )));
677 tasks.push(TransformTask::Visit(child));
678 }
679 Expression::TableArgument(mut arg) => {
680 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
681 tasks.push(TransformTask::Finish(FinishTask::Unary(
682 Expression::TableArgument(arg),
683 )));
684 tasks.push(TransformTask::Visit(child));
685 }
686 Expression::And(mut op) => {
687 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
688 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
689 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
690 op,
691 ))));
692 tasks.push(TransformTask::Visit(right));
693 tasks.push(TransformTask::Visit(left));
694 }
695 Expression::Or(mut op) => {
696 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
697 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
698 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
699 op,
700 ))));
701 tasks.push(TransformTask::Visit(right));
702 tasks.push(TransformTask::Visit(left));
703 }
704 Expression::Add(mut op) => {
705 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
706 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
707 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
708 op,
709 ))));
710 tasks.push(TransformTask::Visit(right));
711 tasks.push(TransformTask::Visit(left));
712 }
713 Expression::Sub(mut op) => {
714 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
715 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
716 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
717 op,
718 ))));
719 tasks.push(TransformTask::Visit(right));
720 tasks.push(TransformTask::Visit(left));
721 }
722 Expression::Mul(mut op) => {
723 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
724 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
725 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
726 op,
727 ))));
728 tasks.push(TransformTask::Visit(right));
729 tasks.push(TransformTask::Visit(left));
730 }
731 Expression::Div(mut op) => {
732 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
733 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
734 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
735 op,
736 ))));
737 tasks.push(TransformTask::Visit(right));
738 tasks.push(TransformTask::Visit(left));
739 }
740 Expression::Eq(mut op) => {
741 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
742 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
743 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
744 op,
745 ))));
746 tasks.push(TransformTask::Visit(right));
747 tasks.push(TransformTask::Visit(left));
748 }
749 Expression::Lt(mut op) => {
750 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
751 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
752 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
753 op,
754 ))));
755 tasks.push(TransformTask::Visit(right));
756 tasks.push(TransformTask::Visit(left));
757 }
758 Expression::Gt(mut op) => {
759 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
760 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
761 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
762 op,
763 ))));
764 tasks.push(TransformTask::Visit(right));
765 tasks.push(TransformTask::Visit(left));
766 }
767 Expression::Neq(mut op) => {
768 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
769 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
770 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
771 op,
772 ))));
773 tasks.push(TransformTask::Visit(right));
774 tasks.push(TransformTask::Visit(left));
775 }
776 Expression::Lte(mut op) => {
777 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
778 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
779 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
780 op,
781 ))));
782 tasks.push(TransformTask::Visit(right));
783 tasks.push(TransformTask::Visit(left));
784 }
785 Expression::Gte(mut op) => {
786 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
787 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
788 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
789 op,
790 ))));
791 tasks.push(TransformTask::Visit(right));
792 tasks.push(TransformTask::Visit(left));
793 }
794 Expression::Mod(mut op) => {
795 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
796 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
797 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
798 op,
799 ))));
800 tasks.push(TransformTask::Visit(right));
801 tasks.push(TransformTask::Visit(left));
802 }
803 Expression::Concat(mut op) => {
804 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
805 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
806 tasks.push(TransformTask::Finish(FinishTask::Binary(
807 Expression::Concat(op),
808 )));
809 tasks.push(TransformTask::Visit(right));
810 tasks.push(TransformTask::Visit(left));
811 }
812 Expression::BitwiseAnd(mut op) => {
813 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
814 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
815 tasks.push(TransformTask::Finish(FinishTask::Binary(
816 Expression::BitwiseAnd(op),
817 )));
818 tasks.push(TransformTask::Visit(right));
819 tasks.push(TransformTask::Visit(left));
820 }
821 Expression::BitwiseOr(mut op) => {
822 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
823 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
824 tasks.push(TransformTask::Finish(FinishTask::Binary(
825 Expression::BitwiseOr(op),
826 )));
827 tasks.push(TransformTask::Visit(right));
828 tasks.push(TransformTask::Visit(left));
829 }
830 Expression::BitwiseXor(mut op) => {
831 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
832 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
833 tasks.push(TransformTask::Finish(FinishTask::Binary(
834 Expression::BitwiseXor(op),
835 )));
836 tasks.push(TransformTask::Visit(right));
837 tasks.push(TransformTask::Visit(left));
838 }
839 Expression::Is(mut op) => {
840 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
841 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
842 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
843 op,
844 ))));
845 tasks.push(TransformTask::Visit(right));
846 tasks.push(TransformTask::Visit(left));
847 }
848 Expression::MemberOf(mut op) => {
849 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
850 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
851 tasks.push(TransformTask::Finish(FinishTask::Binary(
852 Expression::MemberOf(op),
853 )));
854 tasks.push(TransformTask::Visit(right));
855 tasks.push(TransformTask::Visit(left));
856 }
857 Expression::ArrayContainsAll(mut op) => {
858 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
859 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
860 tasks.push(TransformTask::Finish(FinishTask::Binary(
861 Expression::ArrayContainsAll(op),
862 )));
863 tasks.push(TransformTask::Visit(right));
864 tasks.push(TransformTask::Visit(left));
865 }
866 Expression::ArrayContainedBy(mut op) => {
867 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
868 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
869 tasks.push(TransformTask::Finish(FinishTask::Binary(
870 Expression::ArrayContainedBy(op),
871 )));
872 tasks.push(TransformTask::Visit(right));
873 tasks.push(TransformTask::Visit(left));
874 }
875 Expression::ArrayOverlaps(mut op) => {
876 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
877 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
878 tasks.push(TransformTask::Finish(FinishTask::Binary(
879 Expression::ArrayOverlaps(op),
880 )));
881 tasks.push(TransformTask::Visit(right));
882 tasks.push(TransformTask::Visit(left));
883 }
884 Expression::TsMatch(mut op) => {
885 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
886 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
887 tasks.push(TransformTask::Finish(FinishTask::Binary(
888 Expression::TsMatch(op),
889 )));
890 tasks.push(TransformTask::Visit(right));
891 tasks.push(TransformTask::Visit(left));
892 }
893 Expression::Adjacent(mut op) => {
894 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
895 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
896 tasks.push(TransformTask::Finish(FinishTask::Binary(
897 Expression::Adjacent(op),
898 )));
899 tasks.push(TransformTask::Visit(right));
900 tasks.push(TransformTask::Visit(left));
901 }
902 Expression::Like(mut like) => {
903 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
904 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
905 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
906 like,
907 ))));
908 tasks.push(TransformTask::Visit(right));
909 tasks.push(TransformTask::Visit(left));
910 }
911 Expression::ILike(mut like) => {
912 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
913 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
914 tasks.push(TransformTask::Finish(FinishTask::Binary(
915 Expression::ILike(like),
916 )));
917 tasks.push(TransformTask::Visit(right));
918 tasks.push(TransformTask::Visit(left));
919 }
920 Expression::Cast(mut cast) => {
921 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
922 tasks.push(TransformTask::Finish(FinishTask::CastLike(
923 Expression::Cast(cast),
924 )));
925 tasks.push(TransformTask::Visit(child));
926 }
927 Expression::TryCast(mut cast) => {
928 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
929 tasks.push(TransformTask::Finish(FinishTask::CastLike(
930 Expression::TryCast(cast),
931 )));
932 tasks.push(TransformTask::Visit(child));
933 }
934 Expression::SafeCast(mut cast) => {
935 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
936 tasks.push(TransformTask::Finish(FinishTask::CastLike(
937 Expression::SafeCast(cast),
938 )));
939 tasks.push(TransformTask::Visit(child));
940 }
941 Expression::Function(mut function) => {
942 let args = std::mem::take(&mut function.args);
943 let count = args.len();
944 tasks.push(TransformTask::Finish(FinishTask::List(
945 Expression::Function(function),
946 count,
947 )));
948 for child in args.into_iter().rev() {
949 tasks.push(TransformTask::Visit(child));
950 }
951 }
952 Expression::Array(mut array) => {
953 let expressions = std::mem::take(&mut array.expressions);
954 let count = expressions.len();
955 tasks.push(TransformTask::Finish(FinishTask::List(
956 Expression::Array(array),
957 count,
958 )));
959 for child in expressions.into_iter().rev() {
960 tasks.push(TransformTask::Visit(child));
961 }
962 }
963 Expression::Tuple(mut tuple) => {
964 let expressions = std::mem::take(&mut tuple.expressions);
965 let count = expressions.len();
966 tasks.push(TransformTask::Finish(FinishTask::List(
967 Expression::Tuple(tuple),
968 count,
969 )));
970 for child in expressions.into_iter().rev() {
971 tasks.push(TransformTask::Visit(child));
972 }
973 }
974 Expression::ArrayFunc(mut array) => {
975 let expressions = std::mem::take(&mut array.expressions);
976 let count = expressions.len();
977 tasks.push(TransformTask::Finish(FinishTask::List(
978 Expression::ArrayFunc(array),
979 count,
980 )));
981 for child in expressions.into_iter().rev() {
982 tasks.push(TransformTask::Visit(child));
983 }
984 }
985 Expression::Coalesce(mut func) => {
986 let expressions = std::mem::take(&mut func.expressions);
987 let count = expressions.len();
988 tasks.push(TransformTask::Finish(FinishTask::List(
989 Expression::Coalesce(func),
990 count,
991 )));
992 for child in expressions.into_iter().rev() {
993 tasks.push(TransformTask::Visit(child));
994 }
995 }
996 Expression::Greatest(mut func) => {
997 let expressions = std::mem::take(&mut func.expressions);
998 let count = expressions.len();
999 tasks.push(TransformTask::Finish(FinishTask::List(
1000 Expression::Greatest(func),
1001 count,
1002 )));
1003 for child in expressions.into_iter().rev() {
1004 tasks.push(TransformTask::Visit(child));
1005 }
1006 }
1007 Expression::Least(mut func) => {
1008 let expressions = std::mem::take(&mut func.expressions);
1009 let count = expressions.len();
1010 tasks.push(TransformTask::Finish(FinishTask::List(
1011 Expression::Least(func),
1012 count,
1013 )));
1014 for child in expressions.into_iter().rev() {
1015 tasks.push(TransformTask::Visit(child));
1016 }
1017 }
1018 Expression::ArrayConcat(mut func) => {
1019 let expressions = std::mem::take(&mut func.expressions);
1020 let count = expressions.len();
1021 tasks.push(TransformTask::Finish(FinishTask::List(
1022 Expression::ArrayConcat(func),
1023 count,
1024 )));
1025 for child in expressions.into_iter().rev() {
1026 tasks.push(TransformTask::Visit(child));
1027 }
1028 }
1029 Expression::ArrayIntersect(mut func) => {
1030 let expressions = std::mem::take(&mut func.expressions);
1031 let count = expressions.len();
1032 tasks.push(TransformTask::Finish(FinishTask::List(
1033 Expression::ArrayIntersect(func),
1034 count,
1035 )));
1036 for child in expressions.into_iter().rev() {
1037 tasks.push(TransformTask::Visit(child));
1038 }
1039 }
1040 Expression::ArrayZip(mut func) => {
1041 let expressions = std::mem::take(&mut func.expressions);
1042 let count = expressions.len();
1043 tasks.push(TransformTask::Finish(FinishTask::List(
1044 Expression::ArrayZip(func),
1045 count,
1046 )));
1047 for child in expressions.into_iter().rev() {
1048 tasks.push(TransformTask::Visit(child));
1049 }
1050 }
1051 Expression::MapConcat(mut func) => {
1052 let expressions = std::mem::take(&mut func.expressions);
1053 let count = expressions.len();
1054 tasks.push(TransformTask::Finish(FinishTask::List(
1055 Expression::MapConcat(func),
1056 count,
1057 )));
1058 for child in expressions.into_iter().rev() {
1059 tasks.push(TransformTask::Visit(child));
1060 }
1061 }
1062 Expression::JsonArray(mut func) => {
1063 let expressions = std::mem::take(&mut func.expressions);
1064 let count = expressions.len();
1065 tasks.push(TransformTask::Finish(FinishTask::List(
1066 Expression::JsonArray(func),
1067 count,
1068 )));
1069 for child in expressions.into_iter().rev() {
1070 tasks.push(TransformTask::Visit(child));
1071 }
1072 }
1073 Expression::From(mut from) => {
1074 let expressions = std::mem::take(&mut from.expressions);
1075 let count = expressions.len();
1076 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1077 for child in expressions.into_iter().rev() {
1078 tasks.push(TransformTask::Visit(child));
1079 }
1080 }
1081 Expression::Select(mut select) => {
1082 let expressions = std::mem::take(&mut select.expressions);
1083 let expr_count = expressions.len();
1084
1085 let from_info = select.from.take().map(|mut from| {
1086 let children = std::mem::take(&mut from.expressions);
1087 (from, children)
1088 });
1089 let from_present = from_info.is_some();
1090
1091 let where_child = select.where_clause.as_mut().map(|where_clause| {
1092 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1093 });
1094 let where_present = where_child.is_some();
1095
1096 let group_expressions = select
1097 .group_by
1098 .as_mut()
1099 .map(|group_by| std::mem::take(&mut group_by.expressions))
1100 .unwrap_or_default();
1101 let group_by_count = group_expressions.len();
1102
1103 let having_child = select.having.as_mut().map(|having| {
1104 std::mem::replace(&mut having.this, Expression::Null(Null))
1105 });
1106 let having_present = having_child.is_some();
1107
1108 let qualify_child = select.qualify.as_mut().map(|qualify| {
1109 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1110 });
1111 let qualify_present = qualify_child.is_some();
1112
1113 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1114 select,
1115 expr_count,
1116 from_present,
1117 where_present,
1118 group_by_count,
1119 having_present,
1120 qualify_present,
1121 })));
1122
1123 if let Some(child) = qualify_child {
1124 tasks.push(TransformTask::Visit(child));
1125 }
1126 if let Some(child) = having_child {
1127 tasks.push(TransformTask::Visit(child));
1128 }
1129 for child in group_expressions.into_iter().rev() {
1130 tasks.push(TransformTask::Visit(child));
1131 }
1132 if let Some(child) = where_child {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 if let Some((from, children)) = from_info {
1136 tasks.push(TransformTask::Finish(FinishTask::From(
1137 from,
1138 children.len(),
1139 )));
1140 for child in children.into_iter().rev() {
1141 tasks.push(TransformTask::Visit(child));
1142 }
1143 }
1144 for child in expressions.into_iter().rev() {
1145 tasks.push(TransformTask::Visit(child));
1146 }
1147 }
1148 Expression::Union(mut union) => {
1149 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1150 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1151 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1152 union,
1153 ))));
1154 tasks.push(TransformTask::Visit(right));
1155 tasks.push(TransformTask::Visit(left));
1156 }
1157 Expression::Intersect(mut intersect) => {
1158 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1159 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1160 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1161 Expression::Intersect(intersect),
1162 )));
1163 tasks.push(TransformTask::Visit(right));
1164 tasks.push(TransformTask::Visit(left));
1165 }
1166 Expression::Except(mut except) => {
1167 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1168 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1169 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1170 Expression::Except(except),
1171 )));
1172 tasks.push(TransformTask::Visit(right));
1173 tasks.push(TransformTask::Visit(left));
1174 }
1175 other => {
1176 results.push(transform_recursive_reference(other, transform_fn)?);
1177 }
1178 }
1179 }
1180 TransformTask::Finish(finish) => match finish {
1181 FinishTask::Unary(expr) => {
1182 let child = transform_pop_result(&mut results)?;
1183 let rebuilt = match expr {
1184 Expression::Alias(mut alias) => {
1185 alias.this = child;
1186 Expression::Alias(alias)
1187 }
1188 Expression::Paren(mut paren) => {
1189 paren.this = child;
1190 Expression::Paren(paren)
1191 }
1192 Expression::Not(mut not) => {
1193 not.this = child;
1194 Expression::Not(not)
1195 }
1196 Expression::Neg(mut neg) => {
1197 neg.this = child;
1198 Expression::Neg(neg)
1199 }
1200 Expression::IsNull(mut expr) => {
1201 expr.this = child;
1202 Expression::IsNull(expr)
1203 }
1204 Expression::IsTrue(mut expr) => {
1205 expr.this = child;
1206 Expression::IsTrue(expr)
1207 }
1208 Expression::IsFalse(mut expr) => {
1209 expr.this = child;
1210 Expression::IsFalse(expr)
1211 }
1212 Expression::Subquery(mut subquery) => {
1213 subquery.this = child;
1214 Expression::Subquery(subquery)
1215 }
1216 Expression::Exists(mut exists) => {
1217 exists.this = child;
1218 Expression::Exists(exists)
1219 }
1220 Expression::TableArgument(mut arg) => {
1221 arg.this = child;
1222 Expression::TableArgument(arg)
1223 }
1224 _ => {
1225 return Err(crate::error::Error::Internal(
1226 "unexpected unary transform task".to_string(),
1227 ));
1228 }
1229 };
1230 results.push(transform_fn(rebuilt)?);
1231 }
1232 FinishTask::Binary(expr) => {
1233 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1234 let left = children.next().expect("left child");
1235 let right = children.next().expect("right child");
1236 let rebuilt = match expr {
1237 Expression::And(mut op) => {
1238 op.left = left;
1239 op.right = right;
1240 Expression::And(op)
1241 }
1242 Expression::Or(mut op) => {
1243 op.left = left;
1244 op.right = right;
1245 Expression::Or(op)
1246 }
1247 Expression::Add(mut op) => {
1248 op.left = left;
1249 op.right = right;
1250 Expression::Add(op)
1251 }
1252 Expression::Sub(mut op) => {
1253 op.left = left;
1254 op.right = right;
1255 Expression::Sub(op)
1256 }
1257 Expression::Mul(mut op) => {
1258 op.left = left;
1259 op.right = right;
1260 Expression::Mul(op)
1261 }
1262 Expression::Div(mut op) => {
1263 op.left = left;
1264 op.right = right;
1265 Expression::Div(op)
1266 }
1267 Expression::Eq(mut op) => {
1268 op.left = left;
1269 op.right = right;
1270 Expression::Eq(op)
1271 }
1272 Expression::Lt(mut op) => {
1273 op.left = left;
1274 op.right = right;
1275 Expression::Lt(op)
1276 }
1277 Expression::Gt(mut op) => {
1278 op.left = left;
1279 op.right = right;
1280 Expression::Gt(op)
1281 }
1282 Expression::Neq(mut op) => {
1283 op.left = left;
1284 op.right = right;
1285 Expression::Neq(op)
1286 }
1287 Expression::Lte(mut op) => {
1288 op.left = left;
1289 op.right = right;
1290 Expression::Lte(op)
1291 }
1292 Expression::Gte(mut op) => {
1293 op.left = left;
1294 op.right = right;
1295 Expression::Gte(op)
1296 }
1297 Expression::Mod(mut op) => {
1298 op.left = left;
1299 op.right = right;
1300 Expression::Mod(op)
1301 }
1302 Expression::Concat(mut op) => {
1303 op.left = left;
1304 op.right = right;
1305 Expression::Concat(op)
1306 }
1307 Expression::BitwiseAnd(mut op) => {
1308 op.left = left;
1309 op.right = right;
1310 Expression::BitwiseAnd(op)
1311 }
1312 Expression::BitwiseOr(mut op) => {
1313 op.left = left;
1314 op.right = right;
1315 Expression::BitwiseOr(op)
1316 }
1317 Expression::BitwiseXor(mut op) => {
1318 op.left = left;
1319 op.right = right;
1320 Expression::BitwiseXor(op)
1321 }
1322 Expression::Is(mut op) => {
1323 op.left = left;
1324 op.right = right;
1325 Expression::Is(op)
1326 }
1327 Expression::MemberOf(mut op) => {
1328 op.left = left;
1329 op.right = right;
1330 Expression::MemberOf(op)
1331 }
1332 Expression::ArrayContainsAll(mut op) => {
1333 op.left = left;
1334 op.right = right;
1335 Expression::ArrayContainsAll(op)
1336 }
1337 Expression::ArrayContainedBy(mut op) => {
1338 op.left = left;
1339 op.right = right;
1340 Expression::ArrayContainedBy(op)
1341 }
1342 Expression::ArrayOverlaps(mut op) => {
1343 op.left = left;
1344 op.right = right;
1345 Expression::ArrayOverlaps(op)
1346 }
1347 Expression::TsMatch(mut op) => {
1348 op.left = left;
1349 op.right = right;
1350 Expression::TsMatch(op)
1351 }
1352 Expression::Adjacent(mut op) => {
1353 op.left = left;
1354 op.right = right;
1355 Expression::Adjacent(op)
1356 }
1357 Expression::Like(mut like) => {
1358 like.left = left;
1359 like.right = right;
1360 Expression::Like(like)
1361 }
1362 Expression::ILike(mut like) => {
1363 like.left = left;
1364 like.right = right;
1365 Expression::ILike(like)
1366 }
1367 _ => {
1368 return Err(crate::error::Error::Internal(
1369 "unexpected binary transform task".to_string(),
1370 ));
1371 }
1372 };
1373 results.push(transform_fn(rebuilt)?);
1374 }
1375 FinishTask::CastLike(expr) => {
1376 let child = transform_pop_result(&mut results)?;
1377 let rebuilt = match expr {
1378 Expression::Cast(mut cast) => {
1379 cast.this = child;
1380 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1381 Expression::Cast(cast)
1382 }
1383 Expression::TryCast(mut cast) => {
1384 cast.this = child;
1385 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1386 Expression::TryCast(cast)
1387 }
1388 Expression::SafeCast(mut cast) => {
1389 cast.this = child;
1390 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1391 Expression::SafeCast(cast)
1392 }
1393 _ => {
1394 return Err(crate::error::Error::Internal(
1395 "unexpected cast transform task".to_string(),
1396 ));
1397 }
1398 };
1399 results.push(transform_fn(rebuilt)?);
1400 }
1401 FinishTask::List(expr, count) => {
1402 let children = transform_pop_results(&mut results, count)?;
1403 let rebuilt = match expr {
1404 Expression::Function(mut function) => {
1405 function.args = children;
1406 Expression::Function(function)
1407 }
1408 Expression::Array(mut array) => {
1409 array.expressions = children;
1410 Expression::Array(array)
1411 }
1412 Expression::Tuple(mut tuple) => {
1413 tuple.expressions = children;
1414 Expression::Tuple(tuple)
1415 }
1416 Expression::ArrayFunc(mut array) => {
1417 array.expressions = children;
1418 Expression::ArrayFunc(array)
1419 }
1420 Expression::Coalesce(mut func) => {
1421 func.expressions = children;
1422 Expression::Coalesce(func)
1423 }
1424 Expression::Greatest(mut func) => {
1425 func.expressions = children;
1426 Expression::Greatest(func)
1427 }
1428 Expression::Least(mut func) => {
1429 func.expressions = children;
1430 Expression::Least(func)
1431 }
1432 Expression::ArrayConcat(mut func) => {
1433 func.expressions = children;
1434 Expression::ArrayConcat(func)
1435 }
1436 Expression::ArrayIntersect(mut func) => {
1437 func.expressions = children;
1438 Expression::ArrayIntersect(func)
1439 }
1440 Expression::ArrayZip(mut func) => {
1441 func.expressions = children;
1442 Expression::ArrayZip(func)
1443 }
1444 Expression::MapConcat(mut func) => {
1445 func.expressions = children;
1446 Expression::MapConcat(func)
1447 }
1448 Expression::JsonArray(mut func) => {
1449 func.expressions = children;
1450 Expression::JsonArray(func)
1451 }
1452 _ => {
1453 return Err(crate::error::Error::Internal(
1454 "unexpected list transform task".to_string(),
1455 ));
1456 }
1457 };
1458 results.push(transform_fn(rebuilt)?);
1459 }
1460 FinishTask::From(mut from, count) => {
1461 from.expressions = transform_pop_results(&mut results, count)?;
1462 results.push(transform_fn(Expression::From(Box::new(from)))?);
1463 }
1464 FinishTask::Select(frame) => {
1465 let mut select = *frame.select;
1466
1467 if frame.qualify_present {
1468 if let Some(ref mut qualify) = select.qualify {
1469 qualify.this = transform_pop_result(&mut results)?;
1470 }
1471 }
1472 if frame.having_present {
1473 if let Some(ref mut having) = select.having {
1474 having.this = transform_pop_result(&mut results)?;
1475 }
1476 }
1477 if frame.group_by_count > 0 {
1478 if let Some(ref mut group_by) = select.group_by {
1479 group_by.expressions =
1480 transform_pop_results(&mut results, frame.group_by_count)?;
1481 }
1482 }
1483 if frame.where_present {
1484 if let Some(ref mut where_clause) = select.where_clause {
1485 where_clause.this = transform_pop_result(&mut results)?;
1486 }
1487 }
1488 if frame.from_present {
1489 match transform_pop_result(&mut results)? {
1490 Expression::From(from) => {
1491 select.from = Some(*from);
1492 }
1493 _ => {
1494 return Err(crate::error::Error::Internal(
1495 "expected FROM expression result".to_string(),
1496 ));
1497 }
1498 }
1499 }
1500 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1501
1502 select.joins = select
1503 .joins
1504 .into_iter()
1505 .map(|mut join| {
1506 join.this = transform_recursive(join.this, transform_fn)?;
1507 if let Some(on) = join.on.take() {
1508 join.on = Some(transform_recursive(on, transform_fn)?);
1509 }
1510 match transform_fn(Expression::Join(Box::new(join)))? {
1511 Expression::Join(j) => Ok(*j),
1512 _ => Err(crate::error::Error::parse(
1513 "Join transformation returned non-join expression",
1514 0,
1515 0,
1516 0,
1517 0,
1518 )),
1519 }
1520 })
1521 .collect::<Result<Vec<_>>>()?;
1522
1523 select.lateral_views = select
1524 .lateral_views
1525 .into_iter()
1526 .map(|mut lv| {
1527 lv.this = transform_recursive(lv.this, transform_fn)?;
1528 Ok(lv)
1529 })
1530 .collect::<Result<Vec<_>>>()?;
1531
1532 if let Some(mut with) = select.with.take() {
1533 with.ctes = with
1534 .ctes
1535 .into_iter()
1536 .map(|mut cte| {
1537 let original = cte.this.clone();
1538 cte.this =
1539 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1540 cte
1541 })
1542 .collect();
1543 select.with = Some(with);
1544 }
1545
1546 if let Some(mut order) = select.order_by.take() {
1547 order.expressions = order
1548 .expressions
1549 .into_iter()
1550 .map(|o| {
1551 let mut o = o;
1552 let original = o.this.clone();
1553 o.this =
1554 transform_recursive(o.this, transform_fn).unwrap_or(original);
1555 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1556 Ok(Expression::Ordered(transformed)) => *transformed,
1557 Ok(_) | Err(_) => o,
1558 }
1559 })
1560 .collect();
1561 select.order_by = Some(order);
1562 }
1563
1564 if let Some(ref mut windows) = select.windows {
1565 for nw in windows.iter_mut() {
1566 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1567 .into_iter()
1568 .map(|o| {
1569 let mut o = o;
1570 let original = o.this.clone();
1571 o.this = transform_recursive(o.this, transform_fn)
1572 .unwrap_or(original);
1573 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1574 Ok(Expression::Ordered(transformed)) => *transformed,
1575 Ok(_) | Err(_) => o,
1576 }
1577 })
1578 .collect();
1579 }
1580 }
1581
1582 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1583 }
1584 FinishTask::SetOp(expr) => {
1585 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1586 let left = children.next().expect("left child");
1587 let right = children.next().expect("right child");
1588
1589 let rebuilt = match expr {
1590 Expression::Union(mut union) => {
1591 union.left = left;
1592 union.right = right;
1593 if let Some(mut with) = union.with.take() {
1594 with.ctes = with
1595 .ctes
1596 .into_iter()
1597 .map(|mut cte| {
1598 let original = cte.this.clone();
1599 cte.this = transform_recursive(cte.this, transform_fn)
1600 .unwrap_or(original);
1601 cte
1602 })
1603 .collect();
1604 union.with = Some(with);
1605 }
1606 Expression::Union(union)
1607 }
1608 Expression::Intersect(mut intersect) => {
1609 intersect.left = left;
1610 intersect.right = right;
1611 if let Some(mut with) = intersect.with.take() {
1612 with.ctes = with
1613 .ctes
1614 .into_iter()
1615 .map(|mut cte| {
1616 let original = cte.this.clone();
1617 cte.this = transform_recursive(cte.this, transform_fn)
1618 .unwrap_or(original);
1619 cte
1620 })
1621 .collect();
1622 intersect.with = Some(with);
1623 }
1624 Expression::Intersect(intersect)
1625 }
1626 Expression::Except(mut except) => {
1627 except.left = left;
1628 except.right = right;
1629 if let Some(mut with) = except.with.take() {
1630 with.ctes = with
1631 .ctes
1632 .into_iter()
1633 .map(|mut cte| {
1634 let original = cte.this.clone();
1635 cte.this = transform_recursive(cte.this, transform_fn)
1636 .unwrap_or(original);
1637 cte
1638 })
1639 .collect();
1640 except.with = Some(with);
1641 }
1642 Expression::Except(except)
1643 }
1644 _ => {
1645 return Err(crate::error::Error::Internal(
1646 "unexpected set-op transform task".to_string(),
1647 ));
1648 }
1649 };
1650 results.push(transform_fn(rebuilt)?);
1651 }
1652 },
1653 }
1654 }
1655
1656 match results.len() {
1657 1 => Ok(results.pop().expect("single transform result")),
1658 _ => Err(crate::error::Error::Internal(
1659 "unexpected transform result stack size".to_string(),
1660 )),
1661 }
1662}
1663
1664fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1665where
1666 F: Fn(Expression) -> Result<Expression>,
1667{
1668 use crate::expressions::BinaryOp;
1669
1670 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
1671 macro_rules! recurse_agg {
1672 ($variant:ident, $f:expr) => {{
1673 let mut f = $f;
1674 f.this = transform_recursive(f.this, transform_fn)?;
1675 if let Some(filter) = f.filter.take() {
1676 f.filter = Some(transform_recursive(filter, transform_fn)?);
1677 }
1678 for ord in &mut f.order_by {
1679 ord.this = transform_recursive(
1680 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
1681 transform_fn,
1682 )?;
1683 }
1684 if let Some((ref mut expr, _)) = f.having_max {
1685 *expr = Box::new(transform_recursive(
1686 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
1687 transform_fn,
1688 )?);
1689 }
1690 if let Some(limit) = f.limit.take() {
1691 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
1692 }
1693 Expression::$variant(f)
1694 }};
1695 }
1696
1697 // Helper macro to transform binary ops with Box<BinaryOp>
1698 macro_rules! transform_binary {
1699 ($variant:ident, $op:expr) => {{
1700 let left = transform_recursive($op.left, transform_fn)?;
1701 let right = transform_recursive($op.right, transform_fn)?;
1702 Expression::$variant(Box::new(BinaryOp {
1703 left,
1704 right,
1705 left_comments: $op.left_comments,
1706 operator_comments: $op.operator_comments,
1707 trailing_comments: $op.trailing_comments,
1708 inferred_type: $op.inferred_type,
1709 }))
1710 }};
1711 }
1712
1713 // Fast path: leaf nodes never need child traversal, apply transform directly
1714 if matches!(
1715 &expr,
1716 Expression::Literal(_)
1717 | Expression::Boolean(_)
1718 | Expression::Null(_)
1719 | Expression::Identifier(_)
1720 | Expression::Star(_)
1721 | Expression::Parameter(_)
1722 | Expression::Placeholder(_)
1723 | Expression::SessionParameter(_)
1724 ) {
1725 return transform_fn(expr);
1726 }
1727
1728 // First recursively transform children, then apply the transform function
1729 let expr = match expr {
1730 Expression::Select(mut select) => {
1731 select.expressions = select
1732 .expressions
1733 .into_iter()
1734 .map(|e| transform_recursive(e, transform_fn))
1735 .collect::<Result<Vec<_>>>()?;
1736
1737 // Transform FROM clause
1738 if let Some(mut from) = select.from.take() {
1739 from.expressions = from
1740 .expressions
1741 .into_iter()
1742 .map(|e| transform_recursive(e, transform_fn))
1743 .collect::<Result<Vec<_>>>()?;
1744 select.from = Some(from);
1745 }
1746
1747 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
1748 select.joins = select
1749 .joins
1750 .into_iter()
1751 .map(|mut join| {
1752 join.this = transform_recursive(join.this, transform_fn)?;
1753 if let Some(on) = join.on.take() {
1754 join.on = Some(transform_recursive(on, transform_fn)?);
1755 }
1756 // Wrap join in Expression::Join to allow transform_fn to transform it
1757 match transform_fn(Expression::Join(Box::new(join)))? {
1758 Expression::Join(j) => Ok(*j),
1759 _ => Err(crate::error::Error::parse(
1760 "Join transformation returned non-join expression",
1761 0,
1762 0,
1763 0,
1764 0,
1765 )),
1766 }
1767 })
1768 .collect::<Result<Vec<_>>>()?;
1769
1770 // Transform LATERAL VIEW expressions (Hive/Spark)
1771 select.lateral_views = select
1772 .lateral_views
1773 .into_iter()
1774 .map(|mut lv| {
1775 lv.this = transform_recursive(lv.this, transform_fn)?;
1776 Ok(lv)
1777 })
1778 .collect::<Result<Vec<_>>>()?;
1779
1780 // Transform WHERE clause
1781 if let Some(mut where_clause) = select.where_clause.take() {
1782 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1783 select.where_clause = Some(where_clause);
1784 }
1785
1786 // Transform GROUP BY
1787 if let Some(mut group_by) = select.group_by.take() {
1788 group_by.expressions = group_by
1789 .expressions
1790 .into_iter()
1791 .map(|e| transform_recursive(e, transform_fn))
1792 .collect::<Result<Vec<_>>>()?;
1793 select.group_by = Some(group_by);
1794 }
1795
1796 // Transform HAVING
1797 if let Some(mut having) = select.having.take() {
1798 having.this = transform_recursive(having.this, transform_fn)?;
1799 select.having = Some(having);
1800 }
1801
1802 // Transform WITH (CTEs)
1803 if let Some(mut with) = select.with.take() {
1804 with.ctes = with
1805 .ctes
1806 .into_iter()
1807 .map(|mut cte| {
1808 let original = cte.this.clone();
1809 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1810 cte
1811 })
1812 .collect();
1813 select.with = Some(with);
1814 }
1815
1816 // Transform ORDER BY
1817 if let Some(mut order) = select.order_by.take() {
1818 order.expressions = order
1819 .expressions
1820 .into_iter()
1821 .map(|o| {
1822 let mut o = o;
1823 let original = o.this.clone();
1824 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1825 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
1826 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1827 Ok(Expression::Ordered(transformed)) => *transformed,
1828 Ok(_) | Err(_) => o,
1829 }
1830 })
1831 .collect();
1832 select.order_by = Some(order);
1833 }
1834
1835 // Transform WINDOW clause order_by
1836 if let Some(ref mut windows) = select.windows {
1837 for nw in windows.iter_mut() {
1838 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1839 .into_iter()
1840 .map(|o| {
1841 let mut o = o;
1842 let original = o.this.clone();
1843 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1844 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1845 Ok(Expression::Ordered(transformed)) => *transformed,
1846 Ok(_) | Err(_) => o,
1847 }
1848 })
1849 .collect();
1850 }
1851 }
1852
1853 // Transform QUALIFY
1854 if let Some(mut qual) = select.qualify.take() {
1855 qual.this = transform_recursive(qual.this, transform_fn)?;
1856 select.qualify = Some(qual);
1857 }
1858
1859 Expression::Select(select)
1860 }
1861 Expression::Function(mut f) => {
1862 f.args = f
1863 .args
1864 .into_iter()
1865 .map(|e| transform_recursive(e, transform_fn))
1866 .collect::<Result<Vec<_>>>()?;
1867 Expression::Function(f)
1868 }
1869 Expression::AggregateFunction(mut f) => {
1870 f.args = f
1871 .args
1872 .into_iter()
1873 .map(|e| transform_recursive(e, transform_fn))
1874 .collect::<Result<Vec<_>>>()?;
1875 if let Some(filter) = f.filter {
1876 f.filter = Some(transform_recursive(filter, transform_fn)?);
1877 }
1878 Expression::AggregateFunction(f)
1879 }
1880 Expression::WindowFunction(mut wf) => {
1881 wf.this = transform_recursive(wf.this, transform_fn)?;
1882 wf.over.partition_by = wf
1883 .over
1884 .partition_by
1885 .into_iter()
1886 .map(|e| transform_recursive(e, transform_fn))
1887 .collect::<Result<Vec<_>>>()?;
1888 // Transform order_by items through Expression::Ordered wrapper
1889 wf.over.order_by = wf
1890 .over
1891 .order_by
1892 .into_iter()
1893 .map(|o| {
1894 let mut o = o;
1895 o.this = transform_recursive(o.this, transform_fn)?;
1896 match transform_fn(Expression::Ordered(Box::new(o)))? {
1897 Expression::Ordered(transformed) => Ok(*transformed),
1898 _ => Err(crate::error::Error::parse(
1899 "Ordered transformation returned non-Ordered expression",
1900 0,
1901 0,
1902 0,
1903 0,
1904 )),
1905 }
1906 })
1907 .collect::<Result<Vec<_>>>()?;
1908 Expression::WindowFunction(wf)
1909 }
1910 Expression::Alias(mut a) => {
1911 a.this = transform_recursive(a.this, transform_fn)?;
1912 Expression::Alias(a)
1913 }
1914 Expression::Cast(mut c) => {
1915 c.this = transform_recursive(c.this, transform_fn)?;
1916 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
1917 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1918 Expression::Cast(c)
1919 }
1920 Expression::And(op) => transform_binary!(And, *op),
1921 Expression::Or(op) => transform_binary!(Or, *op),
1922 Expression::Add(op) => transform_binary!(Add, *op),
1923 Expression::Sub(op) => transform_binary!(Sub, *op),
1924 Expression::Mul(op) => transform_binary!(Mul, *op),
1925 Expression::Div(op) => transform_binary!(Div, *op),
1926 Expression::Eq(op) => transform_binary!(Eq, *op),
1927 Expression::Lt(op) => transform_binary!(Lt, *op),
1928 Expression::Gt(op) => transform_binary!(Gt, *op),
1929 Expression::Paren(mut p) => {
1930 p.this = transform_recursive(p.this, transform_fn)?;
1931 Expression::Paren(p)
1932 }
1933 Expression::Coalesce(mut f) => {
1934 f.expressions = f
1935 .expressions
1936 .into_iter()
1937 .map(|e| transform_recursive(e, transform_fn))
1938 .collect::<Result<Vec<_>>>()?;
1939 Expression::Coalesce(f)
1940 }
1941 Expression::IfNull(mut f) => {
1942 f.this = transform_recursive(f.this, transform_fn)?;
1943 f.expression = transform_recursive(f.expression, transform_fn)?;
1944 Expression::IfNull(f)
1945 }
1946 Expression::Nvl(mut f) => {
1947 f.this = transform_recursive(f.this, transform_fn)?;
1948 f.expression = transform_recursive(f.expression, transform_fn)?;
1949 Expression::Nvl(f)
1950 }
1951 Expression::In(mut i) => {
1952 i.this = transform_recursive(i.this, transform_fn)?;
1953 i.expressions = i
1954 .expressions
1955 .into_iter()
1956 .map(|e| transform_recursive(e, transform_fn))
1957 .collect::<Result<Vec<_>>>()?;
1958 if let Some(query) = i.query {
1959 i.query = Some(transform_recursive(query, transform_fn)?);
1960 }
1961 Expression::In(i)
1962 }
1963 Expression::Not(mut n) => {
1964 n.this = transform_recursive(n.this, transform_fn)?;
1965 Expression::Not(n)
1966 }
1967 Expression::ArraySlice(mut s) => {
1968 s.this = transform_recursive(s.this, transform_fn)?;
1969 if let Some(start) = s.start {
1970 s.start = Some(transform_recursive(start, transform_fn)?);
1971 }
1972 if let Some(end) = s.end {
1973 s.end = Some(transform_recursive(end, transform_fn)?);
1974 }
1975 Expression::ArraySlice(s)
1976 }
1977 Expression::Subscript(mut s) => {
1978 s.this = transform_recursive(s.this, transform_fn)?;
1979 s.index = transform_recursive(s.index, transform_fn)?;
1980 Expression::Subscript(s)
1981 }
1982 Expression::Array(mut a) => {
1983 a.expressions = a
1984 .expressions
1985 .into_iter()
1986 .map(|e| transform_recursive(e, transform_fn))
1987 .collect::<Result<Vec<_>>>()?;
1988 Expression::Array(a)
1989 }
1990 Expression::Struct(mut s) => {
1991 let mut new_fields = Vec::new();
1992 for (name, expr) in s.fields {
1993 let transformed = transform_recursive(expr, transform_fn)?;
1994 new_fields.push((name, transformed));
1995 }
1996 s.fields = new_fields;
1997 Expression::Struct(s)
1998 }
1999 Expression::NamedArgument(mut na) => {
2000 na.value = transform_recursive(na.value, transform_fn)?;
2001 Expression::NamedArgument(na)
2002 }
2003 Expression::MapFunc(mut m) => {
2004 m.keys = m
2005 .keys
2006 .into_iter()
2007 .map(|e| transform_recursive(e, transform_fn))
2008 .collect::<Result<Vec<_>>>()?;
2009 m.values = m
2010 .values
2011 .into_iter()
2012 .map(|e| transform_recursive(e, transform_fn))
2013 .collect::<Result<Vec<_>>>()?;
2014 Expression::MapFunc(m)
2015 }
2016 Expression::ArrayFunc(mut a) => {
2017 a.expressions = a
2018 .expressions
2019 .into_iter()
2020 .map(|e| transform_recursive(e, transform_fn))
2021 .collect::<Result<Vec<_>>>()?;
2022 Expression::ArrayFunc(a)
2023 }
2024 Expression::Lambda(mut l) => {
2025 l.body = transform_recursive(l.body, transform_fn)?;
2026 Expression::Lambda(l)
2027 }
2028 Expression::JsonExtract(mut f) => {
2029 f.this = transform_recursive(f.this, transform_fn)?;
2030 f.path = transform_recursive(f.path, transform_fn)?;
2031 Expression::JsonExtract(f)
2032 }
2033 Expression::JsonExtractScalar(mut f) => {
2034 f.this = transform_recursive(f.this, transform_fn)?;
2035 f.path = transform_recursive(f.path, transform_fn)?;
2036 Expression::JsonExtractScalar(f)
2037 }
2038
2039 // ===== UnaryFunc-based expressions =====
2040 // These all have a single `this: Expression` child
2041 Expression::Length(mut f) => {
2042 f.this = transform_recursive(f.this, transform_fn)?;
2043 Expression::Length(f)
2044 }
2045 Expression::Upper(mut f) => {
2046 f.this = transform_recursive(f.this, transform_fn)?;
2047 Expression::Upper(f)
2048 }
2049 Expression::Lower(mut f) => {
2050 f.this = transform_recursive(f.this, transform_fn)?;
2051 Expression::Lower(f)
2052 }
2053 Expression::LTrim(mut f) => {
2054 f.this = transform_recursive(f.this, transform_fn)?;
2055 Expression::LTrim(f)
2056 }
2057 Expression::RTrim(mut f) => {
2058 f.this = transform_recursive(f.this, transform_fn)?;
2059 Expression::RTrim(f)
2060 }
2061 Expression::Reverse(mut f) => {
2062 f.this = transform_recursive(f.this, transform_fn)?;
2063 Expression::Reverse(f)
2064 }
2065 Expression::Abs(mut f) => {
2066 f.this = transform_recursive(f.this, transform_fn)?;
2067 Expression::Abs(f)
2068 }
2069 Expression::Ceil(mut f) => {
2070 f.this = transform_recursive(f.this, transform_fn)?;
2071 Expression::Ceil(f)
2072 }
2073 Expression::Floor(mut f) => {
2074 f.this = transform_recursive(f.this, transform_fn)?;
2075 Expression::Floor(f)
2076 }
2077 Expression::Sign(mut f) => {
2078 f.this = transform_recursive(f.this, transform_fn)?;
2079 Expression::Sign(f)
2080 }
2081 Expression::Sqrt(mut f) => {
2082 f.this = transform_recursive(f.this, transform_fn)?;
2083 Expression::Sqrt(f)
2084 }
2085 Expression::Cbrt(mut f) => {
2086 f.this = transform_recursive(f.this, transform_fn)?;
2087 Expression::Cbrt(f)
2088 }
2089 Expression::Ln(mut f) => {
2090 f.this = transform_recursive(f.this, transform_fn)?;
2091 Expression::Ln(f)
2092 }
2093 Expression::Log(mut f) => {
2094 f.this = transform_recursive(f.this, transform_fn)?;
2095 if let Some(base) = f.base {
2096 f.base = Some(transform_recursive(base, transform_fn)?);
2097 }
2098 Expression::Log(f)
2099 }
2100 Expression::Exp(mut f) => {
2101 f.this = transform_recursive(f.this, transform_fn)?;
2102 Expression::Exp(f)
2103 }
2104 Expression::Date(mut f) => {
2105 f.this = transform_recursive(f.this, transform_fn)?;
2106 Expression::Date(f)
2107 }
2108 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2109 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2110 Expression::Variance(f) => recurse_agg!(Variance, f),
2111
2112 // ===== BinaryFunc-based expressions =====
2113 Expression::ModFunc(mut f) => {
2114 f.this = transform_recursive(f.this, transform_fn)?;
2115 f.expression = transform_recursive(f.expression, transform_fn)?;
2116 Expression::ModFunc(f)
2117 }
2118 Expression::Power(mut f) => {
2119 f.this = transform_recursive(f.this, transform_fn)?;
2120 f.expression = transform_recursive(f.expression, transform_fn)?;
2121 Expression::Power(f)
2122 }
2123 Expression::MapFromArrays(mut f) => {
2124 f.this = transform_recursive(f.this, transform_fn)?;
2125 f.expression = transform_recursive(f.expression, transform_fn)?;
2126 Expression::MapFromArrays(f)
2127 }
2128 Expression::ElementAt(mut f) => {
2129 f.this = transform_recursive(f.this, transform_fn)?;
2130 f.expression = transform_recursive(f.expression, transform_fn)?;
2131 Expression::ElementAt(f)
2132 }
2133 Expression::MapContainsKey(mut f) => {
2134 f.this = transform_recursive(f.this, transform_fn)?;
2135 f.expression = transform_recursive(f.expression, transform_fn)?;
2136 Expression::MapContainsKey(f)
2137 }
2138 Expression::Left(mut f) => {
2139 f.this = transform_recursive(f.this, transform_fn)?;
2140 f.length = transform_recursive(f.length, transform_fn)?;
2141 Expression::Left(f)
2142 }
2143 Expression::Right(mut f) => {
2144 f.this = transform_recursive(f.this, transform_fn)?;
2145 f.length = transform_recursive(f.length, transform_fn)?;
2146 Expression::Right(f)
2147 }
2148 Expression::Repeat(mut f) => {
2149 f.this = transform_recursive(f.this, transform_fn)?;
2150 f.times = transform_recursive(f.times, transform_fn)?;
2151 Expression::Repeat(f)
2152 }
2153
2154 // ===== Complex function expressions =====
2155 Expression::Substring(mut f) => {
2156 f.this = transform_recursive(f.this, transform_fn)?;
2157 f.start = transform_recursive(f.start, transform_fn)?;
2158 if let Some(len) = f.length {
2159 f.length = Some(transform_recursive(len, transform_fn)?);
2160 }
2161 Expression::Substring(f)
2162 }
2163 Expression::Replace(mut f) => {
2164 f.this = transform_recursive(f.this, transform_fn)?;
2165 f.old = transform_recursive(f.old, transform_fn)?;
2166 f.new = transform_recursive(f.new, transform_fn)?;
2167 Expression::Replace(f)
2168 }
2169 Expression::ConcatWs(mut f) => {
2170 f.separator = transform_recursive(f.separator, transform_fn)?;
2171 f.expressions = f
2172 .expressions
2173 .into_iter()
2174 .map(|e| transform_recursive(e, transform_fn))
2175 .collect::<Result<Vec<_>>>()?;
2176 Expression::ConcatWs(f)
2177 }
2178 Expression::Trim(mut f) => {
2179 f.this = transform_recursive(f.this, transform_fn)?;
2180 if let Some(chars) = f.characters {
2181 f.characters = Some(transform_recursive(chars, transform_fn)?);
2182 }
2183 Expression::Trim(f)
2184 }
2185 Expression::Split(mut f) => {
2186 f.this = transform_recursive(f.this, transform_fn)?;
2187 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2188 Expression::Split(f)
2189 }
2190 Expression::Lpad(mut f) => {
2191 f.this = transform_recursive(f.this, transform_fn)?;
2192 f.length = transform_recursive(f.length, transform_fn)?;
2193 if let Some(fill) = f.fill {
2194 f.fill = Some(transform_recursive(fill, transform_fn)?);
2195 }
2196 Expression::Lpad(f)
2197 }
2198 Expression::Rpad(mut f) => {
2199 f.this = transform_recursive(f.this, transform_fn)?;
2200 f.length = transform_recursive(f.length, transform_fn)?;
2201 if let Some(fill) = f.fill {
2202 f.fill = Some(transform_recursive(fill, transform_fn)?);
2203 }
2204 Expression::Rpad(f)
2205 }
2206
2207 // ===== Conditional expressions =====
2208 Expression::Case(mut c) => {
2209 if let Some(operand) = c.operand {
2210 c.operand = Some(transform_recursive(operand, transform_fn)?);
2211 }
2212 c.whens = c
2213 .whens
2214 .into_iter()
2215 .map(|(cond, then)| {
2216 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2217 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2218 (new_cond, new_then)
2219 })
2220 .collect();
2221 if let Some(else_expr) = c.else_ {
2222 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2223 }
2224 Expression::Case(c)
2225 }
2226 Expression::IfFunc(mut f) => {
2227 f.condition = transform_recursive(f.condition, transform_fn)?;
2228 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2229 if let Some(false_val) = f.false_value {
2230 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2231 }
2232 Expression::IfFunc(f)
2233 }
2234
2235 // ===== Date/Time expressions =====
2236 Expression::DateAdd(mut f) => {
2237 f.this = transform_recursive(f.this, transform_fn)?;
2238 f.interval = transform_recursive(f.interval, transform_fn)?;
2239 Expression::DateAdd(f)
2240 }
2241 Expression::DateSub(mut f) => {
2242 f.this = transform_recursive(f.this, transform_fn)?;
2243 f.interval = transform_recursive(f.interval, transform_fn)?;
2244 Expression::DateSub(f)
2245 }
2246 Expression::DateDiff(mut f) => {
2247 f.this = transform_recursive(f.this, transform_fn)?;
2248 f.expression = transform_recursive(f.expression, transform_fn)?;
2249 Expression::DateDiff(f)
2250 }
2251 Expression::DateTrunc(mut f) => {
2252 f.this = transform_recursive(f.this, transform_fn)?;
2253 Expression::DateTrunc(f)
2254 }
2255 Expression::Extract(mut f) => {
2256 f.this = transform_recursive(f.this, transform_fn)?;
2257 Expression::Extract(f)
2258 }
2259
2260 // ===== JSON expressions =====
2261 Expression::JsonObject(mut f) => {
2262 f.pairs = f
2263 .pairs
2264 .into_iter()
2265 .map(|(k, v)| {
2266 let new_k = transform_recursive(k, transform_fn)?;
2267 let new_v = transform_recursive(v, transform_fn)?;
2268 Ok((new_k, new_v))
2269 })
2270 .collect::<Result<Vec<_>>>()?;
2271 Expression::JsonObject(f)
2272 }
2273
2274 // ===== Subquery expressions =====
2275 Expression::Subquery(mut s) => {
2276 s.this = transform_recursive(s.this, transform_fn)?;
2277 Expression::Subquery(s)
2278 }
2279 Expression::Exists(mut e) => {
2280 e.this = transform_recursive(e.this, transform_fn)?;
2281 Expression::Exists(e)
2282 }
2283 Expression::Describe(mut d) => {
2284 d.target = transform_recursive(d.target, transform_fn)?;
2285 Expression::Describe(d)
2286 }
2287
2288 // ===== Set operations =====
2289 Expression::Union(mut u) => {
2290 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2291 u.left = transform_recursive(left, transform_fn)?;
2292 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2293 u.right = transform_recursive(right, transform_fn)?;
2294 if let Some(mut with) = u.with.take() {
2295 with.ctes = with
2296 .ctes
2297 .into_iter()
2298 .map(|mut cte| {
2299 let original = cte.this.clone();
2300 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2301 cte
2302 })
2303 .collect();
2304 u.with = Some(with);
2305 }
2306 Expression::Union(u)
2307 }
2308 Expression::Intersect(mut i) => {
2309 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2310 i.left = transform_recursive(left, transform_fn)?;
2311 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2312 i.right = transform_recursive(right, transform_fn)?;
2313 if let Some(mut with) = i.with.take() {
2314 with.ctes = with
2315 .ctes
2316 .into_iter()
2317 .map(|mut cte| {
2318 let original = cte.this.clone();
2319 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2320 cte
2321 })
2322 .collect();
2323 i.with = Some(with);
2324 }
2325 Expression::Intersect(i)
2326 }
2327 Expression::Except(mut e) => {
2328 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2329 e.left = transform_recursive(left, transform_fn)?;
2330 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2331 e.right = transform_recursive(right, transform_fn)?;
2332 if let Some(mut with) = e.with.take() {
2333 with.ctes = with
2334 .ctes
2335 .into_iter()
2336 .map(|mut cte| {
2337 let original = cte.this.clone();
2338 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2339 cte
2340 })
2341 .collect();
2342 e.with = Some(with);
2343 }
2344 Expression::Except(e)
2345 }
2346
2347 // ===== DML expressions =====
2348 Expression::Insert(mut ins) => {
2349 // Transform VALUES clause expressions
2350 let mut new_values = Vec::new();
2351 for row in ins.values {
2352 let mut new_row = Vec::new();
2353 for e in row {
2354 new_row.push(transform_recursive(e, transform_fn)?);
2355 }
2356 new_values.push(new_row);
2357 }
2358 ins.values = new_values;
2359
2360 // Transform query (for INSERT ... SELECT)
2361 if let Some(query) = ins.query {
2362 ins.query = Some(transform_recursive(query, transform_fn)?);
2363 }
2364
2365 // Transform RETURNING clause
2366 let mut new_returning = Vec::new();
2367 for e in ins.returning {
2368 new_returning.push(transform_recursive(e, transform_fn)?);
2369 }
2370 ins.returning = new_returning;
2371
2372 // Transform ON CONFLICT clause
2373 if let Some(on_conflict) = ins.on_conflict {
2374 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2375 }
2376
2377 Expression::Insert(ins)
2378 }
2379 Expression::Update(mut upd) => {
2380 upd.set = upd
2381 .set
2382 .into_iter()
2383 .map(|(id, val)| {
2384 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2385 (id, new_val)
2386 })
2387 .collect();
2388 if let Some(mut where_clause) = upd.where_clause.take() {
2389 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2390 upd.where_clause = Some(where_clause);
2391 }
2392 Expression::Update(upd)
2393 }
2394 Expression::Delete(mut del) => {
2395 if let Some(mut where_clause) = del.where_clause.take() {
2396 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2397 del.where_clause = Some(where_clause);
2398 }
2399 Expression::Delete(del)
2400 }
2401
2402 // ===== CTE expressions =====
2403 Expression::With(mut w) => {
2404 w.ctes = w
2405 .ctes
2406 .into_iter()
2407 .map(|mut cte| {
2408 let original = cte.this.clone();
2409 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2410 cte
2411 })
2412 .collect();
2413 Expression::With(w)
2414 }
2415 Expression::Cte(mut c) => {
2416 c.this = transform_recursive(c.this, transform_fn)?;
2417 Expression::Cte(c)
2418 }
2419
2420 // ===== Order expressions =====
2421 Expression::Ordered(mut o) => {
2422 o.this = transform_recursive(o.this, transform_fn)?;
2423 Expression::Ordered(o)
2424 }
2425
2426 // ===== Negation =====
2427 Expression::Neg(mut n) => {
2428 n.this = transform_recursive(n.this, transform_fn)?;
2429 Expression::Neg(n)
2430 }
2431
2432 // ===== Between =====
2433 Expression::Between(mut b) => {
2434 b.this = transform_recursive(b.this, transform_fn)?;
2435 b.low = transform_recursive(b.low, transform_fn)?;
2436 b.high = transform_recursive(b.high, transform_fn)?;
2437 Expression::Between(b)
2438 }
2439 Expression::IsNull(mut i) => {
2440 i.this = transform_recursive(i.this, transform_fn)?;
2441 Expression::IsNull(i)
2442 }
2443 Expression::IsTrue(mut i) => {
2444 i.this = transform_recursive(i.this, transform_fn)?;
2445 Expression::IsTrue(i)
2446 }
2447 Expression::IsFalse(mut i) => {
2448 i.this = transform_recursive(i.this, transform_fn)?;
2449 Expression::IsFalse(i)
2450 }
2451
2452 // ===== Like expressions =====
2453 Expression::Like(mut l) => {
2454 l.left = transform_recursive(l.left, transform_fn)?;
2455 l.right = transform_recursive(l.right, transform_fn)?;
2456 Expression::Like(l)
2457 }
2458 Expression::ILike(mut l) => {
2459 l.left = transform_recursive(l.left, transform_fn)?;
2460 l.right = transform_recursive(l.right, transform_fn)?;
2461 Expression::ILike(l)
2462 }
2463
2464 // ===== Additional binary ops not covered by macro =====
2465 Expression::Neq(op) => transform_binary!(Neq, *op),
2466 Expression::Lte(op) => transform_binary!(Lte, *op),
2467 Expression::Gte(op) => transform_binary!(Gte, *op),
2468 Expression::Mod(op) => transform_binary!(Mod, *op),
2469 Expression::Concat(op) => transform_binary!(Concat, *op),
2470 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2471 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2472 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2473 Expression::Is(op) => transform_binary!(Is, *op),
2474
2475 // ===== TryCast / SafeCast =====
2476 Expression::TryCast(mut c) => {
2477 c.this = transform_recursive(c.this, transform_fn)?;
2478 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2479 Expression::TryCast(c)
2480 }
2481 Expression::SafeCast(mut c) => {
2482 c.this = transform_recursive(c.this, transform_fn)?;
2483 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2484 Expression::SafeCast(c)
2485 }
2486
2487 // ===== Misc =====
2488 Expression::Unnest(mut f) => {
2489 f.this = transform_recursive(f.this, transform_fn)?;
2490 f.expressions = f
2491 .expressions
2492 .into_iter()
2493 .map(|e| transform_recursive(e, transform_fn))
2494 .collect::<Result<Vec<_>>>()?;
2495 Expression::Unnest(f)
2496 }
2497 Expression::Explode(mut f) => {
2498 f.this = transform_recursive(f.this, transform_fn)?;
2499 Expression::Explode(f)
2500 }
2501 Expression::GroupConcat(mut f) => {
2502 f.this = transform_recursive(f.this, transform_fn)?;
2503 Expression::GroupConcat(f)
2504 }
2505 Expression::StringAgg(mut f) => {
2506 f.this = transform_recursive(f.this, transform_fn)?;
2507 Expression::StringAgg(f)
2508 }
2509 Expression::ListAgg(mut f) => {
2510 f.this = transform_recursive(f.this, transform_fn)?;
2511 Expression::ListAgg(f)
2512 }
2513 Expression::ArrayAgg(mut f) => {
2514 f.this = transform_recursive(f.this, transform_fn)?;
2515 Expression::ArrayAgg(f)
2516 }
2517 Expression::ParseJson(mut f) => {
2518 f.this = transform_recursive(f.this, transform_fn)?;
2519 Expression::ParseJson(f)
2520 }
2521 Expression::ToJson(mut f) => {
2522 f.this = transform_recursive(f.this, transform_fn)?;
2523 Expression::ToJson(f)
2524 }
2525 Expression::JSONExtract(mut e) => {
2526 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2527 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2528 Expression::JSONExtract(e)
2529 }
2530 Expression::JSONExtractScalar(mut e) => {
2531 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2532 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2533 Expression::JSONExtractScalar(e)
2534 }
2535
2536 // StrToTime: recurse into this
2537 Expression::StrToTime(mut e) => {
2538 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2539 Expression::StrToTime(e)
2540 }
2541
2542 // UnixToTime: recurse into this
2543 Expression::UnixToTime(mut e) => {
2544 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2545 Expression::UnixToTime(e)
2546 }
2547
2548 // CreateTable: recurse into column defaults, on_update expressions, and data types
2549 Expression::CreateTable(mut ct) => {
2550 for col in &mut ct.columns {
2551 if let Some(default_expr) = col.default.take() {
2552 col.default = Some(transform_recursive(default_expr, transform_fn)?);
2553 }
2554 if let Some(on_update_expr) = col.on_update.take() {
2555 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
2556 }
2557 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
2558 // are NOT applied here because per-dialect transforms are designed for CAST/expression
2559 // contexts and may not produce correct results for DDL column definitions.
2560 // The DDL type mappings would need dedicated handling per source/target pair.
2561 }
2562 if let Some(as_select) = ct.as_select.take() {
2563 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
2564 }
2565 Expression::CreateTable(ct)
2566 }
2567
2568 // CreateView: recurse into the view body query
2569 Expression::CreateView(mut cv) => {
2570 cv.query = transform_recursive(cv.query, transform_fn)?;
2571 Expression::CreateView(cv)
2572 }
2573
2574 // CreateTask: recurse into the task body
2575 Expression::CreateTask(mut ct) => {
2576 ct.body = transform_recursive(ct.body, transform_fn)?;
2577 Expression::CreateTask(ct)
2578 }
2579
2580 // CreateProcedure: recurse into body expressions
2581 Expression::CreateProcedure(mut cp) => {
2582 if let Some(body) = cp.body.take() {
2583 cp.body = Some(match body {
2584 FunctionBody::Expression(expr) => {
2585 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2586 }
2587 FunctionBody::Return(expr) => {
2588 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2589 }
2590 FunctionBody::Statements(stmts) => {
2591 let transformed_stmts = stmts
2592 .into_iter()
2593 .map(|s| transform_recursive(s, transform_fn))
2594 .collect::<Result<Vec<_>>>()?;
2595 FunctionBody::Statements(transformed_stmts)
2596 }
2597 other => other,
2598 });
2599 }
2600 Expression::CreateProcedure(cp)
2601 }
2602
2603 // CreateFunction: recurse into body expressions
2604 Expression::CreateFunction(mut cf) => {
2605 if let Some(body) = cf.body.take() {
2606 cf.body = Some(match body {
2607 FunctionBody::Expression(expr) => {
2608 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2609 }
2610 FunctionBody::Return(expr) => {
2611 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2612 }
2613 FunctionBody::Statements(stmts) => {
2614 let transformed_stmts = stmts
2615 .into_iter()
2616 .map(|s| transform_recursive(s, transform_fn))
2617 .collect::<Result<Vec<_>>>()?;
2618 FunctionBody::Statements(transformed_stmts)
2619 }
2620 other => other,
2621 });
2622 }
2623 Expression::CreateFunction(cf)
2624 }
2625
2626 // MemberOf: recurse into left and right operands
2627 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
2628 // ArrayContainsAll (@>): recurse into left and right operands
2629 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
2630 // ArrayContainedBy (<@): recurse into left and right operands
2631 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
2632 // ArrayOverlaps (&&): recurse into left and right operands
2633 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
2634 // TsMatch (@@): recurse into left and right operands
2635 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
2636 // Adjacent (-|-): recurse into left and right operands
2637 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
2638
2639 // Table: recurse into when (HistoricalData) and changes fields
2640 Expression::Table(mut t) => {
2641 if let Some(when) = t.when.take() {
2642 let transformed =
2643 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
2644 if let Expression::HistoricalData(hd) = transformed {
2645 t.when = Some(hd);
2646 }
2647 }
2648 if let Some(changes) = t.changes.take() {
2649 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
2650 if let Expression::Changes(c) = transformed {
2651 t.changes = Some(c);
2652 }
2653 }
2654 Expression::Table(t)
2655 }
2656
2657 // HistoricalData (Snowflake time travel): recurse into expression
2658 Expression::HistoricalData(mut hd) => {
2659 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
2660 Expression::HistoricalData(hd)
2661 }
2662
2663 // Changes (Snowflake CHANGES clause): recurse into at_before and end
2664 Expression::Changes(mut c) => {
2665 if let Some(at_before) = c.at_before.take() {
2666 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
2667 }
2668 if let Some(end) = c.end.take() {
2669 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
2670 }
2671 Expression::Changes(c)
2672 }
2673
2674 // TableArgument: TABLE(expr) or MODEL(expr)
2675 Expression::TableArgument(mut ta) => {
2676 ta.this = transform_recursive(ta.this, transform_fn)?;
2677 Expression::TableArgument(ta)
2678 }
2679
2680 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
2681 Expression::JoinedTable(mut jt) => {
2682 jt.left = transform_recursive(jt.left, transform_fn)?;
2683 for join in &mut jt.joins {
2684 join.this = transform_recursive(
2685 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
2686 transform_fn,
2687 )?;
2688 if let Some(on) = join.on.take() {
2689 join.on = Some(transform_recursive(on, transform_fn)?);
2690 }
2691 }
2692 jt.lateral_views = jt
2693 .lateral_views
2694 .into_iter()
2695 .map(|mut lv| {
2696 lv.this = transform_recursive(lv.this, transform_fn)?;
2697 Ok(lv)
2698 })
2699 .collect::<Result<Vec<_>>>()?;
2700 Expression::JoinedTable(jt)
2701 }
2702
2703 // Lateral: LATERAL func() - recurse into the function expression
2704 Expression::Lateral(mut lat) => {
2705 *lat.this = transform_recursive(*lat.this, transform_fn)?;
2706 Expression::Lateral(lat)
2707 }
2708
2709 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
2710 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
2711 // as a unit together with the WithinGroup wrapper
2712 Expression::WithinGroup(mut wg) => {
2713 wg.order_by = wg
2714 .order_by
2715 .into_iter()
2716 .map(|mut o| {
2717 let original = o.this.clone();
2718 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2719 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2720 Ok(Expression::Ordered(transformed)) => *transformed,
2721 Ok(_) | Err(_) => o,
2722 }
2723 })
2724 .collect();
2725 Expression::WithinGroup(wg)
2726 }
2727
2728 // Filter: recurse into both the aggregate and the filter condition
2729 Expression::Filter(mut f) => {
2730 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
2731 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
2732 Expression::Filter(f)
2733 }
2734
2735 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
2736 // filter, order_by, having_max, and limit.
2737 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
2738 Expression::Sum(f) => recurse_agg!(Sum, f),
2739 Expression::Avg(f) => recurse_agg!(Avg, f),
2740 Expression::Min(f) => recurse_agg!(Min, f),
2741 Expression::Max(f) => recurse_agg!(Max, f),
2742 Expression::CountIf(f) => recurse_agg!(CountIf, f),
2743 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
2744 Expression::VarPop(f) => recurse_agg!(VarPop, f),
2745 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
2746 Expression::Median(f) => recurse_agg!(Median, f),
2747 Expression::Mode(f) => recurse_agg!(Mode, f),
2748 Expression::First(f) => recurse_agg!(First, f),
2749 Expression::Last(f) => recurse_agg!(Last, f),
2750 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
2751 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
2752 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
2753 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
2754 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
2755 Expression::Skewness(f) => recurse_agg!(Skewness, f),
2756 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
2757 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
2758 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
2759 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
2760 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
2761 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
2762
2763 // Count has its own struct with an Option<Expression> `this` field
2764 Expression::Count(mut c) => {
2765 if let Some(this) = c.this.take() {
2766 c.this = Some(transform_recursive(this, transform_fn)?);
2767 }
2768 if let Some(filter) = c.filter.take() {
2769 c.filter = Some(transform_recursive(filter, transform_fn)?);
2770 }
2771 Expression::Count(c)
2772 }
2773
2774 Expression::PipeOperator(mut pipe) => {
2775 pipe.this = transform_recursive(pipe.this, transform_fn)?;
2776 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
2777 Expression::PipeOperator(pipe)
2778 }
2779
2780 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
2781 Expression::ArrayExcept(mut f) => {
2782 f.this = transform_recursive(f.this, transform_fn)?;
2783 f.expression = transform_recursive(f.expression, transform_fn)?;
2784 Expression::ArrayExcept(f)
2785 }
2786 Expression::ArrayContains(mut f) => {
2787 f.this = transform_recursive(f.this, transform_fn)?;
2788 f.expression = transform_recursive(f.expression, transform_fn)?;
2789 Expression::ArrayContains(f)
2790 }
2791 Expression::ArrayDistinct(mut f) => {
2792 f.this = transform_recursive(f.this, transform_fn)?;
2793 Expression::ArrayDistinct(f)
2794 }
2795 Expression::ArrayPosition(mut f) => {
2796 f.this = transform_recursive(f.this, transform_fn)?;
2797 f.expression = transform_recursive(f.expression, transform_fn)?;
2798 Expression::ArrayPosition(f)
2799 }
2800
2801 // Pass through leaf nodes unchanged
2802 other => other,
2803 };
2804
2805 // Then apply the transform function
2806 transform_fn(expr)
2807}
2808
2809/// Returns the tokenizer config, generator config, and expression transform closure
2810/// for a built-in dialect type. This is the shared implementation used by both
2811/// `Dialect::get()` and custom dialect construction.
2812// ---------------------------------------------------------------------------
2813// Cached dialect configurations
2814// ---------------------------------------------------------------------------
2815
2816/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
2817/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
2818struct CachedDialectConfig {
2819 tokenizer_config: TokenizerConfig,
2820 generator_config: Arc<GeneratorConfig>,
2821}
2822
2823/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
2824macro_rules! cached_dialect {
2825 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
2826 #[cfg(feature = $feature)]
2827 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2828 let d = $dialect_struct;
2829 CachedDialectConfig {
2830 tokenizer_config: d.tokenizer_config(),
2831 generator_config: Arc::new(d.generator_config()),
2832 }
2833 });
2834 };
2835}
2836
2837static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2838 let d = GenericDialect;
2839 CachedDialectConfig {
2840 tokenizer_config: d.tokenizer_config(),
2841 generator_config: Arc::new(d.generator_config()),
2842 }
2843});
2844
2845cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
2846cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
2847cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
2848cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
2849cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
2850cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
2851cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
2852cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
2853cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
2854cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
2855cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
2856cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
2857cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
2858cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
2859cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
2860cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
2861cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
2862cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
2863cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
2864cached_dialect!(
2865 CACHED_MATERIALIZE,
2866 MaterializeDialect,
2867 "dialect-materialize"
2868);
2869cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
2870cached_dialect!(
2871 CACHED_SINGLESTORE,
2872 SingleStoreDialect,
2873 "dialect-singlestore"
2874);
2875cached_dialect!(
2876 CACHED_COCKROACHDB,
2877 CockroachDBDialect,
2878 "dialect-cockroachdb"
2879);
2880cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
2881cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
2882cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
2883cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
2884cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
2885cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
2886cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
2887cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
2888cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
2889cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
2890
2891fn configs_for_dialect_type(
2892 dt: DialectType,
2893) -> (
2894 TokenizerConfig,
2895 Arc<GeneratorConfig>,
2896 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2897) {
2898 /// Clone configs from a cached static and pair with a fresh transform closure.
2899 macro_rules! from_cache {
2900 ($cache:expr, $dialect_struct:expr) => {{
2901 let c = &*$cache;
2902 (
2903 c.tokenizer_config.clone(),
2904 c.generator_config.clone(),
2905 Box::new(move |e| $dialect_struct.transform_expr(e)),
2906 )
2907 }};
2908 }
2909 match dt {
2910 #[cfg(feature = "dialect-postgresql")]
2911 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
2912 #[cfg(feature = "dialect-mysql")]
2913 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
2914 #[cfg(feature = "dialect-bigquery")]
2915 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
2916 #[cfg(feature = "dialect-snowflake")]
2917 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
2918 #[cfg(feature = "dialect-duckdb")]
2919 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
2920 #[cfg(feature = "dialect-tsql")]
2921 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
2922 #[cfg(feature = "dialect-oracle")]
2923 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
2924 #[cfg(feature = "dialect-hive")]
2925 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
2926 #[cfg(feature = "dialect-spark")]
2927 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
2928 #[cfg(feature = "dialect-sqlite")]
2929 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
2930 #[cfg(feature = "dialect-presto")]
2931 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
2932 #[cfg(feature = "dialect-trino")]
2933 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
2934 #[cfg(feature = "dialect-redshift")]
2935 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
2936 #[cfg(feature = "dialect-clickhouse")]
2937 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
2938 #[cfg(feature = "dialect-databricks")]
2939 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
2940 #[cfg(feature = "dialect-athena")]
2941 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
2942 #[cfg(feature = "dialect-teradata")]
2943 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
2944 #[cfg(feature = "dialect-doris")]
2945 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
2946 #[cfg(feature = "dialect-starrocks")]
2947 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
2948 #[cfg(feature = "dialect-materialize")]
2949 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
2950 #[cfg(feature = "dialect-risingwave")]
2951 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
2952 #[cfg(feature = "dialect-singlestore")]
2953 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
2954 #[cfg(feature = "dialect-cockroachdb")]
2955 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
2956 #[cfg(feature = "dialect-tidb")]
2957 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
2958 #[cfg(feature = "dialect-druid")]
2959 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
2960 #[cfg(feature = "dialect-solr")]
2961 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
2962 #[cfg(feature = "dialect-tableau")]
2963 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
2964 #[cfg(feature = "dialect-dune")]
2965 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
2966 #[cfg(feature = "dialect-fabric")]
2967 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
2968 #[cfg(feature = "dialect-drill")]
2969 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
2970 #[cfg(feature = "dialect-dremio")]
2971 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
2972 #[cfg(feature = "dialect-exasol")]
2973 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
2974 #[cfg(feature = "dialect-datafusion")]
2975 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
2976 _ => from_cache!(CACHED_GENERIC, GenericDialect),
2977 }
2978}
2979
2980// ---------------------------------------------------------------------------
2981// Custom dialect registry
2982// ---------------------------------------------------------------------------
2983
2984static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
2985 LazyLock::new(|| RwLock::new(HashMap::new()));
2986
2987struct CustomDialectConfig {
2988 name: String,
2989 base_dialect: DialectType,
2990 tokenizer_config: TokenizerConfig,
2991 generator_config: GeneratorConfig,
2992 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2993 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2994}
2995
2996/// Fluent builder for creating and registering custom SQL dialects.
2997///
2998/// A custom dialect is based on an existing built-in dialect and allows selective
2999/// overrides of tokenizer configuration, generator configuration, and expression
3000/// transforms.
3001///
3002/// # Example
3003///
3004/// ```rust,ignore
3005/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3006/// use polyglot_sql::generator::NormalizeFunctions;
3007///
3008/// CustomDialectBuilder::new("my_postgres")
3009/// .based_on(DialectType::PostgreSQL)
3010/// .generator_config_modifier(|gc| {
3011/// gc.normalize_functions = NormalizeFunctions::Lower;
3012/// })
3013/// .register()
3014/// .unwrap();
3015///
3016/// let d = Dialect::get_by_name("my_postgres").unwrap();
3017/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3018/// let sql = d.generate(&exprs[0]).unwrap();
3019/// assert_eq!(sql, "select count(*)");
3020///
3021/// polyglot_sql::unregister_custom_dialect("my_postgres");
3022/// ```
3023pub struct CustomDialectBuilder {
3024 name: String,
3025 base_dialect: DialectType,
3026 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3027 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3028 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3029 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3030}
3031
3032impl CustomDialectBuilder {
3033 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3034 pub fn new(name: impl Into<String>) -> Self {
3035 Self {
3036 name: name.into(),
3037 base_dialect: DialectType::Generic,
3038 tokenizer_modifier: None,
3039 generator_modifier: None,
3040 transform: None,
3041 preprocess: None,
3042 }
3043 }
3044
3045 /// Set the base built-in dialect to inherit configuration from.
3046 pub fn based_on(mut self, dialect: DialectType) -> Self {
3047 self.base_dialect = dialect;
3048 self
3049 }
3050
3051 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3052 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3053 where
3054 F: FnOnce(&mut TokenizerConfig) + 'static,
3055 {
3056 self.tokenizer_modifier = Some(Box::new(f));
3057 self
3058 }
3059
3060 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3061 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3062 where
3063 F: FnOnce(&mut GeneratorConfig) + 'static,
3064 {
3065 self.generator_modifier = Some(Box::new(f));
3066 self
3067 }
3068
3069 /// Set a custom per-node expression transform function.
3070 ///
3071 /// This replaces the base dialect's transform. It is called on every expression
3072 /// node during the recursive transform pass.
3073 pub fn transform_fn<F>(mut self, f: F) -> Self
3074 where
3075 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3076 {
3077 self.transform = Some(Arc::new(f));
3078 self
3079 }
3080
3081 /// Set a custom whole-tree preprocessing function.
3082 ///
3083 /// This replaces the base dialect's built-in preprocessing. It is called once
3084 /// on the entire expression tree before the recursive per-node transform.
3085 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3086 where
3087 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3088 {
3089 self.preprocess = Some(Arc::new(f));
3090 self
3091 }
3092
3093 /// Build the custom dialect configuration and register it in the global registry.
3094 ///
3095 /// Returns an error if:
3096 /// - The name collides with a built-in dialect name
3097 /// - A custom dialect with the same name is already registered
3098 pub fn register(self) -> Result<()> {
3099 // Reject names that collide with built-in dialects
3100 if DialectType::from_str(&self.name).is_ok() {
3101 return Err(crate::error::Error::parse(
3102 format!(
3103 "Cannot register custom dialect '{}': name collides with built-in dialect",
3104 self.name
3105 ),
3106 0,
3107 0,
3108 0,
3109 0,
3110 ));
3111 }
3112
3113 // Get base configs
3114 let (mut tok_config, arc_gen_config, _base_transform) =
3115 configs_for_dialect_type(self.base_dialect);
3116 let mut gen_config = (*arc_gen_config).clone();
3117
3118 // Apply modifiers
3119 if let Some(tok_mod) = self.tokenizer_modifier {
3120 tok_mod(&mut tok_config);
3121 }
3122 if let Some(gen_mod) = self.generator_modifier {
3123 gen_mod(&mut gen_config);
3124 }
3125
3126 let config = CustomDialectConfig {
3127 name: self.name.clone(),
3128 base_dialect: self.base_dialect,
3129 tokenizer_config: tok_config,
3130 generator_config: gen_config,
3131 transform: self.transform,
3132 preprocess: self.preprocess,
3133 };
3134
3135 register_custom_dialect(config)
3136 }
3137}
3138
3139use std::str::FromStr;
3140
3141fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3142 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3143 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3144 })?;
3145
3146 if registry.contains_key(&config.name) {
3147 return Err(crate::error::Error::parse(
3148 format!("Custom dialect '{}' is already registered", config.name),
3149 0,
3150 0,
3151 0,
3152 0,
3153 ));
3154 }
3155
3156 registry.insert(config.name.clone(), Arc::new(config));
3157 Ok(())
3158}
3159
3160/// Remove a custom dialect from the global registry.
3161///
3162/// Returns `true` if a dialect with that name was found and removed,
3163/// `false` if no such custom dialect existed.
3164pub fn unregister_custom_dialect(name: &str) -> bool {
3165 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3166 registry.remove(name).is_some()
3167 } else {
3168 false
3169 }
3170}
3171
3172fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3173 CUSTOM_DIALECT_REGISTRY
3174 .read()
3175 .ok()
3176 .and_then(|registry| registry.get(name).cloned())
3177}
3178
3179/// Main entry point for dialect-specific SQL operations.
3180///
3181/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3182/// transformer for a specific SQL database engine. It is the high-level API through
3183/// which callers parse, generate, transform, and transpile SQL.
3184///
3185/// # Usage
3186///
3187/// ```rust,ignore
3188/// use polyglot_sql::dialects::{Dialect, DialectType};
3189///
3190/// // Parse PostgreSQL SQL into an AST
3191/// let pg = Dialect::get(DialectType::PostgreSQL);
3192/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3193///
3194/// // Transpile from PostgreSQL to BigQuery
3195/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3196/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3197/// ```
3198///
3199/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3200/// The struct is `Send + Sync` safe so it can be shared across threads.
3201pub struct Dialect {
3202 dialect_type: DialectType,
3203 tokenizer: Tokenizer,
3204 generator_config: Arc<GeneratorConfig>,
3205 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3206 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3207 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3208 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3209 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3210}
3211
3212/// Options for [`Dialect::transpile_with`].
3213///
3214/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3215/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3216/// breaking the API.
3217///
3218/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3219/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3220#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3221#[serde(rename_all = "camelCase", default)]
3222#[non_exhaustive]
3223pub struct TranspileOptions {
3224 /// Whether to pretty-print the output SQL.
3225 pub pretty: bool,
3226}
3227
3228impl TranspileOptions {
3229 /// Construct options with pretty-printing enabled.
3230 pub fn pretty() -> Self {
3231 Self { pretty: true }
3232 }
3233}
3234
3235/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3236/// [`Dialect::transpile_with`].
3237///
3238/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3239/// dialect handle, including custom ones). End users do not normally need to
3240/// implement this trait themselves.
3241pub trait TranspileTarget {
3242 /// Invoke `f` with a reference to the resolved target dialect.
3243 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3244}
3245
3246impl TranspileTarget for DialectType {
3247 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3248 f(&Dialect::get(self))
3249 }
3250}
3251
3252impl TranspileTarget for &Dialect {
3253 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3254 f(self)
3255 }
3256}
3257
3258impl Dialect {
3259 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3260 ///
3261 /// This is the primary constructor. It initializes the tokenizer, generator config,
3262 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3263 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3264 /// config routing.
3265 pub fn get(dialect_type: DialectType) -> Self {
3266 let (tokenizer_config, generator_config, transformer) =
3267 configs_for_dialect_type(dialect_type);
3268
3269 // Set up expression-specific generator config for hybrid dialects
3270 let generator_config_for_expr: Option<
3271 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3272 > = match dialect_type {
3273 #[cfg(feature = "dialect-athena")]
3274 DialectType::Athena => Some(Box::new(|expr| {
3275 AthenaDialect.generator_config_for_expr(expr)
3276 })),
3277 _ => None,
3278 };
3279
3280 Self {
3281 dialect_type,
3282 tokenizer: Tokenizer::new(tokenizer_config),
3283 generator_config,
3284 transformer,
3285 generator_config_for_expr,
3286 custom_preprocess: None,
3287 }
3288 }
3289
3290 /// Look up a dialect by string name.
3291 ///
3292 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3293 /// falls back to the custom dialect registry. Returns `None` if no dialect
3294 /// with the given name exists.
3295 pub fn get_by_name(name: &str) -> Option<Self> {
3296 // Try built-in first
3297 if let Ok(dt) = DialectType::from_str(name) {
3298 return Some(Self::get(dt));
3299 }
3300
3301 // Try custom registry
3302 let config = get_custom_dialect_config(name)?;
3303 Some(Self::from_custom_config(&config))
3304 }
3305
3306 /// Construct a `Dialect` from a custom dialect configuration.
3307 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3308 // Build the transformer: use custom if provided, else use base dialect's
3309 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3310 if let Some(ref custom_transform) = config.transform {
3311 let t = Arc::clone(custom_transform);
3312 Box::new(move |e| t(e))
3313 } else {
3314 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
3315 base_transform
3316 };
3317
3318 // Build the custom preprocess: use custom if provided
3319 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
3320 config.preprocess.as_ref().map(|p| {
3321 let p = Arc::clone(p);
3322 Box::new(move |e: Expression| p(e))
3323 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3324 });
3325
3326 Self {
3327 dialect_type: config.base_dialect,
3328 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3329 generator_config: Arc::new(config.generator_config.clone()),
3330 transformer,
3331 generator_config_for_expr: None,
3332 custom_preprocess,
3333 }
3334 }
3335
3336 /// Get the dialect type
3337 pub fn dialect_type(&self) -> DialectType {
3338 self.dialect_type
3339 }
3340
3341 /// Get the generator configuration
3342 pub fn generator_config(&self) -> &GeneratorConfig {
3343 &self.generator_config
3344 }
3345
3346 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3347 ///
3348 /// The input may contain multiple semicolon-separated statements; each one
3349 /// produces a separate element in the returned vector. Tokenization uses
3350 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3351 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3352 let tokens = self.tokenizer.tokenize(sql)?;
3353 let config = crate::parser::ParserConfig {
3354 dialect: Some(self.dialect_type),
3355 ..Default::default()
3356 };
3357 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3358 parser.parse()
3359 }
3360
3361 /// Tokenize SQL using this dialect's tokenizer configuration.
3362 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3363 self.tokenizer.tokenize(sql)
3364 }
3365
3366 /// Get the generator config for a specific expression (supports hybrid dialects).
3367 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3368 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3369 if let Some(ref config_fn) = self.generator_config_for_expr {
3370 config_fn(expr)
3371 } else {
3372 (*self.generator_config).clone()
3373 }
3374 }
3375
3376 /// Generates a SQL string from an [`Expression`] AST node.
3377 ///
3378 /// The output uses this dialect's generator configuration for identifier quoting,
3379 /// keyword casing, function name normalization, and syntax style. The result is
3380 /// a single-line (non-pretty) SQL string.
3381 pub fn generate(&self, expr: &Expression) -> Result<String> {
3382 // Fast path: when no per-expression config override, share the Arc cheaply.
3383 if self.generator_config_for_expr.is_none() {
3384 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3385 return generator.generate(expr);
3386 }
3387 let config = self.get_config_for_expr(expr);
3388 let mut generator = Generator::with_config(config);
3389 generator.generate(expr)
3390 }
3391
3392 /// Generate SQL from an expression with pretty printing enabled
3393 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3394 let mut config = self.get_config_for_expr(expr);
3395 config.pretty = true;
3396 let mut generator = Generator::with_config(config);
3397 generator.generate(expr)
3398 }
3399
3400 /// Generate SQL from an expression with source dialect info (for transpilation)
3401 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3402 let mut config = self.get_config_for_expr(expr);
3403 config.source_dialect = Some(source);
3404 let mut generator = Generator::with_config(config);
3405 generator.generate(expr)
3406 }
3407
3408 /// Generate SQL from an expression with pretty printing and source dialect info
3409 pub fn generate_pretty_with_source(
3410 &self,
3411 expr: &Expression,
3412 source: DialectType,
3413 ) -> Result<String> {
3414 let mut config = self.get_config_for_expr(expr);
3415 config.pretty = true;
3416 config.source_dialect = Some(source);
3417 let mut generator = Generator::with_config(config);
3418 generator.generate(expr)
3419 }
3420
3421 /// Generate SQL from an expression with forced identifier quoting (identify=True)
3422 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
3423 let mut config = self.get_config_for_expr(expr);
3424 config.always_quote_identifiers = true;
3425 let mut generator = Generator::with_config(config);
3426 generator.generate(expr)
3427 }
3428
3429 /// Generate SQL from an expression with pretty printing and forced identifier quoting
3430 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
3431 let mut config = (*self.generator_config).clone();
3432 config.pretty = true;
3433 config.always_quote_identifiers = true;
3434 let mut generator = Generator::with_config(config);
3435 generator.generate(expr)
3436 }
3437
3438 /// Generate SQL from an expression with caller-specified config overrides
3439 pub fn generate_with_overrides(
3440 &self,
3441 expr: &Expression,
3442 overrides: impl FnOnce(&mut GeneratorConfig),
3443 ) -> Result<String> {
3444 let mut config = self.get_config_for_expr(expr);
3445 overrides(&mut config);
3446 let mut generator = Generator::with_config(config);
3447 generator.generate(expr)
3448 }
3449
3450 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
3451 ///
3452 /// The transformation proceeds in two phases:
3453 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
3454 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
3455 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
3456 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
3457 ///
3458 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
3459 /// and for identity transforms (normalizing SQL within the same dialect).
3460 pub fn transform(&self, expr: Expression) -> Result<Expression> {
3461 // Apply preprocessing transforms based on dialect
3462 let preprocessed = self.preprocess(expr)?;
3463 // Then apply recursive transformation
3464 transform_recursive(preprocessed, &self.transformer)
3465 }
3466
3467 /// Apply dialect-specific preprocessing transforms
3468 fn preprocess(&self, expr: Expression) -> Result<Expression> {
3469 // If a custom preprocess function is set, use it instead of the built-in logic
3470 if let Some(ref custom_preprocess) = self.custom_preprocess {
3471 return custom_preprocess(expr);
3472 }
3473
3474 #[cfg(any(
3475 feature = "dialect-mysql",
3476 feature = "dialect-postgresql",
3477 feature = "dialect-bigquery",
3478 feature = "dialect-snowflake",
3479 feature = "dialect-tsql",
3480 feature = "dialect-spark",
3481 feature = "dialect-databricks",
3482 feature = "dialect-hive",
3483 feature = "dialect-sqlite",
3484 feature = "dialect-trino",
3485 feature = "dialect-presto",
3486 feature = "dialect-duckdb",
3487 feature = "dialect-redshift",
3488 feature = "dialect-starrocks",
3489 feature = "dialect-oracle",
3490 feature = "dialect-clickhouse",
3491 ))]
3492 use crate::transforms;
3493
3494 match self.dialect_type {
3495 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
3496 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
3497 #[cfg(feature = "dialect-mysql")]
3498 DialectType::MySQL => {
3499 let expr = transforms::eliminate_qualify(expr)?;
3500 let expr = transforms::eliminate_full_outer_join(expr)?;
3501 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3502 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3503 Ok(expr)
3504 }
3505 // PostgreSQL doesn't support QUALIFY
3506 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
3507 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
3508 #[cfg(feature = "dialect-postgresql")]
3509 DialectType::PostgreSQL => {
3510 let expr = transforms::eliminate_qualify(expr)?;
3511 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3512 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
3513 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
3514 // Only normalize when sqlglot would fully parse (no body) —
3515 // sqlglot falls back to Command for complex function bodies,
3516 // preserving the original text including TO.
3517 let expr = if let Expression::CreateFunction(mut cf) = expr {
3518 if cf.body.is_none() {
3519 for opt in &mut cf.set_options {
3520 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
3521 &mut opt.value
3522 {
3523 *use_to = false;
3524 }
3525 }
3526 }
3527 Expression::CreateFunction(cf)
3528 } else {
3529 expr
3530 };
3531 Ok(expr)
3532 }
3533 // BigQuery doesn't support DISTINCT ON or CTE column aliases
3534 #[cfg(feature = "dialect-bigquery")]
3535 DialectType::BigQuery => {
3536 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3537 let expr = transforms::pushdown_cte_column_names(expr)?;
3538 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
3539 Ok(expr)
3540 }
3541 // Snowflake
3542 #[cfg(feature = "dialect-snowflake")]
3543 DialectType::Snowflake => {
3544 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3545 let expr = transforms::eliminate_window_clause(expr)?;
3546 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
3547 Ok(expr)
3548 }
3549 // TSQL doesn't support QUALIFY
3550 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
3551 // TSQL doesn't support CTEs in subqueries (hoist to top level)
3552 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
3553 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
3554 #[cfg(feature = "dialect-tsql")]
3555 DialectType::TSQL => {
3556 let expr = transforms::eliminate_qualify(expr)?;
3557 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3558 let expr = transforms::ensure_bools(expr)?;
3559 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3560 let expr = transforms::move_ctes_to_top_level(expr)?;
3561 let expr = transforms::qualify_derived_table_outputs(expr)?;
3562 Ok(expr)
3563 }
3564 // Spark doesn't support QUALIFY (but Databricks does)
3565 // Spark doesn't support CTEs in subqueries (hoist to top level)
3566 #[cfg(feature = "dialect-spark")]
3567 DialectType::Spark => {
3568 let expr = transforms::eliminate_qualify(expr)?;
3569 let expr = transforms::add_auto_table_alias(expr)?;
3570 let expr = transforms::simplify_nested_paren_values(expr)?;
3571 let expr = transforms::move_ctes_to_top_level(expr)?;
3572 Ok(expr)
3573 }
3574 // Databricks supports QUALIFY natively
3575 // Databricks doesn't support CTEs in subqueries (hoist to top level)
3576 #[cfg(feature = "dialect-databricks")]
3577 DialectType::Databricks => {
3578 let expr = transforms::add_auto_table_alias(expr)?;
3579 let expr = transforms::simplify_nested_paren_values(expr)?;
3580 let expr = transforms::move_ctes_to_top_level(expr)?;
3581 Ok(expr)
3582 }
3583 // Hive doesn't support QUALIFY or CTEs in subqueries
3584 #[cfg(feature = "dialect-hive")]
3585 DialectType::Hive => {
3586 let expr = transforms::eliminate_qualify(expr)?;
3587 let expr = transforms::move_ctes_to_top_level(expr)?;
3588 Ok(expr)
3589 }
3590 // SQLite doesn't support QUALIFY
3591 #[cfg(feature = "dialect-sqlite")]
3592 DialectType::SQLite => {
3593 let expr = transforms::eliminate_qualify(expr)?;
3594 Ok(expr)
3595 }
3596 // Trino doesn't support QUALIFY
3597 #[cfg(feature = "dialect-trino")]
3598 DialectType::Trino => {
3599 let expr = transforms::eliminate_qualify(expr)?;
3600 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
3601 Ok(expr)
3602 }
3603 // Presto doesn't support QUALIFY or WINDOW clause
3604 #[cfg(feature = "dialect-presto")]
3605 DialectType::Presto => {
3606 let expr = transforms::eliminate_qualify(expr)?;
3607 let expr = transforms::eliminate_window_clause(expr)?;
3608 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
3609 Ok(expr)
3610 }
3611 // DuckDB supports QUALIFY - no elimination needed
3612 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
3613 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
3614 #[cfg(feature = "dialect-duckdb")]
3615 DialectType::DuckDB => {
3616 let expr = transforms::expand_posexplode_duckdb(expr)?;
3617 let expr = transforms::expand_like_any(expr)?;
3618 Ok(expr)
3619 }
3620 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
3621 #[cfg(feature = "dialect-redshift")]
3622 DialectType::Redshift => {
3623 let expr = transforms::eliminate_qualify(expr)?;
3624 let expr = transforms::eliminate_window_clause(expr)?;
3625 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3626 Ok(expr)
3627 }
3628 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
3629 #[cfg(feature = "dialect-starrocks")]
3630 DialectType::StarRocks => {
3631 let expr = transforms::eliminate_qualify(expr)?;
3632 let expr = transforms::expand_between_in_delete(expr)?;
3633 let expr = transforms::eliminate_distinct_on_for_dialect(
3634 expr,
3635 Some(DialectType::StarRocks),
3636 Some(DialectType::StarRocks),
3637 )?;
3638 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3639 Ok(expr)
3640 }
3641 // DataFusion supports QUALIFY and semi/anti joins natively
3642 #[cfg(feature = "dialect-datafusion")]
3643 DialectType::DataFusion => Ok(expr),
3644 // Oracle doesn't support QUALIFY
3645 #[cfg(feature = "dialect-oracle")]
3646 DialectType::Oracle => {
3647 let expr = transforms::eliminate_qualify(expr)?;
3648 Ok(expr)
3649 }
3650 // Drill - no special preprocessing needed
3651 #[cfg(feature = "dialect-drill")]
3652 DialectType::Drill => Ok(expr),
3653 // Teradata - no special preprocessing needed
3654 #[cfg(feature = "dialect-teradata")]
3655 DialectType::Teradata => Ok(expr),
3656 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
3657 #[cfg(feature = "dialect-clickhouse")]
3658 DialectType::ClickHouse => {
3659 let expr = transforms::no_limit_order_by_union(expr)?;
3660 Ok(expr)
3661 }
3662 // Other dialects - no preprocessing
3663 _ => Ok(expr),
3664 }
3665 }
3666
3667 /// Transpile SQL from this dialect to the given target dialect.
3668 ///
3669 /// The target may be specified as either a built-in [`DialectType`] enum variant
3670 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
3671 ///
3672 /// ```rust,ignore
3673 /// let pg = Dialect::get(DialectType::PostgreSQL);
3674 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
3675 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
3676 /// ```
3677 ///
3678 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
3679 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
3680 self.transpile_with(sql, target, TranspileOptions::default())
3681 }
3682
3683 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
3684 pub fn transpile_with<T: TranspileTarget>(
3685 &self,
3686 sql: &str,
3687 target: T,
3688 opts: TranspileOptions,
3689 ) -> Result<Vec<String>> {
3690 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
3691 }
3692
3693 #[cfg(not(feature = "transpile"))]
3694 fn transpile_inner(
3695 &self,
3696 sql: &str,
3697 target_dialect: &Dialect,
3698 pretty: bool,
3699 ) -> Result<Vec<String>> {
3700 let target = target_dialect.dialect_type;
3701 // Without the transpile feature, only same-dialect or to/from generic is supported
3702 if self.dialect_type != target
3703 && self.dialect_type != DialectType::Generic
3704 && target != DialectType::Generic
3705 {
3706 return Err(crate::error::Error::parse(
3707 "Cross-dialect transpilation not available in this build",
3708 0,
3709 0,
3710 0,
3711 0,
3712 ));
3713 }
3714
3715 let expressions = self.parse(sql)?;
3716 let generic_identity =
3717 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3718
3719 if generic_identity {
3720 return expressions
3721 .into_iter()
3722 .map(|expr| {
3723 if pretty {
3724 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3725 } else {
3726 target_dialect.generate_with_source(&expr, self.dialect_type)
3727 }
3728 })
3729 .collect();
3730 }
3731
3732 expressions
3733 .into_iter()
3734 .map(|expr| {
3735 let transformed = target_dialect.transform(expr)?;
3736 if pretty {
3737 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
3738 } else {
3739 target_dialect.generate_with_source(&transformed, self.dialect_type)
3740 }
3741 })
3742 .collect()
3743 }
3744
3745 #[cfg(feature = "transpile")]
3746 fn transpile_inner(
3747 &self,
3748 sql: &str,
3749 target_dialect: &Dialect,
3750 pretty: bool,
3751 ) -> Result<Vec<String>> {
3752 let target = target_dialect.dialect_type;
3753 if matches!(self.dialect_type, DialectType::PostgreSQL)
3754 && matches!(target, DialectType::SQLite)
3755 {
3756 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
3757 }
3758 let expressions = self.parse(sql)?;
3759 let generic_identity =
3760 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3761
3762 if generic_identity {
3763 return expressions
3764 .into_iter()
3765 .map(|expr| {
3766 if pretty {
3767 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3768 } else {
3769 target_dialect.generate_with_source(&expr, self.dialect_type)
3770 }
3771 })
3772 .collect();
3773 }
3774
3775 expressions
3776 .into_iter()
3777 .map(|expr| {
3778 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
3779 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
3780 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
3781 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
3782 use crate::expressions::DataType as DT;
3783 transform_recursive(expr, &|e| match e {
3784 Expression::DataType(DT::VarChar { .. }) => {
3785 Ok(Expression::DataType(DT::Text))
3786 }
3787 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
3788 _ => Ok(e),
3789 })?
3790 } else {
3791 expr
3792 };
3793
3794 // When source and target differ, first normalize the source dialect's
3795 // AST constructs to standard SQL, so that the target dialect can handle them.
3796 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
3797 let normalized =
3798 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
3799 self.transform(expr)?
3800 } else {
3801 expr
3802 };
3803
3804 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
3805 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
3806 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
3807 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
3808 let normalized =
3809 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3810 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3811 {
3812 transform_recursive(normalized, &|e| {
3813 if let Expression::Function(ref f) = e {
3814 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
3815 // Check if first arg is JSON_QUERY and second is JSON_VALUE
3816 if let (
3817 Expression::Function(ref jq),
3818 Expression::Function(ref jv),
3819 ) = (&f.args[0], &f.args[1])
3820 {
3821 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
3822 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
3823 {
3824 // Unwrap: return just JSON_QUERY(...)
3825 return Ok(f.args[0].clone());
3826 }
3827 }
3828 }
3829 }
3830 Ok(e)
3831 })?
3832 } else {
3833 normalized
3834 };
3835
3836 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
3837 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
3838 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
3839 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3840 && !matches!(target, DialectType::Snowflake)
3841 {
3842 transform_recursive(normalized, &|e| {
3843 if let Expression::Function(ref f) = e {
3844 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
3845 return Ok(Expression::Localtime(Box::new(
3846 crate::expressions::Localtime { this: None },
3847 )));
3848 }
3849 }
3850 Ok(e)
3851 })?
3852 } else {
3853 normalized
3854 };
3855
3856 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
3857 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
3858 // transform. DuckDB requires the count argument to be BIGINT.
3859 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3860 && matches!(target, DialectType::DuckDB)
3861 {
3862 transform_recursive(normalized, &|e| {
3863 if let Expression::Function(ref f) = e {
3864 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
3865 // Check if first arg is space string literal
3866 if let Expression::Literal(ref lit) = f.args[0] {
3867 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
3868 {
3869 if s == " " {
3870 // Wrap second arg in CAST(... AS BIGINT) if not already
3871 if !matches!(f.args[1], Expression::Cast(_)) {
3872 let mut new_args = f.args.clone();
3873 new_args[1] = Expression::Cast(Box::new(
3874 crate::expressions::Cast {
3875 this: new_args[1].clone(),
3876 to: crate::expressions::DataType::BigInt {
3877 length: None,
3878 },
3879 trailing_comments: Vec::new(),
3880 double_colon_syntax: false,
3881 format: None,
3882 default: None,
3883 inferred_type: None,
3884 },
3885 ));
3886 return Ok(Expression::Function(Box::new(
3887 crate::expressions::Function {
3888 name: f.name.clone(),
3889 args: new_args,
3890 distinct: f.distinct,
3891 trailing_comments: f
3892 .trailing_comments
3893 .clone(),
3894 use_bracket_syntax: f.use_bracket_syntax,
3895 no_parens: f.no_parens,
3896 quoted: f.quoted,
3897 span: None,
3898 inferred_type: None,
3899 },
3900 )));
3901 }
3902 }
3903 }
3904 }
3905 }
3906 }
3907 Ok(e)
3908 })?
3909 } else {
3910 normalized
3911 };
3912
3913 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
3914 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
3915 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
3916 && !matches!(target, DialectType::BigQuery)
3917 {
3918 crate::transforms::propagate_struct_field_names(normalized)?
3919 } else {
3920 normalized
3921 };
3922
3923 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
3924 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
3925 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
3926 // functions handle their generator args differently (as float seeds).
3927 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3928 && matches!(target, DialectType::DuckDB)
3929 {
3930 fn make_scaled_random() -> Expression {
3931 let lower =
3932 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3933 "-9.223372036854776E+18".to_string(),
3934 )));
3935 let upper =
3936 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3937 "9.223372036854776e+18".to_string(),
3938 )));
3939 let random_call = Expression::Random(crate::expressions::Random);
3940 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
3941 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
3942 left: upper,
3943 right: lower.clone(),
3944 left_comments: vec![],
3945 operator_comments: vec![],
3946 trailing_comments: vec![],
3947 inferred_type: None,
3948 })),
3949 trailing_comments: vec![],
3950 }));
3951 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
3952 left: random_call,
3953 right: range_size,
3954 left_comments: vec![],
3955 operator_comments: vec![],
3956 trailing_comments: vec![],
3957 inferred_type: None,
3958 }));
3959 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
3960 left: lower,
3961 right: scaled,
3962 left_comments: vec![],
3963 operator_comments: vec![],
3964 trailing_comments: vec![],
3965 inferred_type: None,
3966 }));
3967 Expression::Cast(Box::new(crate::expressions::Cast {
3968 this: shifted,
3969 to: crate::expressions::DataType::BigInt { length: None },
3970 trailing_comments: vec![],
3971 double_colon_syntax: false,
3972 format: None,
3973 default: None,
3974 inferred_type: None,
3975 }))
3976 }
3977
3978 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
3979 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
3980 // This prevents transform_recursive (which is bottom-up) from expanding
3981 // seeded RANDOM into make_scaled_random() and losing the seed value.
3982 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
3983 // and then un-expanded back to Expression::Random by the code below.
3984 let normalized = transform_recursive(normalized, &|e| {
3985 if let Expression::Function(ref f) = e {
3986 let n = f.name.to_ascii_uppercase();
3987 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
3988 if let Expression::Function(mut f) = e {
3989 for arg in f.args.iter_mut() {
3990 if let Expression::Rand(ref r) = arg {
3991 if r.lower.is_none() && r.upper.is_none() {
3992 if let Some(ref seed) = r.seed {
3993 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
3994 // so it won't be expanded by the RANDOM expansion below
3995 *arg = Expression::Function(Box::new(
3996 crate::expressions::Function::new(
3997 "RANDOM".to_string(),
3998 vec![*seed.clone()],
3999 ),
4000 ));
4001 }
4002 }
4003 }
4004 }
4005 return Ok(Expression::Function(f));
4006 }
4007 }
4008 }
4009 Ok(e)
4010 })?;
4011
4012 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4013 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4014 // we see the parent. We detect this and undo the expansion by replacing
4015 // the expanded pattern back with Expression::Random.
4016 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4017 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4018 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4019 transform_recursive(normalized, &|e| {
4020 if let Expression::Function(ref f) = e {
4021 let n = f.name.to_ascii_uppercase();
4022 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4023 if let Expression::Function(mut f) = e {
4024 for arg in f.args.iter_mut() {
4025 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4026 if let Expression::Cast(ref cast) = arg {
4027 if matches!(
4028 cast.to,
4029 crate::expressions::DataType::BigInt { .. }
4030 ) {
4031 if let Expression::Add(ref add) = cast.this {
4032 if let Expression::Literal(ref lit) = add.left {
4033 if let crate::expressions::Literal::Number(
4034 ref num,
4035 ) = lit.as_ref()
4036 {
4037 if num == "-9.223372036854776E+18" {
4038 *arg = Expression::Random(
4039 crate::expressions::Random,
4040 );
4041 }
4042 }
4043 }
4044 }
4045 }
4046 }
4047 }
4048 return Ok(Expression::Function(f));
4049 }
4050 return Ok(e);
4051 }
4052 }
4053 match e {
4054 Expression::Random(_) => Ok(make_scaled_random()),
4055 // Rand(seed) with no bounds: drop seed and expand
4056 // (DuckDB RANDOM doesn't support seeds)
4057 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4058 Ok(make_scaled_random())
4059 }
4060 _ => Ok(e),
4061 }
4062 })?
4063 } else {
4064 normalized
4065 };
4066
4067 // Apply cross-dialect semantic normalizations
4068 let normalized =
4069 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4070
4071 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4072 && matches!(target, DialectType::SQLite)
4073 {
4074 Self::normalize_postgres_to_sqlite_types(normalized)?
4075 } else {
4076 normalized
4077 };
4078
4079 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4080 // (SELECT UNNEST(..., max_depth => 2)) subquery
4081 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4082 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4083 && matches!(target, DialectType::DuckDB)
4084 {
4085 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4086 } else {
4087 normalized
4088 };
4089
4090 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4091 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4092 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4093 && matches!(
4094 target,
4095 DialectType::DuckDB
4096 | DialectType::Presto
4097 | DialectType::Trino
4098 | DialectType::Athena
4099 | DialectType::Spark
4100 | DialectType::Databricks
4101 ) {
4102 crate::transforms::unnest_alias_to_column_alias(normalized)?
4103 } else if matches!(self.dialect_type, DialectType::BigQuery)
4104 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4105 {
4106 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4107 // but don't convert alias format (no _t0 wrapper)
4108 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4109 // For Redshift: strip UNNEST when arg is a column reference path
4110 if matches!(target, DialectType::Redshift) {
4111 crate::transforms::strip_unnest_column_refs(result)?
4112 } else {
4113 result
4114 }
4115 } else {
4116 normalized
4117 };
4118
4119 // For Presto/Trino targets from PostgreSQL/Redshift source:
4120 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4121 let normalized = if matches!(
4122 self.dialect_type,
4123 DialectType::PostgreSQL | DialectType::Redshift
4124 ) && matches!(
4125 target,
4126 DialectType::Presto | DialectType::Trino | DialectType::Athena
4127 ) {
4128 crate::transforms::wrap_unnest_join_aliases(normalized)?
4129 } else {
4130 normalized
4131 };
4132
4133 // Eliminate DISTINCT ON with target-dialect awareness
4134 // This must happen after source transform (which may produce DISTINCT ON)
4135 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4136 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4137 normalized,
4138 Some(target),
4139 Some(self.dialect_type),
4140 )?;
4141
4142 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4143 let normalized = if matches!(target, DialectType::Snowflake) {
4144 Self::transform_generate_date_array_snowflake(normalized)?
4145 } else {
4146 normalized
4147 };
4148
4149 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4150 let normalized = if matches!(
4151 target,
4152 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4153 ) {
4154 crate::transforms::unnest_to_explode_select(normalized)?
4155 } else {
4156 normalized
4157 };
4158
4159 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4160 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4161 crate::transforms::no_limit_order_by_union(normalized)?
4162 } else {
4163 normalized
4164 };
4165
4166 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4167 // Python sqlglot does this in the TSQL generator, but we can't do it there
4168 // because it would break TSQL -> TSQL identity
4169 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4170 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4171 {
4172 transform_recursive(normalized, &|e| {
4173 if let Expression::Count(ref c) = e {
4174 // Build COUNT_BIG(...) as an AggregateFunction
4175 let args = if c.star {
4176 vec![Expression::Star(crate::expressions::Star {
4177 table: None,
4178 except: None,
4179 replace: None,
4180 rename: None,
4181 trailing_comments: Vec::new(),
4182 span: None,
4183 })]
4184 } else if let Some(ref this) = c.this {
4185 vec![this.clone()]
4186 } else {
4187 vec![]
4188 };
4189 Ok(Expression::AggregateFunction(Box::new(
4190 crate::expressions::AggregateFunction {
4191 name: "COUNT_BIG".to_string(),
4192 args,
4193 distinct: c.distinct,
4194 filter: c.filter.clone(),
4195 order_by: Vec::new(),
4196 limit: None,
4197 ignore_nulls: None,
4198 inferred_type: None,
4199 },
4200 )))
4201 } else {
4202 Ok(e)
4203 }
4204 })?
4205 } else {
4206 normalized
4207 };
4208
4209 let transformed = target_dialect.transform(normalized)?;
4210
4211 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4212 let transformed = if matches!(target, DialectType::DuckDB) {
4213 Self::seq_rownum_to_range(transformed)?
4214 } else {
4215 transformed
4216 };
4217
4218 let mut sql = if pretty {
4219 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
4220 } else {
4221 target_dialect.generate_with_source(&transformed, self.dialect_type)?
4222 };
4223
4224 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4225 if pretty && target == DialectType::Snowflake {
4226 sql = Self::normalize_snowflake_pretty(sql);
4227 }
4228
4229 Ok(sql)
4230 })
4231 .collect()
4232 }
4233}
4234
4235// Transpile-only methods: cross-dialect normalization and helpers
4236#[cfg(feature = "transpile")]
4237impl Dialect {
4238 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
4239 let tokens = self.tokenize(sql)?;
4240 for (i, token) in tokens.iter().enumerate() {
4241 if token.token_type == TokenType::NullsafeEq {
4242 return Err(crate::error::Error::unsupported(
4243 "PostgreSQL pgvector cosine distance operator <=>",
4244 "SQLite",
4245 ));
4246 }
4247 if token.token_type == TokenType::Lt
4248 && tokens
4249 .get(i + 1)
4250 .is_some_and(|token| token.token_type == TokenType::Tilde)
4251 && tokens
4252 .get(i + 2)
4253 .is_some_and(|token| token.token_type == TokenType::Gt)
4254 {
4255 return Err(crate::error::Error::unsupported(
4256 "PostgreSQL pgvector Hamming distance operator <~>",
4257 "SQLite",
4258 ));
4259 }
4260 }
4261 Ok(())
4262 }
4263
4264 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
4265 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
4266 use crate::expressions::DataType;
4267
4268 match dt {
4269 DataType::Bit { .. } => DataType::Int {
4270 length: None,
4271 integer_spelling: true,
4272 },
4273 DataType::TextWithLength { .. } => DataType::Text,
4274 DataType::VarChar { .. } => DataType::Text,
4275 DataType::Char { .. } => DataType::Text,
4276 DataType::Timestamp { timezone: true, .. } => DataType::Text,
4277 DataType::Custom { name } => {
4278 let base = name
4279 .split_once('(')
4280 .map_or(name.as_str(), |(base, _)| base)
4281 .trim();
4282 if base.eq_ignore_ascii_case("TSVECTOR")
4283 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
4284 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
4285 || base.eq_ignore_ascii_case("NVARCHAR")
4286 || base.eq_ignore_ascii_case("NCHAR")
4287 {
4288 DataType::Text
4289 } else {
4290 DataType::Custom { name }
4291 }
4292 }
4293 _ => dt,
4294 }
4295 }
4296
4297 transform_recursive(expr, &|e| match e {
4298 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
4299 Expression::CreateTable(mut ct) => {
4300 for column in &mut ct.columns {
4301 column.data_type = sqlite_type(column.data_type.clone());
4302 }
4303 Ok(Expression::CreateTable(ct))
4304 }
4305 _ => Ok(e),
4306 })
4307 }
4308
4309 /// For DuckDB target: when FROM clause contains RANGE(n), replace
4310 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
4311 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
4312 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
4313 if let Expression::Select(mut select) = expr {
4314 // Check if FROM contains a RANGE function
4315 let has_range_from = if let Some(ref from) = select.from {
4316 from.expressions.iter().any(|e| {
4317 // Check for direct RANGE(...) or aliased RANGE(...)
4318 match e {
4319 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
4320 Expression::Alias(a) => {
4321 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
4322 }
4323 _ => false,
4324 }
4325 })
4326 } else {
4327 false
4328 };
4329
4330 if has_range_from {
4331 // Replace the ROW_NUMBER pattern in select expressions
4332 select.expressions = select
4333 .expressions
4334 .into_iter()
4335 .map(|e| Self::replace_rownum_with_range(e))
4336 .collect();
4337 }
4338
4339 Ok(Expression::Select(select))
4340 } else {
4341 Ok(expr)
4342 }
4343 }
4344
4345 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
4346 fn replace_rownum_with_range(expr: Expression) -> Expression {
4347 match expr {
4348 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
4349 Expression::Mod(op) => {
4350 let new_left = Self::try_replace_rownum_paren(&op.left);
4351 Expression::Mod(Box::new(crate::expressions::BinaryOp {
4352 left: new_left,
4353 right: op.right,
4354 left_comments: op.left_comments,
4355 operator_comments: op.operator_comments,
4356 trailing_comments: op.trailing_comments,
4357 inferred_type: op.inferred_type,
4358 }))
4359 }
4360 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
4361 Expression::Paren(p) => {
4362 let inner = Self::replace_rownum_with_range(p.this);
4363 Expression::Paren(Box::new(crate::expressions::Paren {
4364 this: inner,
4365 trailing_comments: p.trailing_comments,
4366 }))
4367 }
4368 Expression::Case(mut c) => {
4369 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
4370 c.whens = c
4371 .whens
4372 .into_iter()
4373 .map(|(cond, then)| {
4374 (
4375 Self::replace_rownum_with_range(cond),
4376 Self::replace_rownum_with_range(then),
4377 )
4378 })
4379 .collect();
4380 if let Some(else_) = c.else_ {
4381 c.else_ = Some(Self::replace_rownum_with_range(else_));
4382 }
4383 Expression::Case(c)
4384 }
4385 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
4386 left: Self::replace_rownum_with_range(op.left),
4387 right: op.right,
4388 left_comments: op.left_comments,
4389 operator_comments: op.operator_comments,
4390 trailing_comments: op.trailing_comments,
4391 inferred_type: op.inferred_type,
4392 })),
4393 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
4394 left: Self::replace_rownum_with_range(op.left),
4395 right: op.right,
4396 left_comments: op.left_comments,
4397 operator_comments: op.operator_comments,
4398 trailing_comments: op.trailing_comments,
4399 inferred_type: op.inferred_type,
4400 })),
4401 Expression::Alias(mut a) => {
4402 a.this = Self::replace_rownum_with_range(a.this);
4403 Expression::Alias(a)
4404 }
4405 other => other,
4406 }
4407 }
4408
4409 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
4410 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
4411 if let Expression::Paren(ref p) = expr {
4412 if let Expression::Sub(ref sub) = p.this {
4413 if let Expression::WindowFunction(ref wf) = sub.left {
4414 if let Expression::Function(ref f) = wf.this {
4415 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
4416 if let Expression::Literal(ref lit) = sub.right {
4417 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
4418 if n == "1" {
4419 return Expression::column("range");
4420 }
4421 }
4422 }
4423 }
4424 }
4425 }
4426 }
4427 }
4428 expr.clone()
4429 }
4430
4431 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
4432 /// Converts:
4433 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
4434 /// To:
4435 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
4436 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
4437 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
4438 use crate::expressions::*;
4439 transform_recursive(expr, &|e| {
4440 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
4441 if let Expression::ArraySize(ref af) = e {
4442 if let Expression::Function(ref f) = af.this {
4443 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4444 let result = Self::convert_array_size_gda_snowflake(f)?;
4445 return Ok(result);
4446 }
4447 }
4448 }
4449
4450 let Expression::Select(mut sel) = e else {
4451 return Ok(e);
4452 };
4453
4454 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
4455 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
4456 let mut gda_join_idx: Option<usize> = None;
4457
4458 for (idx, join) in sel.joins.iter().enumerate() {
4459 // The join.this may be:
4460 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
4461 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
4462 let (unnest_ref, alias_name) = match &join.this {
4463 Expression::Unnest(ref unnest) => {
4464 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
4465 (Some(unnest.as_ref()), alias)
4466 }
4467 Expression::Alias(ref a) => {
4468 if let Expression::Unnest(ref unnest) = a.this {
4469 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
4470 } else {
4471 (None, None)
4472 }
4473 }
4474 _ => (None, None),
4475 };
4476
4477 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
4478 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
4479 if let Expression::Function(ref f) = unnest.this {
4480 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4481 let start_expr = f.args[0].clone();
4482 let end_expr = f.args[1].clone();
4483 let step = f.args.get(2).cloned();
4484
4485 // Extract unit from step interval
4486 let unit = if let Some(Expression::Interval(ref iv)) = step {
4487 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
4488 Some(format!("{:?}", unit).to_ascii_uppercase())
4489 } else if let Some(ref this) = iv.this {
4490 // The interval may be stored as a string like "1 MONTH"
4491 if let Expression::Literal(lit) = this {
4492 if let Literal::String(ref s) = lit.as_ref() {
4493 let parts: Vec<&str> = s.split_whitespace().collect();
4494 if parts.len() == 2 {
4495 Some(parts[1].to_ascii_uppercase())
4496 } else if parts.len() == 1 {
4497 // Single word like "MONTH" or just "1"
4498 let upper = parts[0].to_ascii_uppercase();
4499 if matches!(
4500 upper.as_str(),
4501 "YEAR"
4502 | "QUARTER"
4503 | "MONTH"
4504 | "WEEK"
4505 | "DAY"
4506 | "HOUR"
4507 | "MINUTE"
4508 | "SECOND"
4509 ) {
4510 Some(upper)
4511 } else {
4512 None
4513 }
4514 } else {
4515 None
4516 }
4517 } else {
4518 None
4519 }
4520 } else {
4521 None
4522 }
4523 } else {
4524 None
4525 }
4526 } else {
4527 None
4528 };
4529
4530 if let Some(unit_str) = unit {
4531 gda_info = Some((alias, start_expr, end_expr, unit_str));
4532 gda_join_idx = Some(idx);
4533 }
4534 }
4535 }
4536 }
4537 if gda_info.is_some() {
4538 break;
4539 }
4540 }
4541
4542 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
4543 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
4544 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
4545 let result = Self::try_transform_from_gda_snowflake(sel);
4546 return result;
4547 };
4548 let join_idx = gda_join_idx.unwrap();
4549
4550 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
4551 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
4552 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
4553 let datediff = Expression::Function(Box::new(Function::new(
4554 "DATEDIFF".to_string(),
4555 vec![
4556 Expression::boxed_column(Column {
4557 name: Identifier::new(&unit_str),
4558 table: None,
4559 join_mark: false,
4560 trailing_comments: vec![],
4561 span: None,
4562 inferred_type: None,
4563 }),
4564 start_expr.clone(),
4565 end_expr.clone(),
4566 ],
4567 )));
4568 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4569 left: datediff,
4570 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4571 left_comments: vec![],
4572 operator_comments: vec![],
4573 trailing_comments: vec![],
4574 inferred_type: None,
4575 }));
4576
4577 let array_gen_range = Expression::Function(Box::new(Function::new(
4578 "ARRAY_GENERATE_RANGE".to_string(),
4579 vec![
4580 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4581 datediff_plus_one,
4582 ],
4583 )));
4584
4585 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
4586 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4587 name: Identifier::new("INPUT"),
4588 value: array_gen_range,
4589 separator: crate::expressions::NamedArgSeparator::DArrow,
4590 }));
4591 let flatten = Expression::Function(Box::new(Function::new(
4592 "FLATTEN".to_string(),
4593 vec![flatten_input],
4594 )));
4595
4596 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
4597 let alias_table = Alias {
4598 this: flatten,
4599 alias: Identifier::new("_t0"),
4600 column_aliases: vec![
4601 Identifier::new("seq"),
4602 Identifier::new("key"),
4603 Identifier::new("path"),
4604 Identifier::new("index"),
4605 Identifier::new(&alias_name),
4606 Identifier::new("this"),
4607 ],
4608 pre_alias_comments: vec![],
4609 trailing_comments: vec![],
4610 inferred_type: None,
4611 };
4612 let lateral_expr = Expression::Lateral(Box::new(Lateral {
4613 this: Box::new(Expression::Alias(Box::new(alias_table))),
4614 view: None,
4615 outer: None,
4616 alias: None,
4617 alias_quoted: false,
4618 cross_apply: None,
4619 ordinality: None,
4620 column_aliases: vec![],
4621 }));
4622
4623 // Remove the original join and add to FROM expressions
4624 sel.joins.remove(join_idx);
4625 if let Some(ref mut from) = sel.from {
4626 from.expressions.push(lateral_expr);
4627 }
4628
4629 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
4630 let dateadd_expr = Expression::Function(Box::new(Function::new(
4631 "DATEADD".to_string(),
4632 vec![
4633 Expression::boxed_column(Column {
4634 name: Identifier::new(&unit_str),
4635 table: None,
4636 join_mark: false,
4637 trailing_comments: vec![],
4638 span: None,
4639 inferred_type: None,
4640 }),
4641 Expression::Cast(Box::new(Cast {
4642 this: Expression::boxed_column(Column {
4643 name: Identifier::new(&alias_name),
4644 table: None,
4645 join_mark: false,
4646 trailing_comments: vec![],
4647 span: None,
4648 inferred_type: None,
4649 }),
4650 to: DataType::Int {
4651 length: None,
4652 integer_spelling: false,
4653 },
4654 trailing_comments: vec![],
4655 double_colon_syntax: false,
4656 format: None,
4657 default: None,
4658 inferred_type: None,
4659 })),
4660 Expression::Cast(Box::new(Cast {
4661 this: start_expr.clone(),
4662 to: DataType::Date,
4663 trailing_comments: vec![],
4664 double_colon_syntax: false,
4665 format: None,
4666 default: None,
4667 inferred_type: None,
4668 })),
4669 ],
4670 )));
4671
4672 // Replace references to the alias in the SELECT list
4673 let new_exprs: Vec<Expression> = sel
4674 .expressions
4675 .iter()
4676 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
4677 .collect();
4678 sel.expressions = new_exprs;
4679
4680 Ok(Expression::Select(sel))
4681 })
4682 }
4683
4684 /// Helper: replace column references to `alias_name` with dateadd expression
4685 fn replace_column_ref_with_dateadd(
4686 expr: &Expression,
4687 alias_name: &str,
4688 dateadd: &Expression,
4689 ) -> Expression {
4690 use crate::expressions::*;
4691 match expr {
4692 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4693 // Plain column reference -> DATEADD(...) AS alias_name
4694 Expression::Alias(Box::new(Alias {
4695 this: dateadd.clone(),
4696 alias: Identifier::new(alias_name),
4697 column_aliases: vec![],
4698 pre_alias_comments: vec![],
4699 trailing_comments: vec![],
4700 inferred_type: None,
4701 }))
4702 }
4703 Expression::Alias(a) => {
4704 // Check if the inner expression references the alias
4705 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
4706 Expression::Alias(Box::new(Alias {
4707 this: new_this,
4708 alias: a.alias.clone(),
4709 column_aliases: a.column_aliases.clone(),
4710 pre_alias_comments: a.pre_alias_comments.clone(),
4711 trailing_comments: a.trailing_comments.clone(),
4712 inferred_type: None,
4713 }))
4714 }
4715 _ => expr.clone(),
4716 }
4717 }
4718
4719 /// Helper: replace column references in inner expression (not top-level)
4720 fn replace_column_ref_inner(
4721 expr: &Expression,
4722 alias_name: &str,
4723 dateadd: &Expression,
4724 ) -> Expression {
4725 use crate::expressions::*;
4726 match expr {
4727 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4728 dateadd.clone()
4729 }
4730 Expression::Add(op) => {
4731 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4732 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4733 Expression::Add(Box::new(BinaryOp {
4734 left,
4735 right,
4736 left_comments: op.left_comments.clone(),
4737 operator_comments: op.operator_comments.clone(),
4738 trailing_comments: op.trailing_comments.clone(),
4739 inferred_type: None,
4740 }))
4741 }
4742 Expression::Sub(op) => {
4743 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4744 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4745 Expression::Sub(Box::new(BinaryOp {
4746 left,
4747 right,
4748 left_comments: op.left_comments.clone(),
4749 operator_comments: op.operator_comments.clone(),
4750 trailing_comments: op.trailing_comments.clone(),
4751 inferred_type: None,
4752 }))
4753 }
4754 Expression::Mul(op) => {
4755 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4756 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4757 Expression::Mul(Box::new(BinaryOp {
4758 left,
4759 right,
4760 left_comments: op.left_comments.clone(),
4761 operator_comments: op.operator_comments.clone(),
4762 trailing_comments: op.trailing_comments.clone(),
4763 inferred_type: None,
4764 }))
4765 }
4766 _ => expr.clone(),
4767 }
4768 }
4769
4770 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
4771 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
4772 fn try_transform_from_gda_snowflake(
4773 mut sel: Box<crate::expressions::Select>,
4774 ) -> Result<Expression> {
4775 use crate::expressions::*;
4776
4777 // Extract GDA info from FROM clause
4778 let mut gda_info: Option<(
4779 usize,
4780 String,
4781 Expression,
4782 Expression,
4783 String,
4784 Option<(String, Vec<Identifier>)>,
4785 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
4786
4787 if let Some(ref from) = sel.from {
4788 for (idx, table_expr) in from.expressions.iter().enumerate() {
4789 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
4790 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
4791 let (unnest_opt, outer_alias_info) = match table_expr {
4792 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
4793 Expression::Alias(ref a) => {
4794 if let Expression::Unnest(ref unnest) = a.this {
4795 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
4796 (Some(unnest.as_ref()), Some(alias_info))
4797 } else {
4798 (None, None)
4799 }
4800 }
4801 _ => (None, None),
4802 };
4803
4804 if let Some(unnest) = unnest_opt {
4805 // Check for GENERATE_DATE_ARRAY function
4806 let func_opt = match &unnest.this {
4807 Expression::Function(ref f)
4808 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
4809 && f.args.len() >= 2 =>
4810 {
4811 Some(f)
4812 }
4813 // Also check for GenerateSeries (from earlier normalization)
4814 _ => None,
4815 };
4816
4817 if let Some(f) = func_opt {
4818 let start_expr = f.args[0].clone();
4819 let end_expr = f.args[1].clone();
4820 let step = f.args.get(2).cloned();
4821
4822 // Extract unit and column name
4823 let unit = Self::extract_interval_unit_str(&step);
4824 let col_name = outer_alias_info
4825 .as_ref()
4826 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
4827 .unwrap_or_else(|| "value".to_string());
4828
4829 if let Some(unit_str) = unit {
4830 gda_info = Some((
4831 idx,
4832 col_name,
4833 start_expr,
4834 end_expr,
4835 unit_str,
4836 outer_alias_info,
4837 ));
4838 break;
4839 }
4840 }
4841 }
4842 }
4843 }
4844
4845 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
4846 else {
4847 return Ok(Expression::Select(sel));
4848 };
4849
4850 // Build the Snowflake subquery:
4851 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4852 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
4853
4854 // DATEDIFF(unit, start, end)
4855 let datediff = Expression::Function(Box::new(Function::new(
4856 "DATEDIFF".to_string(),
4857 vec![
4858 Expression::boxed_column(Column {
4859 name: Identifier::new(&unit_str),
4860 table: None,
4861 join_mark: false,
4862 trailing_comments: vec![],
4863 span: None,
4864 inferred_type: None,
4865 }),
4866 start_expr.clone(),
4867 end_expr.clone(),
4868 ],
4869 )));
4870 // DATEDIFF(...) + 1
4871 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4872 left: datediff,
4873 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4874 left_comments: vec![],
4875 operator_comments: vec![],
4876 trailing_comments: vec![],
4877 inferred_type: None,
4878 }));
4879
4880 let array_gen_range = Expression::Function(Box::new(Function::new(
4881 "ARRAY_GENERATE_RANGE".to_string(),
4882 vec![
4883 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4884 datediff_plus_one,
4885 ],
4886 )));
4887
4888 // TABLE(FLATTEN(INPUT => ...))
4889 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4890 name: Identifier::new("INPUT"),
4891 value: array_gen_range,
4892 separator: crate::expressions::NamedArgSeparator::DArrow,
4893 }));
4894 let flatten = Expression::Function(Box::new(Function::new(
4895 "FLATTEN".to_string(),
4896 vec![flatten_input],
4897 )));
4898
4899 // Determine alias name for the table: use outer alias or _t0
4900 let table_alias_name = outer_alias_info
4901 .as_ref()
4902 .map(|(name, _)| name.clone())
4903 .unwrap_or_else(|| "_t0".to_string());
4904
4905 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
4906 let table_func =
4907 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
4908 let flatten_aliased = Expression::Alias(Box::new(Alias {
4909 this: table_func,
4910 alias: Identifier::new(&table_alias_name),
4911 column_aliases: vec![
4912 Identifier::new("seq"),
4913 Identifier::new("key"),
4914 Identifier::new("path"),
4915 Identifier::new("index"),
4916 Identifier::new(&col_name),
4917 Identifier::new("this"),
4918 ],
4919 pre_alias_comments: vec![],
4920 trailing_comments: vec![],
4921 inferred_type: None,
4922 }));
4923
4924 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4925 let dateadd_expr = Expression::Function(Box::new(Function::new(
4926 "DATEADD".to_string(),
4927 vec![
4928 Expression::boxed_column(Column {
4929 name: Identifier::new(&unit_str),
4930 table: None,
4931 join_mark: false,
4932 trailing_comments: vec![],
4933 span: None,
4934 inferred_type: None,
4935 }),
4936 Expression::Cast(Box::new(Cast {
4937 this: Expression::boxed_column(Column {
4938 name: Identifier::new(&col_name),
4939 table: None,
4940 join_mark: false,
4941 trailing_comments: vec![],
4942 span: None,
4943 inferred_type: None,
4944 }),
4945 to: DataType::Int {
4946 length: None,
4947 integer_spelling: false,
4948 },
4949 trailing_comments: vec![],
4950 double_colon_syntax: false,
4951 format: None,
4952 default: None,
4953 inferred_type: None,
4954 })),
4955 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
4956 start_expr.clone(),
4957 ],
4958 )));
4959 let dateadd_aliased = Expression::Alias(Box::new(Alias {
4960 this: dateadd_expr,
4961 alias: Identifier::new(&col_name),
4962 column_aliases: vec![],
4963 pre_alias_comments: vec![],
4964 trailing_comments: vec![],
4965 inferred_type: None,
4966 }));
4967
4968 // Build inner SELECT
4969 let mut inner_select = Select::new();
4970 inner_select.expressions = vec![dateadd_aliased];
4971 inner_select.from = Some(From {
4972 expressions: vec![flatten_aliased],
4973 });
4974
4975 let inner_select_expr = Expression::Select(Box::new(inner_select));
4976 let subquery = Expression::Subquery(Box::new(Subquery {
4977 this: inner_select_expr,
4978 alias: None,
4979 column_aliases: vec![],
4980 order_by: None,
4981 limit: None,
4982 offset: None,
4983 distribute_by: None,
4984 sort_by: None,
4985 cluster_by: None,
4986 lateral: false,
4987 modifiers_inside: false,
4988 trailing_comments: vec![],
4989 inferred_type: None,
4990 }));
4991
4992 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
4993 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
4994 Expression::Alias(Box::new(Alias {
4995 this: subquery,
4996 alias: Identifier::new(&alias_name),
4997 column_aliases: col_aliases,
4998 pre_alias_comments: vec![],
4999 trailing_comments: vec![],
5000 inferred_type: None,
5001 }))
5002 } else {
5003 subquery
5004 };
5005
5006 // Replace the FROM expression
5007 if let Some(ref mut from) = sel.from {
5008 from.expressions[from_idx] = replacement;
5009 }
5010
5011 Ok(Expression::Select(sel))
5012 }
5013
5014 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
5015 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
5016 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
5017 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
5018 use crate::expressions::*;
5019
5020 let start_expr = f.args[0].clone();
5021 let end_expr = f.args[1].clone();
5022 let step = f.args.get(2).cloned();
5023 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
5024 let col_name = "value";
5025
5026 // Build the inner subquery: same as try_transform_from_gda_snowflake
5027 let datediff = Expression::Function(Box::new(Function::new(
5028 "DATEDIFF".to_string(),
5029 vec![
5030 Expression::boxed_column(Column {
5031 name: Identifier::new(&unit_str),
5032 table: None,
5033 join_mark: false,
5034 trailing_comments: vec![],
5035 span: None,
5036 inferred_type: None,
5037 }),
5038 start_expr.clone(),
5039 end_expr.clone(),
5040 ],
5041 )));
5042 // DATEDIFF(...) + 1
5043 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5044 left: datediff,
5045 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5046 left_comments: vec![],
5047 operator_comments: vec![],
5048 trailing_comments: vec![],
5049 inferred_type: None,
5050 }));
5051
5052 let array_gen_range = Expression::Function(Box::new(Function::new(
5053 "ARRAY_GENERATE_RANGE".to_string(),
5054 vec![
5055 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5056 datediff_plus_one,
5057 ],
5058 )));
5059
5060 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5061 name: Identifier::new("INPUT"),
5062 value: array_gen_range,
5063 separator: crate::expressions::NamedArgSeparator::DArrow,
5064 }));
5065 let flatten = Expression::Function(Box::new(Function::new(
5066 "FLATTEN".to_string(),
5067 vec![flatten_input],
5068 )));
5069
5070 let table_func =
5071 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5072 let flatten_aliased = Expression::Alias(Box::new(Alias {
5073 this: table_func,
5074 alias: Identifier::new("_t0"),
5075 column_aliases: vec![
5076 Identifier::new("seq"),
5077 Identifier::new("key"),
5078 Identifier::new("path"),
5079 Identifier::new("index"),
5080 Identifier::new(col_name),
5081 Identifier::new("this"),
5082 ],
5083 pre_alias_comments: vec![],
5084 trailing_comments: vec![],
5085 inferred_type: None,
5086 }));
5087
5088 let dateadd_expr = Expression::Function(Box::new(Function::new(
5089 "DATEADD".to_string(),
5090 vec![
5091 Expression::boxed_column(Column {
5092 name: Identifier::new(&unit_str),
5093 table: None,
5094 join_mark: false,
5095 trailing_comments: vec![],
5096 span: None,
5097 inferred_type: None,
5098 }),
5099 Expression::Cast(Box::new(Cast {
5100 this: Expression::boxed_column(Column {
5101 name: Identifier::new(col_name),
5102 table: None,
5103 join_mark: false,
5104 trailing_comments: vec![],
5105 span: None,
5106 inferred_type: None,
5107 }),
5108 to: DataType::Int {
5109 length: None,
5110 integer_spelling: false,
5111 },
5112 trailing_comments: vec![],
5113 double_colon_syntax: false,
5114 format: None,
5115 default: None,
5116 inferred_type: None,
5117 })),
5118 start_expr.clone(),
5119 ],
5120 )));
5121 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5122 this: dateadd_expr,
5123 alias: Identifier::new(col_name),
5124 column_aliases: vec![],
5125 pre_alias_comments: vec![],
5126 trailing_comments: vec![],
5127 inferred_type: None,
5128 }));
5129
5130 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
5131 let mut inner_select = Select::new();
5132 inner_select.expressions = vec![dateadd_aliased];
5133 inner_select.from = Some(From {
5134 expressions: vec![flatten_aliased],
5135 });
5136
5137 // Wrap in subquery for the inner part
5138 let inner_subquery = Expression::Subquery(Box::new(Subquery {
5139 this: Expression::Select(Box::new(inner_select)),
5140 alias: None,
5141 column_aliases: vec![],
5142 order_by: None,
5143 limit: None,
5144 offset: None,
5145 distribute_by: None,
5146 sort_by: None,
5147 cluster_by: None,
5148 lateral: false,
5149 modifiers_inside: false,
5150 trailing_comments: vec![],
5151 inferred_type: None,
5152 }));
5153
5154 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
5155 let star = Expression::Star(Star {
5156 table: None,
5157 except: None,
5158 replace: None,
5159 rename: None,
5160 trailing_comments: vec![],
5161 span: None,
5162 });
5163 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
5164 this: star,
5165 distinct: false,
5166 filter: None,
5167 order_by: vec![],
5168 name: Some("ARRAY_AGG".to_string()),
5169 ignore_nulls: None,
5170 having_max: None,
5171 limit: None,
5172 inferred_type: None,
5173 }));
5174
5175 let mut outer_select = Select::new();
5176 outer_select.expressions = vec![array_agg];
5177 outer_select.from = Some(From {
5178 expressions: vec![inner_subquery],
5179 });
5180
5181 // Wrap in a subquery
5182 let outer_subquery = Expression::Subquery(Box::new(Subquery {
5183 this: Expression::Select(Box::new(outer_select)),
5184 alias: None,
5185 column_aliases: vec![],
5186 order_by: None,
5187 limit: None,
5188 offset: None,
5189 distribute_by: None,
5190 sort_by: None,
5191 cluster_by: None,
5192 lateral: false,
5193 modifiers_inside: false,
5194 trailing_comments: vec![],
5195 inferred_type: None,
5196 }));
5197
5198 // ARRAY_SIZE(subquery)
5199 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
5200 outer_subquery,
5201 ))))
5202 }
5203
5204 /// Extract interval unit string from an optional step expression.
5205 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
5206 use crate::expressions::*;
5207 if let Some(Expression::Interval(ref iv)) = step {
5208 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
5209 return Some(format!("{:?}", unit).to_ascii_uppercase());
5210 }
5211 if let Some(ref this) = iv.this {
5212 if let Expression::Literal(lit) = this {
5213 if let Literal::String(ref s) = lit.as_ref() {
5214 let parts: Vec<&str> = s.split_whitespace().collect();
5215 if parts.len() == 2 {
5216 return Some(parts[1].to_ascii_uppercase());
5217 } else if parts.len() == 1 {
5218 let upper = parts[0].to_ascii_uppercase();
5219 if matches!(
5220 upper.as_str(),
5221 "YEAR"
5222 | "QUARTER"
5223 | "MONTH"
5224 | "WEEK"
5225 | "DAY"
5226 | "HOUR"
5227 | "MINUTE"
5228 | "SECOND"
5229 ) {
5230 return Some(upper);
5231 }
5232 }
5233 }
5234 }
5235 }
5236 }
5237 // Default to DAY if no step or no interval
5238 if step.is_none() {
5239 return Some("DAY".to_string());
5240 }
5241 None
5242 }
5243
5244 fn normalize_snowflake_pretty(mut sql: String) -> String {
5245 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
5246 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
5247 {
5248 sql = sql.replace(
5249 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
5250 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
5251 );
5252
5253 sql = sql.replace(
5254 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
5255 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
5256 );
5257
5258 sql = sql.replace(
5259 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
5260 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
5261 );
5262 }
5263
5264 sql
5265 }
5266
5267 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
5268 /// This handles cases where the same syntax has different semantics across dialects.
5269 fn cross_dialect_normalize(
5270 expr: Expression,
5271 source: DialectType,
5272 target: DialectType,
5273 ) -> Result<Expression> {
5274 use crate::expressions::{
5275 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
5276 Function, Identifier, IsNull, Literal, Null, Paren,
5277 };
5278
5279 // Helper to tag which kind of transform to apply
5280 #[derive(Debug)]
5281 enum Action {
5282 None,
5283 GreatestLeastNull,
5284 ArrayGenerateRange,
5285 Div0TypedDivision,
5286 ArrayAggCollectList,
5287 ArrayAggWithinGroupFilter,
5288 ArrayAggFilter,
5289 CastTimestampToDatetime,
5290 DateTruncWrapCast,
5291 ToDateToCast,
5292 ConvertTimezoneToExpr,
5293 SetToVariable,
5294 RegexpReplaceSnowflakeToDuckDB,
5295 BigQueryFunctionNormalize,
5296 BigQuerySafeDivide,
5297 BigQueryCastType,
5298 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
5299 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
5300 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
5301 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
5302 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
5303 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
5304 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5305 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
5306 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
5307 EpochConvert, // Expression::Epoch -> target-specific epoch function
5308 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
5309 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
5310 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
5311 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
5312 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
5313 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
5314 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
5315 TempTableHash, // TSQL #table -> temp table normalization
5316 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
5317 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
5318 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
5319 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
5320 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
5321 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
5322 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5323 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5324 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
5325 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
5326 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
5327 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
5328 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
5329 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
5330 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
5331 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
5332 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
5333 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
5334 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
5335 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
5336 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
5337 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
5338 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
5339 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
5340 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
5341 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
5342 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
5343 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5344 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
5345 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
5346 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
5347 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
5348 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
5349 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
5350 DollarParamConvert, // $foo -> @foo for BigQuery
5351 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
5352 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
5353 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
5354 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
5355 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
5356 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
5357 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
5358 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
5359 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
5360 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
5361 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
5362 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
5363 RespectNullsConvert, // RESPECT NULLS window function handling
5364 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
5365 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
5366 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
5367 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
5368 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
5369 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
5370 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
5371 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
5372 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
5373 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
5374 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
5375 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
5376 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
5377 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
5378 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
5379 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
5380 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
5381 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
5382 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
5383 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
5384 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
5385 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
5386 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
5387 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
5388 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
5389 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
5390 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
5391 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
5392 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
5393 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
5394 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5395 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5396 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
5397 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
5398 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
5399 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
5400 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
5401 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
5402 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
5403 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
5404 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
5405 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
5406 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
5407 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
5408 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
5409 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
5410 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
5411 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
5412 DecodeSimplify, // DECODE with null-safe -> simple = comparison
5413 ArraySumConvert, // ARRAY_SUM -> target-specific
5414 ArraySizeConvert, // ARRAY_SIZE -> target-specific
5415 ArrayAnyConvert, // ARRAY_ANY -> target-specific
5416 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
5417 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
5418 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
5419 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
5420 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
5421 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
5422 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
5423 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
5424 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
5425 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
5426 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
5427 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
5428 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
5429 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
5430 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
5431 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
5432 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
5433 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
5434 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
5435 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
5436 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5437 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
5438 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
5439 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
5440 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
5441 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
5442 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
5443 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
5444 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
5445 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
5446 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
5447 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
5448 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
5449 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
5450 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
5451 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
5452 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
5453 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
5454 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
5455 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
5456 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
5457 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
5458 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
5459 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
5460 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
5461 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
5462 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
5463 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
5464 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
5465 }
5466
5467 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
5468 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
5469 Self::transform_select_into(expr, source, target)
5470 } else {
5471 expr
5472 };
5473
5474 // Strip OFFSET ROWS for non-TSQL/Oracle targets
5475 let expr = if !matches!(
5476 target,
5477 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
5478 ) {
5479 if let Expression::Select(mut select) = expr {
5480 if let Some(ref mut offset) = select.offset {
5481 offset.rows = None;
5482 }
5483 Expression::Select(select)
5484 } else {
5485 expr
5486 }
5487 } else {
5488 expr
5489 };
5490
5491 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
5492 let expr = if matches!(target, DialectType::Oracle) {
5493 if let Expression::Select(mut select) = expr {
5494 if let Some(limit) = select.limit.take() {
5495 // Convert LIMIT to FETCH FIRST n ROWS ONLY
5496 select.fetch = Some(crate::expressions::Fetch {
5497 direction: "FIRST".to_string(),
5498 count: Some(limit.this),
5499 percent: false,
5500 rows: true,
5501 with_ties: false,
5502 });
5503 }
5504 // Add ROWS to OFFSET if present
5505 if let Some(ref mut offset) = select.offset {
5506 offset.rows = Some(true);
5507 }
5508 Expression::Select(select)
5509 } else {
5510 expr
5511 }
5512 } else {
5513 expr
5514 };
5515
5516 // Handle CreateTable WITH properties transformation before recursive transforms
5517 let expr = if let Expression::CreateTable(mut ct) = expr {
5518 Self::transform_create_table_properties(&mut ct, source, target);
5519
5520 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
5521 // When the PARTITIONED BY clause contains column definitions, merge them into the
5522 // main column list and adjust the PARTITIONED BY clause for the target dialect.
5523 if matches!(
5524 source,
5525 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5526 ) {
5527 let mut partition_col_names: Vec<String> = Vec::new();
5528 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
5529 let mut has_col_def_partitions = false;
5530
5531 // Check if any PARTITIONED BY property contains ColumnDef expressions
5532 for prop in &ct.properties {
5533 if let Expression::PartitionedByProperty(ref pbp) = prop {
5534 if let Expression::Tuple(ref tuple) = *pbp.this {
5535 for expr in &tuple.expressions {
5536 if let Expression::ColumnDef(ref cd) = expr {
5537 has_col_def_partitions = true;
5538 partition_col_names.push(cd.name.name.clone());
5539 partition_col_defs.push(*cd.clone());
5540 }
5541 }
5542 }
5543 }
5544 }
5545
5546 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
5547 // Merge partition columns into main column list
5548 for cd in partition_col_defs {
5549 ct.columns.push(cd);
5550 }
5551
5552 // Replace PARTITIONED BY property with column-name-only version
5553 ct.properties
5554 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
5555
5556 if matches!(
5557 target,
5558 DialectType::Presto | DialectType::Trino | DialectType::Athena
5559 ) {
5560 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
5561 let array_elements: Vec<String> = partition_col_names
5562 .iter()
5563 .map(|n| format!("'{}'", n))
5564 .collect();
5565 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
5566 ct.with_properties
5567 .push(("PARTITIONED_BY".to_string(), array_value));
5568 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5569 // Spark: PARTITIONED BY (y, z) - just column names
5570 let name_exprs: Vec<Expression> = partition_col_names
5571 .iter()
5572 .map(|n| {
5573 Expression::Column(Box::new(crate::expressions::Column {
5574 name: crate::expressions::Identifier::new(n.clone()),
5575 table: None,
5576 join_mark: false,
5577 trailing_comments: Vec::new(),
5578 span: None,
5579 inferred_type: None,
5580 }))
5581 })
5582 .collect();
5583 ct.properties.insert(
5584 0,
5585 Expression::PartitionedByProperty(Box::new(
5586 crate::expressions::PartitionedByProperty {
5587 this: Box::new(Expression::Tuple(Box::new(
5588 crate::expressions::Tuple {
5589 expressions: name_exprs,
5590 },
5591 ))),
5592 },
5593 )),
5594 );
5595 }
5596 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
5597 }
5598
5599 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
5600 // are handled by transform_create_table_properties which runs first
5601 }
5602
5603 // Strip LOCATION property for Presto/Trino (not supported)
5604 if matches!(
5605 target,
5606 DialectType::Presto | DialectType::Trino | DialectType::Athena
5607 ) {
5608 ct.properties
5609 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
5610 }
5611
5612 // Strip table-level constraints for Spark/Hive/Databricks
5613 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
5614 if matches!(
5615 target,
5616 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5617 ) {
5618 ct.constraints.retain(|c| {
5619 matches!(
5620 c,
5621 crate::expressions::TableConstraint::PrimaryKey { .. }
5622 | crate::expressions::TableConstraint::Like { .. }
5623 )
5624 });
5625 for constraint in &mut ct.constraints {
5626 if let crate::expressions::TableConstraint::PrimaryKey {
5627 columns,
5628 modifiers,
5629 ..
5630 } = constraint
5631 {
5632 // Strip ASC/DESC from column names
5633 for col in columns.iter_mut() {
5634 if col.name.ends_with(" ASC") {
5635 col.name = col.name[..col.name.len() - 4].to_string();
5636 } else if col.name.ends_with(" DESC") {
5637 col.name = col.name[..col.name.len() - 5].to_string();
5638 }
5639 }
5640 // Strip TSQL-specific modifiers
5641 modifiers.clustered = None;
5642 modifiers.with_options.clear();
5643 modifiers.on_filegroup = None;
5644 }
5645 }
5646 }
5647
5648 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
5649 if matches!(target, DialectType::Databricks) {
5650 for col in &mut ct.columns {
5651 if col.auto_increment {
5652 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
5653 col.data_type = crate::expressions::DataType::BigInt { length: None };
5654 }
5655 }
5656 }
5657 }
5658
5659 // Spark/Databricks: INTEGER -> INT in column definitions
5660 // Python sqlglot always outputs INT for Spark/Databricks
5661 if matches!(target, DialectType::Spark | DialectType::Databricks) {
5662 for col in &mut ct.columns {
5663 if let crate::expressions::DataType::Int {
5664 integer_spelling, ..
5665 } = &mut col.data_type
5666 {
5667 *integer_spelling = false;
5668 }
5669 }
5670 }
5671
5672 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
5673 if matches!(target, DialectType::Hive | DialectType::Spark) {
5674 for col in &mut ct.columns {
5675 // If nullable is explicitly true (NULL), change to None (omit it)
5676 if col.nullable == Some(true) {
5677 col.nullable = None;
5678 }
5679 // Also remove from constraints if stored there
5680 col.constraints
5681 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
5682 }
5683 }
5684
5685 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
5686 if ct.on_property.is_some()
5687 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5688 {
5689 ct.on_property = None;
5690 }
5691
5692 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
5693 // Snowflake doesn't support typed arrays in DDL
5694 if matches!(target, DialectType::Snowflake) {
5695 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
5696 if let crate::expressions::DataType::Array { .. } = dt {
5697 *dt = crate::expressions::DataType::Custom {
5698 name: "ARRAY".to_string(),
5699 };
5700 }
5701 }
5702 for col in &mut ct.columns {
5703 strip_array_type_params(&mut col.data_type);
5704 }
5705 }
5706
5707 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
5708 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
5709 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
5710 if matches!(target, DialectType::PostgreSQL) {
5711 for col in &mut ct.columns {
5712 if col.auto_increment && !col.constraint_order.is_empty() {
5713 use crate::expressions::ConstraintType;
5714 let has_explicit_not_null = col
5715 .constraint_order
5716 .iter()
5717 .any(|ct| *ct == ConstraintType::NotNull);
5718
5719 if has_explicit_not_null {
5720 // Source had explicit NOT NULL - preserve original order
5721 // Just ensure nullable is set
5722 if col.nullable != Some(false) {
5723 col.nullable = Some(false);
5724 }
5725 } else {
5726 // Source didn't have explicit NOT NULL - build order with
5727 // AutoIncrement + NotNull first, then remaining constraints
5728 let mut new_order = Vec::new();
5729 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
5730 new_order.push(ConstraintType::AutoIncrement);
5731 new_order.push(ConstraintType::NotNull);
5732 // Add remaining constraints in original order (except AutoIncrement)
5733 for ct_type in &col.constraint_order {
5734 if *ct_type != ConstraintType::AutoIncrement {
5735 new_order.push(ct_type.clone());
5736 }
5737 }
5738 col.constraint_order = new_order;
5739 col.nullable = Some(false);
5740 }
5741 }
5742 }
5743 }
5744
5745 Expression::CreateTable(ct)
5746 } else {
5747 expr
5748 };
5749
5750 // Handle CreateView column stripping for Presto/Trino target
5751 let expr = if let Expression::CreateView(mut cv) = expr {
5752 // Presto/Trino: drop column list when view has a SELECT body
5753 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
5754 {
5755 if !matches!(&cv.query, Expression::Null(_)) {
5756 cv.columns.clear();
5757 }
5758 }
5759 Expression::CreateView(cv)
5760 } else {
5761 expr
5762 };
5763
5764 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
5765 let expr = if !matches!(
5766 target,
5767 DialectType::Presto | DialectType::Trino | DialectType::Athena
5768 ) {
5769 if let Expression::Select(mut select) = expr {
5770 if let Some(ref mut with) = select.with {
5771 for cte in &mut with.ctes {
5772 if let Expression::Values(ref vals) = cte.this {
5773 // Build: SELECT * FROM (VALUES ...) AS _values
5774 let values_subquery =
5775 Expression::Subquery(Box::new(crate::expressions::Subquery {
5776 this: Expression::Values(vals.clone()),
5777 alias: Some(Identifier::new("_values".to_string())),
5778 column_aliases: Vec::new(),
5779 order_by: None,
5780 limit: None,
5781 offset: None,
5782 distribute_by: None,
5783 sort_by: None,
5784 cluster_by: None,
5785 lateral: false,
5786 modifiers_inside: false,
5787 trailing_comments: Vec::new(),
5788 inferred_type: None,
5789 }));
5790 let mut new_select = crate::expressions::Select::new();
5791 new_select.expressions =
5792 vec![Expression::Star(crate::expressions::Star {
5793 table: None,
5794 except: None,
5795 replace: None,
5796 rename: None,
5797 trailing_comments: Vec::new(),
5798 span: None,
5799 })];
5800 new_select.from = Some(crate::expressions::From {
5801 expressions: vec![values_subquery],
5802 });
5803 cte.this = Expression::Select(Box::new(new_select));
5804 }
5805 }
5806 }
5807 Expression::Select(select)
5808 } else {
5809 expr
5810 }
5811 } else {
5812 expr
5813 };
5814
5815 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
5816 let expr = if matches!(target, DialectType::PostgreSQL) {
5817 if let Expression::CreateIndex(mut ci) = expr {
5818 for col in &mut ci.columns {
5819 if col.nulls_first.is_none() {
5820 col.nulls_first = Some(true);
5821 }
5822 }
5823 Expression::CreateIndex(ci)
5824 } else {
5825 expr
5826 }
5827 } else {
5828 expr
5829 };
5830
5831 transform_recursive(expr, &|e| {
5832 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
5833 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
5834 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5835 if let Expression::Cast(ref c) = e {
5836 // Check if this is a CAST of an array to a struct array type
5837 let is_struct_array_cast =
5838 matches!(&c.to, crate::expressions::DataType::Array { .. });
5839 if is_struct_array_cast {
5840 let has_auto_named_structs = match &c.this {
5841 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
5842 if let Expression::Struct(s) = elem {
5843 s.fields.iter().all(|(name, _)| {
5844 name.as_ref().map_or(true, |n| {
5845 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5846 })
5847 })
5848 } else {
5849 false
5850 }
5851 }),
5852 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
5853 if let Expression::Struct(s) = elem {
5854 s.fields.iter().all(|(name, _)| {
5855 name.as_ref().map_or(true, |n| {
5856 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5857 })
5858 })
5859 } else {
5860 false
5861 }
5862 }),
5863 _ => false,
5864 };
5865 if has_auto_named_structs {
5866 let convert_struct_to_row = |elem: Expression| -> Expression {
5867 if let Expression::Struct(s) = elem {
5868 let row_args: Vec<Expression> =
5869 s.fields.into_iter().map(|(_, v)| v).collect();
5870 Expression::Function(Box::new(Function::new(
5871 "ROW".to_string(),
5872 row_args,
5873 )))
5874 } else {
5875 elem
5876 }
5877 };
5878 let mut c_clone = c.as_ref().clone();
5879 match &mut c_clone.this {
5880 Expression::Array(arr) => {
5881 arr.expressions = arr
5882 .expressions
5883 .drain(..)
5884 .map(convert_struct_to_row)
5885 .collect();
5886 }
5887 Expression::ArrayFunc(arr) => {
5888 arr.expressions = arr
5889 .expressions
5890 .drain(..)
5891 .map(convert_struct_to_row)
5892 .collect();
5893 }
5894 _ => {}
5895 }
5896 return Ok(Expression::Cast(Box::new(c_clone)));
5897 }
5898 }
5899 }
5900 }
5901
5902 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
5903 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5904 if let Expression::Select(ref sel) = e {
5905 if sel.kind.as_deref() == Some("STRUCT") {
5906 let mut fields = Vec::new();
5907 for expr in &sel.expressions {
5908 match expr {
5909 Expression::Alias(a) => {
5910 fields.push((Some(a.alias.name.clone()), a.this.clone()));
5911 }
5912 Expression::Column(c) => {
5913 fields.push((Some(c.name.name.clone()), expr.clone()));
5914 }
5915 _ => {
5916 fields.push((None, expr.clone()));
5917 }
5918 }
5919 }
5920 let struct_lit =
5921 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
5922 let mut new_select = sel.as_ref().clone();
5923 new_select.kind = None;
5924 new_select.expressions = vec![struct_lit];
5925 return Ok(Expression::Select(Box::new(new_select)));
5926 }
5927 }
5928 }
5929
5930 // Convert @variable -> ${variable} for Spark/Hive/Databricks
5931 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5932 && matches!(
5933 target,
5934 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5935 )
5936 {
5937 if let Expression::Parameter(ref p) = e {
5938 if p.style == crate::expressions::ParameterStyle::At {
5939 if let Some(ref name) = p.name {
5940 return Ok(Expression::Parameter(Box::new(
5941 crate::expressions::Parameter {
5942 name: Some(name.clone()),
5943 index: p.index,
5944 style: crate::expressions::ParameterStyle::DollarBrace,
5945 quoted: p.quoted,
5946 string_quoted: p.string_quoted,
5947 expression: None,
5948 },
5949 )));
5950 }
5951 }
5952 }
5953 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
5954 if let Expression::Column(ref col) = e {
5955 if col.name.name.starts_with('@') && col.table.is_none() {
5956 let var_name = col.name.name.trim_start_matches('@').to_string();
5957 return Ok(Expression::Parameter(Box::new(
5958 crate::expressions::Parameter {
5959 name: Some(var_name),
5960 index: None,
5961 style: crate::expressions::ParameterStyle::DollarBrace,
5962 quoted: false,
5963 string_quoted: false,
5964 expression: None,
5965 },
5966 )));
5967 }
5968 }
5969 }
5970
5971 // Convert @variable -> variable in SET statements for Spark/Databricks
5972 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5973 && matches!(target, DialectType::Spark | DialectType::Databricks)
5974 {
5975 if let Expression::SetStatement(ref s) = e {
5976 let mut new_items = s.items.clone();
5977 let mut changed = false;
5978 for item in &mut new_items {
5979 // Strip @ from the SET name (Parameter style)
5980 if let Expression::Parameter(ref p) = item.name {
5981 if p.style == crate::expressions::ParameterStyle::At {
5982 if let Some(ref name) = p.name {
5983 item.name = Expression::Identifier(Identifier::new(name));
5984 changed = true;
5985 }
5986 }
5987 }
5988 // Strip @ from the SET name (Identifier style - SET parser)
5989 if let Expression::Identifier(ref id) = item.name {
5990 if id.name.starts_with('@') {
5991 let var_name = id.name.trim_start_matches('@').to_string();
5992 item.name = Expression::Identifier(Identifier::new(&var_name));
5993 changed = true;
5994 }
5995 }
5996 // Strip @ from the SET name (Column style - alternative parsing)
5997 if let Expression::Column(ref col) = item.name {
5998 if col.name.name.starts_with('@') && col.table.is_none() {
5999 let var_name = col.name.name.trim_start_matches('@').to_string();
6000 item.name = Expression::Identifier(Identifier::new(&var_name));
6001 changed = true;
6002 }
6003 }
6004 }
6005 if changed {
6006 let mut new_set = (**s).clone();
6007 new_set.items = new_items;
6008 return Ok(Expression::SetStatement(Box::new(new_set)));
6009 }
6010 }
6011 }
6012
6013 // Strip NOLOCK hint for non-TSQL targets
6014 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6015 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6016 {
6017 if let Expression::Table(ref tr) = e {
6018 if !tr.hints.is_empty() {
6019 let mut new_tr = tr.clone();
6020 new_tr.hints.clear();
6021 return Ok(Expression::Table(new_tr));
6022 }
6023 }
6024 }
6025
6026 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
6027 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
6028 if matches!(target, DialectType::Snowflake) {
6029 if let Expression::IsTrue(ref itf) = e {
6030 if let Expression::Boolean(ref b) = itf.this {
6031 if !itf.not {
6032 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6033 value: b.value,
6034 }));
6035 } else {
6036 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6037 value: !b.value,
6038 }));
6039 }
6040 }
6041 }
6042 if let Expression::IsFalse(ref itf) = e {
6043 if let Expression::Boolean(ref b) = itf.this {
6044 if !itf.not {
6045 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6046 value: !b.value,
6047 }));
6048 } else {
6049 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6050 value: b.value,
6051 }));
6052 }
6053 }
6054 }
6055 }
6056
6057 // BigQuery: split dotted backtick identifiers in table names
6058 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
6059 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6060 if let Expression::CreateTable(ref ct) = e {
6061 let mut changed = false;
6062 let mut new_ct = ct.clone();
6063 // Split the table name
6064 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
6065 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
6066 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
6067 let was_quoted = ct.name.name.quoted;
6068 let mk_id = |s: &str| {
6069 if was_quoted {
6070 Identifier::quoted(s)
6071 } else {
6072 Identifier::new(s)
6073 }
6074 };
6075 if parts.len() == 3 {
6076 new_ct.name.catalog = Some(mk_id(parts[0]));
6077 new_ct.name.schema = Some(mk_id(parts[1]));
6078 new_ct.name.name = mk_id(parts[2]);
6079 changed = true;
6080 } else if parts.len() == 2 {
6081 new_ct.name.schema = Some(mk_id(parts[0]));
6082 new_ct.name.name = mk_id(parts[1]);
6083 changed = true;
6084 }
6085 }
6086 // Split the clone source name
6087 if let Some(ref clone_src) = ct.clone_source {
6088 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
6089 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
6090 let was_quoted = clone_src.name.quoted;
6091 let mk_id = |s: &str| {
6092 if was_quoted {
6093 Identifier::quoted(s)
6094 } else {
6095 Identifier::new(s)
6096 }
6097 };
6098 let mut new_src = clone_src.clone();
6099 if parts.len() == 3 {
6100 new_src.catalog = Some(mk_id(parts[0]));
6101 new_src.schema = Some(mk_id(parts[1]));
6102 new_src.name = mk_id(parts[2]);
6103 new_ct.clone_source = Some(new_src);
6104 changed = true;
6105 } else if parts.len() == 2 {
6106 new_src.schema = Some(mk_id(parts[0]));
6107 new_src.name = mk_id(parts[1]);
6108 new_ct.clone_source = Some(new_src);
6109 changed = true;
6110 }
6111 }
6112 }
6113 if changed {
6114 return Ok(Expression::CreateTable(new_ct));
6115 }
6116 }
6117 }
6118
6119 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
6120 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
6121 if matches!(source, DialectType::BigQuery)
6122 && matches!(
6123 target,
6124 DialectType::DuckDB
6125 | DialectType::Presto
6126 | DialectType::Trino
6127 | DialectType::Athena
6128 )
6129 {
6130 if let Expression::Subscript(ref sub) = e {
6131 let (new_index, is_safe) = match &sub.index {
6132 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
6133 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
6134 let Literal::Number(n) = lit.as_ref() else {
6135 unreachable!()
6136 };
6137 if let Ok(val) = n.parse::<i64>() {
6138 (
6139 Some(Expression::Literal(Box::new(Literal::Number(
6140 (val + 1).to_string(),
6141 )))),
6142 false,
6143 )
6144 } else {
6145 (None, false)
6146 }
6147 }
6148 // OFFSET(n) -> n+1 (0-based)
6149 Expression::Function(ref f)
6150 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
6151 {
6152 if let Expression::Literal(lit) = &f.args[0] {
6153 if let Literal::Number(n) = lit.as_ref() {
6154 if let Ok(val) = n.parse::<i64>() {
6155 (
6156 Some(Expression::Literal(Box::new(Literal::Number(
6157 (val + 1).to_string(),
6158 )))),
6159 false,
6160 )
6161 } else {
6162 (
6163 Some(Expression::Add(Box::new(
6164 crate::expressions::BinaryOp::new(
6165 f.args[0].clone(),
6166 Expression::number(1),
6167 ),
6168 ))),
6169 false,
6170 )
6171 }
6172 } else {
6173 (None, false)
6174 }
6175 } else {
6176 (
6177 Some(Expression::Add(Box::new(
6178 crate::expressions::BinaryOp::new(
6179 f.args[0].clone(),
6180 Expression::number(1),
6181 ),
6182 ))),
6183 false,
6184 )
6185 }
6186 }
6187 // ORDINAL(n) -> n (already 1-based)
6188 Expression::Function(ref f)
6189 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
6190 {
6191 (Some(f.args[0].clone()), false)
6192 }
6193 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
6194 Expression::Function(ref f)
6195 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
6196 {
6197 if let Expression::Literal(lit) = &f.args[0] {
6198 if let Literal::Number(n) = lit.as_ref() {
6199 if let Ok(val) = n.parse::<i64>() {
6200 (
6201 Some(Expression::Literal(Box::new(Literal::Number(
6202 (val + 1).to_string(),
6203 )))),
6204 true,
6205 )
6206 } else {
6207 (
6208 Some(Expression::Add(Box::new(
6209 crate::expressions::BinaryOp::new(
6210 f.args[0].clone(),
6211 Expression::number(1),
6212 ),
6213 ))),
6214 true,
6215 )
6216 }
6217 } else {
6218 (None, false)
6219 }
6220 } else {
6221 (
6222 Some(Expression::Add(Box::new(
6223 crate::expressions::BinaryOp::new(
6224 f.args[0].clone(),
6225 Expression::number(1),
6226 ),
6227 ))),
6228 true,
6229 )
6230 }
6231 }
6232 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
6233 Expression::Function(ref f)
6234 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
6235 {
6236 (Some(f.args[0].clone()), true)
6237 }
6238 _ => (None, false),
6239 };
6240 if let Some(idx) = new_index {
6241 if is_safe
6242 && matches!(
6243 target,
6244 DialectType::Presto | DialectType::Trino | DialectType::Athena
6245 )
6246 {
6247 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
6248 return Ok(Expression::Function(Box::new(Function::new(
6249 "ELEMENT_AT".to_string(),
6250 vec![sub.this.clone(), idx],
6251 ))));
6252 } else {
6253 // DuckDB or non-safe: just use subscript with converted index
6254 return Ok(Expression::Subscript(Box::new(
6255 crate::expressions::Subscript {
6256 this: sub.this.clone(),
6257 index: idx,
6258 },
6259 )));
6260 }
6261 }
6262 }
6263 }
6264
6265 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
6266 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6267 if let Expression::Length(ref uf) = e {
6268 let arg = uf.this.clone();
6269 let typeof_func = Expression::Function(Box::new(Function::new(
6270 "TYPEOF".to_string(),
6271 vec![arg.clone()],
6272 )));
6273 let blob_cast = Expression::Cast(Box::new(Cast {
6274 this: arg.clone(),
6275 to: DataType::VarBinary { length: None },
6276 trailing_comments: vec![],
6277 double_colon_syntax: false,
6278 format: None,
6279 default: None,
6280 inferred_type: None,
6281 }));
6282 let octet_length = Expression::Function(Box::new(Function::new(
6283 "OCTET_LENGTH".to_string(),
6284 vec![blob_cast],
6285 )));
6286 let text_cast = Expression::Cast(Box::new(Cast {
6287 this: arg,
6288 to: DataType::Text,
6289 trailing_comments: vec![],
6290 double_colon_syntax: false,
6291 format: None,
6292 default: None,
6293 inferred_type: None,
6294 }));
6295 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
6296 this: text_cast,
6297 original_name: None,
6298 inferred_type: None,
6299 }));
6300 return Ok(Expression::Case(Box::new(Case {
6301 operand: Some(typeof_func),
6302 whens: vec![(
6303 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
6304 octet_length,
6305 )],
6306 else_: Some(length_text),
6307 comments: Vec::new(),
6308 inferred_type: None,
6309 })));
6310 }
6311 }
6312
6313 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
6314 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
6315 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
6316 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
6317 if let Expression::Alias(ref a) = e {
6318 if matches!(&a.this, Expression::Unnest(_)) {
6319 if a.column_aliases.is_empty() {
6320 // Drop the entire alias, return just the UNNEST expression
6321 return Ok(a.this.clone());
6322 } else {
6323 // Use first column alias as the main alias
6324 let mut new_alias = a.as_ref().clone();
6325 new_alias.alias = a.column_aliases[0].clone();
6326 new_alias.column_aliases.clear();
6327 return Ok(Expression::Alias(Box::new(new_alias)));
6328 }
6329 }
6330 }
6331 }
6332
6333 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
6334 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6335 if let Expression::In(ref in_expr) = e {
6336 if let Some(ref unnest_inner) = in_expr.unnest {
6337 // Build the function call for the target dialect
6338 let func_expr = if matches!(
6339 target,
6340 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6341 ) {
6342 // Use EXPLODE for Hive/Spark
6343 Expression::Function(Box::new(Function::new(
6344 "EXPLODE".to_string(),
6345 vec![*unnest_inner.clone()],
6346 )))
6347 } else {
6348 // Use UNNEST for Presto/Trino/DuckDB/etc.
6349 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
6350 this: *unnest_inner.clone(),
6351 expressions: Vec::new(),
6352 with_ordinality: false,
6353 alias: None,
6354 offset_alias: None,
6355 }))
6356 };
6357
6358 // Wrap in SELECT
6359 let mut inner_select = crate::expressions::Select::new();
6360 inner_select.expressions = vec![func_expr];
6361
6362 let subquery_expr = Expression::Select(Box::new(inner_select));
6363
6364 return Ok(Expression::In(Box::new(crate::expressions::In {
6365 this: in_expr.this.clone(),
6366 expressions: Vec::new(),
6367 query: Some(subquery_expr),
6368 not: in_expr.not,
6369 global: in_expr.global,
6370 unnest: None,
6371 is_field: false,
6372 })));
6373 }
6374 }
6375 }
6376
6377 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
6378 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
6379 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
6380 if let Expression::Alias(ref a) = e {
6381 if let Expression::Function(ref f) = a.this {
6382 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
6383 && !a.column_aliases.is_empty()
6384 {
6385 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
6386 let col_alias = a.column_aliases[0].clone();
6387 let mut inner_select = crate::expressions::Select::new();
6388 inner_select.expressions =
6389 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
6390 Expression::Identifier(Identifier::new("value".to_string())),
6391 col_alias,
6392 )))];
6393 inner_select.from = Some(crate::expressions::From {
6394 expressions: vec![a.this.clone()],
6395 });
6396 let subquery =
6397 Expression::Subquery(Box::new(crate::expressions::Subquery {
6398 this: Expression::Select(Box::new(inner_select)),
6399 alias: Some(a.alias.clone()),
6400 column_aliases: Vec::new(),
6401 order_by: None,
6402 limit: None,
6403 offset: None,
6404 lateral: false,
6405 modifiers_inside: false,
6406 trailing_comments: Vec::new(),
6407 distribute_by: None,
6408 sort_by: None,
6409 cluster_by: None,
6410 inferred_type: None,
6411 }));
6412 return Ok(subquery);
6413 }
6414 }
6415 }
6416 }
6417
6418 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
6419 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
6420 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
6421 if matches!(source, DialectType::BigQuery) {
6422 if let Expression::Select(ref s) = e {
6423 if let Some(ref from) = s.from {
6424 if from.expressions.len() >= 2 {
6425 // Collect table names from first expression
6426 let first_tables: Vec<String> = from
6427 .expressions
6428 .iter()
6429 .take(1)
6430 .filter_map(|expr| {
6431 if let Expression::Table(t) = expr {
6432 Some(t.name.name.to_ascii_lowercase())
6433 } else {
6434 None
6435 }
6436 })
6437 .collect();
6438
6439 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
6440 // or have a dotted name matching a table
6441 let mut needs_rewrite = false;
6442 for expr in from.expressions.iter().skip(1) {
6443 if let Expression::Table(t) = expr {
6444 if let Some(ref schema) = t.schema {
6445 if first_tables.contains(&schema.name.to_ascii_lowercase())
6446 {
6447 needs_rewrite = true;
6448 break;
6449 }
6450 }
6451 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
6452 if t.schema.is_none() && t.name.name.contains('.') {
6453 let parts: Vec<&str> = t.name.name.split('.').collect();
6454 if parts.len() >= 2
6455 && first_tables.contains(&parts[0].to_ascii_lowercase())
6456 {
6457 needs_rewrite = true;
6458 break;
6459 }
6460 }
6461 }
6462 }
6463
6464 if needs_rewrite {
6465 let mut new_select = s.clone();
6466 let mut new_from_exprs = vec![from.expressions[0].clone()];
6467 let mut new_joins = s.joins.clone();
6468
6469 for expr in from.expressions.iter().skip(1) {
6470 if let Expression::Table(ref t) = expr {
6471 if let Some(ref schema) = t.schema {
6472 if first_tables
6473 .contains(&schema.name.to_ascii_lowercase())
6474 {
6475 // This is an array path reference, convert to CROSS JOIN UNNEST
6476 let col_expr = Expression::Column(Box::new(
6477 crate::expressions::Column {
6478 name: t.name.clone(),
6479 table: Some(schema.clone()),
6480 join_mark: false,
6481 trailing_comments: vec![],
6482 span: None,
6483 inferred_type: None,
6484 },
6485 ));
6486 let unnest_expr = Expression::Unnest(Box::new(
6487 crate::expressions::UnnestFunc {
6488 this: col_expr,
6489 expressions: Vec::new(),
6490 with_ordinality: false,
6491 alias: None,
6492 offset_alias: None,
6493 },
6494 ));
6495 let join_this = if let Some(ref alias) = t.alias {
6496 if matches!(
6497 target,
6498 DialectType::Presto
6499 | DialectType::Trino
6500 | DialectType::Athena
6501 ) {
6502 // Presto: UNNEST(x) AS _t0(results)
6503 Expression::Alias(Box::new(
6504 crate::expressions::Alias {
6505 this: unnest_expr,
6506 alias: Identifier::new("_t0"),
6507 column_aliases: vec![alias.clone()],
6508 pre_alias_comments: vec![],
6509 trailing_comments: vec![],
6510 inferred_type: None,
6511 },
6512 ))
6513 } else {
6514 // BigQuery: UNNEST(x) AS results
6515 Expression::Alias(Box::new(
6516 crate::expressions::Alias {
6517 this: unnest_expr,
6518 alias: alias.clone(),
6519 column_aliases: vec![],
6520 pre_alias_comments: vec![],
6521 trailing_comments: vec![],
6522 inferred_type: None,
6523 },
6524 ))
6525 }
6526 } else {
6527 unnest_expr
6528 };
6529 new_joins.push(crate::expressions::Join {
6530 kind: crate::expressions::JoinKind::Cross,
6531 this: join_this,
6532 on: None,
6533 using: Vec::new(),
6534 use_inner_keyword: false,
6535 use_outer_keyword: false,
6536 deferred_condition: false,
6537 join_hint: None,
6538 match_condition: None,
6539 pivots: Vec::new(),
6540 comments: Vec::new(),
6541 nesting_group: 0,
6542 directed: false,
6543 });
6544 } else {
6545 new_from_exprs.push(expr.clone());
6546 }
6547 } else if t.schema.is_none() && t.name.name.contains('.') {
6548 // Dotted name in quoted identifier: `Coordinates.position`
6549 let parts: Vec<&str> = t.name.name.split('.').collect();
6550 if parts.len() >= 2
6551 && first_tables
6552 .contains(&parts[0].to_ascii_lowercase())
6553 {
6554 let join_this =
6555 if matches!(target, DialectType::BigQuery) {
6556 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
6557 Expression::Table(t.clone())
6558 } else {
6559 // Other targets: split into "schema"."name"
6560 let mut new_t = t.clone();
6561 new_t.schema =
6562 Some(Identifier::quoted(parts[0]));
6563 new_t.name = Identifier::quoted(parts[1]);
6564 Expression::Table(new_t)
6565 };
6566 new_joins.push(crate::expressions::Join {
6567 kind: crate::expressions::JoinKind::Cross,
6568 this: join_this,
6569 on: None,
6570 using: Vec::new(),
6571 use_inner_keyword: false,
6572 use_outer_keyword: false,
6573 deferred_condition: false,
6574 join_hint: None,
6575 match_condition: None,
6576 pivots: Vec::new(),
6577 comments: Vec::new(),
6578 nesting_group: 0,
6579 directed: false,
6580 });
6581 } else {
6582 new_from_exprs.push(expr.clone());
6583 }
6584 } else {
6585 new_from_exprs.push(expr.clone());
6586 }
6587 } else {
6588 new_from_exprs.push(expr.clone());
6589 }
6590 }
6591
6592 new_select.from = Some(crate::expressions::From {
6593 expressions: new_from_exprs,
6594 ..from.clone()
6595 });
6596 new_select.joins = new_joins;
6597 return Ok(Expression::Select(new_select));
6598 }
6599 }
6600 }
6601 }
6602 }
6603
6604 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
6605 if matches!(
6606 target,
6607 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6608 ) {
6609 if let Expression::Select(ref s) = e {
6610 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
6611 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
6612 matches!(expr, Expression::Unnest(_))
6613 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
6614 };
6615 let has_unnest_join = s.joins.iter().any(|j| {
6616 j.kind == crate::expressions::JoinKind::Cross && (
6617 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
6618 || is_unnest_or_explode_expr(&j.this)
6619 )
6620 });
6621 if has_unnest_join {
6622 let mut select = s.clone();
6623 let mut new_joins = Vec::new();
6624 for join in select.joins.drain(..) {
6625 if join.kind == crate::expressions::JoinKind::Cross {
6626 // Extract the UNNEST/EXPLODE from the join
6627 let (func_expr, table_alias, col_aliases) = match &join.this {
6628 Expression::Alias(a) => {
6629 let ta = if a.alias.is_empty() {
6630 None
6631 } else {
6632 Some(a.alias.clone())
6633 };
6634 let cas = a.column_aliases.clone();
6635 match &a.this {
6636 Expression::Unnest(u) => {
6637 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
6638 if !u.expressions.is_empty() {
6639 let mut all_args = vec![u.this.clone()];
6640 all_args.extend(u.expressions.clone());
6641 let arrays_zip =
6642 Expression::Function(Box::new(
6643 crate::expressions::Function::new(
6644 "ARRAYS_ZIP".to_string(),
6645 all_args,
6646 ),
6647 ));
6648 let inline = Expression::Function(Box::new(
6649 crate::expressions::Function::new(
6650 "INLINE".to_string(),
6651 vec![arrays_zip],
6652 ),
6653 ));
6654 (Some(inline), ta, a.column_aliases.clone())
6655 } else {
6656 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
6657 let func_name = if u.with_ordinality {
6658 "POSEXPLODE"
6659 } else {
6660 "EXPLODE"
6661 };
6662 let explode = Expression::Function(Box::new(
6663 crate::expressions::Function::new(
6664 func_name.to_string(),
6665 vec![u.this.clone()],
6666 ),
6667 ));
6668 // For POSEXPLODE, add 'pos' to column aliases
6669 let cas = if u.with_ordinality {
6670 let mut pos_aliases =
6671 vec![Identifier::new(
6672 "pos".to_string(),
6673 )];
6674 pos_aliases
6675 .extend(a.column_aliases.clone());
6676 pos_aliases
6677 } else {
6678 a.column_aliases.clone()
6679 };
6680 (Some(explode), ta, cas)
6681 }
6682 }
6683 Expression::Function(f)
6684 if f.name.eq_ignore_ascii_case("EXPLODE") =>
6685 {
6686 (Some(Expression::Function(f.clone())), ta, cas)
6687 }
6688 _ => (None, None, Vec::new()),
6689 }
6690 }
6691 Expression::Unnest(u) => {
6692 let func_name = if u.with_ordinality {
6693 "POSEXPLODE"
6694 } else {
6695 "EXPLODE"
6696 };
6697 let explode = Expression::Function(Box::new(
6698 crate::expressions::Function::new(
6699 func_name.to_string(),
6700 vec![u.this.clone()],
6701 ),
6702 ));
6703 let ta = u.alias.clone();
6704 let col_aliases = if u.with_ordinality {
6705 vec![Identifier::new("pos".to_string())]
6706 } else {
6707 Vec::new()
6708 };
6709 (Some(explode), ta, col_aliases)
6710 }
6711 _ => (None, None, Vec::new()),
6712 };
6713 if let Some(func) = func_expr {
6714 select.lateral_views.push(crate::expressions::LateralView {
6715 this: func,
6716 table_alias,
6717 column_aliases: col_aliases,
6718 outer: false,
6719 });
6720 } else {
6721 new_joins.push(join);
6722 }
6723 } else {
6724 new_joins.push(join);
6725 }
6726 }
6727 select.joins = new_joins;
6728 return Ok(Expression::Select(select));
6729 }
6730 }
6731 }
6732
6733 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
6734 // for BigQuery, Presto/Trino, Snowflake
6735 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
6736 && matches!(
6737 target,
6738 DialectType::BigQuery
6739 | DialectType::Presto
6740 | DialectType::Trino
6741 | DialectType::Snowflake
6742 )
6743 {
6744 if let Expression::Select(ref s) = e {
6745 // Check if any SELECT expressions contain UNNEST
6746 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
6747 let has_unnest_in_select = s.expressions.iter().any(|expr| {
6748 fn contains_unnest(e: &Expression) -> bool {
6749 match e {
6750 Expression::Unnest(_) => true,
6751 Expression::Function(f)
6752 if f.name.eq_ignore_ascii_case("UNNEST") =>
6753 {
6754 true
6755 }
6756 Expression::Alias(a) => contains_unnest(&a.this),
6757 Expression::Add(op)
6758 | Expression::Sub(op)
6759 | Expression::Mul(op)
6760 | Expression::Div(op) => {
6761 contains_unnest(&op.left) || contains_unnest(&op.right)
6762 }
6763 _ => false,
6764 }
6765 }
6766 contains_unnest(expr)
6767 });
6768
6769 if has_unnest_in_select {
6770 let rewritten = Self::rewrite_unnest_expansion(s, target);
6771 if let Some(new_select) = rewritten {
6772 return Ok(Expression::Select(Box::new(new_select)));
6773 }
6774 }
6775 }
6776 }
6777
6778 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
6779 // BigQuery '\n' -> PostgreSQL literal newline in string
6780 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
6781 {
6782 if let Expression::Literal(ref lit) = e {
6783 if let Literal::String(ref s) = lit.as_ref() {
6784 if s.contains("\\n")
6785 || s.contains("\\t")
6786 || s.contains("\\r")
6787 || s.contains("\\\\")
6788 {
6789 let converted = s
6790 .replace("\\n", "\n")
6791 .replace("\\t", "\t")
6792 .replace("\\r", "\r")
6793 .replace("\\\\", "\\");
6794 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
6795 }
6796 }
6797 }
6798 }
6799
6800 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
6801 // when source != target (identity tests keep the Literal::Timestamp for native handling)
6802 if source != target {
6803 if let Expression::Literal(ref lit) = e {
6804 if let Literal::Timestamp(ref s) = lit.as_ref() {
6805 let s = s.clone();
6806 // MySQL: TIMESTAMP handling depends on source dialect
6807 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
6808 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
6809 if matches!(target, DialectType::MySQL) {
6810 if matches!(source, DialectType::BigQuery) {
6811 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
6812 return Ok(Expression::Function(Box::new(Function::new(
6813 "TIMESTAMP".to_string(),
6814 vec![Expression::Literal(Box::new(Literal::String(s)))],
6815 ))));
6816 } else {
6817 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
6818 return Ok(Expression::Cast(Box::new(Cast {
6819 this: Expression::Literal(Box::new(Literal::String(s))),
6820 to: DataType::Custom {
6821 name: "DATETIME".to_string(),
6822 },
6823 trailing_comments: Vec::new(),
6824 double_colon_syntax: false,
6825 format: None,
6826 default: None,
6827 inferred_type: None,
6828 })));
6829 }
6830 }
6831 let dt = match target {
6832 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
6833 name: "DATETIME".to_string(),
6834 },
6835 DialectType::Snowflake => {
6836 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
6837 if matches!(source, DialectType::BigQuery) {
6838 DataType::Custom {
6839 name: "TIMESTAMPTZ".to_string(),
6840 }
6841 } else if matches!(
6842 source,
6843 DialectType::PostgreSQL
6844 | DialectType::Redshift
6845 | DialectType::Snowflake
6846 ) {
6847 DataType::Timestamp {
6848 precision: None,
6849 timezone: false,
6850 }
6851 } else {
6852 DataType::Custom {
6853 name: "TIMESTAMPNTZ".to_string(),
6854 }
6855 }
6856 }
6857 DialectType::Spark | DialectType::Databricks => {
6858 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
6859 if matches!(source, DialectType::BigQuery) {
6860 DataType::Timestamp {
6861 precision: None,
6862 timezone: false,
6863 }
6864 } else {
6865 DataType::Custom {
6866 name: "TIMESTAMP_NTZ".to_string(),
6867 }
6868 }
6869 }
6870 DialectType::ClickHouse => DataType::Nullable {
6871 inner: Box::new(DataType::Custom {
6872 name: "DateTime".to_string(),
6873 }),
6874 },
6875 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
6876 name: "DATETIME2".to_string(),
6877 },
6878 DialectType::DuckDB => {
6879 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
6880 // or when the timestamp string explicitly has timezone info
6881 if matches!(source, DialectType::BigQuery)
6882 || Self::timestamp_string_has_timezone(&s)
6883 {
6884 DataType::Custom {
6885 name: "TIMESTAMPTZ".to_string(),
6886 }
6887 } else {
6888 DataType::Timestamp {
6889 precision: None,
6890 timezone: false,
6891 }
6892 }
6893 }
6894 _ => DataType::Timestamp {
6895 precision: None,
6896 timezone: false,
6897 },
6898 };
6899 return Ok(Expression::Cast(Box::new(Cast {
6900 this: Expression::Literal(Box::new(Literal::String(s))),
6901 to: dt,
6902 trailing_comments: vec![],
6903 double_colon_syntax: false,
6904 format: None,
6905 default: None,
6906 inferred_type: None,
6907 })));
6908 }
6909 }
6910 }
6911
6912 // PostgreSQL DELETE requires explicit AS for table aliases
6913 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
6914 if let Expression::Delete(ref del) = e {
6915 if del.alias.is_some() && !del.alias_explicit_as {
6916 let mut new_del = del.clone();
6917 new_del.alias_explicit_as = true;
6918 return Ok(Expression::Delete(new_del));
6919 }
6920 }
6921 }
6922
6923 // UNION/INTERSECT/EXCEPT DISTINCT handling:
6924 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
6925 // while others don't support it (Presto, Spark, DuckDB, etc.)
6926 {
6927 let needs_distinct =
6928 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
6929 let drop_distinct = matches!(
6930 target,
6931 DialectType::Presto
6932 | DialectType::Trino
6933 | DialectType::Athena
6934 | DialectType::Spark
6935 | DialectType::Databricks
6936 | DialectType::DuckDB
6937 | DialectType::Hive
6938 | DialectType::MySQL
6939 | DialectType::PostgreSQL
6940 | DialectType::SQLite
6941 | DialectType::TSQL
6942 | DialectType::Redshift
6943 | DialectType::Snowflake
6944 | DialectType::Oracle
6945 | DialectType::Teradata
6946 | DialectType::Drill
6947 | DialectType::Doris
6948 | DialectType::StarRocks
6949 );
6950 match &e {
6951 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
6952 let mut new_u = (**u).clone();
6953 new_u.distinct = true;
6954 return Ok(Expression::Union(Box::new(new_u)));
6955 }
6956 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
6957 let mut new_i = (**i).clone();
6958 new_i.distinct = true;
6959 return Ok(Expression::Intersect(Box::new(new_i)));
6960 }
6961 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
6962 let mut new_ex = (**ex).clone();
6963 new_ex.distinct = true;
6964 return Ok(Expression::Except(Box::new(new_ex)));
6965 }
6966 Expression::Union(u) if u.distinct && drop_distinct => {
6967 let mut new_u = (**u).clone();
6968 new_u.distinct = false;
6969 return Ok(Expression::Union(Box::new(new_u)));
6970 }
6971 Expression::Intersect(i) if i.distinct && drop_distinct => {
6972 let mut new_i = (**i).clone();
6973 new_i.distinct = false;
6974 return Ok(Expression::Intersect(Box::new(new_i)));
6975 }
6976 Expression::Except(ex) if ex.distinct && drop_distinct => {
6977 let mut new_ex = (**ex).clone();
6978 new_ex.distinct = false;
6979 return Ok(Expression::Except(Box::new(new_ex)));
6980 }
6981 _ => {}
6982 }
6983 }
6984
6985 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
6986 if matches!(target, DialectType::ClickHouse) {
6987 if let Expression::Function(ref f) = e {
6988 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
6989 let mut new_f = f.as_ref().clone();
6990 new_f.name = "map".to_string();
6991 return Ok(Expression::Function(Box::new(new_f)));
6992 }
6993 }
6994 }
6995
6996 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
6997 if matches!(target, DialectType::ClickHouse) {
6998 if let Expression::Intersect(ref i) = e {
6999 if i.all {
7000 let mut new_i = (**i).clone();
7001 new_i.all = false;
7002 return Ok(Expression::Intersect(Box::new(new_i)));
7003 }
7004 }
7005 }
7006
7007 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
7008 // Only from Generic source, to prevent double-wrapping
7009 if matches!(source, DialectType::Generic) {
7010 if let Expression::Div(ref op) = e {
7011 let cast_type = match target {
7012 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
7013 precision: None,
7014 scale: None,
7015 real_spelling: false,
7016 }),
7017 DialectType::Drill
7018 | DialectType::Trino
7019 | DialectType::Athena
7020 | DialectType::Presto => Some(DataType::Double {
7021 precision: None,
7022 scale: None,
7023 }),
7024 DialectType::PostgreSQL
7025 | DialectType::Redshift
7026 | DialectType::Materialize
7027 | DialectType::Teradata
7028 | DialectType::RisingWave => Some(DataType::Double {
7029 precision: None,
7030 scale: None,
7031 }),
7032 _ => None,
7033 };
7034 if let Some(dt) = cast_type {
7035 let cast_left = Expression::Cast(Box::new(Cast {
7036 this: op.left.clone(),
7037 to: dt,
7038 double_colon_syntax: false,
7039 trailing_comments: Vec::new(),
7040 format: None,
7041 default: None,
7042 inferred_type: None,
7043 }));
7044 let new_op = crate::expressions::BinaryOp {
7045 left: cast_left,
7046 right: op.right.clone(),
7047 left_comments: op.left_comments.clone(),
7048 operator_comments: op.operator_comments.clone(),
7049 trailing_comments: op.trailing_comments.clone(),
7050 inferred_type: None,
7051 };
7052 return Ok(Expression::Div(Box::new(new_op)));
7053 }
7054 }
7055 }
7056
7057 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
7058 if matches!(target, DialectType::DuckDB) {
7059 if let Expression::CreateDatabase(db) = e {
7060 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
7061 schema.if_not_exists = db.if_not_exists;
7062 return Ok(Expression::CreateSchema(Box::new(schema)));
7063 }
7064 if let Expression::DropDatabase(db) = e {
7065 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
7066 schema.if_exists = db.if_exists;
7067 return Ok(Expression::DropSchema(Box::new(schema)));
7068 }
7069 }
7070
7071 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
7072 if matches!(source, DialectType::ClickHouse)
7073 && !matches!(target, DialectType::ClickHouse)
7074 {
7075 if let Expression::Cast(ref c) = e {
7076 if let DataType::Custom { ref name } = c.to {
7077 if name.len() >= 9
7078 && name[..9].eq_ignore_ascii_case("NULLABLE(")
7079 && name.ends_with(")")
7080 {
7081 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
7082 let inner_upper = inner.to_ascii_uppercase();
7083 let new_dt = match inner_upper.as_str() {
7084 "DATETIME" | "DATETIME64" => DataType::Timestamp {
7085 precision: None,
7086 timezone: false,
7087 },
7088 "DATE" => DataType::Date,
7089 "INT64" | "BIGINT" => DataType::BigInt { length: None },
7090 "INT32" | "INT" | "INTEGER" => DataType::Int {
7091 length: None,
7092 integer_spelling: false,
7093 },
7094 "FLOAT64" | "DOUBLE" => DataType::Double {
7095 precision: None,
7096 scale: None,
7097 },
7098 "STRING" => DataType::Text,
7099 _ => DataType::Custom {
7100 name: inner.to_string(),
7101 },
7102 };
7103 let mut new_cast = c.clone();
7104 new_cast.to = new_dt;
7105 return Ok(Expression::Cast(new_cast));
7106 }
7107 }
7108 }
7109 }
7110
7111 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
7112 if matches!(target, DialectType::Snowflake) {
7113 if let Expression::ArrayConcatAgg(ref agg) = e {
7114 let mut agg_clone = agg.as_ref().clone();
7115 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
7116 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
7117 let flatten = Expression::Function(Box::new(Function::new(
7118 "ARRAY_FLATTEN".to_string(),
7119 vec![array_agg],
7120 )));
7121 return Ok(flatten);
7122 }
7123 }
7124
7125 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
7126 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
7127 if let Expression::ArrayConcatAgg(agg) = e {
7128 let arg = agg.this;
7129 return Ok(Expression::Function(Box::new(Function::new(
7130 "ARRAY_CONCAT_AGG".to_string(),
7131 vec![arg],
7132 ))));
7133 }
7134 }
7135
7136 // Determine what action to take by inspecting e immutably
7137 let action = {
7138 let source_propagates_nulls =
7139 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
7140 let target_ignores_nulls =
7141 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
7142
7143 match &e {
7144 Expression::Function(f) => {
7145 let name = f.name.to_ascii_uppercase();
7146 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
7147 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
7148 if name == "JSON"
7149 && f.args.len() == 1
7150 && matches!(source, DialectType::DuckDB)
7151 && matches!(
7152 target,
7153 DialectType::Presto | DialectType::Trino | DialectType::Athena
7154 )
7155 {
7156 Action::DuckDBJsonFuncToJsonParse
7157 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
7158 // SQL:2016 `x IS JSON` predicate which has matching semantics.
7159 } else if name == "JSON_VALID"
7160 && f.args.len() == 1
7161 && matches!(source, DialectType::DuckDB)
7162 && matches!(
7163 target,
7164 DialectType::Presto | DialectType::Trino | DialectType::Athena
7165 )
7166 {
7167 Action::DuckDBJsonValidToIsJson
7168 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
7169 } else if (name == "DATE_PART" || name == "DATEPART")
7170 && f.args.len() == 2
7171 && matches!(target, DialectType::Snowflake)
7172 && !matches!(source, DialectType::Snowflake)
7173 && matches!(
7174 &f.args[0],
7175 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7176 )
7177 {
7178 Action::DatePartUnquote
7179 } else if source_propagates_nulls
7180 && target_ignores_nulls
7181 && (name == "GREATEST" || name == "LEAST")
7182 && f.args.len() >= 2
7183 {
7184 Action::GreatestLeastNull
7185 } else if matches!(source, DialectType::Snowflake)
7186 && name == "ARRAY_GENERATE_RANGE"
7187 && f.args.len() >= 2
7188 {
7189 Action::ArrayGenerateRange
7190 } else if matches!(source, DialectType::Snowflake)
7191 && matches!(target, DialectType::DuckDB)
7192 && name == "DATE_TRUNC"
7193 && f.args.len() == 2
7194 {
7195 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
7196 // Logic based on Python sqlglot's input_type_preserved flag:
7197 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
7198 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
7199 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
7200 let unit_str = match &f.args[0] {
7201 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
7202 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
7203 Some(s.to_ascii_uppercase())
7204 }
7205 _ => None,
7206 };
7207 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
7208 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
7209 });
7210 match &f.args[1] {
7211 Expression::Cast(c) => match &c.to {
7212 DataType::Time { .. } => Action::DateTruncWrapCast,
7213 DataType::Custom { name }
7214 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
7215 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
7216 {
7217 Action::DateTruncWrapCast
7218 }
7219 DataType::Timestamp { timezone: true, .. } => {
7220 Action::DateTruncWrapCast
7221 }
7222 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
7223 DataType::Timestamp {
7224 timezone: false, ..
7225 } if is_date_unit => Action::DateTruncWrapCast,
7226 _ => Action::None,
7227 },
7228 _ => Action::None,
7229 }
7230 } else if matches!(source, DialectType::Snowflake)
7231 && matches!(target, DialectType::DuckDB)
7232 && name == "TO_DATE"
7233 && f.args.len() == 1
7234 && !matches!(
7235 &f.args[0],
7236 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7237 )
7238 {
7239 Action::ToDateToCast
7240 } else if !matches!(source, DialectType::Redshift)
7241 && matches!(target, DialectType::Redshift)
7242 && name == "CONVERT_TIMEZONE"
7243 && (f.args.len() == 2 || f.args.len() == 3)
7244 {
7245 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
7246 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
7247 // The Redshift parser adds 'UTC' as default source_tz, but when
7248 // transpiling from other dialects, we should preserve the original form.
7249 Action::ConvertTimezoneToExpr
7250 } else if matches!(source, DialectType::Snowflake)
7251 && matches!(target, DialectType::DuckDB)
7252 && name == "REGEXP_REPLACE"
7253 && f.args.len() == 4
7254 && !matches!(
7255 &f.args[3],
7256 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7257 )
7258 {
7259 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
7260 Action::RegexpReplaceSnowflakeToDuckDB
7261 } else if matches!(source, DialectType::Snowflake)
7262 && matches!(target, DialectType::DuckDB)
7263 && name == "REGEXP_REPLACE"
7264 && f.args.len() == 5
7265 {
7266 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
7267 Action::RegexpReplacePositionSnowflakeToDuckDB
7268 } else if matches!(source, DialectType::Snowflake)
7269 && matches!(target, DialectType::DuckDB)
7270 && name == "REGEXP_SUBSTR"
7271 {
7272 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
7273 Action::RegexpSubstrSnowflakeToDuckDB
7274 } else if matches!(source, DialectType::Snowflake)
7275 && matches!(target, DialectType::Snowflake)
7276 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
7277 && f.args.len() == 6
7278 {
7279 // Snowflake identity: strip trailing group=0
7280 Action::RegexpSubstrSnowflakeIdentity
7281 } else if matches!(source, DialectType::Snowflake)
7282 && matches!(target, DialectType::DuckDB)
7283 && name == "REGEXP_SUBSTR_ALL"
7284 {
7285 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
7286 Action::RegexpSubstrAllSnowflakeToDuckDB
7287 } else if matches!(source, DialectType::Snowflake)
7288 && matches!(target, DialectType::DuckDB)
7289 && name == "REGEXP_COUNT"
7290 {
7291 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
7292 Action::RegexpCountSnowflakeToDuckDB
7293 } else if matches!(source, DialectType::Snowflake)
7294 && matches!(target, DialectType::DuckDB)
7295 && name == "REGEXP_INSTR"
7296 {
7297 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
7298 Action::RegexpInstrSnowflakeToDuckDB
7299 } else if matches!(source, DialectType::BigQuery)
7300 && matches!(target, DialectType::Snowflake)
7301 && name == "REGEXP_EXTRACT_ALL"
7302 {
7303 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
7304 Action::RegexpExtractAllToSnowflake
7305 } else if name == "_BQ_TO_HEX" {
7306 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
7307 Action::BigQueryToHexBare
7308 } else if matches!(source, DialectType::BigQuery)
7309 && !matches!(target, DialectType::BigQuery)
7310 {
7311 // BigQuery-specific functions that need to be converted to standard forms
7312 match name.as_str() {
7313 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
7314 | "DATE_DIFF"
7315 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
7316 | "DATETIME_ADD" | "DATETIME_SUB"
7317 | "TIME_ADD" | "TIME_SUB"
7318 | "DATE_ADD" | "DATE_SUB"
7319 | "SAFE_DIVIDE"
7320 | "GENERATE_UUID"
7321 | "COUNTIF"
7322 | "EDIT_DISTANCE"
7323 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
7324 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
7325 | "TO_HEX"
7326 | "TO_JSON_STRING"
7327 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
7328 | "DIV"
7329 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
7330 | "LAST_DAY"
7331 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
7332 | "REGEXP_CONTAINS"
7333 | "CONTAINS_SUBSTR"
7334 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
7335 | "SAFE_CAST"
7336 | "GENERATE_DATE_ARRAY"
7337 | "PARSE_DATE" | "PARSE_TIMESTAMP"
7338 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
7339 | "ARRAY_CONCAT"
7340 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
7341 | "INSTR"
7342 | "MD5" | "SHA1" | "SHA256" | "SHA512"
7343 | "GENERATE_UUID()" // just in case
7344 | "REGEXP_EXTRACT_ALL"
7345 | "REGEXP_EXTRACT"
7346 | "INT64"
7347 | "ARRAY_CONCAT_AGG"
7348 | "DATE_DIFF(" // just in case
7349 | "TO_HEX_MD5" // internal
7350 | "MOD"
7351 | "CONCAT"
7352 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
7353 | "STRUCT"
7354 | "ROUND"
7355 | "MAKE_INTERVAL"
7356 | "ARRAY_TO_STRING"
7357 | "PERCENTILE_CONT"
7358 => Action::BigQueryFunctionNormalize,
7359 "ARRAY" if matches!(target, DialectType::Snowflake)
7360 && f.args.len() == 1
7361 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
7362 => Action::BigQueryArraySelectAsStructToSnowflake,
7363 _ => Action::None,
7364 }
7365 } else if matches!(source, DialectType::BigQuery)
7366 && matches!(target, DialectType::BigQuery)
7367 {
7368 // BigQuery -> BigQuery normalizations
7369 match name.as_str() {
7370 "TIMESTAMP_DIFF"
7371 | "DATETIME_DIFF"
7372 | "TIME_DIFF"
7373 | "DATE_DIFF"
7374 | "DATE_ADD"
7375 | "TO_HEX"
7376 | "CURRENT_TIMESTAMP"
7377 | "CURRENT_DATE"
7378 | "CURRENT_TIME"
7379 | "CURRENT_DATETIME"
7380 | "GENERATE_DATE_ARRAY"
7381 | "INSTR"
7382 | "FORMAT_DATETIME"
7383 | "DATETIME"
7384 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
7385 _ => Action::None,
7386 }
7387 } else {
7388 // Generic function normalization for non-BigQuery sources
7389 match name.as_str() {
7390 "ARBITRARY" | "AGGREGATE"
7391 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
7392 | "STRUCT_EXTRACT"
7393 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
7394 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
7395 | "SUBSTRINGINDEX"
7396 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
7397 | "UNICODE"
7398 | "XOR"
7399 | "ARRAY_REVERSE_SORT"
7400 | "ENCODE" | "DECODE"
7401 | "QUANTILE"
7402 | "EPOCH" | "EPOCH_MS"
7403 | "HASHBYTES"
7404 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
7405 | "APPROX_DISTINCT"
7406 | "DATE_PARSE" | "FORMAT_DATETIME"
7407 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
7408 | "RLIKE"
7409 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
7410 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
7411 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
7412 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
7413 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
7414 | "MAP" | "MAP_FROM_ENTRIES"
7415 | "COLLECT_LIST" | "COLLECT_SET"
7416 | "ISNAN" | "IS_NAN"
7417 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
7418 | "FORMAT_NUMBER"
7419 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
7420 | "ELEMENT_AT"
7421 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
7422 | "SPLIT_PART"
7423 // GENERATE_SERIES: handled separately below
7424 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
7425 | "JSON_QUERY" | "JSON_VALUE"
7426 | "JSON_SEARCH"
7427 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
7428 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
7429 | "CURDATE" | "CURTIME"
7430 | "ARRAY_TO_STRING"
7431 | "ARRAY_SORT" | "SORT_ARRAY"
7432 | "LEFT" | "RIGHT"
7433 | "MAP_FROM_ARRAYS"
7434 | "LIKE" | "ILIKE"
7435 | "ARRAY_CONCAT" | "LIST_CONCAT"
7436 | "QUANTILE_CONT" | "QUANTILE_DISC"
7437 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
7438 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
7439 | "LOCATE" | "STRPOS" | "INSTR"
7440 | "CHAR"
7441 // CONCAT: handled separately for COALESCE wrapping
7442 | "ARRAY_JOIN"
7443 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
7444 | "ISNULL"
7445 | "MONTHNAME"
7446 | "TO_TIMESTAMP"
7447 | "TO_DATE"
7448 | "TO_JSON"
7449 | "REGEXP_SPLIT"
7450 | "SPLIT"
7451 | "FORMATDATETIME"
7452 | "ARRAYJOIN"
7453 | "SPLITBYSTRING" | "SPLITBYREGEXP"
7454 | "NVL"
7455 | "TO_CHAR"
7456 | "DBMS_RANDOM.VALUE"
7457 | "REGEXP_LIKE"
7458 | "REPLICATE"
7459 | "LEN"
7460 | "COUNT_BIG"
7461 | "DATEFROMPARTS"
7462 | "DATETIMEFROMPARTS"
7463 | "CONVERT" | "TRY_CONVERT"
7464 | "STRFTIME" | "STRPTIME"
7465 | "DATE_FORMAT" | "FORMAT_DATE"
7466 | "PARSE_TIMESTAMP" | "PARSE_DATE"
7467 | "FROM_BASE64" | "TO_BASE64"
7468 | "GETDATE"
7469 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
7470 | "TO_UTF8" | "FROM_UTF8"
7471 | "STARTS_WITH" | "STARTSWITH"
7472 | "APPROX_COUNT_DISTINCT"
7473 | "JSON_FORMAT"
7474 | "SYSDATE"
7475 | "LOGICAL_OR" | "LOGICAL_AND"
7476 | "MONTHS_ADD"
7477 | "SCHEMA_NAME"
7478 | "STRTOL"
7479 | "EDITDIST3"
7480 | "FORMAT"
7481 | "LIST_CONTAINS" | "LIST_HAS"
7482 | "VARIANCE" | "STDDEV"
7483 | "ISINF"
7484 | "TO_UNIXTIME"
7485 | "FROM_UNIXTIME"
7486 | "DATEPART" | "DATE_PART"
7487 | "DATENAME"
7488 | "STRING_AGG"
7489 | "JSON_ARRAYAGG"
7490 | "APPROX_QUANTILE"
7491 | "MAKE_DATE"
7492 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
7493 | "RANGE"
7494 | "TRY_ELEMENT_AT"
7495 | "STR_TO_MAP"
7496 | "STRING"
7497 | "STR_TO_TIME"
7498 | "CURRENT_SCHEMA"
7499 | "LTRIM" | "RTRIM"
7500 | "UUID"
7501 | "FARM_FINGERPRINT"
7502 | "JSON_KEYS"
7503 | "WEEKOFYEAR"
7504 | "CONCAT_WS"
7505 | "TRY_DIVIDE"
7506 | "ARRAY_SLICE"
7507 | "ARRAY_PREPEND"
7508 | "ARRAY_REMOVE"
7509 | "GENERATE_DATE_ARRAY"
7510 | "PARSE_JSON"
7511 | "JSON_REMOVE"
7512 | "JSON_SET"
7513 | "LEVENSHTEIN"
7514 | "CURRENT_VERSION"
7515 | "ARRAY_MAX"
7516 | "ARRAY_MIN"
7517 | "JAROWINKLER_SIMILARITY"
7518 | "CURRENT_SCHEMAS"
7519 | "TO_VARIANT"
7520 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
7521 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
7522 => Action::GenericFunctionNormalize,
7523 // Canonical date functions -> dialect-specific
7524 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
7525 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
7526 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
7527 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
7528 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
7529 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
7530 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
7531 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
7532 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
7533 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
7534 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
7535 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
7536 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
7537 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
7538 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
7539 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
7540 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
7541 // STR_TO_DATE(x, fmt) -> dialect-specific
7542 "STR_TO_DATE" if f.args.len() == 2
7543 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
7544 "STR_TO_DATE" => Action::GenericFunctionNormalize,
7545 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
7546 "TS_OR_DS_ADD" if f.args.len() == 3
7547 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
7548 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7549 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
7550 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
7551 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
7552 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
7553 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
7554 // IS_ASCII(x) -> dialect-specific
7555 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
7556 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
7557 "STR_POSITION" => Action::StrPositionConvert,
7558 // ARRAY_SUM -> dialect-specific
7559 "ARRAY_SUM" => Action::ArraySumConvert,
7560 // ARRAY_SIZE -> dialect-specific (Drill only)
7561 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
7562 // ARRAY_ANY -> dialect-specific
7563 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
7564 // Functions needing specific cross-dialect transforms
7565 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
7566 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
7567 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
7568 "ARRAY" if matches!(source, DialectType::BigQuery)
7569 && matches!(target, DialectType::Snowflake)
7570 && f.args.len() == 1
7571 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
7572 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
7573 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
7574 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
7575 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
7576 "DATE_TRUNC" if f.args.len() == 2
7577 && matches!(source, DialectType::Generic)
7578 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
7579 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
7580 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
7581 "TIMESTAMP_TRUNC" if f.args.len() >= 2
7582 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
7583 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
7584 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
7585 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7586 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
7587 // GENERATE_SERIES with interval normalization for PG target
7588 "GENERATE_SERIES" if f.args.len() >= 3
7589 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7590 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
7591 "GENERATE_SERIES" => Action::None, // passthrough for other cases
7592 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
7593 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7594 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
7595 "CONCAT" => Action::GenericFunctionNormalize,
7596 // DIV(a, b) -> target-specific integer division
7597 "DIV" if f.args.len() == 2
7598 && matches!(source, DialectType::PostgreSQL)
7599 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
7600 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7601 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
7602 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
7603 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
7604 "JSONB_EXISTS" if f.args.len() == 2
7605 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
7606 // DATE_BIN -> TIME_BUCKET for DuckDB
7607 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
7608 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
7609 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
7610 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
7611 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
7612 // ClickHouse any -> ANY_VALUE for other dialects
7613 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
7614 _ => Action::None,
7615 }
7616 }
7617 }
7618 Expression::AggregateFunction(af) => {
7619 let name = af.name.to_ascii_uppercase();
7620 match name.as_str() {
7621 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
7622 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
7623 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7624 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
7625 if matches!(target, DialectType::DuckDB) =>
7626 {
7627 Action::JsonObjectAggConvert
7628 }
7629 "ARRAY_AGG"
7630 if matches!(
7631 target,
7632 DialectType::Hive
7633 | DialectType::Spark
7634 | DialectType::Databricks
7635 ) =>
7636 {
7637 Action::ArrayAggToCollectList
7638 }
7639 "MAX_BY" | "MIN_BY"
7640 if matches!(
7641 target,
7642 DialectType::ClickHouse
7643 | DialectType::Spark
7644 | DialectType::Databricks
7645 | DialectType::DuckDB
7646 ) =>
7647 {
7648 Action::MaxByMinByConvert
7649 }
7650 "COLLECT_LIST"
7651 if matches!(
7652 target,
7653 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
7654 ) =>
7655 {
7656 Action::CollectListToArrayAgg
7657 }
7658 "COLLECT_SET"
7659 if matches!(
7660 target,
7661 DialectType::Presto
7662 | DialectType::Trino
7663 | DialectType::Snowflake
7664 | DialectType::DuckDB
7665 ) =>
7666 {
7667 Action::CollectSetConvert
7668 }
7669 "PERCENTILE"
7670 if matches!(
7671 target,
7672 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7673 ) =>
7674 {
7675 Action::PercentileConvert
7676 }
7677 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
7678 "CORR"
7679 if matches!(target, DialectType::DuckDB)
7680 && matches!(source, DialectType::Snowflake) =>
7681 {
7682 Action::CorrIsnanWrap
7683 }
7684 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7685 "APPROX_QUANTILES"
7686 if matches!(source, DialectType::BigQuery)
7687 && matches!(target, DialectType::DuckDB) =>
7688 {
7689 Action::BigQueryApproxQuantiles
7690 }
7691 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
7692 "PERCENTILE_CONT"
7693 if matches!(source, DialectType::BigQuery)
7694 && matches!(target, DialectType::DuckDB)
7695 && af.args.len() >= 2 =>
7696 {
7697 Action::BigQueryPercentileContToDuckDB
7698 }
7699 _ => Action::None,
7700 }
7701 }
7702 Expression::JSONArrayAgg(_) => match target {
7703 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
7704 _ => Action::None,
7705 },
7706 Expression::ToNumber(tn) => {
7707 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
7708 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
7709 match target {
7710 DialectType::Oracle
7711 | DialectType::Snowflake
7712 | DialectType::Teradata => Action::None,
7713 _ => Action::GenericFunctionNormalize,
7714 }
7715 } else {
7716 Action::None
7717 }
7718 }
7719 Expression::Nvl2(_) => {
7720 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
7721 // Keep as NVL2 for dialects that support it natively
7722 match target {
7723 DialectType::Oracle
7724 | DialectType::Snowflake
7725 | DialectType::Teradata
7726 | DialectType::Spark
7727 | DialectType::Databricks
7728 | DialectType::Redshift => Action::None,
7729 _ => Action::Nvl2Expand,
7730 }
7731 }
7732 Expression::Decode(_) | Expression::DecodeCase(_) => {
7733 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
7734 // Keep as DECODE for Oracle/Snowflake
7735 match target {
7736 DialectType::Oracle | DialectType::Snowflake => Action::None,
7737 _ => Action::DecodeSimplify,
7738 }
7739 }
7740 Expression::Coalesce(ref cf) => {
7741 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
7742 // BigQuery keeps IFNULL natively when source is also BigQuery
7743 if cf.original_name.as_deref() == Some("IFNULL")
7744 && !(matches!(source, DialectType::BigQuery)
7745 && matches!(target, DialectType::BigQuery))
7746 {
7747 Action::IfnullToCoalesce
7748 } else {
7749 Action::None
7750 }
7751 }
7752 Expression::IfFunc(if_func) => {
7753 if matches!(source, DialectType::Snowflake)
7754 && matches!(
7755 target,
7756 DialectType::Presto | DialectType::Trino | DialectType::SQLite
7757 )
7758 && matches!(if_func.false_value, Some(Expression::Div(_)))
7759 {
7760 Action::Div0TypedDivision
7761 } else {
7762 Action::None
7763 }
7764 }
7765 Expression::ToJson(_) => match target {
7766 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
7767 DialectType::BigQuery => Action::ToJsonConvert,
7768 DialectType::DuckDB => Action::ToJsonConvert,
7769 _ => Action::None,
7770 },
7771 Expression::ArrayAgg(ref agg) => {
7772 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7773 Action::ArrayAggToGroupConcat
7774 } else if matches!(
7775 target,
7776 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7777 ) {
7778 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
7779 Action::ArrayAggToCollectList
7780 } else if matches!(
7781 source,
7782 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7783 ) && matches!(target, DialectType::DuckDB)
7784 && agg.filter.is_some()
7785 {
7786 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
7787 // Need to add NOT x IS NULL to existing filter
7788 Action::ArrayAggNullFilter
7789 } else if matches!(target, DialectType::DuckDB)
7790 && agg.ignore_nulls == Some(true)
7791 && !agg.order_by.is_empty()
7792 {
7793 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
7794 Action::ArrayAggIgnoreNullsDuckDB
7795 } else if !matches!(source, DialectType::Snowflake) {
7796 Action::None
7797 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7798 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
7799 || agg.name.is_none();
7800 if is_array_agg {
7801 Action::ArrayAggCollectList
7802 } else {
7803 Action::None
7804 }
7805 } else if matches!(
7806 target,
7807 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7808 ) && agg.filter.is_none()
7809 {
7810 Action::ArrayAggFilter
7811 } else {
7812 Action::None
7813 }
7814 }
7815 Expression::WithinGroup(wg) => {
7816 if matches!(source, DialectType::Snowflake)
7817 && matches!(
7818 target,
7819 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7820 )
7821 && matches!(wg.this, Expression::ArrayAgg(_))
7822 {
7823 Action::ArrayAggWithinGroupFilter
7824 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
7825 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
7826 || matches!(&wg.this, Expression::StringAgg(_))
7827 {
7828 Action::StringAggConvert
7829 } else if matches!(
7830 target,
7831 DialectType::Presto
7832 | DialectType::Trino
7833 | DialectType::Athena
7834 | DialectType::Spark
7835 | DialectType::Databricks
7836 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7837 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7838 || matches!(&wg.this, Expression::PercentileCont(_)))
7839 {
7840 Action::PercentileContConvert
7841 } else {
7842 Action::None
7843 }
7844 }
7845 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7846 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
7847 // DATETIME is the timezone-unaware type
7848 Expression::Cast(ref c) => {
7849 if c.format.is_some()
7850 && (matches!(source, DialectType::BigQuery)
7851 || matches!(source, DialectType::Teradata))
7852 {
7853 Action::BigQueryCastFormat
7854 } else if matches!(target, DialectType::BigQuery)
7855 && !matches!(source, DialectType::BigQuery)
7856 && matches!(
7857 c.to,
7858 DataType::Timestamp {
7859 timezone: false,
7860 ..
7861 }
7862 )
7863 {
7864 Action::CastTimestampToDatetime
7865 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
7866 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
7867 && matches!(
7868 c.to,
7869 DataType::Timestamp {
7870 timezone: false,
7871 ..
7872 }
7873 )
7874 {
7875 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7876 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
7877 Action::CastTimestampToDatetime
7878 } else if matches!(
7879 source,
7880 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7881 ) && matches!(
7882 target,
7883 DialectType::Presto
7884 | DialectType::Trino
7885 | DialectType::Athena
7886 | DialectType::DuckDB
7887 | DialectType::Snowflake
7888 | DialectType::BigQuery
7889 | DialectType::Databricks
7890 | DialectType::TSQL
7891 ) {
7892 Action::HiveCastToTryCast
7893 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7894 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
7895 {
7896 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7897 Action::CastTimestamptzToFunc
7898 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7899 && matches!(
7900 target,
7901 DialectType::Hive
7902 | DialectType::Spark
7903 | DialectType::Databricks
7904 | DialectType::BigQuery
7905 )
7906 {
7907 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7908 Action::CastTimestampStripTz
7909 } else if matches!(&c.to, DataType::Json)
7910 && matches!(source, DialectType::DuckDB)
7911 && matches!(target, DialectType::Snowflake)
7912 {
7913 Action::DuckDBCastJsonToVariant
7914 } else if matches!(&c.to, DataType::Json)
7915 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7916 && matches!(
7917 target,
7918 DialectType::Presto
7919 | DialectType::Trino
7920 | DialectType::Athena
7921 | DialectType::Snowflake
7922 )
7923 {
7924 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
7925 // Only when the input is a string literal (JSON 'value' syntax)
7926 Action::JsonLiteralToJsonParse
7927 } else if matches!(&c.to, DataType::Json)
7928 && matches!(source, DialectType::DuckDB)
7929 && matches!(
7930 target,
7931 DialectType::Presto | DialectType::Trino | DialectType::Athena
7932 )
7933 {
7934 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
7935 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
7936 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
7937 // in the target to preserve DuckDB's parse semantics.
7938 Action::JsonLiteralToJsonParse
7939 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
7940 && matches!(target, DialectType::Spark | DialectType::Databricks)
7941 {
7942 // CAST(x AS JSON) -> TO_JSON(x) for Spark
7943 Action::CastToJsonForSpark
7944 } else if (matches!(
7945 &c.to,
7946 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
7947 )) && matches!(
7948 target,
7949 DialectType::Spark | DialectType::Databricks
7950 ) && (matches!(&c.this, Expression::ParseJson(_))
7951 || matches!(
7952 &c.this,
7953 Expression::Function(f)
7954 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
7955 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
7956 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
7957 ))
7958 {
7959 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
7960 // -> FROM_JSON(..., type_string) for Spark
7961 Action::CastJsonToFromJson
7962 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
7963 && matches!(
7964 c.to,
7965 DataType::Timestamp {
7966 timezone: false,
7967 ..
7968 }
7969 )
7970 && matches!(source, DialectType::DuckDB)
7971 {
7972 Action::StrftimeCastTimestamp
7973 } else if matches!(source, DialectType::DuckDB)
7974 && matches!(
7975 c.to,
7976 DataType::Decimal {
7977 precision: None,
7978 ..
7979 }
7980 )
7981 {
7982 Action::DecimalDefaultPrecision
7983 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
7984 && matches!(c.to, DataType::Char { length: None })
7985 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
7986 {
7987 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
7988 Action::MysqlCastCharToText
7989 } else if matches!(
7990 source,
7991 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7992 ) && matches!(
7993 target,
7994 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7995 ) && Self::has_varchar_char_type(&c.to)
7996 {
7997 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
7998 Action::SparkCastVarcharToString
7999 } else {
8000 Action::None
8001 }
8002 }
8003 Expression::SafeCast(ref c) => {
8004 if c.format.is_some()
8005 && matches!(source, DialectType::BigQuery)
8006 && !matches!(target, DialectType::BigQuery)
8007 {
8008 Action::BigQueryCastFormat
8009 } else {
8010 Action::None
8011 }
8012 }
8013 Expression::TryCast(ref c) => {
8014 if matches!(&c.to, DataType::Json)
8015 && matches!(source, DialectType::DuckDB)
8016 && matches!(
8017 target,
8018 DialectType::Presto | DialectType::Trino | DialectType::Athena
8019 )
8020 {
8021 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
8022 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
8023 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
8024 // to preserve DuckDB's parse-or-null semantics.
8025 Action::DuckDBTryCastJsonToTryJsonParse
8026 } else {
8027 Action::None
8028 }
8029 }
8030 Expression::JSONArray(ref ja)
8031 if matches!(target, DialectType::Snowflake)
8032 && ja.null_handling.is_none()
8033 && ja.return_type.is_none()
8034 && ja.strict.is_none() =>
8035 {
8036 Action::GenericFunctionNormalize
8037 }
8038 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
8039 Action::GenericFunctionNormalize
8040 }
8041 // For DuckDB: DATE_TRUNC should preserve the input type
8042 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
8043 if matches!(source, DialectType::Snowflake)
8044 && matches!(target, DialectType::DuckDB)
8045 {
8046 Action::DateTruncWrapCast
8047 } else {
8048 Action::None
8049 }
8050 }
8051 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8052 Expression::SetStatement(s) => {
8053 if matches!(target, DialectType::DuckDB)
8054 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
8055 && s.items.iter().any(|item| item.kind.is_none())
8056 {
8057 Action::SetToVariable
8058 } else {
8059 Action::None
8060 }
8061 }
8062 // Cross-dialect NULL ordering normalization.
8063 // When nulls_first is not specified, fill in the source dialect's implied
8064 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
8065 Expression::Ordered(o) => {
8066 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
8067 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
8068 Action::MysqlNullsOrdering
8069 } else {
8070 // Skip targets that don't support NULLS FIRST/LAST syntax
8071 let target_supports_nulls = !matches!(
8072 target,
8073 DialectType::MySQL
8074 | DialectType::TSQL
8075 | DialectType::StarRocks
8076 | DialectType::Doris
8077 );
8078 if o.nulls_first.is_none() && source != target && target_supports_nulls
8079 {
8080 Action::NullsOrdering
8081 } else {
8082 Action::None
8083 }
8084 }
8085 }
8086 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
8087 Expression::DataType(dt) => {
8088 if matches!(source, DialectType::BigQuery)
8089 && !matches!(target, DialectType::BigQuery)
8090 {
8091 match dt {
8092 DataType::Custom { ref name }
8093 if name.eq_ignore_ascii_case("INT64")
8094 || name.eq_ignore_ascii_case("FLOAT64")
8095 || name.eq_ignore_ascii_case("BOOL")
8096 || name.eq_ignore_ascii_case("BYTES")
8097 || name.eq_ignore_ascii_case("NUMERIC")
8098 || name.eq_ignore_ascii_case("STRING")
8099 || name.eq_ignore_ascii_case("DATETIME") =>
8100 {
8101 Action::BigQueryCastType
8102 }
8103 _ => Action::None,
8104 }
8105 } else if matches!(source, DialectType::TSQL) {
8106 // For TSQL source -> any target (including TSQL itself for REAL)
8107 match dt {
8108 // REAL -> FLOAT even for TSQL->TSQL
8109 DataType::Custom { ref name }
8110 if name.eq_ignore_ascii_case("REAL") =>
8111 {
8112 Action::TSQLTypeNormalize
8113 }
8114 DataType::Float {
8115 real_spelling: true,
8116 ..
8117 } => Action::TSQLTypeNormalize,
8118 // Other TSQL type normalizations only for non-TSQL targets
8119 DataType::Custom { ref name }
8120 if !matches!(target, DialectType::TSQL)
8121 && (name.eq_ignore_ascii_case("MONEY")
8122 || name.eq_ignore_ascii_case("SMALLMONEY")
8123 || name.eq_ignore_ascii_case("DATETIME2")
8124 || name.eq_ignore_ascii_case("IMAGE")
8125 || name.eq_ignore_ascii_case("BIT")
8126 || name.eq_ignore_ascii_case("ROWVERSION")
8127 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
8128 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
8129 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
8130 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
8131 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
8132 {
8133 Action::TSQLTypeNormalize
8134 }
8135 DataType::Float {
8136 precision: Some(_), ..
8137 } if !matches!(target, DialectType::TSQL) => {
8138 Action::TSQLTypeNormalize
8139 }
8140 DataType::TinyInt { .. }
8141 if !matches!(target, DialectType::TSQL) =>
8142 {
8143 Action::TSQLTypeNormalize
8144 }
8145 // INTEGER -> INT for Databricks/Spark targets
8146 DataType::Int {
8147 integer_spelling: true,
8148 ..
8149 } if matches!(
8150 target,
8151 DialectType::Databricks | DialectType::Spark
8152 ) =>
8153 {
8154 Action::TSQLTypeNormalize
8155 }
8156 _ => Action::None,
8157 }
8158 } else if (matches!(source, DialectType::Oracle)
8159 || matches!(source, DialectType::Generic))
8160 && !matches!(target, DialectType::Oracle)
8161 {
8162 match dt {
8163 DataType::Custom { ref name }
8164 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
8165 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
8166 || name.eq_ignore_ascii_case("VARCHAR2")
8167 || name.eq_ignore_ascii_case("NVARCHAR2") =>
8168 {
8169 Action::OracleVarchar2ToVarchar
8170 }
8171 _ => Action::None,
8172 }
8173 } else if matches!(target, DialectType::Snowflake)
8174 && !matches!(source, DialectType::Snowflake)
8175 {
8176 // When target is Snowflake but source is NOT Snowflake,
8177 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
8178 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
8179 // should keep their FLOAT spelling.
8180 match dt {
8181 DataType::Float { .. } => Action::SnowflakeFloatProtect,
8182 _ => Action::None,
8183 }
8184 } else {
8185 Action::None
8186 }
8187 }
8188 // LOWER patterns from BigQuery TO_HEX conversions:
8189 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
8190 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
8191 Expression::Lower(uf) => {
8192 if matches!(source, DialectType::BigQuery) {
8193 match &uf.this {
8194 Expression::Lower(_) => Action::BigQueryToHexLower,
8195 Expression::Function(f)
8196 if f.name == "TO_HEX"
8197 && matches!(target, DialectType::BigQuery) =>
8198 {
8199 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8200 Action::BigQueryToHexLower
8201 }
8202 _ => Action::None,
8203 }
8204 } else {
8205 Action::None
8206 }
8207 }
8208 // UPPER patterns from BigQuery TO_HEX conversions:
8209 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
8210 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
8211 Expression::Upper(uf) => {
8212 if matches!(source, DialectType::BigQuery) {
8213 match &uf.this {
8214 Expression::Lower(_) => Action::BigQueryToHexUpper,
8215 _ => Action::None,
8216 }
8217 } else {
8218 Action::None
8219 }
8220 }
8221 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
8222 // Snowflake supports LAST_DAY with unit, so keep it there
8223 Expression::LastDay(ld) => {
8224 if matches!(source, DialectType::BigQuery)
8225 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
8226 && ld.unit.is_some()
8227 {
8228 Action::BigQueryLastDayStripUnit
8229 } else {
8230 Action::None
8231 }
8232 }
8233 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
8234 Expression::SafeDivide(_) => {
8235 if matches!(source, DialectType::BigQuery)
8236 && !matches!(target, DialectType::BigQuery)
8237 {
8238 Action::BigQuerySafeDivide
8239 } else {
8240 Action::None
8241 }
8242 }
8243 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
8244 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
8245 Expression::AnyValue(ref agg) => {
8246 if matches!(source, DialectType::BigQuery)
8247 && matches!(target, DialectType::DuckDB)
8248 && agg.having_max.is_some()
8249 {
8250 Action::BigQueryAnyValueHaving
8251 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8252 && !matches!(source, DialectType::Spark | DialectType::Databricks)
8253 && agg.ignore_nulls.is_none()
8254 {
8255 Action::AnyValueIgnoreNulls
8256 } else {
8257 Action::None
8258 }
8259 }
8260 Expression::Any(ref q) => {
8261 if matches!(source, DialectType::PostgreSQL)
8262 && matches!(
8263 target,
8264 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8265 )
8266 && q.op.is_some()
8267 && !matches!(
8268 q.subquery,
8269 Expression::Select(_) | Expression::Subquery(_)
8270 )
8271 {
8272 Action::AnyToExists
8273 } else {
8274 Action::None
8275 }
8276 }
8277 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
8278 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
8279 Expression::RegexpLike(_)
8280 if matches!(source, DialectType::Snowflake)
8281 && matches!(target, DialectType::DuckDB) =>
8282 {
8283 Action::RlikeSnowflakeToDuckDB
8284 }
8285 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
8286 Expression::RegexpLike(_)
8287 if !matches!(source, DialectType::DuckDB)
8288 && matches!(target, DialectType::DuckDB) =>
8289 {
8290 Action::RegexpLikeToDuckDB
8291 }
8292 // RegexpLike -> Exasol: anchor pattern with .*...*
8293 Expression::RegexpLike(_)
8294 if matches!(target, DialectType::Exasol) =>
8295 {
8296 Action::RegexpLikeExasolAnchor
8297 }
8298 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
8299 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
8300 Expression::Div(ref op)
8301 if matches!(
8302 source,
8303 DialectType::MySQL
8304 | DialectType::DuckDB
8305 | DialectType::SingleStore
8306 | DialectType::TiDB
8307 | DialectType::ClickHouse
8308 | DialectType::Doris
8309 ) && matches!(
8310 target,
8311 DialectType::PostgreSQL
8312 | DialectType::Redshift
8313 | DialectType::Drill
8314 | DialectType::Trino
8315 | DialectType::Presto
8316 | DialectType::Athena
8317 | DialectType::TSQL
8318 | DialectType::Teradata
8319 | DialectType::SQLite
8320 | DialectType::BigQuery
8321 | DialectType::Snowflake
8322 | DialectType::Databricks
8323 | DialectType::Oracle
8324 | DialectType::Materialize
8325 | DialectType::RisingWave
8326 ) =>
8327 {
8328 // Only wrap if RHS is not already NULLIF
8329 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
8330 {
8331 Action::MySQLSafeDivide
8332 } else {
8333 Action::None
8334 }
8335 }
8336 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
8337 // For TSQL/Fabric, convert to sp_rename instead
8338 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
8339 if let Some(crate::expressions::AlterTableAction::RenameTable(
8340 ref new_tbl,
8341 )) = at.actions.first()
8342 {
8343 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
8344 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
8345 Action::AlterTableToSpRename
8346 } else if new_tbl.schema.is_some()
8347 && matches!(
8348 target,
8349 DialectType::BigQuery
8350 | DialectType::Doris
8351 | DialectType::StarRocks
8352 | DialectType::DuckDB
8353 | DialectType::PostgreSQL
8354 | DialectType::Redshift
8355 )
8356 {
8357 Action::AlterTableRenameStripSchema
8358 } else {
8359 Action::None
8360 }
8361 } else {
8362 Action::None
8363 }
8364 }
8365 // EPOCH(x) expression -> target-specific epoch conversion
8366 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
8367 Action::EpochConvert
8368 }
8369 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
8370 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
8371 Action::EpochMsConvert
8372 }
8373 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
8374 Expression::StringAgg(_) => {
8375 if matches!(
8376 target,
8377 DialectType::MySQL
8378 | DialectType::SingleStore
8379 | DialectType::Doris
8380 | DialectType::StarRocks
8381 | DialectType::SQLite
8382 ) {
8383 Action::StringAggConvert
8384 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8385 Action::StringAggConvert
8386 } else {
8387 Action::None
8388 }
8389 }
8390 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
8391 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
8392 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
8393 Expression::GroupConcat(_) => Action::GroupConcatConvert,
8394 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
8395 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
8396 Expression::Cardinality(_)
8397 if matches!(source, DialectType::DuckDB)
8398 && matches!(target, DialectType::DuckDB) =>
8399 {
8400 Action::None
8401 }
8402 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
8403 Action::ArrayLengthConvert
8404 }
8405 Expression::ArraySize(_) => {
8406 if matches!(target, DialectType::Drill) {
8407 Action::ArraySizeDrill
8408 } else {
8409 Action::ArrayLengthConvert
8410 }
8411 }
8412 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
8413 Expression::ArrayRemove(_) => match target {
8414 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
8415 Action::ArrayRemoveConvert
8416 }
8417 _ => Action::None,
8418 },
8419 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
8420 Expression::ArrayReverse(_) => match target {
8421 DialectType::ClickHouse => Action::ArrayReverseConvert,
8422 _ => Action::None,
8423 },
8424 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
8425 Expression::JsonKeys(_) => match target {
8426 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
8427 Action::JsonKeysConvert
8428 }
8429 _ => Action::None,
8430 },
8431 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
8432 Expression::ParseJson(_) => match target {
8433 DialectType::SQLite
8434 | DialectType::Doris
8435 | DialectType::MySQL
8436 | DialectType::StarRocks => Action::ParseJsonStrip,
8437 _ => Action::None,
8438 },
8439 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
8440 Expression::WeekOfYear(_)
8441 if matches!(target, DialectType::Snowflake)
8442 && !matches!(source, DialectType::Snowflake) =>
8443 {
8444 Action::WeekOfYearToWeekIso
8445 }
8446 // NVL: clear original_name so generator uses dialect-specific function names
8447 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
8448 // XOR: expand for dialects that don't support the XOR keyword
8449 Expression::Xor(_) => {
8450 let target_supports_xor = matches!(
8451 target,
8452 DialectType::MySQL
8453 | DialectType::SingleStore
8454 | DialectType::Doris
8455 | DialectType::StarRocks
8456 );
8457 if !target_supports_xor {
8458 Action::XorExpand
8459 } else {
8460 Action::None
8461 }
8462 }
8463 // TSQL #table -> temp table normalization (CREATE TABLE)
8464 Expression::CreateTable(ct)
8465 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8466 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8467 && ct.name.name.name.starts_with('#') =>
8468 {
8469 Action::TempTableHash
8470 }
8471 // TSQL #table -> strip # from table references in SELECT/etc.
8472 Expression::Table(tr)
8473 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8474 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8475 && tr.name.name.starts_with('#') =>
8476 {
8477 Action::TempTableHash
8478 }
8479 // TSQL #table -> strip # from DROP TABLE names
8480 Expression::DropTable(ref dt)
8481 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8482 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8483 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
8484 {
8485 Action::TempTableHash
8486 }
8487 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8488 Expression::JsonExtract(_)
8489 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8490 {
8491 Action::JsonExtractToTsql
8492 }
8493 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8494 Expression::JsonExtractScalar(_)
8495 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8496 {
8497 Action::JsonExtractToTsql
8498 }
8499 // JSON_EXTRACT -> JSONExtractString for ClickHouse
8500 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
8501 Action::JsonExtractToClickHouse
8502 }
8503 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
8504 Expression::JsonExtractScalar(_)
8505 if matches!(target, DialectType::ClickHouse) =>
8506 {
8507 Action::JsonExtractToClickHouse
8508 }
8509 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
8510 Expression::JsonExtract(ref f)
8511 if !f.arrow_syntax
8512 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
8513 {
8514 Action::JsonExtractToArrow
8515 }
8516 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
8517 Expression::JsonExtract(ref f)
8518 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
8519 && !matches!(
8520 source,
8521 DialectType::PostgreSQL
8522 | DialectType::Redshift
8523 | DialectType::Materialize
8524 )
8525 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
8526 {
8527 Action::JsonExtractToGetJsonObject
8528 }
8529 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
8530 Expression::JsonExtract(_)
8531 if matches!(
8532 target,
8533 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8534 ) =>
8535 {
8536 Action::JsonExtractToGetJsonObject
8537 }
8538 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
8539 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
8540 Expression::JsonExtractScalar(ref f)
8541 if !f.arrow_syntax
8542 && !f.hash_arrow_syntax
8543 && matches!(
8544 target,
8545 DialectType::PostgreSQL
8546 | DialectType::Redshift
8547 | DialectType::Snowflake
8548 | DialectType::SQLite
8549 | DialectType::DuckDB
8550 ) =>
8551 {
8552 Action::JsonExtractScalarConvert
8553 }
8554 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
8555 Expression::JsonExtractScalar(_)
8556 if matches!(
8557 target,
8558 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8559 ) =>
8560 {
8561 Action::JsonExtractScalarToGetJsonObject
8562 }
8563 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
8564 Expression::JsonExtract(ref f)
8565 if !f.arrow_syntax
8566 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
8567 {
8568 Action::JsonPathNormalize
8569 }
8570 // JsonQuery (parsed JSON_QUERY) -> target-specific
8571 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
8572 // JsonValue (parsed JSON_VALUE) -> target-specific
8573 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
8574 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
8575 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
8576 Expression::AtTimeZone(_)
8577 if matches!(
8578 target,
8579 DialectType::Presto
8580 | DialectType::Trino
8581 | DialectType::Athena
8582 | DialectType::Spark
8583 | DialectType::Databricks
8584 | DialectType::BigQuery
8585 | DialectType::Snowflake
8586 ) =>
8587 {
8588 Action::AtTimeZoneConvert
8589 }
8590 // DAY_OF_WEEK -> dialect-specific
8591 Expression::DayOfWeek(_)
8592 if matches!(
8593 target,
8594 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
8595 ) =>
8596 {
8597 Action::DayOfWeekConvert
8598 }
8599 // CURRENT_USER -> CURRENT_USER() for Snowflake
8600 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
8601 Action::CurrentUserParens
8602 }
8603 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
8604 Expression::ElementAt(_)
8605 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
8606 {
8607 Action::ElementAtConvert
8608 }
8609 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
8610 Expression::ArrayFunc(ref arr)
8611 if !arr.bracket_notation
8612 && matches!(
8613 target,
8614 DialectType::Spark
8615 | DialectType::Databricks
8616 | DialectType::Hive
8617 | DialectType::BigQuery
8618 | DialectType::DuckDB
8619 | DialectType::Snowflake
8620 | DialectType::Presto
8621 | DialectType::Trino
8622 | DialectType::Athena
8623 | DialectType::ClickHouse
8624 | DialectType::StarRocks
8625 ) =>
8626 {
8627 Action::ArraySyntaxConvert
8628 }
8629 // VARIANCE expression -> varSamp for ClickHouse
8630 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
8631 Action::VarianceToClickHouse
8632 }
8633 // STDDEV expression -> stddevSamp for ClickHouse
8634 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
8635 Action::StddevToClickHouse
8636 }
8637 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
8638 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
8639 Action::ApproxQuantileConvert
8640 }
8641 // MonthsBetween -> target-specific
8642 Expression::MonthsBetween(_)
8643 if !matches!(
8644 target,
8645 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8646 ) =>
8647 {
8648 Action::MonthsBetweenConvert
8649 }
8650 // AddMonths -> target-specific DATEADD/DATE_ADD
8651 Expression::AddMonths(_) => Action::AddMonthsConvert,
8652 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
8653 Expression::MapFromArrays(_)
8654 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
8655 {
8656 Action::MapFromArraysConvert
8657 }
8658 // CURRENT_USER -> CURRENT_USER() for Spark
8659 Expression::CurrentUser(_)
8660 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
8661 {
8662 Action::CurrentUserSparkParens
8663 }
8664 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
8665 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
8666 if matches!(
8667 source,
8668 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8669 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8670 && matches!(
8671 target,
8672 DialectType::DuckDB
8673 | DialectType::Presto
8674 | DialectType::Trino
8675 | DialectType::Athena
8676 | DialectType::PostgreSQL
8677 | DialectType::Redshift
8678 ) =>
8679 {
8680 Action::SparkDateFuncCast
8681 }
8682 // $parameter -> @parameter for BigQuery
8683 Expression::Parameter(ref p)
8684 if matches!(target, DialectType::BigQuery)
8685 && matches!(source, DialectType::DuckDB)
8686 && (p.style == crate::expressions::ParameterStyle::Dollar
8687 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
8688 {
8689 Action::DollarParamConvert
8690 }
8691 // EscapeString literal: normalize literal newlines to \n
8692 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
8693 =>
8694 {
8695 Action::EscapeStringNormalize
8696 }
8697 // straight_join: keep lowercase for DuckDB, quote for MySQL
8698 Expression::Column(ref col)
8699 if col.name.name == "STRAIGHT_JOIN"
8700 && col.table.is_none()
8701 && matches!(source, DialectType::DuckDB)
8702 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
8703 {
8704 Action::StraightJoinCase
8705 }
8706 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
8707 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
8708 Expression::Interval(ref iv)
8709 if matches!(
8710 target,
8711 DialectType::Snowflake
8712 | DialectType::PostgreSQL
8713 | DialectType::Redshift
8714 ) && iv.unit.is_some()
8715 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
8716 {
8717 Action::SnowflakeIntervalFormat
8718 }
8719 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
8720 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
8721 if let Some(ref sample) = ts.sample {
8722 if !sample.explicit_method {
8723 Action::TablesampleReservoir
8724 } else {
8725 Action::None
8726 }
8727 } else {
8728 Action::None
8729 }
8730 }
8731 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
8732 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
8733 Expression::TableSample(ref ts)
8734 if matches!(target, DialectType::Snowflake)
8735 && !matches!(source, DialectType::Snowflake)
8736 && ts.sample.is_some() =>
8737 {
8738 if let Some(ref sample) = ts.sample {
8739 if !sample.explicit_method {
8740 Action::TablesampleSnowflakeStrip
8741 } else {
8742 Action::None
8743 }
8744 } else {
8745 Action::None
8746 }
8747 }
8748 Expression::Table(ref t)
8749 if matches!(target, DialectType::Snowflake)
8750 && !matches!(source, DialectType::Snowflake)
8751 && t.table_sample.is_some() =>
8752 {
8753 if let Some(ref sample) = t.table_sample {
8754 if !sample.explicit_method {
8755 Action::TablesampleSnowflakeStrip
8756 } else {
8757 Action::None
8758 }
8759 } else {
8760 Action::None
8761 }
8762 }
8763 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
8764 Expression::AlterTable(ref at)
8765 if matches!(target, DialectType::TSQL | DialectType::Fabric)
8766 && !at.actions.is_empty()
8767 && matches!(
8768 at.actions.first(),
8769 Some(crate::expressions::AlterTableAction::RenameTable(_))
8770 ) =>
8771 {
8772 Action::AlterTableToSpRename
8773 }
8774 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
8775 Expression::Subscript(ref sub)
8776 if matches!(
8777 target,
8778 DialectType::BigQuery
8779 | DialectType::Hive
8780 | DialectType::Spark
8781 | DialectType::Databricks
8782 ) && matches!(
8783 source,
8784 DialectType::DuckDB
8785 | DialectType::PostgreSQL
8786 | DialectType::Presto
8787 | DialectType::Trino
8788 | DialectType::Redshift
8789 | DialectType::ClickHouse
8790 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
8791 {
8792 Action::ArrayIndexConvert
8793 }
8794 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
8795 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
8796 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
8797 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
8798 Expression::WindowFunction(ref wf) => {
8799 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
8800 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
8801 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
8802 if matches!(target, DialectType::BigQuery)
8803 && !is_row_number
8804 && !wf.over.order_by.is_empty()
8805 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
8806 {
8807 Action::BigQueryNullsOrdering
8808 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
8809 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
8810 } else {
8811 let source_nulls_last = matches!(source, DialectType::DuckDB);
8812 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
8813 matches!(
8814 f.kind,
8815 crate::expressions::WindowFrameKind::Range
8816 | crate::expressions::WindowFrameKind::Groups
8817 )
8818 });
8819 if source_nulls_last
8820 && matches!(target, DialectType::MySQL)
8821 && !wf.over.order_by.is_empty()
8822 && wf.over.order_by.iter().any(|o| !o.desc)
8823 && !has_range_frame
8824 {
8825 Action::MysqlNullsLastRewrite
8826 } else {
8827 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
8828 let is_ranking_window_func = matches!(
8829 &wf.this,
8830 Expression::FirstValue(_)
8831 | Expression::LastValue(_)
8832 | Expression::NthValue(_)
8833 );
8834 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
8835 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
8836 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
8837 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
8838 && f.exclude.is_none()
8839 });
8840 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
8841 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
8842 // Strip the default frame for Snowflake target
8843 Action::SnowflakeWindowFrameStrip
8844 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
8845 // Add default frame for non-Snowflake target
8846 Action::SnowflakeWindowFrameAdd
8847 } else {
8848 match &wf.this {
8849 Expression::FirstValue(ref vf)
8850 | Expression::LastValue(ref vf)
8851 if vf.ignore_nulls == Some(false) =>
8852 {
8853 match target {
8854 DialectType::SQLite => Action::RespectNullsConvert,
8855 _ => Action::None,
8856 }
8857 }
8858 _ => Action::None,
8859 }
8860 }
8861 } else {
8862 match &wf.this {
8863 Expression::FirstValue(ref vf)
8864 | Expression::LastValue(ref vf)
8865 if vf.ignore_nulls == Some(false) =>
8866 {
8867 // RESPECT NULLS
8868 match target {
8869 DialectType::SQLite | DialectType::PostgreSQL => {
8870 Action::RespectNullsConvert
8871 }
8872 _ => Action::None,
8873 }
8874 }
8875 _ => Action::None,
8876 }
8877 }
8878 }
8879 }
8880 }
8881 // CREATE TABLE a LIKE b -> dialect-specific transformations
8882 Expression::CreateTable(ref ct)
8883 if ct.columns.is_empty()
8884 && ct.constraints.iter().any(|c| {
8885 matches!(c, crate::expressions::TableConstraint::Like { .. })
8886 })
8887 && matches!(
8888 target,
8889 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
8890 ) =>
8891 {
8892 Action::CreateTableLikeToCtas
8893 }
8894 Expression::CreateTable(ref ct)
8895 if ct.columns.is_empty()
8896 && ct.constraints.iter().any(|c| {
8897 matches!(c, crate::expressions::TableConstraint::Like { .. })
8898 })
8899 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8900 {
8901 Action::CreateTableLikeToSelectInto
8902 }
8903 Expression::CreateTable(ref ct)
8904 if ct.columns.is_empty()
8905 && ct.constraints.iter().any(|c| {
8906 matches!(c, crate::expressions::TableConstraint::Like { .. })
8907 })
8908 && matches!(target, DialectType::ClickHouse) =>
8909 {
8910 Action::CreateTableLikeToAs
8911 }
8912 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
8913 Expression::CreateTable(ref ct)
8914 if matches!(target, DialectType::DuckDB)
8915 && matches!(
8916 source,
8917 DialectType::DuckDB
8918 | DialectType::Spark
8919 | DialectType::Databricks
8920 | DialectType::Hive
8921 ) =>
8922 {
8923 let has_comment = ct.columns.iter().any(|c| {
8924 c.comment.is_some()
8925 || c.constraints.iter().any(|con| {
8926 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
8927 })
8928 });
8929 let has_props = !ct.properties.is_empty();
8930 if has_comment || has_props {
8931 Action::CreateTableStripComment
8932 } else {
8933 Action::None
8934 }
8935 }
8936 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
8937 Expression::Array(_)
8938 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
8939 {
8940 Action::ArrayConcatBracketConvert
8941 }
8942 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
8943 Expression::ArrayFunc(ref arr)
8944 if arr.bracket_notation
8945 && matches!(source, DialectType::BigQuery)
8946 && matches!(target, DialectType::Redshift) =>
8947 {
8948 Action::ArrayConcatBracketConvert
8949 }
8950 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
8951 Expression::BitwiseOrAgg(ref f)
8952 | Expression::BitwiseAndAgg(ref f)
8953 | Expression::BitwiseXorAgg(ref f) => {
8954 if matches!(target, DialectType::DuckDB) {
8955 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
8956 if let Expression::Cast(ref c) = f.this {
8957 match &c.to {
8958 DataType::Float { .. }
8959 | DataType::Double { .. }
8960 | DataType::Decimal { .. } => Action::BitAggFloatCast,
8961 DataType::Custom { ref name }
8962 if name.eq_ignore_ascii_case("REAL") =>
8963 {
8964 Action::BitAggFloatCast
8965 }
8966 _ => Action::None,
8967 }
8968 } else {
8969 Action::None
8970 }
8971 } else if matches!(target, DialectType::Snowflake) {
8972 Action::BitAggSnowflakeRename
8973 } else {
8974 Action::None
8975 }
8976 }
8977 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
8978 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
8979 Action::FilterToIff
8980 }
8981 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
8982 Expression::Avg(ref f)
8983 | Expression::Sum(ref f)
8984 | Expression::Min(ref f)
8985 | Expression::Max(ref f)
8986 | Expression::CountIf(ref f)
8987 | Expression::Stddev(ref f)
8988 | Expression::StddevPop(ref f)
8989 | Expression::StddevSamp(ref f)
8990 | Expression::Variance(ref f)
8991 | Expression::VarPop(ref f)
8992 | Expression::VarSamp(ref f)
8993 | Expression::Median(ref f)
8994 | Expression::Mode(ref f)
8995 | Expression::First(ref f)
8996 | Expression::Last(ref f)
8997 | Expression::ApproxDistinct(ref f)
8998 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
8999 {
9000 Action::AggFilterToIff
9001 }
9002 Expression::Count(ref c)
9003 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9004 {
9005 Action::AggFilterToIff
9006 }
9007 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
9008 Expression::Count(ref c)
9009 if c.distinct
9010 && matches!(&c.this, Some(Expression::Tuple(_)))
9011 && matches!(
9012 target,
9013 DialectType::Presto
9014 | DialectType::Trino
9015 | DialectType::DuckDB
9016 | DialectType::PostgreSQL
9017 ) =>
9018 {
9019 Action::CountDistinctMultiArg
9020 }
9021 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
9022 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
9023 Action::JsonToGetPath
9024 }
9025 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
9026 Expression::Struct(_)
9027 if matches!(
9028 target,
9029 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9030 ) && matches!(source, DialectType::DuckDB) =>
9031 {
9032 Action::StructToRow
9033 }
9034 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
9035 Expression::MapFunc(ref m)
9036 if m.curly_brace_syntax
9037 && matches!(
9038 target,
9039 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9040 )
9041 && matches!(source, DialectType::DuckDB) =>
9042 {
9043 Action::StructToRow
9044 }
9045 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
9046 Expression::ApproxCountDistinct(_)
9047 if matches!(
9048 target,
9049 DialectType::Presto | DialectType::Trino | DialectType::Athena
9050 ) =>
9051 {
9052 Action::ApproxCountDistinctToApproxDistinct
9053 }
9054 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
9055 Expression::ArrayContains(_)
9056 if matches!(
9057 target,
9058 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
9059 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
9060 {
9061 Action::ArrayContainsConvert
9062 }
9063 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
9064 Expression::ArrayContains(_)
9065 if matches!(target, DialectType::DuckDB)
9066 && matches!(source, DialectType::Snowflake) =>
9067 {
9068 Action::ArrayContainsDuckDBConvert
9069 }
9070 // ARRAY_EXCEPT -> target-specific conversion
9071 Expression::ArrayExcept(_)
9072 if matches!(
9073 target,
9074 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
9075 ) =>
9076 {
9077 Action::ArrayExceptConvert
9078 }
9079 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
9080 Expression::ArrayPosition(_)
9081 if matches!(target, DialectType::Snowflake)
9082 && !matches!(source, DialectType::Snowflake) =>
9083 {
9084 Action::ArrayPositionSnowflakeSwap
9085 }
9086 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
9087 Expression::ArrayPosition(_)
9088 if matches!(target, DialectType::DuckDB)
9089 && matches!(source, DialectType::Snowflake) =>
9090 {
9091 Action::SnowflakeArrayPositionToDuckDB
9092 }
9093 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
9094 Expression::ArrayDistinct(_)
9095 if matches!(target, DialectType::ClickHouse) =>
9096 {
9097 Action::ArrayDistinctClickHouse
9098 }
9099 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
9100 Expression::ArrayDistinct(_)
9101 if matches!(target, DialectType::DuckDB)
9102 && matches!(source, DialectType::Snowflake) =>
9103 {
9104 Action::ArrayDistinctConvert
9105 }
9106 // StrPosition with position -> complex expansion for Presto/DuckDB
9107 // STRPOS doesn't support a position arg in these dialects
9108 Expression::StrPosition(ref sp)
9109 if sp.position.is_some()
9110 && matches!(
9111 target,
9112 DialectType::Presto
9113 | DialectType::Trino
9114 | DialectType::Athena
9115 | DialectType::DuckDB
9116 ) =>
9117 {
9118 Action::StrPositionExpand
9119 }
9120 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
9121 Expression::First(ref f)
9122 if f.ignore_nulls == Some(true)
9123 && matches!(target, DialectType::DuckDB) =>
9124 {
9125 Action::FirstToAnyValue
9126 }
9127 // BEGIN -> START TRANSACTION for Presto/Trino
9128 Expression::Command(ref cmd)
9129 if cmd.this.eq_ignore_ascii_case("BEGIN")
9130 && matches!(
9131 target,
9132 DialectType::Presto | DialectType::Trino | DialectType::Athena
9133 ) =>
9134 {
9135 // Handled inline below
9136 Action::None // We'll handle it directly
9137 }
9138 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
9139 // PostgreSQL # is parsed as BitwiseXor (which is correct).
9140 // a || b (Concat operator) -> CONCAT function for Presto/Trino
9141 Expression::Concat(ref _op)
9142 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9143 && matches!(target, DialectType::Presto | DialectType::Trino) =>
9144 {
9145 Action::PipeConcatToConcat
9146 }
9147 _ => Action::None,
9148 }
9149 };
9150
9151 match action {
9152 Action::None => {
9153 // Handle inline transforms that don't need a dedicated action
9154 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9155 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
9156 return Ok(rewritten);
9157 }
9158 }
9159
9160 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
9161 if let Expression::Between(ref b) = e {
9162 if let Some(sym) = b.symmetric {
9163 let keeps_symmetric =
9164 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
9165 if !keeps_symmetric {
9166 if sym {
9167 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
9168 let b = if let Expression::Between(b) = e {
9169 *b
9170 } else {
9171 unreachable!()
9172 };
9173 let between1 = Expression::Between(Box::new(
9174 crate::expressions::Between {
9175 this: b.this.clone(),
9176 low: b.low.clone(),
9177 high: b.high.clone(),
9178 not: b.not,
9179 symmetric: None,
9180 },
9181 ));
9182 let between2 = Expression::Between(Box::new(
9183 crate::expressions::Between {
9184 this: b.this,
9185 low: b.high,
9186 high: b.low,
9187 not: b.not,
9188 symmetric: None,
9189 },
9190 ));
9191 return Ok(Expression::Paren(Box::new(
9192 crate::expressions::Paren {
9193 this: Expression::Or(Box::new(
9194 crate::expressions::BinaryOp::new(
9195 between1, between2,
9196 ),
9197 )),
9198 trailing_comments: vec![],
9199 },
9200 )));
9201 } else {
9202 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
9203 let b = if let Expression::Between(b) = e {
9204 *b
9205 } else {
9206 unreachable!()
9207 };
9208 return Ok(Expression::Between(Box::new(
9209 crate::expressions::Between {
9210 this: b.this,
9211 low: b.low,
9212 high: b.high,
9213 not: b.not,
9214 symmetric: None,
9215 },
9216 )));
9217 }
9218 }
9219 }
9220 }
9221
9222 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
9223 if let Expression::ILike(ref _like) = e {
9224 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
9225 let like = if let Expression::ILike(l) = e {
9226 *l
9227 } else {
9228 unreachable!()
9229 };
9230 let lower_left = Expression::Function(Box::new(Function::new(
9231 "LOWER".to_string(),
9232 vec![like.left],
9233 )));
9234 let lower_right = Expression::Function(Box::new(Function::new(
9235 "LOWER".to_string(),
9236 vec![like.right],
9237 )));
9238 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
9239 left: lower_left,
9240 right: lower_right,
9241 escape: like.escape,
9242 quantifier: like.quantifier,
9243 inferred_type: None,
9244 })));
9245 }
9246 }
9247
9248 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
9249 if let Expression::MethodCall(ref mc) = e {
9250 if matches!(source, DialectType::Oracle)
9251 && mc.method.name.eq_ignore_ascii_case("VALUE")
9252 && mc.args.is_empty()
9253 {
9254 let is_dbms_random = match &mc.this {
9255 Expression::Identifier(id) => {
9256 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
9257 }
9258 Expression::Column(col) => {
9259 col.table.is_none()
9260 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
9261 }
9262 _ => false,
9263 };
9264 if is_dbms_random {
9265 let func_name = match target {
9266 DialectType::PostgreSQL
9267 | DialectType::Redshift
9268 | DialectType::DuckDB
9269 | DialectType::SQLite => "RANDOM",
9270 DialectType::Oracle => "DBMS_RANDOM.VALUE",
9271 _ => "RAND",
9272 };
9273 return Ok(Expression::Function(Box::new(Function::new(
9274 func_name.to_string(),
9275 vec![],
9276 ))));
9277 }
9278 }
9279 }
9280 // TRIM without explicit position -> add BOTH for ClickHouse
9281 if let Expression::Trim(ref trim) = e {
9282 if matches!(target, DialectType::ClickHouse)
9283 && trim.sql_standard_syntax
9284 && trim.characters.is_some()
9285 && !trim.position_explicit
9286 {
9287 let mut new_trim = (**trim).clone();
9288 new_trim.position_explicit = true;
9289 return Ok(Expression::Trim(Box::new(new_trim)));
9290 }
9291 }
9292 // BEGIN -> START TRANSACTION for Presto/Trino
9293 if let Expression::Transaction(ref txn) = e {
9294 if matches!(
9295 target,
9296 DialectType::Presto | DialectType::Trino | DialectType::Athena
9297 ) {
9298 // Convert BEGIN to START TRANSACTION by setting mark to "START"
9299 let mut txn = txn.clone();
9300 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
9301 "START".to_string(),
9302 ))));
9303 return Ok(Expression::Transaction(Box::new(*txn)));
9304 }
9305 }
9306 // IS TRUE/FALSE -> simplified forms for Presto/Trino
9307 if matches!(
9308 target,
9309 DialectType::Presto | DialectType::Trino | DialectType::Athena
9310 ) {
9311 match &e {
9312 Expression::IsTrue(itf) if !itf.not => {
9313 // x IS TRUE -> x
9314 return Ok(itf.this.clone());
9315 }
9316 Expression::IsTrue(itf) if itf.not => {
9317 // x IS NOT TRUE -> NOT x
9318 return Ok(Expression::Not(Box::new(
9319 crate::expressions::UnaryOp {
9320 this: itf.this.clone(),
9321 inferred_type: None,
9322 },
9323 )));
9324 }
9325 Expression::IsFalse(itf) if !itf.not => {
9326 // x IS FALSE -> NOT x
9327 return Ok(Expression::Not(Box::new(
9328 crate::expressions::UnaryOp {
9329 this: itf.this.clone(),
9330 inferred_type: None,
9331 },
9332 )));
9333 }
9334 Expression::IsFalse(itf) if itf.not => {
9335 // x IS NOT FALSE -> NOT NOT x
9336 let not_x =
9337 Expression::Not(Box::new(crate::expressions::UnaryOp {
9338 this: itf.this.clone(),
9339 inferred_type: None,
9340 }));
9341 return Ok(Expression::Not(Box::new(
9342 crate::expressions::UnaryOp {
9343 this: not_x,
9344 inferred_type: None,
9345 },
9346 )));
9347 }
9348 _ => {}
9349 }
9350 }
9351 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
9352 if matches!(target, DialectType::Redshift) {
9353 if let Expression::IsFalse(ref itf) = e {
9354 if itf.not {
9355 return Ok(Expression::Not(Box::new(
9356 crate::expressions::UnaryOp {
9357 this: Expression::IsFalse(Box::new(
9358 crate::expressions::IsTrueFalse {
9359 this: itf.this.clone(),
9360 not: false,
9361 },
9362 )),
9363 inferred_type: None,
9364 },
9365 )));
9366 }
9367 }
9368 }
9369 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
9370 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
9371 if let Expression::Function(ref f) = e {
9372 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
9373 && matches!(source, DialectType::Snowflake)
9374 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
9375 {
9376 if f.args.len() == 3 {
9377 let mut args = f.args.clone();
9378 args.push(Expression::string("g"));
9379 return Ok(Expression::Function(Box::new(Function::new(
9380 "REGEXP_REPLACE".to_string(),
9381 args,
9382 ))));
9383 } else if f.args.len() == 4 {
9384 // 4th arg might be position, add 'g' as 5th
9385 let mut args = f.args.clone();
9386 args.push(Expression::string("g"));
9387 return Ok(Expression::Function(Box::new(Function::new(
9388 "REGEXP_REPLACE".to_string(),
9389 args,
9390 ))));
9391 }
9392 }
9393 }
9394 Ok(e)
9395 }
9396
9397 Action::GreatestLeastNull => {
9398 let f = if let Expression::Function(f) = e {
9399 *f
9400 } else {
9401 unreachable!("action only triggered for Function expressions")
9402 };
9403 let mut null_checks: Vec<Expression> = f
9404 .args
9405 .iter()
9406 .map(|a| {
9407 Expression::IsNull(Box::new(IsNull {
9408 this: a.clone(),
9409 not: false,
9410 postfix_form: false,
9411 }))
9412 })
9413 .collect();
9414 let condition = if null_checks.len() == 1 {
9415 null_checks.remove(0)
9416 } else {
9417 let first = null_checks.remove(0);
9418 null_checks.into_iter().fold(first, |acc, check| {
9419 Expression::Or(Box::new(BinaryOp::new(acc, check)))
9420 })
9421 };
9422 Ok(Expression::Case(Box::new(Case {
9423 operand: None,
9424 whens: vec![(condition, Expression::Null(Null))],
9425 else_: Some(Expression::Function(Box::new(Function::new(
9426 f.name, f.args,
9427 )))),
9428 comments: Vec::new(),
9429 inferred_type: None,
9430 })))
9431 }
9432
9433 Action::ArrayGenerateRange => {
9434 let f = if let Expression::Function(f) = e {
9435 *f
9436 } else {
9437 unreachable!("action only triggered for Function expressions")
9438 };
9439 let start = f.args[0].clone();
9440 let end = f.args[1].clone();
9441 let step = f.args.get(2).cloned();
9442
9443 // Helper: compute end - 1 for converting exclusive→inclusive end.
9444 // When end is a literal number, simplify to a computed literal.
9445 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
9446 // Try to simplify literal numbers
9447 match end {
9448 Expression::Literal(lit)
9449 if matches!(lit.as_ref(), Literal::Number(_)) =>
9450 {
9451 let Literal::Number(n) = lit.as_ref() else {
9452 unreachable!()
9453 };
9454 if let Ok(val) = n.parse::<i64>() {
9455 return Expression::number(val - 1);
9456 }
9457 }
9458 Expression::Neg(u) => {
9459 if let Expression::Literal(lit) = &u.this {
9460 if let Literal::Number(n) = lit.as_ref() {
9461 if let Ok(val) = n.parse::<i64>() {
9462 return Expression::number(-val - 1);
9463 }
9464 }
9465 }
9466 }
9467 _ => {}
9468 }
9469 // Non-literal: produce end - 1 expression
9470 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
9471 }
9472
9473 match target {
9474 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
9475 // so no adjustment needed — just rename the function.
9476 DialectType::Snowflake => {
9477 let mut args = vec![start, end];
9478 if let Some(s) = step {
9479 args.push(s);
9480 }
9481 Ok(Expression::Function(Box::new(Function::new(
9482 "ARRAY_GENERATE_RANGE".to_string(),
9483 args,
9484 ))))
9485 }
9486 DialectType::DuckDB => {
9487 let mut args = vec![start, end];
9488 if let Some(s) = step {
9489 args.push(s);
9490 }
9491 Ok(Expression::Function(Box::new(Function::new(
9492 "RANGE".to_string(),
9493 args,
9494 ))))
9495 }
9496 // These dialects use inclusive end, so convert exclusive→inclusive.
9497 // Presto/Trino: simplify literal numbers (3 → 2).
9498 DialectType::Presto | DialectType::Trino => {
9499 let end_inclusive = exclusive_to_inclusive_end(&end);
9500 let mut args = vec![start, end_inclusive];
9501 if let Some(s) = step {
9502 args.push(s);
9503 }
9504 Ok(Expression::Function(Box::new(Function::new(
9505 "SEQUENCE".to_string(),
9506 args,
9507 ))))
9508 }
9509 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
9510 DialectType::PostgreSQL | DialectType::Redshift => {
9511 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9512 end.clone(),
9513 Expression::number(1),
9514 )));
9515 let mut args = vec![start, end_minus_1];
9516 if let Some(s) = step {
9517 args.push(s);
9518 }
9519 Ok(Expression::Function(Box::new(Function::new(
9520 "GENERATE_SERIES".to_string(),
9521 args,
9522 ))))
9523 }
9524 DialectType::BigQuery => {
9525 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9526 end.clone(),
9527 Expression::number(1),
9528 )));
9529 let mut args = vec![start, end_minus_1];
9530 if let Some(s) = step {
9531 args.push(s);
9532 }
9533 Ok(Expression::Function(Box::new(Function::new(
9534 "GENERATE_ARRAY".to_string(),
9535 args,
9536 ))))
9537 }
9538 _ => Ok(Expression::Function(Box::new(Function::new(
9539 f.name, f.args,
9540 )))),
9541 }
9542 }
9543
9544 Action::Div0TypedDivision => {
9545 let if_func = if let Expression::IfFunc(f) = e {
9546 *f
9547 } else {
9548 unreachable!("action only triggered for IfFunc expressions")
9549 };
9550 if let Some(Expression::Div(div)) = if_func.false_value {
9551 let cast_type = if matches!(target, DialectType::SQLite) {
9552 DataType::Float {
9553 precision: None,
9554 scale: None,
9555 real_spelling: true,
9556 }
9557 } else {
9558 DataType::Double {
9559 precision: None,
9560 scale: None,
9561 }
9562 };
9563 let casted_left = Expression::Cast(Box::new(Cast {
9564 this: div.left,
9565 to: cast_type,
9566 trailing_comments: vec![],
9567 double_colon_syntax: false,
9568 format: None,
9569 default: None,
9570 inferred_type: None,
9571 }));
9572 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9573 condition: if_func.condition,
9574 true_value: if_func.true_value,
9575 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
9576 casted_left,
9577 div.right,
9578 )))),
9579 original_name: if_func.original_name,
9580 inferred_type: None,
9581 })))
9582 } else {
9583 // Not actually a Div, reconstruct
9584 Ok(Expression::IfFunc(Box::new(if_func)))
9585 }
9586 }
9587
9588 Action::ArrayAggCollectList => {
9589 let agg = if let Expression::ArrayAgg(a) = e {
9590 *a
9591 } else {
9592 unreachable!("action only triggered for ArrayAgg expressions")
9593 };
9594 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9595 name: Some("COLLECT_LIST".to_string()),
9596 ..agg
9597 })))
9598 }
9599
9600 Action::ArrayAggToGroupConcat => {
9601 let agg = if let Expression::ArrayAgg(a) = e {
9602 *a
9603 } else {
9604 unreachable!("action only triggered for ArrayAgg expressions")
9605 };
9606 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9607 name: Some("GROUP_CONCAT".to_string()),
9608 ..agg
9609 })))
9610 }
9611
9612 Action::ArrayAggWithinGroupFilter => {
9613 let wg = if let Expression::WithinGroup(w) = e {
9614 *w
9615 } else {
9616 unreachable!("action only triggered for WithinGroup expressions")
9617 };
9618 if let Expression::ArrayAgg(inner_agg) = wg.this {
9619 let col = inner_agg.this.clone();
9620 let filter = Expression::IsNull(Box::new(IsNull {
9621 this: col,
9622 not: true,
9623 postfix_form: false,
9624 }));
9625 // For DuckDB, add explicit NULLS FIRST for DESC ordering
9626 let order_by = if matches!(target, DialectType::DuckDB) {
9627 wg.order_by
9628 .into_iter()
9629 .map(|mut o| {
9630 if o.desc && o.nulls_first.is_none() {
9631 o.nulls_first = Some(true);
9632 }
9633 o
9634 })
9635 .collect()
9636 } else {
9637 wg.order_by
9638 };
9639 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9640 this: inner_agg.this,
9641 distinct: inner_agg.distinct,
9642 filter: Some(filter),
9643 order_by,
9644 name: inner_agg.name,
9645 ignore_nulls: inner_agg.ignore_nulls,
9646 having_max: inner_agg.having_max,
9647 limit: inner_agg.limit,
9648 inferred_type: None,
9649 })))
9650 } else {
9651 Ok(Expression::WithinGroup(Box::new(wg)))
9652 }
9653 }
9654
9655 Action::ArrayAggFilter => {
9656 let agg = if let Expression::ArrayAgg(a) = e {
9657 *a
9658 } else {
9659 unreachable!("action only triggered for ArrayAgg expressions")
9660 };
9661 let col = agg.this.clone();
9662 let filter = Expression::IsNull(Box::new(IsNull {
9663 this: col,
9664 not: true,
9665 postfix_form: false,
9666 }));
9667 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9668 filter: Some(filter),
9669 ..agg
9670 })))
9671 }
9672
9673 Action::ArrayAggNullFilter => {
9674 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
9675 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
9676 let agg = if let Expression::ArrayAgg(a) = e {
9677 *a
9678 } else {
9679 unreachable!("action only triggered for ArrayAgg expressions")
9680 };
9681 let col = agg.this.clone();
9682 let not_null = Expression::IsNull(Box::new(IsNull {
9683 this: col,
9684 not: true,
9685 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
9686 }));
9687 let new_filter = if let Some(existing_filter) = agg.filter {
9688 // AND the NOT IS NULL with existing filter
9689 Expression::And(Box::new(crate::expressions::BinaryOp::new(
9690 existing_filter,
9691 not_null,
9692 )))
9693 } else {
9694 not_null
9695 };
9696 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9697 filter: Some(new_filter),
9698 ..agg
9699 })))
9700 }
9701
9702 Action::BigQueryArraySelectAsStructToSnowflake => {
9703 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
9704 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
9705 if let Expression::Function(mut f) = e {
9706 let is_match = f.args.len() == 1
9707 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
9708 if is_match {
9709 let inner_select = match f.args.remove(0) {
9710 Expression::Select(s) => *s,
9711 _ => unreachable!(
9712 "argument already verified to be a Select expression"
9713 ),
9714 };
9715 // Build OBJECT_CONSTRUCT args from SELECT expressions
9716 let mut oc_args = Vec::new();
9717 for expr in &inner_select.expressions {
9718 match expr {
9719 Expression::Alias(a) => {
9720 let key = Expression::Literal(Box::new(Literal::String(
9721 a.alias.name.clone(),
9722 )));
9723 let value = a.this.clone();
9724 oc_args.push(key);
9725 oc_args.push(value);
9726 }
9727 Expression::Column(c) => {
9728 let key = Expression::Literal(Box::new(Literal::String(
9729 c.name.name.clone(),
9730 )));
9731 oc_args.push(key);
9732 oc_args.push(expr.clone());
9733 }
9734 _ => {
9735 oc_args.push(expr.clone());
9736 }
9737 }
9738 }
9739 let object_construct = Expression::Function(Box::new(Function::new(
9740 "OBJECT_CONSTRUCT".to_string(),
9741 oc_args,
9742 )));
9743 let array_agg = Expression::Function(Box::new(Function::new(
9744 "ARRAY_AGG".to_string(),
9745 vec![object_construct],
9746 )));
9747 let mut new_select = crate::expressions::Select::new();
9748 new_select.expressions = vec![array_agg];
9749 new_select.from = inner_select.from.clone();
9750 new_select.where_clause = inner_select.where_clause.clone();
9751 new_select.group_by = inner_select.group_by.clone();
9752 new_select.having = inner_select.having.clone();
9753 new_select.joins = inner_select.joins.clone();
9754 Ok(Expression::Subquery(Box::new(
9755 crate::expressions::Subquery {
9756 this: Expression::Select(Box::new(new_select)),
9757 alias: None,
9758 column_aliases: Vec::new(),
9759 order_by: None,
9760 limit: None,
9761 offset: None,
9762 distribute_by: None,
9763 sort_by: None,
9764 cluster_by: None,
9765 lateral: false,
9766 modifiers_inside: false,
9767 trailing_comments: Vec::new(),
9768 inferred_type: None,
9769 },
9770 )))
9771 } else {
9772 Ok(Expression::Function(f))
9773 }
9774 } else {
9775 Ok(e)
9776 }
9777 }
9778
9779 Action::BigQueryPercentileContToDuckDB => {
9780 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
9781 if let Expression::AggregateFunction(mut af) = e {
9782 af.name = "QUANTILE_CONT".to_string();
9783 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
9784 // Keep only first 2 args
9785 if af.args.len() > 2 {
9786 af.args.truncate(2);
9787 }
9788 Ok(Expression::AggregateFunction(af))
9789 } else {
9790 Ok(e)
9791 }
9792 }
9793
9794 Action::ArrayAggIgnoreNullsDuckDB => {
9795 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
9796 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
9797 let mut agg = if let Expression::ArrayAgg(a) = e {
9798 *a
9799 } else {
9800 unreachable!("action only triggered for ArrayAgg expressions")
9801 };
9802 agg.ignore_nulls = None; // Strip IGNORE NULLS
9803 if !agg.order_by.is_empty() {
9804 agg.order_by[0].nulls_first = Some(true);
9805 }
9806 Ok(Expression::ArrayAgg(Box::new(agg)))
9807 }
9808
9809 Action::CountDistinctMultiArg => {
9810 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
9811 if let Expression::Count(c) = e {
9812 if let Some(Expression::Tuple(t)) = c.this {
9813 let args = t.expressions;
9814 // Build CASE expression:
9815 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
9816 let mut whens = Vec::new();
9817 for arg in &args {
9818 whens.push((
9819 Expression::IsNull(Box::new(IsNull {
9820 this: arg.clone(),
9821 not: false,
9822 postfix_form: false,
9823 })),
9824 Expression::Null(crate::expressions::Null),
9825 ));
9826 }
9827 // Build the tuple for ELSE
9828 let tuple_expr =
9829 Expression::Tuple(Box::new(crate::expressions::Tuple {
9830 expressions: args,
9831 }));
9832 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
9833 operand: None,
9834 whens,
9835 else_: Some(tuple_expr),
9836 comments: Vec::new(),
9837 inferred_type: None,
9838 }));
9839 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
9840 this: Some(case_expr),
9841 star: false,
9842 distinct: true,
9843 filter: c.filter,
9844 ignore_nulls: c.ignore_nulls,
9845 original_name: c.original_name,
9846 inferred_type: None,
9847 })))
9848 } else {
9849 Ok(Expression::Count(c))
9850 }
9851 } else {
9852 Ok(e)
9853 }
9854 }
9855
9856 Action::CastTimestampToDatetime => {
9857 let c = if let Expression::Cast(c) = e {
9858 *c
9859 } else {
9860 unreachable!("action only triggered for Cast expressions")
9861 };
9862 Ok(Expression::Cast(Box::new(Cast {
9863 to: DataType::Custom {
9864 name: "DATETIME".to_string(),
9865 },
9866 ..c
9867 })))
9868 }
9869
9870 Action::CastTimestampStripTz => {
9871 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
9872 let c = if let Expression::Cast(c) = e {
9873 *c
9874 } else {
9875 unreachable!("action only triggered for Cast expressions")
9876 };
9877 Ok(Expression::Cast(Box::new(Cast {
9878 to: DataType::Timestamp {
9879 precision: None,
9880 timezone: false,
9881 },
9882 ..c
9883 })))
9884 }
9885
9886 Action::CastTimestamptzToFunc => {
9887 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
9888 let c = if let Expression::Cast(c) = e {
9889 *c
9890 } else {
9891 unreachable!("action only triggered for Cast expressions")
9892 };
9893 Ok(Expression::Function(Box::new(Function::new(
9894 "TIMESTAMP".to_string(),
9895 vec![c.this],
9896 ))))
9897 }
9898
9899 Action::ToDateToCast => {
9900 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
9901 if let Expression::Function(f) = e {
9902 let arg = f.args.into_iter().next().unwrap();
9903 Ok(Expression::Cast(Box::new(Cast {
9904 this: arg,
9905 to: DataType::Date,
9906 double_colon_syntax: false,
9907 trailing_comments: vec![],
9908 format: None,
9909 default: None,
9910 inferred_type: None,
9911 })))
9912 } else {
9913 Ok(e)
9914 }
9915 }
9916 Action::DateTruncWrapCast => {
9917 // Handle both Expression::DateTrunc/TimestampTrunc and
9918 // Expression::Function("DATE_TRUNC", [unit, expr])
9919 match e {
9920 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
9921 let input_type = match &d.this {
9922 Expression::Cast(c) => Some(c.to.clone()),
9923 _ => None,
9924 };
9925 if let Some(cast_type) = input_type {
9926 let is_time = matches!(cast_type, DataType::Time { .. });
9927 if is_time {
9928 let date_expr = Expression::Cast(Box::new(Cast {
9929 this: Expression::Literal(Box::new(
9930 crate::expressions::Literal::String(
9931 "1970-01-01".to_string(),
9932 ),
9933 )),
9934 to: DataType::Date,
9935 double_colon_syntax: false,
9936 trailing_comments: vec![],
9937 format: None,
9938 default: None,
9939 inferred_type: None,
9940 }));
9941 let add_expr =
9942 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
9943 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
9944 this: add_expr,
9945 unit: d.unit,
9946 }));
9947 Ok(Expression::Cast(Box::new(Cast {
9948 this: inner,
9949 to: cast_type,
9950 double_colon_syntax: false,
9951 trailing_comments: vec![],
9952 format: None,
9953 default: None,
9954 inferred_type: None,
9955 })))
9956 } else {
9957 let inner = Expression::DateTrunc(Box::new(*d));
9958 Ok(Expression::Cast(Box::new(Cast {
9959 this: inner,
9960 to: cast_type,
9961 double_colon_syntax: false,
9962 trailing_comments: vec![],
9963 format: None,
9964 default: None,
9965 inferred_type: None,
9966 })))
9967 }
9968 } else {
9969 Ok(Expression::DateTrunc(d))
9970 }
9971 }
9972 Expression::Function(f) if f.args.len() == 2 => {
9973 // Function-based DATE_TRUNC(unit, expr)
9974 let input_type = match &f.args[1] {
9975 Expression::Cast(c) => Some(c.to.clone()),
9976 _ => None,
9977 };
9978 if let Some(cast_type) = input_type {
9979 let is_time = matches!(cast_type, DataType::Time { .. });
9980 if is_time {
9981 let date_expr = Expression::Cast(Box::new(Cast {
9982 this: Expression::Literal(Box::new(
9983 crate::expressions::Literal::String(
9984 "1970-01-01".to_string(),
9985 ),
9986 )),
9987 to: DataType::Date,
9988 double_colon_syntax: false,
9989 trailing_comments: vec![],
9990 format: None,
9991 default: None,
9992 inferred_type: None,
9993 }));
9994 let mut args = f.args;
9995 let unit_arg = args.remove(0);
9996 let time_expr = args.remove(0);
9997 let add_expr = Expression::Add(Box::new(BinaryOp::new(
9998 date_expr, time_expr,
9999 )));
10000 let inner = Expression::Function(Box::new(Function::new(
10001 "DATE_TRUNC".to_string(),
10002 vec![unit_arg, add_expr],
10003 )));
10004 Ok(Expression::Cast(Box::new(Cast {
10005 this: inner,
10006 to: cast_type,
10007 double_colon_syntax: false,
10008 trailing_comments: vec![],
10009 format: None,
10010 default: None,
10011 inferred_type: None,
10012 })))
10013 } else {
10014 // Wrap the function in CAST
10015 Ok(Expression::Cast(Box::new(Cast {
10016 this: Expression::Function(f),
10017 to: cast_type,
10018 double_colon_syntax: false,
10019 trailing_comments: vec![],
10020 format: None,
10021 default: None,
10022 inferred_type: None,
10023 })))
10024 }
10025 } else {
10026 Ok(Expression::Function(f))
10027 }
10028 }
10029 other => Ok(other),
10030 }
10031 }
10032
10033 Action::RegexpReplaceSnowflakeToDuckDB => {
10034 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
10035 if let Expression::Function(f) = e {
10036 let mut args = f.args;
10037 let subject = args.remove(0);
10038 let pattern = args.remove(0);
10039 let replacement = args.remove(0);
10040 Ok(Expression::Function(Box::new(Function::new(
10041 "REGEXP_REPLACE".to_string(),
10042 vec![
10043 subject,
10044 pattern,
10045 replacement,
10046 Expression::Literal(Box::new(crate::expressions::Literal::String(
10047 "g".to_string(),
10048 ))),
10049 ],
10050 ))))
10051 } else {
10052 Ok(e)
10053 }
10054 }
10055
10056 Action::RegexpReplacePositionSnowflakeToDuckDB => {
10057 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
10058 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
10059 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
10060 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
10061 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
10062 if let Expression::Function(f) = e {
10063 let mut args = f.args;
10064 let subject = args.remove(0);
10065 let pattern = args.remove(0);
10066 let replacement = args.remove(0);
10067 let position = args.remove(0);
10068 let occurrence = args.remove(0);
10069
10070 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10071 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10072 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10073
10074 if is_pos_1 && is_occ_1 {
10075 // REGEXP_REPLACE(s, p, r) - single replace, no flags
10076 Ok(Expression::Function(Box::new(Function::new(
10077 "REGEXP_REPLACE".to_string(),
10078 vec![subject, pattern, replacement],
10079 ))))
10080 } else if is_pos_1 && is_occ_0 {
10081 // REGEXP_REPLACE(s, p, r, 'g') - global replace
10082 Ok(Expression::Function(Box::new(Function::new(
10083 "REGEXP_REPLACE".to_string(),
10084 vec![
10085 subject,
10086 pattern,
10087 replacement,
10088 Expression::Literal(Box::new(Literal::String("g".to_string()))),
10089 ],
10090 ))))
10091 } else {
10092 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
10093 // Pre-compute pos-1 when position is a numeric literal
10094 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
10095 if let Literal::Number(ref n) = lit.as_ref() {
10096 if let Ok(val) = n.parse::<i64>() {
10097 Expression::number(val - 1)
10098 } else {
10099 Expression::Sub(Box::new(BinaryOp::new(
10100 position.clone(),
10101 Expression::number(1),
10102 )))
10103 }
10104 } else {
10105 position.clone()
10106 }
10107 } else {
10108 Expression::Sub(Box::new(BinaryOp::new(
10109 position.clone(),
10110 Expression::number(1),
10111 )))
10112 };
10113 let prefix = Expression::Function(Box::new(Function::new(
10114 "SUBSTRING".to_string(),
10115 vec![subject.clone(), Expression::number(1), pos_minus_1],
10116 )));
10117 let suffix_subject = Expression::Function(Box::new(Function::new(
10118 "SUBSTRING".to_string(),
10119 vec![subject, position],
10120 )));
10121 let mut replace_args = vec![suffix_subject, pattern, replacement];
10122 if is_occ_0 {
10123 replace_args.push(Expression::Literal(Box::new(Literal::String(
10124 "g".to_string(),
10125 ))));
10126 }
10127 let replace_expr = Expression::Function(Box::new(Function::new(
10128 "REGEXP_REPLACE".to_string(),
10129 replace_args,
10130 )));
10131 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10132 this: Box::new(prefix),
10133 expression: Box::new(replace_expr),
10134 safe: None,
10135 })))
10136 }
10137 } else {
10138 Ok(e)
10139 }
10140 }
10141
10142 Action::RegexpSubstrSnowflakeToDuckDB => {
10143 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
10144 if let Expression::Function(f) = e {
10145 let mut args = f.args;
10146 let arg_count = args.len();
10147 match arg_count {
10148 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
10149 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10150 "REGEXP_EXTRACT".to_string(),
10151 args,
10152 )))),
10153 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
10154 3 => {
10155 let subject = args.remove(0);
10156 let pattern = args.remove(0);
10157 let position = args.remove(0);
10158 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10159 if is_pos_1 {
10160 Ok(Expression::Function(Box::new(Function::new(
10161 "REGEXP_EXTRACT".to_string(),
10162 vec![subject, pattern],
10163 ))))
10164 } else {
10165 let substring_expr =
10166 Expression::Function(Box::new(Function::new(
10167 "SUBSTRING".to_string(),
10168 vec![subject, position],
10169 )));
10170 let nullif_expr =
10171 Expression::Function(Box::new(Function::new(
10172 "NULLIF".to_string(),
10173 vec![
10174 substring_expr,
10175 Expression::Literal(Box::new(Literal::String(
10176 String::new(),
10177 ))),
10178 ],
10179 )));
10180 Ok(Expression::Function(Box::new(Function::new(
10181 "REGEXP_EXTRACT".to_string(),
10182 vec![nullif_expr, pattern],
10183 ))))
10184 }
10185 }
10186 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
10187 4 => {
10188 let subject = args.remove(0);
10189 let pattern = args.remove(0);
10190 let position = args.remove(0);
10191 let occurrence = args.remove(0);
10192 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10193 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10194
10195 let effective_subject = if is_pos_1 {
10196 subject
10197 } else {
10198 let substring_expr =
10199 Expression::Function(Box::new(Function::new(
10200 "SUBSTRING".to_string(),
10201 vec![subject, position],
10202 )));
10203 Expression::Function(Box::new(Function::new(
10204 "NULLIF".to_string(),
10205 vec![
10206 substring_expr,
10207 Expression::Literal(Box::new(Literal::String(
10208 String::new(),
10209 ))),
10210 ],
10211 )))
10212 };
10213
10214 if is_occ_1 {
10215 Ok(Expression::Function(Box::new(Function::new(
10216 "REGEXP_EXTRACT".to_string(),
10217 vec![effective_subject, pattern],
10218 ))))
10219 } else {
10220 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
10221 let extract_all =
10222 Expression::Function(Box::new(Function::new(
10223 "REGEXP_EXTRACT_ALL".to_string(),
10224 vec![effective_subject, pattern],
10225 )));
10226 Ok(Expression::Function(Box::new(Function::new(
10227 "ARRAY_EXTRACT".to_string(),
10228 vec![extract_all, occurrence],
10229 ))))
10230 }
10231 }
10232 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
10233 5 => {
10234 let subject = args.remove(0);
10235 let pattern = args.remove(0);
10236 let _position = args.remove(0);
10237 let _occurrence = args.remove(0);
10238 let _flags = args.remove(0);
10239 // Strip 'e' flag, convert to REGEXP_EXTRACT
10240 Ok(Expression::Function(Box::new(Function::new(
10241 "REGEXP_EXTRACT".to_string(),
10242 vec![subject, pattern],
10243 ))))
10244 }
10245 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
10246 _ => {
10247 let subject = args.remove(0);
10248 let pattern = args.remove(0);
10249 let _position = args.remove(0);
10250 let _occurrence = args.remove(0);
10251 let _flags = args.remove(0);
10252 let group = args.remove(0);
10253 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10254 if is_group_0 {
10255 // Strip group=0 (default)
10256 Ok(Expression::Function(Box::new(Function::new(
10257 "REGEXP_EXTRACT".to_string(),
10258 vec![subject, pattern],
10259 ))))
10260 } else {
10261 Ok(Expression::Function(Box::new(Function::new(
10262 "REGEXP_EXTRACT".to_string(),
10263 vec![subject, pattern, group],
10264 ))))
10265 }
10266 }
10267 }
10268 } else {
10269 Ok(e)
10270 }
10271 }
10272
10273 Action::RegexpSubstrSnowflakeIdentity => {
10274 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
10275 // Strip trailing group=0
10276 if let Expression::Function(f) = e {
10277 let func_name = f.name.clone();
10278 let mut args = f.args;
10279 if args.len() == 6 {
10280 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10281 if is_group_0 {
10282 args.truncate(5);
10283 }
10284 }
10285 Ok(Expression::Function(Box::new(Function::new(
10286 func_name, args,
10287 ))))
10288 } else {
10289 Ok(e)
10290 }
10291 }
10292
10293 Action::RegexpSubstrAllSnowflakeToDuckDB => {
10294 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
10295 if let Expression::Function(f) = e {
10296 let mut args = f.args;
10297 let arg_count = args.len();
10298 match arg_count {
10299 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
10300 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10301 "REGEXP_EXTRACT_ALL".to_string(),
10302 args,
10303 )))),
10304 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
10305 3 => {
10306 let subject = args.remove(0);
10307 let pattern = args.remove(0);
10308 let position = args.remove(0);
10309 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10310 if is_pos_1 {
10311 Ok(Expression::Function(Box::new(Function::new(
10312 "REGEXP_EXTRACT_ALL".to_string(),
10313 vec![subject, pattern],
10314 ))))
10315 } else {
10316 let substring_expr =
10317 Expression::Function(Box::new(Function::new(
10318 "SUBSTRING".to_string(),
10319 vec![subject, position],
10320 )));
10321 Ok(Expression::Function(Box::new(Function::new(
10322 "REGEXP_EXTRACT_ALL".to_string(),
10323 vec![substring_expr, pattern],
10324 ))))
10325 }
10326 }
10327 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
10328 4 => {
10329 let subject = args.remove(0);
10330 let pattern = args.remove(0);
10331 let position = args.remove(0);
10332 let occurrence = args.remove(0);
10333 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10334 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10335
10336 let effective_subject = if is_pos_1 {
10337 subject
10338 } else {
10339 Expression::Function(Box::new(Function::new(
10340 "SUBSTRING".to_string(),
10341 vec![subject, position],
10342 )))
10343 };
10344
10345 if is_occ_1 {
10346 Ok(Expression::Function(Box::new(Function::new(
10347 "REGEXP_EXTRACT_ALL".to_string(),
10348 vec![effective_subject, pattern],
10349 ))))
10350 } else {
10351 // REGEXP_EXTRACT_ALL(s, p)[occ:]
10352 let extract_all =
10353 Expression::Function(Box::new(Function::new(
10354 "REGEXP_EXTRACT_ALL".to_string(),
10355 vec![effective_subject, pattern],
10356 )));
10357 Ok(Expression::ArraySlice(Box::new(
10358 crate::expressions::ArraySlice {
10359 this: extract_all,
10360 start: Some(occurrence),
10361 end: None,
10362 },
10363 )))
10364 }
10365 }
10366 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
10367 5 => {
10368 let subject = args.remove(0);
10369 let pattern = args.remove(0);
10370 let _position = args.remove(0);
10371 let _occurrence = args.remove(0);
10372 let _flags = args.remove(0);
10373 Ok(Expression::Function(Box::new(Function::new(
10374 "REGEXP_EXTRACT_ALL".to_string(),
10375 vec![subject, pattern],
10376 ))))
10377 }
10378 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
10379 _ => {
10380 let subject = args.remove(0);
10381 let pattern = args.remove(0);
10382 let _position = args.remove(0);
10383 let _occurrence = args.remove(0);
10384 let _flags = args.remove(0);
10385 let group = args.remove(0);
10386 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10387 if is_group_0 {
10388 Ok(Expression::Function(Box::new(Function::new(
10389 "REGEXP_EXTRACT_ALL".to_string(),
10390 vec![subject, pattern],
10391 ))))
10392 } else {
10393 Ok(Expression::Function(Box::new(Function::new(
10394 "REGEXP_EXTRACT_ALL".to_string(),
10395 vec![subject, pattern, group],
10396 ))))
10397 }
10398 }
10399 }
10400 } else {
10401 Ok(e)
10402 }
10403 }
10404
10405 Action::RegexpCountSnowflakeToDuckDB => {
10406 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
10407 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10408 if let Expression::Function(f) = e {
10409 let mut args = f.args;
10410 let arg_count = args.len();
10411 let subject = args.remove(0);
10412 let pattern = args.remove(0);
10413
10414 // Handle position arg
10415 let effective_subject = if arg_count >= 3 {
10416 let position = args.remove(0);
10417 Expression::Function(Box::new(Function::new(
10418 "SUBSTRING".to_string(),
10419 vec![subject, position],
10420 )))
10421 } else {
10422 subject
10423 };
10424
10425 // Handle flags arg -> embed as (?flags) prefix in pattern
10426 let effective_pattern = if arg_count >= 4 {
10427 let flags = args.remove(0);
10428 match &flags {
10429 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
10430 {
10431 let Literal::String(f_str) = lit.as_ref() else {
10432 unreachable!()
10433 };
10434 // Always use concatenation: '(?flags)' || pattern
10435 let prefix = Expression::Literal(Box::new(Literal::String(
10436 format!("(?{})", f_str),
10437 )));
10438 Expression::DPipe(Box::new(crate::expressions::DPipe {
10439 this: Box::new(prefix),
10440 expression: Box::new(pattern.clone()),
10441 safe: None,
10442 }))
10443 }
10444 _ => pattern.clone(),
10445 }
10446 } else {
10447 pattern.clone()
10448 };
10449
10450 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10451 let extract_all = Expression::Function(Box::new(Function::new(
10452 "REGEXP_EXTRACT_ALL".to_string(),
10453 vec![effective_subject, effective_pattern.clone()],
10454 )));
10455 let length_expr =
10456 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10457 this: extract_all,
10458 original_name: None,
10459 inferred_type: None,
10460 }));
10461 let condition = Expression::Eq(Box::new(BinaryOp::new(
10462 effective_pattern,
10463 Expression::Literal(Box::new(Literal::String(String::new()))),
10464 )));
10465 Ok(Expression::Case(Box::new(Case {
10466 operand: None,
10467 whens: vec![(condition, Expression::number(0))],
10468 else_: Some(length_expr),
10469 comments: vec![],
10470 inferred_type: None,
10471 })))
10472 } else {
10473 Ok(e)
10474 }
10475 }
10476
10477 Action::RegexpInstrSnowflakeToDuckDB => {
10478 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
10479 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
10480 // WHEN p = '' THEN 0
10481 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10482 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
10483 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
10484 // + pos_offset
10485 // END
10486 if let Expression::Function(f) = e {
10487 let mut args = f.args;
10488 let subject = args.remove(0);
10489 let pattern = if !args.is_empty() {
10490 args.remove(0)
10491 } else {
10492 Expression::Literal(Box::new(Literal::String(String::new())))
10493 };
10494
10495 // Collect all original args for NULL checks
10496 let position = if !args.is_empty() {
10497 Some(args.remove(0))
10498 } else {
10499 None
10500 };
10501 let occurrence = if !args.is_empty() {
10502 Some(args.remove(0))
10503 } else {
10504 None
10505 };
10506 let option = if !args.is_empty() {
10507 Some(args.remove(0))
10508 } else {
10509 None
10510 };
10511 let flags = if !args.is_empty() {
10512 Some(args.remove(0))
10513 } else {
10514 None
10515 };
10516 let _group = if !args.is_empty() {
10517 Some(args.remove(0))
10518 } else {
10519 None
10520 };
10521
10522 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
10523 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
10524
10525 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
10526 let mut null_checks: Vec<Expression> = vec![
10527 Expression::Is(Box::new(BinaryOp::new(
10528 subject.clone(),
10529 Expression::Null(Null),
10530 ))),
10531 Expression::Is(Box::new(BinaryOp::new(
10532 pattern.clone(),
10533 Expression::Null(Null),
10534 ))),
10535 ];
10536 // Add NULL checks for all provided optional args
10537 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
10538 if let Some(arg) = opt_arg {
10539 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
10540 (*arg).clone(),
10541 Expression::Null(Null),
10542 ))));
10543 }
10544 }
10545 // Chain with OR
10546 let null_condition = null_checks
10547 .into_iter()
10548 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
10549 .unwrap();
10550
10551 // Effective subject (apply position offset)
10552 let effective_subject = if is_pos_1 {
10553 subject.clone()
10554 } else {
10555 let pos = position.clone().unwrap_or(Expression::number(1));
10556 Expression::Function(Box::new(Function::new(
10557 "SUBSTRING".to_string(),
10558 vec![subject.clone(), pos],
10559 )))
10560 };
10561
10562 // Effective pattern (apply flags if present)
10563 let effective_pattern = if let Some(ref fl) = flags {
10564 if let Expression::Literal(lit) = fl {
10565 if let Literal::String(f_str) = lit.as_ref() {
10566 if !f_str.is_empty() {
10567 let prefix = Expression::Literal(Box::new(
10568 Literal::String(format!("(?{})", f_str)),
10569 ));
10570 Expression::DPipe(Box::new(crate::expressions::DPipe {
10571 this: Box::new(prefix),
10572 expression: Box::new(pattern.clone()),
10573 safe: None,
10574 }))
10575 } else {
10576 pattern.clone()
10577 }
10578 } else {
10579 fl.clone()
10580 }
10581 } else {
10582 pattern.clone()
10583 }
10584 } else {
10585 pattern.clone()
10586 };
10587
10588 // WHEN pattern = '' THEN 0
10589 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
10590 effective_pattern.clone(),
10591 Expression::Literal(Box::new(Literal::String(String::new()))),
10592 )));
10593
10594 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10595 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
10596 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10597 this: Expression::Function(Box::new(Function::new(
10598 "REGEXP_EXTRACT_ALL".to_string(),
10599 vec![effective_subject.clone(), effective_pattern.clone()],
10600 ))),
10601 original_name: None,
10602 inferred_type: None,
10603 })),
10604 occurrence_expr.clone(),
10605 )));
10606
10607 // Helper: build LENGTH lambda for LIST_TRANSFORM
10608 let make_len_lambda = || {
10609 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10610 parameters: vec![crate::expressions::Identifier::new("x")],
10611 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
10612 this: Expression::Identifier(
10613 crate::expressions::Identifier::new("x"),
10614 ),
10615 original_name: None,
10616 inferred_type: None,
10617 })),
10618 colon: false,
10619 parameter_types: vec![],
10620 }))
10621 };
10622
10623 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
10624 let split_sliced =
10625 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10626 this: Expression::Function(Box::new(Function::new(
10627 "STRING_SPLIT_REGEX".to_string(),
10628 vec![effective_subject.clone(), effective_pattern.clone()],
10629 ))),
10630 start: Some(Expression::number(1)),
10631 end: Some(occurrence_expr.clone()),
10632 }));
10633 let split_sum = Expression::Function(Box::new(Function::new(
10634 "COALESCE".to_string(),
10635 vec![
10636 Expression::Function(Box::new(Function::new(
10637 "LIST_SUM".to_string(),
10638 vec![Expression::Function(Box::new(Function::new(
10639 "LIST_TRANSFORM".to_string(),
10640 vec![split_sliced, make_len_lambda()],
10641 )))],
10642 ))),
10643 Expression::number(0),
10644 ],
10645 )));
10646
10647 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
10648 let extract_sliced =
10649 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10650 this: Expression::Function(Box::new(Function::new(
10651 "REGEXP_EXTRACT_ALL".to_string(),
10652 vec![effective_subject.clone(), effective_pattern.clone()],
10653 ))),
10654 start: Some(Expression::number(1)),
10655 end: Some(Expression::Sub(Box::new(BinaryOp::new(
10656 occurrence_expr.clone(),
10657 Expression::number(1),
10658 )))),
10659 }));
10660 let extract_sum = Expression::Function(Box::new(Function::new(
10661 "COALESCE".to_string(),
10662 vec![
10663 Expression::Function(Box::new(Function::new(
10664 "LIST_SUM".to_string(),
10665 vec![Expression::Function(Box::new(Function::new(
10666 "LIST_TRANSFORM".to_string(),
10667 vec![extract_sliced, make_len_lambda()],
10668 )))],
10669 ))),
10670 Expression::number(0),
10671 ],
10672 )));
10673
10674 // Position offset: pos - 1 when pos > 1, else 0
10675 let pos_offset: Expression = if !is_pos_1 {
10676 let pos = position.clone().unwrap_or(Expression::number(1));
10677 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
10678 } else {
10679 Expression::number(0)
10680 };
10681
10682 // ELSE: 1 + split_sum + extract_sum + pos_offset
10683 let else_expr = Expression::Add(Box::new(BinaryOp::new(
10684 Expression::Add(Box::new(BinaryOp::new(
10685 Expression::Add(Box::new(BinaryOp::new(
10686 Expression::number(1),
10687 split_sum,
10688 ))),
10689 extract_sum,
10690 ))),
10691 pos_offset,
10692 )));
10693
10694 Ok(Expression::Case(Box::new(Case {
10695 operand: None,
10696 whens: vec![
10697 (null_condition, Expression::Null(Null)),
10698 (empty_pattern_check, Expression::number(0)),
10699 (match_count_check, Expression::number(0)),
10700 ],
10701 else_: Some(else_expr),
10702 comments: vec![],
10703 inferred_type: None,
10704 })))
10705 } else {
10706 Ok(e)
10707 }
10708 }
10709
10710 Action::RlikeSnowflakeToDuckDB => {
10711 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
10712 // Both do full-string matching, so no anchoring needed
10713 let (subject, pattern, flags) = match e {
10714 Expression::RegexpLike(ref rl) => {
10715 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
10716 }
10717 Expression::Function(ref f) if f.args.len() >= 2 => {
10718 let s = f.args[0].clone();
10719 let p = f.args[1].clone();
10720 let fl = f.args.get(2).cloned();
10721 (s, p, fl)
10722 }
10723 _ => return Ok(e),
10724 };
10725
10726 let mut result_args = vec![subject, pattern];
10727 if let Some(fl) = flags {
10728 result_args.push(fl);
10729 }
10730 Ok(Expression::Function(Box::new(Function::new(
10731 "REGEXP_FULL_MATCH".to_string(),
10732 result_args,
10733 ))))
10734 }
10735
10736 Action::RegexpExtractAllToSnowflake => {
10737 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
10738 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
10739 if let Expression::Function(f) = e {
10740 let mut args = f.args;
10741 if args.len() >= 2 {
10742 let str_expr = args.remove(0);
10743 let pattern = args.remove(0);
10744
10745 let has_groups = match &pattern {
10746 Expression::Literal(lit)
10747 if matches!(lit.as_ref(), Literal::String(_)) =>
10748 {
10749 let Literal::String(s) = lit.as_ref() else {
10750 unreachable!()
10751 };
10752 s.contains('(') && s.contains(')')
10753 }
10754 _ => false,
10755 };
10756
10757 if has_groups {
10758 Ok(Expression::Function(Box::new(Function::new(
10759 "REGEXP_SUBSTR_ALL".to_string(),
10760 vec![
10761 str_expr,
10762 pattern,
10763 Expression::number(1),
10764 Expression::number(1),
10765 Expression::Literal(Box::new(Literal::String(
10766 "c".to_string(),
10767 ))),
10768 Expression::number(1),
10769 ],
10770 ))))
10771 } else {
10772 Ok(Expression::Function(Box::new(Function::new(
10773 "REGEXP_SUBSTR_ALL".to_string(),
10774 vec![str_expr, pattern],
10775 ))))
10776 }
10777 } else {
10778 Ok(Expression::Function(Box::new(Function::new(
10779 "REGEXP_SUBSTR_ALL".to_string(),
10780 args,
10781 ))))
10782 }
10783 } else {
10784 Ok(e)
10785 }
10786 }
10787
10788 Action::SetToVariable => {
10789 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
10790 if let Expression::SetStatement(mut s) = e {
10791 for item in &mut s.items {
10792 if item.kind.is_none() {
10793 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
10794 let already_variable = match &item.name {
10795 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
10796 _ => false,
10797 };
10798 if already_variable {
10799 // Extract the actual name and set kind
10800 if let Expression::Identifier(ref mut id) = item.name {
10801 let actual_name = id.name["VARIABLE ".len()..].to_string();
10802 id.name = actual_name;
10803 }
10804 }
10805 item.kind = Some("VARIABLE".to_string());
10806 }
10807 }
10808 Ok(Expression::SetStatement(s))
10809 } else {
10810 Ok(e)
10811 }
10812 }
10813
10814 Action::ConvertTimezoneToExpr => {
10815 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
10816 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
10817 if let Expression::Function(f) = e {
10818 if f.args.len() == 2 {
10819 let mut args = f.args;
10820 let target_tz = args.remove(0);
10821 let timestamp = args.remove(0);
10822 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10823 source_tz: None,
10824 target_tz: Some(Box::new(target_tz)),
10825 timestamp: Some(Box::new(timestamp)),
10826 options: vec![],
10827 })))
10828 } else if f.args.len() == 3 {
10829 let mut args = f.args;
10830 let source_tz = args.remove(0);
10831 let target_tz = args.remove(0);
10832 let timestamp = args.remove(0);
10833 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10834 source_tz: Some(Box::new(source_tz)),
10835 target_tz: Some(Box::new(target_tz)),
10836 timestamp: Some(Box::new(timestamp)),
10837 options: vec![],
10838 })))
10839 } else {
10840 Ok(Expression::Function(f))
10841 }
10842 } else {
10843 Ok(e)
10844 }
10845 }
10846
10847 Action::BigQueryCastType => {
10848 // Convert BigQuery types to standard SQL types
10849 if let Expression::DataType(dt) = e {
10850 match dt {
10851 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
10852 Ok(Expression::DataType(DataType::BigInt { length: None }))
10853 }
10854 DataType::Custom { ref name }
10855 if name.eq_ignore_ascii_case("FLOAT64") =>
10856 {
10857 Ok(Expression::DataType(DataType::Double {
10858 precision: None,
10859 scale: None,
10860 }))
10861 }
10862 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
10863 Ok(Expression::DataType(DataType::Boolean))
10864 }
10865 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
10866 Ok(Expression::DataType(DataType::VarBinary { length: None }))
10867 }
10868 DataType::Custom { ref name }
10869 if name.eq_ignore_ascii_case("NUMERIC") =>
10870 {
10871 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
10872 // default precision (18, 3) being added to bare DECIMAL
10873 if matches!(target, DialectType::DuckDB) {
10874 Ok(Expression::DataType(DataType::Custom {
10875 name: "DECIMAL".to_string(),
10876 }))
10877 } else {
10878 Ok(Expression::DataType(DataType::Decimal {
10879 precision: None,
10880 scale: None,
10881 }))
10882 }
10883 }
10884 DataType::Custom { ref name }
10885 if name.eq_ignore_ascii_case("STRING") =>
10886 {
10887 Ok(Expression::DataType(DataType::String { length: None }))
10888 }
10889 DataType::Custom { ref name }
10890 if name.eq_ignore_ascii_case("DATETIME") =>
10891 {
10892 Ok(Expression::DataType(DataType::Timestamp {
10893 precision: None,
10894 timezone: false,
10895 }))
10896 }
10897 _ => Ok(Expression::DataType(dt)),
10898 }
10899 } else {
10900 Ok(e)
10901 }
10902 }
10903
10904 Action::BigQuerySafeDivide => {
10905 // Convert SafeDivide expression to IF/CASE form for most targets
10906 if let Expression::SafeDivide(sd) = e {
10907 let x = *sd.this;
10908 let y = *sd.expression;
10909 // Wrap x and y in parens if they're complex expressions
10910 let y_ref = match &y {
10911 Expression::Column(_)
10912 | Expression::Literal(_)
10913 | Expression::Identifier(_) => y.clone(),
10914 _ => Expression::Paren(Box::new(Paren {
10915 this: y.clone(),
10916 trailing_comments: vec![],
10917 })),
10918 };
10919 let x_ref = match &x {
10920 Expression::Column(_)
10921 | Expression::Literal(_)
10922 | Expression::Identifier(_) => x.clone(),
10923 _ => Expression::Paren(Box::new(Paren {
10924 this: x.clone(),
10925 trailing_comments: vec![],
10926 })),
10927 };
10928 let condition = Expression::Neq(Box::new(BinaryOp::new(
10929 y_ref.clone(),
10930 Expression::number(0),
10931 )));
10932 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
10933
10934 if matches!(target, DialectType::Spark | DialectType::Databricks) {
10935 Ok(Expression::Function(Box::new(Function::new(
10936 "TRY_DIVIDE".to_string(),
10937 vec![x, y],
10938 ))))
10939 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
10940 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
10941 let cast_x = Expression::Cast(Box::new(Cast {
10942 this: match &x {
10943 Expression::Column(_)
10944 | Expression::Literal(_)
10945 | Expression::Identifier(_) => x,
10946 _ => Expression::Paren(Box::new(Paren {
10947 this: x,
10948 trailing_comments: vec![],
10949 })),
10950 },
10951 to: DataType::Double {
10952 precision: None,
10953 scale: None,
10954 },
10955 trailing_comments: vec![],
10956 double_colon_syntax: false,
10957 format: None,
10958 default: None,
10959 inferred_type: None,
10960 }));
10961 let cast_div = Expression::Div(Box::new(BinaryOp::new(
10962 cast_x,
10963 match &y {
10964 Expression::Column(_)
10965 | Expression::Literal(_)
10966 | Expression::Identifier(_) => y,
10967 _ => Expression::Paren(Box::new(Paren {
10968 this: y,
10969 trailing_comments: vec![],
10970 })),
10971 },
10972 )));
10973 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10974 condition,
10975 true_value: cast_div,
10976 false_value: Some(Expression::Null(Null)),
10977 original_name: None,
10978 inferred_type: None,
10979 })))
10980 } else if matches!(target, DialectType::PostgreSQL) {
10981 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
10982 let cast_x = Expression::Cast(Box::new(Cast {
10983 this: match &x {
10984 Expression::Column(_)
10985 | Expression::Literal(_)
10986 | Expression::Identifier(_) => x,
10987 _ => Expression::Paren(Box::new(Paren {
10988 this: x,
10989 trailing_comments: vec![],
10990 })),
10991 },
10992 to: DataType::Custom {
10993 name: "DOUBLE PRECISION".to_string(),
10994 },
10995 trailing_comments: vec![],
10996 double_colon_syntax: false,
10997 format: None,
10998 default: None,
10999 inferred_type: None,
11000 }));
11001 let y_paren = match &y {
11002 Expression::Column(_)
11003 | Expression::Literal(_)
11004 | Expression::Identifier(_) => y,
11005 _ => Expression::Paren(Box::new(Paren {
11006 this: y,
11007 trailing_comments: vec![],
11008 })),
11009 };
11010 let cast_div =
11011 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
11012 Ok(Expression::Case(Box::new(Case {
11013 operand: None,
11014 whens: vec![(condition, cast_div)],
11015 else_: Some(Expression::Null(Null)),
11016 comments: Vec::new(),
11017 inferred_type: None,
11018 })))
11019 } else if matches!(target, DialectType::DuckDB) {
11020 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
11021 Ok(Expression::Case(Box::new(Case {
11022 operand: None,
11023 whens: vec![(condition, div_expr)],
11024 else_: Some(Expression::Null(Null)),
11025 comments: Vec::new(),
11026 inferred_type: None,
11027 })))
11028 } else if matches!(target, DialectType::Snowflake) {
11029 // Snowflake: IFF(y <> 0, x / y, NULL)
11030 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11031 condition,
11032 true_value: div_expr,
11033 false_value: Some(Expression::Null(Null)),
11034 original_name: Some("IFF".to_string()),
11035 inferred_type: None,
11036 })))
11037 } else {
11038 // All others: IF(y <> 0, x / y, NULL)
11039 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11040 condition,
11041 true_value: div_expr,
11042 false_value: Some(Expression::Null(Null)),
11043 original_name: None,
11044 inferred_type: None,
11045 })))
11046 }
11047 } else {
11048 Ok(e)
11049 }
11050 }
11051
11052 Action::BigQueryLastDayStripUnit => {
11053 if let Expression::LastDay(mut ld) = e {
11054 ld.unit = None; // Strip the unit (MONTH is default)
11055 match target {
11056 DialectType::PostgreSQL => {
11057 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11058 let date_trunc = Expression::Function(Box::new(Function::new(
11059 "DATE_TRUNC".to_string(),
11060 vec![
11061 Expression::Literal(Box::new(
11062 crate::expressions::Literal::String(
11063 "MONTH".to_string(),
11064 ),
11065 )),
11066 ld.this.clone(),
11067 ],
11068 )));
11069 let plus_month =
11070 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11071 date_trunc,
11072 Expression::Interval(Box::new(
11073 crate::expressions::Interval {
11074 this: Some(Expression::Literal(Box::new(
11075 crate::expressions::Literal::String(
11076 "1 MONTH".to_string(),
11077 ),
11078 ))),
11079 unit: None,
11080 },
11081 )),
11082 )));
11083 let minus_day =
11084 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
11085 plus_month,
11086 Expression::Interval(Box::new(
11087 crate::expressions::Interval {
11088 this: Some(Expression::Literal(Box::new(
11089 crate::expressions::Literal::String(
11090 "1 DAY".to_string(),
11091 ),
11092 ))),
11093 unit: None,
11094 },
11095 )),
11096 )));
11097 Ok(Expression::Cast(Box::new(Cast {
11098 this: minus_day,
11099 to: DataType::Date,
11100 trailing_comments: vec![],
11101 double_colon_syntax: false,
11102 format: None,
11103 default: None,
11104 inferred_type: None,
11105 })))
11106 }
11107 DialectType::Presto => {
11108 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
11109 Ok(Expression::Function(Box::new(Function::new(
11110 "LAST_DAY_OF_MONTH".to_string(),
11111 vec![ld.this],
11112 ))))
11113 }
11114 DialectType::ClickHouse => {
11115 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
11116 // Need to wrap the DATE type in Nullable
11117 let nullable_date = match ld.this {
11118 Expression::Cast(mut c) => {
11119 c.to = DataType::Nullable {
11120 inner: Box::new(DataType::Date),
11121 };
11122 Expression::Cast(c)
11123 }
11124 other => other,
11125 };
11126 ld.this = nullable_date;
11127 Ok(Expression::LastDay(ld))
11128 }
11129 _ => Ok(Expression::LastDay(ld)),
11130 }
11131 } else {
11132 Ok(e)
11133 }
11134 }
11135
11136 Action::BigQueryCastFormat => {
11137 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
11138 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
11139 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
11140 let (this, to, format_expr, is_safe) = match e {
11141 Expression::Cast(ref c) if c.format.is_some() => (
11142 c.this.clone(),
11143 c.to.clone(),
11144 c.format.as_ref().unwrap().as_ref().clone(),
11145 false,
11146 ),
11147 Expression::SafeCast(ref c) if c.format.is_some() => (
11148 c.this.clone(),
11149 c.to.clone(),
11150 c.format.as_ref().unwrap().as_ref().clone(),
11151 true,
11152 ),
11153 _ => return Ok(e),
11154 };
11155 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
11156 if matches!(target, DialectType::BigQuery) {
11157 match &to {
11158 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
11159 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
11160 return Ok(e);
11161 }
11162 _ => {}
11163 }
11164 }
11165 // Extract timezone from format if AT TIME ZONE is present
11166 let (actual_format_expr, timezone) = match &format_expr {
11167 Expression::AtTimeZone(ref atz) => {
11168 (atz.this.clone(), Some(atz.zone.clone()))
11169 }
11170 _ => (format_expr.clone(), None),
11171 };
11172 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
11173 match target {
11174 DialectType::BigQuery => {
11175 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
11176 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
11177 let func_name = match &to {
11178 DataType::Date => "PARSE_DATE",
11179 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
11180 DataType::Time { .. } => "PARSE_TIMESTAMP",
11181 _ => "PARSE_TIMESTAMP",
11182 };
11183 let mut func_args = vec![strftime_fmt, this];
11184 if let Some(tz) = timezone {
11185 func_args.push(tz);
11186 }
11187 Ok(Expression::Function(Box::new(Function::new(
11188 func_name.to_string(),
11189 func_args,
11190 ))))
11191 }
11192 DialectType::DuckDB => {
11193 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
11194 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
11195 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
11196 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
11197 let parse_call = Expression::Function(Box::new(Function::new(
11198 parse_fn_name.to_string(),
11199 vec![this, duck_fmt],
11200 )));
11201 Ok(Expression::Cast(Box::new(Cast {
11202 this: parse_call,
11203 to,
11204 trailing_comments: vec![],
11205 double_colon_syntax: false,
11206 format: None,
11207 default: None,
11208 inferred_type: None,
11209 })))
11210 }
11211 _ => Ok(e),
11212 }
11213 }
11214
11215 Action::BigQueryFunctionNormalize => {
11216 Self::normalize_bigquery_function(e, source, target)
11217 }
11218
11219 Action::BigQueryToHexBare => {
11220 // Not used anymore - handled directly in normalize_bigquery_function
11221 Ok(e)
11222 }
11223
11224 Action::BigQueryToHexLower => {
11225 if let Expression::Lower(uf) = e {
11226 match uf.this {
11227 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
11228 Expression::Function(f)
11229 if matches!(target, DialectType::BigQuery)
11230 && f.name == "TO_HEX" =>
11231 {
11232 Ok(Expression::Function(f))
11233 }
11234 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
11235 Expression::Lower(inner_uf) => {
11236 if matches!(target, DialectType::BigQuery) {
11237 // BQ->BQ: extract TO_HEX
11238 if let Expression::Function(f) = inner_uf.this {
11239 Ok(Expression::Function(Box::new(Function::new(
11240 "TO_HEX".to_string(),
11241 f.args,
11242 ))))
11243 } else {
11244 Ok(Expression::Lower(inner_uf))
11245 }
11246 } else {
11247 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
11248 Ok(Expression::Lower(inner_uf))
11249 }
11250 }
11251 other => {
11252 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
11253 this: other,
11254 original_name: None,
11255 inferred_type: None,
11256 })))
11257 }
11258 }
11259 } else {
11260 Ok(e)
11261 }
11262 }
11263
11264 Action::BigQueryToHexUpper => {
11265 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
11266 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
11267 if let Expression::Upper(uf) = e {
11268 if let Expression::Lower(inner_uf) = uf.this {
11269 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
11270 if matches!(target, DialectType::BigQuery) {
11271 // Restore TO_HEX name in inner function
11272 if let Expression::Function(f) = inner_uf.this {
11273 let restored = Expression::Function(Box::new(Function::new(
11274 "TO_HEX".to_string(),
11275 f.args,
11276 )));
11277 Ok(Expression::Upper(Box::new(
11278 crate::expressions::UnaryFunc::new(restored),
11279 )))
11280 } else {
11281 Ok(Expression::Upper(inner_uf))
11282 }
11283 } else {
11284 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
11285 Ok(inner_uf.this)
11286 }
11287 } else {
11288 Ok(Expression::Upper(uf))
11289 }
11290 } else {
11291 Ok(e)
11292 }
11293 }
11294
11295 Action::BigQueryAnyValueHaving => {
11296 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
11297 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
11298 if let Expression::AnyValue(agg) = e {
11299 if let Some((having_expr, is_max)) = agg.having_max {
11300 let func_name = if is_max {
11301 "ARG_MAX_NULL"
11302 } else {
11303 "ARG_MIN_NULL"
11304 };
11305 Ok(Expression::Function(Box::new(Function::new(
11306 func_name.to_string(),
11307 vec![agg.this, *having_expr],
11308 ))))
11309 } else {
11310 Ok(Expression::AnyValue(agg))
11311 }
11312 } else {
11313 Ok(e)
11314 }
11315 }
11316
11317 Action::BigQueryApproxQuantiles => {
11318 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
11319 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
11320 if let Expression::AggregateFunction(agg) = e {
11321 if agg.args.len() >= 2 {
11322 let x_expr = agg.args[0].clone();
11323 let n_expr = &agg.args[1];
11324
11325 // Extract the numeric value from n_expr
11326 let n = match n_expr {
11327 Expression::Literal(lit)
11328 if matches!(
11329 lit.as_ref(),
11330 crate::expressions::Literal::Number(_)
11331 ) =>
11332 {
11333 let crate::expressions::Literal::Number(s) = lit.as_ref()
11334 else {
11335 unreachable!()
11336 };
11337 s.parse::<usize>().unwrap_or(2)
11338 }
11339 _ => 2,
11340 };
11341
11342 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
11343 let mut quantiles = Vec::new();
11344 for i in 0..=n {
11345 let q = i as f64 / n as f64;
11346 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
11347 if q == 0.0 {
11348 quantiles.push(Expression::number(0));
11349 } else if q == 1.0 {
11350 quantiles.push(Expression::number(1));
11351 } else {
11352 quantiles.push(Expression::Literal(Box::new(
11353 crate::expressions::Literal::Number(format!("{}", q)),
11354 )));
11355 }
11356 }
11357
11358 let array_expr =
11359 Expression::Array(Box::new(crate::expressions::Array {
11360 expressions: quantiles,
11361 }));
11362
11363 // Preserve DISTINCT modifier
11364 let mut new_func = Function::new(
11365 "APPROX_QUANTILE".to_string(),
11366 vec![x_expr, array_expr],
11367 );
11368 new_func.distinct = agg.distinct;
11369 Ok(Expression::Function(Box::new(new_func)))
11370 } else {
11371 Ok(Expression::AggregateFunction(agg))
11372 }
11373 } else {
11374 Ok(e)
11375 }
11376 }
11377
11378 Action::GenericFunctionNormalize => {
11379 // Helper closure to convert ARBITRARY to target-specific function
11380 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
11381 let name = match target {
11382 DialectType::ClickHouse => "any",
11383 DialectType::TSQL | DialectType::SQLite => "MAX",
11384 DialectType::Hive => "FIRST",
11385 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11386 "ARBITRARY"
11387 }
11388 _ => "ANY_VALUE",
11389 };
11390 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
11391 }
11392
11393 if let Expression::Function(f) = e {
11394 let name = f.name.to_ascii_uppercase();
11395 match name.as_str() {
11396 "ARBITRARY" if f.args.len() == 1 => {
11397 let arg = f.args.into_iter().next().unwrap();
11398 Ok(convert_arbitrary(arg, target))
11399 }
11400 "TO_NUMBER" if f.args.len() == 1 => {
11401 let arg = f.args.into_iter().next().unwrap();
11402 match target {
11403 DialectType::Oracle | DialectType::Snowflake => {
11404 Ok(Expression::Function(Box::new(Function::new(
11405 "TO_NUMBER".to_string(),
11406 vec![arg],
11407 ))))
11408 }
11409 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11410 this: arg,
11411 to: crate::expressions::DataType::Double {
11412 precision: None,
11413 scale: None,
11414 },
11415 double_colon_syntax: false,
11416 trailing_comments: Vec::new(),
11417 format: None,
11418 default: None,
11419 inferred_type: None,
11420 }))),
11421 }
11422 }
11423 "AGGREGATE" if f.args.len() >= 3 => match target {
11424 DialectType::DuckDB
11425 | DialectType::Hive
11426 | DialectType::Presto
11427 | DialectType::Trino => Ok(Expression::Function(Box::new(
11428 Function::new("REDUCE".to_string(), f.args),
11429 ))),
11430 _ => Ok(Expression::Function(f)),
11431 },
11432 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
11433 "REGEXP_MATCHES" if f.args.len() >= 2 => {
11434 if matches!(target, DialectType::DuckDB) {
11435 Ok(Expression::Function(f))
11436 } else {
11437 let mut args = f.args;
11438 let this = args.remove(0);
11439 let pattern = args.remove(0);
11440 let flags = if args.is_empty() {
11441 None
11442 } else {
11443 Some(args.remove(0))
11444 };
11445 Ok(Expression::RegexpLike(Box::new(
11446 crate::expressions::RegexpFunc {
11447 this,
11448 pattern,
11449 flags,
11450 },
11451 )))
11452 }
11453 }
11454 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
11455 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
11456 if matches!(target, DialectType::DuckDB) {
11457 Ok(Expression::Function(f))
11458 } else {
11459 let mut args = f.args;
11460 let this = args.remove(0);
11461 let pattern = args.remove(0);
11462 let flags = if args.is_empty() {
11463 None
11464 } else {
11465 Some(args.remove(0))
11466 };
11467 Ok(Expression::RegexpLike(Box::new(
11468 crate::expressions::RegexpFunc {
11469 this,
11470 pattern,
11471 flags,
11472 },
11473 )))
11474 }
11475 }
11476 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
11477 "STRUCT_EXTRACT" if f.args.len() == 2 => {
11478 let mut args = f.args;
11479 let this = args.remove(0);
11480 let field_expr = args.remove(0);
11481 // Extract string literal to get field name
11482 let field_name = match &field_expr {
11483 Expression::Literal(lit)
11484 if matches!(
11485 lit.as_ref(),
11486 crate::expressions::Literal::String(_)
11487 ) =>
11488 {
11489 let crate::expressions::Literal::String(s) = lit.as_ref()
11490 else {
11491 unreachable!()
11492 };
11493 s.clone()
11494 }
11495 Expression::Identifier(id) => id.name.clone(),
11496 _ => {
11497 return Ok(Expression::Function(Box::new(Function::new(
11498 "STRUCT_EXTRACT".to_string(),
11499 vec![this, field_expr],
11500 ))))
11501 }
11502 };
11503 Ok(Expression::StructExtract(Box::new(
11504 crate::expressions::StructExtractFunc {
11505 this,
11506 field: crate::expressions::Identifier::new(field_name),
11507 },
11508 )))
11509 }
11510 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
11511 "LIST_FILTER" if f.args.len() == 2 => {
11512 let name = match target {
11513 DialectType::DuckDB => "LIST_FILTER",
11514 _ => "FILTER",
11515 };
11516 Ok(Expression::Function(Box::new(Function::new(
11517 name.to_string(),
11518 f.args,
11519 ))))
11520 }
11521 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
11522 "LIST_TRANSFORM" if f.args.len() == 2 => {
11523 let name = match target {
11524 DialectType::DuckDB => "LIST_TRANSFORM",
11525 _ => "TRANSFORM",
11526 };
11527 Ok(Expression::Function(Box::new(Function::new(
11528 name.to_string(),
11529 f.args,
11530 ))))
11531 }
11532 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
11533 "LIST_SORT" if f.args.len() >= 1 => {
11534 let name = match target {
11535 DialectType::DuckDB => "LIST_SORT",
11536 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
11537 _ => "SORT_ARRAY",
11538 };
11539 Ok(Expression::Function(Box::new(Function::new(
11540 name.to_string(),
11541 f.args,
11542 ))))
11543 }
11544 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11545 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
11546 match target {
11547 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11548 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
11549 ))),
11550 DialectType::Spark
11551 | DialectType::Databricks
11552 | DialectType::Hive => {
11553 let mut args = f.args;
11554 args.push(Expression::Identifier(
11555 crate::expressions::Identifier::new("FALSE"),
11556 ));
11557 Ok(Expression::Function(Box::new(Function::new(
11558 "SORT_ARRAY".to_string(),
11559 args,
11560 ))))
11561 }
11562 DialectType::Presto
11563 | DialectType::Trino
11564 | DialectType::Athena => {
11565 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11566 let arr = f.args.into_iter().next().unwrap();
11567 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
11568 parameters: vec![
11569 crate::expressions::Identifier::new("a"),
11570 crate::expressions::Identifier::new("b"),
11571 ],
11572 body: Expression::Case(Box::new(Case {
11573 operand: None,
11574 whens: vec![
11575 (
11576 Expression::Lt(Box::new(BinaryOp::new(
11577 Expression::Identifier(crate::expressions::Identifier::new("a")),
11578 Expression::Identifier(crate::expressions::Identifier::new("b")),
11579 ))),
11580 Expression::number(1),
11581 ),
11582 (
11583 Expression::Gt(Box::new(BinaryOp::new(
11584 Expression::Identifier(crate::expressions::Identifier::new("a")),
11585 Expression::Identifier(crate::expressions::Identifier::new("b")),
11586 ))),
11587 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
11588 ),
11589 ],
11590 else_: Some(Expression::number(0)),
11591 comments: Vec::new(),
11592 inferred_type: None,
11593 })),
11594 colon: false,
11595 parameter_types: Vec::new(),
11596 }));
11597 Ok(Expression::Function(Box::new(Function::new(
11598 "ARRAY_SORT".to_string(),
11599 vec![arr, lambda],
11600 ))))
11601 }
11602 _ => Ok(Expression::Function(Box::new(Function::new(
11603 "LIST_REVERSE_SORT".to_string(),
11604 f.args,
11605 )))),
11606 }
11607 }
11608 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
11609 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
11610 let mut args = f.args;
11611 args.push(Expression::string(","));
11612 let name = match target {
11613 DialectType::DuckDB => "STR_SPLIT",
11614 DialectType::Presto | DialectType::Trino => "SPLIT",
11615 DialectType::Spark
11616 | DialectType::Databricks
11617 | DialectType::Hive => "SPLIT",
11618 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11619 DialectType::Redshift => "SPLIT_TO_ARRAY",
11620 _ => "SPLIT",
11621 };
11622 Ok(Expression::Function(Box::new(Function::new(
11623 name.to_string(),
11624 args,
11625 ))))
11626 }
11627 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
11628 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
11629 let name = match target {
11630 DialectType::DuckDB => "STR_SPLIT",
11631 DialectType::Presto | DialectType::Trino => "SPLIT",
11632 DialectType::Spark
11633 | DialectType::Databricks
11634 | DialectType::Hive => "SPLIT",
11635 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11636 DialectType::Redshift => "SPLIT_TO_ARRAY",
11637 _ => "SPLIT",
11638 };
11639 Ok(Expression::Function(Box::new(Function::new(
11640 name.to_string(),
11641 f.args,
11642 ))))
11643 }
11644 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
11645 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
11646 let name = match target {
11647 DialectType::DuckDB => "STR_SPLIT",
11648 DialectType::Presto | DialectType::Trino => "SPLIT",
11649 DialectType::Spark
11650 | DialectType::Databricks
11651 | DialectType::Hive => "SPLIT",
11652 DialectType::Doris | DialectType::StarRocks => {
11653 "SPLIT_BY_STRING"
11654 }
11655 DialectType::PostgreSQL | DialectType::Redshift => {
11656 "STRING_TO_ARRAY"
11657 }
11658 _ => "SPLIT",
11659 };
11660 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
11661 if matches!(
11662 target,
11663 DialectType::Spark
11664 | DialectType::Databricks
11665 | DialectType::Hive
11666 ) {
11667 let mut args = f.args;
11668 let x = args.remove(0);
11669 let sep = args.remove(0);
11670 // Wrap separator in CONCAT('\\Q', sep, '\\E')
11671 let escaped_sep =
11672 Expression::Function(Box::new(Function::new(
11673 "CONCAT".to_string(),
11674 vec![
11675 Expression::string("\\Q"),
11676 sep,
11677 Expression::string("\\E"),
11678 ],
11679 )));
11680 Ok(Expression::Function(Box::new(Function::new(
11681 name.to_string(),
11682 vec![x, escaped_sep],
11683 ))))
11684 } else {
11685 Ok(Expression::Function(Box::new(Function::new(
11686 name.to_string(),
11687 f.args,
11688 ))))
11689 }
11690 }
11691 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
11692 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
11693 let name = match target {
11694 DialectType::DuckDB => "STR_SPLIT_REGEX",
11695 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
11696 DialectType::Spark
11697 | DialectType::Databricks
11698 | DialectType::Hive => "SPLIT",
11699 _ => "REGEXP_SPLIT",
11700 };
11701 Ok(Expression::Function(Box::new(Function::new(
11702 name.to_string(),
11703 f.args,
11704 ))))
11705 }
11706 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
11707 "SPLIT"
11708 if f.args.len() == 2
11709 && matches!(source, DialectType::Snowflake)
11710 && matches!(target, DialectType::DuckDB) =>
11711 {
11712 let mut args = f.args;
11713 let str_arg = args.remove(0);
11714 let delim_arg = args.remove(0);
11715
11716 // STR_SPLIT(str, delim) as the base
11717 let base_func = Expression::Function(Box::new(Function::new(
11718 "STR_SPLIT".to_string(),
11719 vec![str_arg.clone(), delim_arg.clone()],
11720 )));
11721
11722 // [str] - array with single element
11723 let array_with_input =
11724 Expression::Array(Box::new(crate::expressions::Array {
11725 expressions: vec![str_arg],
11726 }));
11727
11728 // CASE
11729 // WHEN delim IS NULL THEN NULL
11730 // WHEN delim = '' THEN [str]
11731 // ELSE STR_SPLIT(str, delim)
11732 // END
11733 Ok(Expression::Case(Box::new(Case {
11734 operand: None,
11735 whens: vec![
11736 (
11737 Expression::Is(Box::new(BinaryOp {
11738 left: delim_arg.clone(),
11739 right: Expression::Null(Null),
11740 left_comments: vec![],
11741 operator_comments: vec![],
11742 trailing_comments: vec![],
11743 inferred_type: None,
11744 })),
11745 Expression::Null(Null),
11746 ),
11747 (
11748 Expression::Eq(Box::new(BinaryOp {
11749 left: delim_arg,
11750 right: Expression::string(""),
11751 left_comments: vec![],
11752 operator_comments: vec![],
11753 trailing_comments: vec![],
11754 inferred_type: None,
11755 })),
11756 array_with_input,
11757 ),
11758 ],
11759 else_: Some(base_func),
11760 comments: vec![],
11761 inferred_type: None,
11762 })))
11763 }
11764 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
11765 "SPLIT"
11766 if f.args.len() == 2
11767 && matches!(
11768 source,
11769 DialectType::Presto
11770 | DialectType::Trino
11771 | DialectType::Athena
11772 | DialectType::StarRocks
11773 | DialectType::Doris
11774 )
11775 && matches!(
11776 target,
11777 DialectType::Spark
11778 | DialectType::Databricks
11779 | DialectType::Hive
11780 ) =>
11781 {
11782 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
11783 let mut args = f.args;
11784 let x = args.remove(0);
11785 let sep = args.remove(0);
11786 let escaped_sep = Expression::Function(Box::new(Function::new(
11787 "CONCAT".to_string(),
11788 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
11789 )));
11790 Ok(Expression::Function(Box::new(Function::new(
11791 "SPLIT".to_string(),
11792 vec![x, escaped_sep],
11793 ))))
11794 }
11795 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
11796 // For ClickHouse target, preserve original name to maintain camelCase
11797 "SUBSTRINGINDEX" => {
11798 let name = if matches!(target, DialectType::ClickHouse) {
11799 f.name.clone()
11800 } else {
11801 "SUBSTRING_INDEX".to_string()
11802 };
11803 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
11804 }
11805 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
11806 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
11807 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
11808 if name == "CARDINALITY"
11809 && matches!(source, DialectType::DuckDB)
11810 && matches!(target, DialectType::DuckDB)
11811 {
11812 return Ok(Expression::Function(f));
11813 }
11814 // Get the array argument (first arg, drop dimension args)
11815 let mut args = f.args;
11816 let arr = if args.is_empty() {
11817 return Ok(Expression::Function(Box::new(Function::new(
11818 name.to_string(),
11819 args,
11820 ))));
11821 } else {
11822 args.remove(0)
11823 };
11824 let name =
11825 match target {
11826 DialectType::Spark
11827 | DialectType::Databricks
11828 | DialectType::Hive => "SIZE",
11829 DialectType::Presto | DialectType::Trino => "CARDINALITY",
11830 DialectType::BigQuery => "ARRAY_LENGTH",
11831 DialectType::DuckDB => {
11832 // DuckDB: use ARRAY_LENGTH with all args
11833 let mut all_args = vec![arr];
11834 all_args.extend(args);
11835 return Ok(Expression::Function(Box::new(
11836 Function::new("ARRAY_LENGTH".to_string(), all_args),
11837 )));
11838 }
11839 DialectType::PostgreSQL | DialectType::Redshift => {
11840 // Keep ARRAY_LENGTH with dimension arg
11841 let mut all_args = vec![arr];
11842 all_args.extend(args);
11843 return Ok(Expression::Function(Box::new(
11844 Function::new("ARRAY_LENGTH".to_string(), all_args),
11845 )));
11846 }
11847 DialectType::ClickHouse => "LENGTH",
11848 _ => "ARRAY_LENGTH",
11849 };
11850 Ok(Expression::Function(Box::new(Function::new(
11851 name.to_string(),
11852 vec![arr],
11853 ))))
11854 }
11855 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
11856 "TO_VARIANT" if f.args.len() == 1 => match target {
11857 DialectType::DuckDB => {
11858 let arg = f.args.into_iter().next().unwrap();
11859 Ok(Expression::Cast(Box::new(Cast {
11860 this: arg,
11861 to: DataType::Custom {
11862 name: "VARIANT".to_string(),
11863 },
11864 double_colon_syntax: false,
11865 trailing_comments: Vec::new(),
11866 format: None,
11867 default: None,
11868 inferred_type: None,
11869 })))
11870 }
11871 _ => Ok(Expression::Function(f)),
11872 },
11873 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
11874 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
11875 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11876 Function::new("JSON_AGG".to_string(), f.args),
11877 ))),
11878 _ => Ok(Expression::Function(f)),
11879 },
11880 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
11881 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
11882 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11883 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
11884 ))),
11885 _ => Ok(Expression::Function(f)),
11886 },
11887 // UNICODE(x) -> target-specific codepoint function
11888 "UNICODE" if f.args.len() == 1 => {
11889 match target {
11890 DialectType::SQLite | DialectType::DuckDB => {
11891 Ok(Expression::Function(Box::new(Function::new(
11892 "UNICODE".to_string(),
11893 f.args,
11894 ))))
11895 }
11896 DialectType::Oracle => {
11897 // ASCII(UNISTR(x))
11898 let inner = Expression::Function(Box::new(Function::new(
11899 "UNISTR".to_string(),
11900 f.args,
11901 )));
11902 Ok(Expression::Function(Box::new(Function::new(
11903 "ASCII".to_string(),
11904 vec![inner],
11905 ))))
11906 }
11907 DialectType::MySQL => {
11908 // ORD(CONVERT(x USING utf32))
11909 let arg = f.args.into_iter().next().unwrap();
11910 let convert_expr = Expression::ConvertToCharset(Box::new(
11911 crate::expressions::ConvertToCharset {
11912 this: Box::new(arg),
11913 dest: Some(Box::new(Expression::Identifier(
11914 crate::expressions::Identifier::new("utf32"),
11915 ))),
11916 source: None,
11917 },
11918 ));
11919 Ok(Expression::Function(Box::new(Function::new(
11920 "ORD".to_string(),
11921 vec![convert_expr],
11922 ))))
11923 }
11924 _ => Ok(Expression::Function(Box::new(Function::new(
11925 "ASCII".to_string(),
11926 f.args,
11927 )))),
11928 }
11929 }
11930 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
11931 "XOR" if f.args.len() >= 2 => {
11932 match target {
11933 DialectType::ClickHouse => {
11934 // ClickHouse: keep as xor() function with lowercase name
11935 Ok(Expression::Function(Box::new(Function::new(
11936 "xor".to_string(),
11937 f.args,
11938 ))))
11939 }
11940 DialectType::Presto | DialectType::Trino => {
11941 if f.args.len() == 2 {
11942 Ok(Expression::Function(Box::new(Function::new(
11943 "BITWISE_XOR".to_string(),
11944 f.args,
11945 ))))
11946 } else {
11947 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
11948 let mut args = f.args;
11949 let first = args.remove(0);
11950 let second = args.remove(0);
11951 let mut result =
11952 Expression::Function(Box::new(Function::new(
11953 "BITWISE_XOR".to_string(),
11954 vec![first, second],
11955 )));
11956 for arg in args {
11957 result =
11958 Expression::Function(Box::new(Function::new(
11959 "BITWISE_XOR".to_string(),
11960 vec![result, arg],
11961 )));
11962 }
11963 Ok(result)
11964 }
11965 }
11966 DialectType::MySQL
11967 | DialectType::SingleStore
11968 | DialectType::Doris
11969 | DialectType::StarRocks => {
11970 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
11971 let args = f.args;
11972 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
11973 this: None,
11974 expression: None,
11975 expressions: args,
11976 })))
11977 }
11978 DialectType::PostgreSQL | DialectType::Redshift => {
11979 // PostgreSQL: a # b (hash operator for XOR)
11980 let mut args = f.args;
11981 let first = args.remove(0);
11982 let second = args.remove(0);
11983 let mut result = Expression::BitwiseXor(Box::new(
11984 BinaryOp::new(first, second),
11985 ));
11986 for arg in args {
11987 result = Expression::BitwiseXor(Box::new(
11988 BinaryOp::new(result, arg),
11989 ));
11990 }
11991 Ok(result)
11992 }
11993 DialectType::DuckDB => {
11994 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
11995 Ok(Expression::Function(Box::new(Function::new(
11996 "XOR".to_string(),
11997 f.args,
11998 ))))
11999 }
12000 DialectType::BigQuery => {
12001 // BigQuery: a ^ b (caret operator for XOR)
12002 let mut args = f.args;
12003 let first = args.remove(0);
12004 let second = args.remove(0);
12005 let mut result = Expression::BitwiseXor(Box::new(
12006 BinaryOp::new(first, second),
12007 ));
12008 for arg in args {
12009 result = Expression::BitwiseXor(Box::new(
12010 BinaryOp::new(result, arg),
12011 ));
12012 }
12013 Ok(result)
12014 }
12015 _ => Ok(Expression::Function(Box::new(Function::new(
12016 "XOR".to_string(),
12017 f.args,
12018 )))),
12019 }
12020 }
12021 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
12022 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
12023 match target {
12024 DialectType::Spark
12025 | DialectType::Databricks
12026 | DialectType::Hive => {
12027 let mut args = f.args;
12028 args.push(Expression::Identifier(
12029 crate::expressions::Identifier::new("FALSE"),
12030 ));
12031 Ok(Expression::Function(Box::new(Function::new(
12032 "SORT_ARRAY".to_string(),
12033 args,
12034 ))))
12035 }
12036 DialectType::Presto
12037 | DialectType::Trino
12038 | DialectType::Athena => {
12039 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
12040 let arr = f.args.into_iter().next().unwrap();
12041 let lambda = Expression::Lambda(Box::new(
12042 crate::expressions::LambdaExpr {
12043 parameters: vec![
12044 Identifier::new("a"),
12045 Identifier::new("b"),
12046 ],
12047 colon: false,
12048 parameter_types: Vec::new(),
12049 body: Expression::Case(Box::new(Case {
12050 operand: None,
12051 whens: vec![
12052 (
12053 Expression::Lt(Box::new(
12054 BinaryOp::new(
12055 Expression::Identifier(
12056 Identifier::new("a"),
12057 ),
12058 Expression::Identifier(
12059 Identifier::new("b"),
12060 ),
12061 ),
12062 )),
12063 Expression::number(1),
12064 ),
12065 (
12066 Expression::Gt(Box::new(
12067 BinaryOp::new(
12068 Expression::Identifier(
12069 Identifier::new("a"),
12070 ),
12071 Expression::Identifier(
12072 Identifier::new("b"),
12073 ),
12074 ),
12075 )),
12076 Expression::Neg(Box::new(
12077 crate::expressions::UnaryOp {
12078 this: Expression::number(1),
12079 inferred_type: None,
12080 },
12081 )),
12082 ),
12083 ],
12084 else_: Some(Expression::number(0)),
12085 comments: Vec::new(),
12086 inferred_type: None,
12087 })),
12088 },
12089 ));
12090 Ok(Expression::Function(Box::new(Function::new(
12091 "ARRAY_SORT".to_string(),
12092 vec![arr, lambda],
12093 ))))
12094 }
12095 _ => Ok(Expression::Function(Box::new(Function::new(
12096 "ARRAY_REVERSE_SORT".to_string(),
12097 f.args,
12098 )))),
12099 }
12100 }
12101 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
12102 "ENCODE" if f.args.len() == 1 => match target {
12103 DialectType::Spark
12104 | DialectType::Databricks
12105 | DialectType::Hive => {
12106 let mut args = f.args;
12107 args.push(Expression::string("utf-8"));
12108 Ok(Expression::Function(Box::new(Function::new(
12109 "ENCODE".to_string(),
12110 args,
12111 ))))
12112 }
12113 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12114 Ok(Expression::Function(Box::new(Function::new(
12115 "TO_UTF8".to_string(),
12116 f.args,
12117 ))))
12118 }
12119 _ => Ok(Expression::Function(Box::new(Function::new(
12120 "ENCODE".to_string(),
12121 f.args,
12122 )))),
12123 },
12124 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
12125 "DECODE" if f.args.len() == 1 => match target {
12126 DialectType::Spark
12127 | DialectType::Databricks
12128 | DialectType::Hive => {
12129 let mut args = f.args;
12130 args.push(Expression::string("utf-8"));
12131 Ok(Expression::Function(Box::new(Function::new(
12132 "DECODE".to_string(),
12133 args,
12134 ))))
12135 }
12136 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12137 Ok(Expression::Function(Box::new(Function::new(
12138 "FROM_UTF8".to_string(),
12139 f.args,
12140 ))))
12141 }
12142 _ => Ok(Expression::Function(Box::new(Function::new(
12143 "DECODE".to_string(),
12144 f.args,
12145 )))),
12146 },
12147 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
12148 "QUANTILE" if f.args.len() == 2 => {
12149 let name = match target {
12150 DialectType::Spark
12151 | DialectType::Databricks
12152 | DialectType::Hive => "PERCENTILE",
12153 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
12154 DialectType::BigQuery => "PERCENTILE_CONT",
12155 _ => "QUANTILE",
12156 };
12157 Ok(Expression::Function(Box::new(Function::new(
12158 name.to_string(),
12159 f.args,
12160 ))))
12161 }
12162 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12163 "QUANTILE_CONT" if f.args.len() == 2 => {
12164 let mut args = f.args;
12165 let column = args.remove(0);
12166 let quantile = args.remove(0);
12167 match target {
12168 DialectType::DuckDB => {
12169 Ok(Expression::Function(Box::new(Function::new(
12170 "QUANTILE_CONT".to_string(),
12171 vec![column, quantile],
12172 ))))
12173 }
12174 DialectType::PostgreSQL
12175 | DialectType::Redshift
12176 | DialectType::Snowflake => {
12177 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
12178 let inner = Expression::PercentileCont(Box::new(
12179 crate::expressions::PercentileFunc {
12180 this: column.clone(),
12181 percentile: quantile,
12182 order_by: None,
12183 filter: None,
12184 },
12185 ));
12186 Ok(Expression::WithinGroup(Box::new(
12187 crate::expressions::WithinGroup {
12188 this: inner,
12189 order_by: vec![crate::expressions::Ordered {
12190 this: column,
12191 desc: false,
12192 nulls_first: None,
12193 explicit_asc: false,
12194 with_fill: None,
12195 }],
12196 },
12197 )))
12198 }
12199 _ => Ok(Expression::Function(Box::new(Function::new(
12200 "QUANTILE_CONT".to_string(),
12201 vec![column, quantile],
12202 )))),
12203 }
12204 }
12205 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12206 "QUANTILE_DISC" if f.args.len() == 2 => {
12207 let mut args = f.args;
12208 let column = args.remove(0);
12209 let quantile = args.remove(0);
12210 match target {
12211 DialectType::DuckDB => {
12212 Ok(Expression::Function(Box::new(Function::new(
12213 "QUANTILE_DISC".to_string(),
12214 vec![column, quantile],
12215 ))))
12216 }
12217 DialectType::PostgreSQL
12218 | DialectType::Redshift
12219 | DialectType::Snowflake => {
12220 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
12221 let inner = Expression::PercentileDisc(Box::new(
12222 crate::expressions::PercentileFunc {
12223 this: column.clone(),
12224 percentile: quantile,
12225 order_by: None,
12226 filter: None,
12227 },
12228 ));
12229 Ok(Expression::WithinGroup(Box::new(
12230 crate::expressions::WithinGroup {
12231 this: inner,
12232 order_by: vec![crate::expressions::Ordered {
12233 this: column,
12234 desc: false,
12235 nulls_first: None,
12236 explicit_asc: false,
12237 with_fill: None,
12238 }],
12239 },
12240 )))
12241 }
12242 _ => Ok(Expression::Function(Box::new(Function::new(
12243 "QUANTILE_DISC".to_string(),
12244 vec![column, quantile],
12245 )))),
12246 }
12247 }
12248 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
12249 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
12250 let name = match target {
12251 DialectType::Presto
12252 | DialectType::Trino
12253 | DialectType::Athena => "APPROX_PERCENTILE",
12254 DialectType::Spark
12255 | DialectType::Databricks
12256 | DialectType::Hive => "PERCENTILE_APPROX",
12257 DialectType::DuckDB => "APPROX_QUANTILE",
12258 DialectType::PostgreSQL | DialectType::Redshift => {
12259 "PERCENTILE_CONT"
12260 }
12261 _ => &f.name,
12262 };
12263 Ok(Expression::Function(Box::new(Function::new(
12264 name.to_string(),
12265 f.args,
12266 ))))
12267 }
12268 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12269 "EPOCH" if f.args.len() == 1 => {
12270 let name = match target {
12271 DialectType::Spark
12272 | DialectType::Databricks
12273 | DialectType::Hive => "UNIX_TIMESTAMP",
12274 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
12275 _ => "EPOCH",
12276 };
12277 Ok(Expression::Function(Box::new(Function::new(
12278 name.to_string(),
12279 f.args,
12280 ))))
12281 }
12282 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
12283 "EPOCH_MS" if f.args.len() == 1 => {
12284 match target {
12285 DialectType::Spark | DialectType::Databricks => {
12286 Ok(Expression::Function(Box::new(Function::new(
12287 "TIMESTAMP_MILLIS".to_string(),
12288 f.args,
12289 ))))
12290 }
12291 DialectType::Hive => {
12292 // Hive: FROM_UNIXTIME(x / 1000)
12293 let arg = f.args.into_iter().next().unwrap();
12294 let div_expr = Expression::Div(Box::new(
12295 crate::expressions::BinaryOp::new(
12296 arg,
12297 Expression::number(1000),
12298 ),
12299 ));
12300 Ok(Expression::Function(Box::new(Function::new(
12301 "FROM_UNIXTIME".to_string(),
12302 vec![div_expr],
12303 ))))
12304 }
12305 DialectType::Presto | DialectType::Trino => {
12306 Ok(Expression::Function(Box::new(Function::new(
12307 "FROM_UNIXTIME".to_string(),
12308 vec![Expression::Div(Box::new(
12309 crate::expressions::BinaryOp::new(
12310 f.args.into_iter().next().unwrap(),
12311 Expression::number(1000),
12312 ),
12313 ))],
12314 ))))
12315 }
12316 _ => Ok(Expression::Function(Box::new(Function::new(
12317 "EPOCH_MS".to_string(),
12318 f.args,
12319 )))),
12320 }
12321 }
12322 // HASHBYTES('algorithm', x) -> target-specific hash function
12323 "HASHBYTES" if f.args.len() == 2 => {
12324 // Keep HASHBYTES as-is for TSQL target
12325 if matches!(target, DialectType::TSQL) {
12326 return Ok(Expression::Function(f));
12327 }
12328 let algo_expr = &f.args[0];
12329 let algo = match algo_expr {
12330 Expression::Literal(lit)
12331 if matches!(
12332 lit.as_ref(),
12333 crate::expressions::Literal::String(_)
12334 ) =>
12335 {
12336 let crate::expressions::Literal::String(s) = lit.as_ref()
12337 else {
12338 unreachable!()
12339 };
12340 s.to_ascii_uppercase()
12341 }
12342 _ => return Ok(Expression::Function(f)),
12343 };
12344 let data_arg = f.args.into_iter().nth(1).unwrap();
12345 match algo.as_str() {
12346 "SHA1" => {
12347 let name = match target {
12348 DialectType::Spark | DialectType::Databricks => "SHA",
12349 DialectType::Hive => "SHA1",
12350 _ => "SHA1",
12351 };
12352 Ok(Expression::Function(Box::new(Function::new(
12353 name.to_string(),
12354 vec![data_arg],
12355 ))))
12356 }
12357 "SHA2_256" => {
12358 Ok(Expression::Function(Box::new(Function::new(
12359 "SHA2".to_string(),
12360 vec![data_arg, Expression::number(256)],
12361 ))))
12362 }
12363 "SHA2_512" => {
12364 Ok(Expression::Function(Box::new(Function::new(
12365 "SHA2".to_string(),
12366 vec![data_arg, Expression::number(512)],
12367 ))))
12368 }
12369 "MD5" => Ok(Expression::Function(Box::new(Function::new(
12370 "MD5".to_string(),
12371 vec![data_arg],
12372 )))),
12373 _ => Ok(Expression::Function(Box::new(Function::new(
12374 "HASHBYTES".to_string(),
12375 vec![Expression::string(&algo), data_arg],
12376 )))),
12377 }
12378 }
12379 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
12380 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
12381 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
12382 let mut args = f.args;
12383 let json_expr = args.remove(0);
12384 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
12385 let mut json_path = "$".to_string();
12386 for a in &args {
12387 match a {
12388 Expression::Literal(lit)
12389 if matches!(
12390 lit.as_ref(),
12391 crate::expressions::Literal::String(_)
12392 ) =>
12393 {
12394 let crate::expressions::Literal::String(s) =
12395 lit.as_ref()
12396 else {
12397 unreachable!()
12398 };
12399 // Numeric string keys become array indices: [0]
12400 if s.chars().all(|c| c.is_ascii_digit()) {
12401 json_path.push('[');
12402 json_path.push_str(s);
12403 json_path.push(']');
12404 } else {
12405 json_path.push('.');
12406 json_path.push_str(s);
12407 }
12408 }
12409 _ => {
12410 json_path.push_str(".?");
12411 }
12412 }
12413 }
12414 match target {
12415 DialectType::Spark
12416 | DialectType::Databricks
12417 | DialectType::Hive => {
12418 Ok(Expression::Function(Box::new(Function::new(
12419 "GET_JSON_OBJECT".to_string(),
12420 vec![json_expr, Expression::string(&json_path)],
12421 ))))
12422 }
12423 DialectType::Presto | DialectType::Trino => {
12424 let func_name = if is_text {
12425 "JSON_EXTRACT_SCALAR"
12426 } else {
12427 "JSON_EXTRACT"
12428 };
12429 Ok(Expression::Function(Box::new(Function::new(
12430 func_name.to_string(),
12431 vec![json_expr, Expression::string(&json_path)],
12432 ))))
12433 }
12434 DialectType::BigQuery | DialectType::MySQL => {
12435 let func_name = if is_text {
12436 "JSON_EXTRACT_SCALAR"
12437 } else {
12438 "JSON_EXTRACT"
12439 };
12440 Ok(Expression::Function(Box::new(Function::new(
12441 func_name.to_string(),
12442 vec![json_expr, Expression::string(&json_path)],
12443 ))))
12444 }
12445 DialectType::PostgreSQL | DialectType::Materialize => {
12446 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
12447 let func_name = if is_text {
12448 "JSON_EXTRACT_PATH_TEXT"
12449 } else {
12450 "JSON_EXTRACT_PATH"
12451 };
12452 let mut new_args = vec![json_expr];
12453 new_args.extend(args);
12454 Ok(Expression::Function(Box::new(Function::new(
12455 func_name.to_string(),
12456 new_args,
12457 ))))
12458 }
12459 DialectType::DuckDB | DialectType::SQLite => {
12460 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
12461 if is_text {
12462 Ok(Expression::JsonExtractScalar(Box::new(
12463 crate::expressions::JsonExtractFunc {
12464 this: json_expr,
12465 path: Expression::string(&json_path),
12466 returning: None,
12467 arrow_syntax: true,
12468 hash_arrow_syntax: false,
12469 wrapper_option: None,
12470 quotes_option: None,
12471 on_scalar_string: false,
12472 on_error: None,
12473 },
12474 )))
12475 } else {
12476 Ok(Expression::JsonExtract(Box::new(
12477 crate::expressions::JsonExtractFunc {
12478 this: json_expr,
12479 path: Expression::string(&json_path),
12480 returning: None,
12481 arrow_syntax: true,
12482 hash_arrow_syntax: false,
12483 wrapper_option: None,
12484 quotes_option: None,
12485 on_scalar_string: false,
12486 on_error: None,
12487 },
12488 )))
12489 }
12490 }
12491 DialectType::Redshift => {
12492 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
12493 let mut new_args = vec![json_expr];
12494 new_args.extend(args);
12495 Ok(Expression::Function(Box::new(Function::new(
12496 "JSON_EXTRACT_PATH_TEXT".to_string(),
12497 new_args,
12498 ))))
12499 }
12500 DialectType::TSQL => {
12501 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
12502 let jq = Expression::Function(Box::new(Function::new(
12503 "JSON_QUERY".to_string(),
12504 vec![json_expr.clone(), Expression::string(&json_path)],
12505 )));
12506 let jv = Expression::Function(Box::new(Function::new(
12507 "JSON_VALUE".to_string(),
12508 vec![json_expr, Expression::string(&json_path)],
12509 )));
12510 Ok(Expression::Function(Box::new(Function::new(
12511 "ISNULL".to_string(),
12512 vec![jq, jv],
12513 ))))
12514 }
12515 DialectType::ClickHouse => {
12516 let func_name = if is_text {
12517 "JSONExtractString"
12518 } else {
12519 "JSONExtractRaw"
12520 };
12521 let mut new_args = vec![json_expr];
12522 new_args.extend(args);
12523 Ok(Expression::Function(Box::new(Function::new(
12524 func_name.to_string(),
12525 new_args,
12526 ))))
12527 }
12528 _ => {
12529 let func_name = if is_text {
12530 "JSON_EXTRACT_SCALAR"
12531 } else {
12532 "JSON_EXTRACT"
12533 };
12534 Ok(Expression::Function(Box::new(Function::new(
12535 func_name.to_string(),
12536 vec![json_expr, Expression::string(&json_path)],
12537 ))))
12538 }
12539 }
12540 }
12541 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
12542 "APPROX_DISTINCT" if f.args.len() >= 1 => {
12543 let name = match target {
12544 DialectType::Spark
12545 | DialectType::Databricks
12546 | DialectType::Hive
12547 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
12548 _ => "APPROX_DISTINCT",
12549 };
12550 let mut args = f.args;
12551 // Hive doesn't support the accuracy parameter
12552 if name == "APPROX_COUNT_DISTINCT"
12553 && matches!(target, DialectType::Hive)
12554 {
12555 args.truncate(1);
12556 }
12557 Ok(Expression::Function(Box::new(Function::new(
12558 name.to_string(),
12559 args,
12560 ))))
12561 }
12562 // REGEXP_EXTRACT(x, pattern) - normalize default group index
12563 "REGEXP_EXTRACT" if f.args.len() == 2 => {
12564 // Determine source default group index
12565 let source_default = match source {
12566 DialectType::Presto
12567 | DialectType::Trino
12568 | DialectType::DuckDB => 0,
12569 _ => 1, // Hive/Spark/Databricks default = 1
12570 };
12571 // Determine target default group index
12572 let target_default = match target {
12573 DialectType::Presto
12574 | DialectType::Trino
12575 | DialectType::DuckDB
12576 | DialectType::BigQuery => 0,
12577 DialectType::Snowflake => {
12578 // Snowflake uses REGEXP_SUBSTR
12579 return Ok(Expression::Function(Box::new(Function::new(
12580 "REGEXP_SUBSTR".to_string(),
12581 f.args,
12582 ))));
12583 }
12584 _ => 1, // Hive/Spark/Databricks default = 1
12585 };
12586 if source_default != target_default {
12587 let mut args = f.args;
12588 args.push(Expression::number(source_default));
12589 Ok(Expression::Function(Box::new(Function::new(
12590 "REGEXP_EXTRACT".to_string(),
12591 args,
12592 ))))
12593 } else {
12594 Ok(Expression::Function(Box::new(Function::new(
12595 "REGEXP_EXTRACT".to_string(),
12596 f.args,
12597 ))))
12598 }
12599 }
12600 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
12601 "RLIKE" if f.args.len() == 2 => {
12602 let mut args = f.args;
12603 let str_expr = args.remove(0);
12604 let pattern = args.remove(0);
12605 match target {
12606 DialectType::DuckDB => {
12607 // REGEXP_MATCHES(str, pattern)
12608 Ok(Expression::Function(Box::new(Function::new(
12609 "REGEXP_MATCHES".to_string(),
12610 vec![str_expr, pattern],
12611 ))))
12612 }
12613 _ => {
12614 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
12615 Ok(Expression::RegexpLike(Box::new(
12616 crate::expressions::RegexpFunc {
12617 this: str_expr,
12618 pattern,
12619 flags: None,
12620 },
12621 )))
12622 }
12623 }
12624 }
12625 // EOMONTH(date[, month_offset]) -> target-specific
12626 "EOMONTH" if f.args.len() >= 1 => {
12627 let mut args = f.args;
12628 let date_arg = args.remove(0);
12629 let month_offset = if !args.is_empty() {
12630 Some(args.remove(0))
12631 } else {
12632 None
12633 };
12634
12635 // Helper: wrap date in CAST to DATE
12636 let cast_to_date = |e: Expression| -> Expression {
12637 Expression::Cast(Box::new(Cast {
12638 this: e,
12639 to: DataType::Date,
12640 trailing_comments: vec![],
12641 double_colon_syntax: false,
12642 format: None,
12643 default: None,
12644 inferred_type: None,
12645 }))
12646 };
12647
12648 match target {
12649 DialectType::TSQL | DialectType::Fabric => {
12650 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
12651 let date = cast_to_date(date_arg);
12652 let date = if let Some(offset) = month_offset {
12653 Expression::Function(Box::new(Function::new(
12654 "DATEADD".to_string(),
12655 vec![
12656 Expression::Identifier(Identifier::new(
12657 "MONTH",
12658 )),
12659 offset,
12660 date,
12661 ],
12662 )))
12663 } else {
12664 date
12665 };
12666 Ok(Expression::Function(Box::new(Function::new(
12667 "EOMONTH".to_string(),
12668 vec![date],
12669 ))))
12670 }
12671 DialectType::Presto
12672 | DialectType::Trino
12673 | DialectType::Athena => {
12674 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
12675 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
12676 let cast_ts = Expression::Cast(Box::new(Cast {
12677 this: date_arg,
12678 to: DataType::Timestamp {
12679 timezone: false,
12680 precision: None,
12681 },
12682 trailing_comments: vec![],
12683 double_colon_syntax: false,
12684 format: None,
12685 default: None,
12686 inferred_type: None,
12687 }));
12688 let date = cast_to_date(cast_ts);
12689 let date = if let Some(offset) = month_offset {
12690 Expression::Function(Box::new(Function::new(
12691 "DATE_ADD".to_string(),
12692 vec![Expression::string("MONTH"), offset, date],
12693 )))
12694 } else {
12695 date
12696 };
12697 Ok(Expression::Function(Box::new(Function::new(
12698 "LAST_DAY_OF_MONTH".to_string(),
12699 vec![date],
12700 ))))
12701 }
12702 DialectType::PostgreSQL => {
12703 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12704 let date = cast_to_date(date_arg);
12705 let date = if let Some(offset) = month_offset {
12706 let interval_str = format!(
12707 "{} MONTH",
12708 Self::expr_to_string_static(&offset)
12709 );
12710 Expression::Add(Box::new(
12711 crate::expressions::BinaryOp::new(
12712 date,
12713 Expression::Interval(Box::new(
12714 crate::expressions::Interval {
12715 this: Some(Expression::string(
12716 &interval_str,
12717 )),
12718 unit: None,
12719 },
12720 )),
12721 ),
12722 ))
12723 } else {
12724 date
12725 };
12726 let truncated =
12727 Expression::Function(Box::new(Function::new(
12728 "DATE_TRUNC".to_string(),
12729 vec![Expression::string("MONTH"), date],
12730 )));
12731 let plus_month = Expression::Add(Box::new(
12732 crate::expressions::BinaryOp::new(
12733 truncated,
12734 Expression::Interval(Box::new(
12735 crate::expressions::Interval {
12736 this: Some(Expression::string("1 MONTH")),
12737 unit: None,
12738 },
12739 )),
12740 ),
12741 ));
12742 let minus_day = Expression::Sub(Box::new(
12743 crate::expressions::BinaryOp::new(
12744 plus_month,
12745 Expression::Interval(Box::new(
12746 crate::expressions::Interval {
12747 this: Some(Expression::string("1 DAY")),
12748 unit: None,
12749 },
12750 )),
12751 ),
12752 ));
12753 Ok(Expression::Cast(Box::new(Cast {
12754 this: minus_day,
12755 to: DataType::Date,
12756 trailing_comments: vec![],
12757 double_colon_syntax: false,
12758 format: None,
12759 default: None,
12760 inferred_type: None,
12761 })))
12762 }
12763 DialectType::DuckDB => {
12764 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
12765 let date = cast_to_date(date_arg);
12766 let date = if let Some(offset) = month_offset {
12767 // Wrap negative numbers in parentheses for DuckDB INTERVAL
12768 let interval_val =
12769 if matches!(&offset, Expression::Neg(_)) {
12770 Expression::Paren(Box::new(
12771 crate::expressions::Paren {
12772 this: offset,
12773 trailing_comments: Vec::new(),
12774 },
12775 ))
12776 } else {
12777 offset
12778 };
12779 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
12780 date,
12781 Expression::Interval(Box::new(crate::expressions::Interval {
12782 this: Some(interval_val),
12783 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12784 unit: crate::expressions::IntervalUnit::Month,
12785 use_plural: false,
12786 }),
12787 })),
12788 )))
12789 } else {
12790 date
12791 };
12792 Ok(Expression::Function(Box::new(Function::new(
12793 "LAST_DAY".to_string(),
12794 vec![date],
12795 ))))
12796 }
12797 DialectType::Snowflake | DialectType::Redshift => {
12798 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
12799 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
12800 let date = if matches!(target, DialectType::Snowflake) {
12801 Expression::Function(Box::new(Function::new(
12802 "TO_DATE".to_string(),
12803 vec![date_arg],
12804 )))
12805 } else {
12806 cast_to_date(date_arg)
12807 };
12808 let date = if let Some(offset) = month_offset {
12809 Expression::Function(Box::new(Function::new(
12810 "DATEADD".to_string(),
12811 vec![
12812 Expression::Identifier(Identifier::new(
12813 "MONTH",
12814 )),
12815 offset,
12816 date,
12817 ],
12818 )))
12819 } else {
12820 date
12821 };
12822 Ok(Expression::Function(Box::new(Function::new(
12823 "LAST_DAY".to_string(),
12824 vec![date],
12825 ))))
12826 }
12827 DialectType::Spark | DialectType::Databricks => {
12828 // Spark: LAST_DAY(TO_DATE(date))
12829 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
12830 let date = Expression::Function(Box::new(Function::new(
12831 "TO_DATE".to_string(),
12832 vec![date_arg],
12833 )));
12834 let date = if let Some(offset) = month_offset {
12835 Expression::Function(Box::new(Function::new(
12836 "ADD_MONTHS".to_string(),
12837 vec![date, offset],
12838 )))
12839 } else {
12840 date
12841 };
12842 Ok(Expression::Function(Box::new(Function::new(
12843 "LAST_DAY".to_string(),
12844 vec![date],
12845 ))))
12846 }
12847 DialectType::MySQL => {
12848 // MySQL: LAST_DAY(DATE(date)) - no offset
12849 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
12850 let date = if let Some(offset) = month_offset {
12851 let iu = crate::expressions::IntervalUnit::Month;
12852 Expression::DateAdd(Box::new(
12853 crate::expressions::DateAddFunc {
12854 this: date_arg,
12855 interval: offset,
12856 unit: iu,
12857 },
12858 ))
12859 } else {
12860 Expression::Function(Box::new(Function::new(
12861 "DATE".to_string(),
12862 vec![date_arg],
12863 )))
12864 };
12865 Ok(Expression::Function(Box::new(Function::new(
12866 "LAST_DAY".to_string(),
12867 vec![date],
12868 ))))
12869 }
12870 DialectType::BigQuery => {
12871 // BigQuery: LAST_DAY(CAST(date AS DATE))
12872 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
12873 let date = cast_to_date(date_arg);
12874 let date = if let Some(offset) = month_offset {
12875 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
12876 this: Some(offset),
12877 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12878 unit: crate::expressions::IntervalUnit::Month,
12879 use_plural: false,
12880 }),
12881 }));
12882 Expression::Function(Box::new(Function::new(
12883 "DATE_ADD".to_string(),
12884 vec![date, interval],
12885 )))
12886 } else {
12887 date
12888 };
12889 Ok(Expression::Function(Box::new(Function::new(
12890 "LAST_DAY".to_string(),
12891 vec![date],
12892 ))))
12893 }
12894 DialectType::ClickHouse => {
12895 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
12896 let date = Expression::Cast(Box::new(Cast {
12897 this: date_arg,
12898 to: DataType::Nullable {
12899 inner: Box::new(DataType::Date),
12900 },
12901 trailing_comments: vec![],
12902 double_colon_syntax: false,
12903 format: None,
12904 default: None,
12905 inferred_type: None,
12906 }));
12907 let date = if let Some(offset) = month_offset {
12908 Expression::Function(Box::new(Function::new(
12909 "DATE_ADD".to_string(),
12910 vec![
12911 Expression::Identifier(Identifier::new(
12912 "MONTH",
12913 )),
12914 offset,
12915 date,
12916 ],
12917 )))
12918 } else {
12919 date
12920 };
12921 Ok(Expression::Function(Box::new(Function::new(
12922 "LAST_DAY".to_string(),
12923 vec![date],
12924 ))))
12925 }
12926 DialectType::Hive => {
12927 // Hive: LAST_DAY(date)
12928 let date = if let Some(offset) = month_offset {
12929 Expression::Function(Box::new(Function::new(
12930 "ADD_MONTHS".to_string(),
12931 vec![date_arg, offset],
12932 )))
12933 } else {
12934 date_arg
12935 };
12936 Ok(Expression::Function(Box::new(Function::new(
12937 "LAST_DAY".to_string(),
12938 vec![date],
12939 ))))
12940 }
12941 _ => {
12942 // Default: LAST_DAY(date)
12943 let date = if let Some(offset) = month_offset {
12944 let unit =
12945 Expression::Identifier(Identifier::new("MONTH"));
12946 Expression::Function(Box::new(Function::new(
12947 "DATEADD".to_string(),
12948 vec![unit, offset, date_arg],
12949 )))
12950 } else {
12951 date_arg
12952 };
12953 Ok(Expression::Function(Box::new(Function::new(
12954 "LAST_DAY".to_string(),
12955 vec![date],
12956 ))))
12957 }
12958 }
12959 }
12960 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
12961 "LAST_DAY" | "LAST_DAY_OF_MONTH"
12962 if !matches!(source, DialectType::BigQuery)
12963 && f.args.len() >= 1 =>
12964 {
12965 let first_arg = f.args.into_iter().next().unwrap();
12966 match target {
12967 DialectType::TSQL | DialectType::Fabric => {
12968 Ok(Expression::Function(Box::new(Function::new(
12969 "EOMONTH".to_string(),
12970 vec![first_arg],
12971 ))))
12972 }
12973 DialectType::Presto
12974 | DialectType::Trino
12975 | DialectType::Athena => {
12976 Ok(Expression::Function(Box::new(Function::new(
12977 "LAST_DAY_OF_MONTH".to_string(),
12978 vec![first_arg],
12979 ))))
12980 }
12981 _ => Ok(Expression::Function(Box::new(Function::new(
12982 "LAST_DAY".to_string(),
12983 vec![first_arg],
12984 )))),
12985 }
12986 }
12987 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
12988 "MAP"
12989 if f.args.len() == 2
12990 && matches!(
12991 source,
12992 DialectType::Presto
12993 | DialectType::Trino
12994 | DialectType::Athena
12995 ) =>
12996 {
12997 let keys_arg = f.args[0].clone();
12998 let vals_arg = f.args[1].clone();
12999
13000 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
13001 fn extract_array_elements(
13002 expr: &Expression,
13003 ) -> Option<&Vec<Expression>> {
13004 match expr {
13005 Expression::Array(arr) => Some(&arr.expressions),
13006 Expression::ArrayFunc(arr) => Some(&arr.expressions),
13007 Expression::Function(f)
13008 if f.name.eq_ignore_ascii_case("ARRAY") =>
13009 {
13010 Some(&f.args)
13011 }
13012 _ => None,
13013 }
13014 }
13015
13016 match target {
13017 DialectType::Spark | DialectType::Databricks => {
13018 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
13019 Ok(Expression::Function(Box::new(Function::new(
13020 "MAP_FROM_ARRAYS".to_string(),
13021 f.args,
13022 ))))
13023 }
13024 DialectType::Hive => {
13025 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
13026 if let (Some(keys), Some(vals)) = (
13027 extract_array_elements(&keys_arg),
13028 extract_array_elements(&vals_arg),
13029 ) {
13030 if keys.len() == vals.len() {
13031 let mut interleaved = Vec::new();
13032 for (k, v) in keys.iter().zip(vals.iter()) {
13033 interleaved.push(k.clone());
13034 interleaved.push(v.clone());
13035 }
13036 Ok(Expression::Function(Box::new(Function::new(
13037 "MAP".to_string(),
13038 interleaved,
13039 ))))
13040 } else {
13041 Ok(Expression::Function(Box::new(Function::new(
13042 "MAP".to_string(),
13043 f.args,
13044 ))))
13045 }
13046 } else {
13047 Ok(Expression::Function(Box::new(Function::new(
13048 "MAP".to_string(),
13049 f.args,
13050 ))))
13051 }
13052 }
13053 DialectType::Snowflake => {
13054 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
13055 if let (Some(keys), Some(vals)) = (
13056 extract_array_elements(&keys_arg),
13057 extract_array_elements(&vals_arg),
13058 ) {
13059 if keys.len() == vals.len() {
13060 let mut interleaved = Vec::new();
13061 for (k, v) in keys.iter().zip(vals.iter()) {
13062 interleaved.push(k.clone());
13063 interleaved.push(v.clone());
13064 }
13065 Ok(Expression::Function(Box::new(Function::new(
13066 "OBJECT_CONSTRUCT".to_string(),
13067 interleaved,
13068 ))))
13069 } else {
13070 Ok(Expression::Function(Box::new(Function::new(
13071 "MAP".to_string(),
13072 f.args,
13073 ))))
13074 }
13075 } else {
13076 Ok(Expression::Function(Box::new(Function::new(
13077 "MAP".to_string(),
13078 f.args,
13079 ))))
13080 }
13081 }
13082 _ => Ok(Expression::Function(f)),
13083 }
13084 }
13085 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
13086 "MAP"
13087 if f.args.is_empty()
13088 && matches!(
13089 source,
13090 DialectType::Hive
13091 | DialectType::Spark
13092 | DialectType::Databricks
13093 )
13094 && matches!(
13095 target,
13096 DialectType::Presto
13097 | DialectType::Trino
13098 | DialectType::Athena
13099 ) =>
13100 {
13101 let empty_keys =
13102 Expression::Array(Box::new(crate::expressions::Array {
13103 expressions: vec![],
13104 }));
13105 let empty_vals =
13106 Expression::Array(Box::new(crate::expressions::Array {
13107 expressions: vec![],
13108 }));
13109 Ok(Expression::Function(Box::new(Function::new(
13110 "MAP".to_string(),
13111 vec![empty_keys, empty_vals],
13112 ))))
13113 }
13114 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
13115 "MAP"
13116 if f.args.len() >= 2
13117 && f.args.len() % 2 == 0
13118 && matches!(
13119 source,
13120 DialectType::Hive
13121 | DialectType::Spark
13122 | DialectType::Databricks
13123 | DialectType::ClickHouse
13124 ) =>
13125 {
13126 let args = f.args;
13127 match target {
13128 DialectType::DuckDB => {
13129 // MAP([k1, k2], [v1, v2])
13130 let mut keys = Vec::new();
13131 let mut vals = Vec::new();
13132 for (i, arg) in args.into_iter().enumerate() {
13133 if i % 2 == 0 {
13134 keys.push(arg);
13135 } else {
13136 vals.push(arg);
13137 }
13138 }
13139 let keys_arr = Expression::Array(Box::new(
13140 crate::expressions::Array { expressions: keys },
13141 ));
13142 let vals_arr = Expression::Array(Box::new(
13143 crate::expressions::Array { expressions: vals },
13144 ));
13145 Ok(Expression::Function(Box::new(Function::new(
13146 "MAP".to_string(),
13147 vec![keys_arr, vals_arr],
13148 ))))
13149 }
13150 DialectType::Presto | DialectType::Trino => {
13151 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
13152 let mut keys = Vec::new();
13153 let mut vals = Vec::new();
13154 for (i, arg) in args.into_iter().enumerate() {
13155 if i % 2 == 0 {
13156 keys.push(arg);
13157 } else {
13158 vals.push(arg);
13159 }
13160 }
13161 let keys_arr = Expression::Array(Box::new(
13162 crate::expressions::Array { expressions: keys },
13163 ));
13164 let vals_arr = Expression::Array(Box::new(
13165 crate::expressions::Array { expressions: vals },
13166 ));
13167 Ok(Expression::Function(Box::new(Function::new(
13168 "MAP".to_string(),
13169 vec![keys_arr, vals_arr],
13170 ))))
13171 }
13172 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13173 Function::new("OBJECT_CONSTRUCT".to_string(), args),
13174 ))),
13175 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
13176 Function::new("map".to_string(), args),
13177 ))),
13178 _ => Ok(Expression::Function(Box::new(Function::new(
13179 "MAP".to_string(),
13180 args,
13181 )))),
13182 }
13183 }
13184 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
13185 "COLLECT_LIST" if f.args.len() >= 1 => {
13186 let name = match target {
13187 DialectType::Spark
13188 | DialectType::Databricks
13189 | DialectType::Hive => "COLLECT_LIST",
13190 DialectType::DuckDB
13191 | DialectType::PostgreSQL
13192 | DialectType::Redshift
13193 | DialectType::Snowflake
13194 | DialectType::BigQuery => "ARRAY_AGG",
13195 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
13196 _ => "ARRAY_AGG",
13197 };
13198 Ok(Expression::Function(Box::new(Function::new(
13199 name.to_string(),
13200 f.args,
13201 ))))
13202 }
13203 // COLLECT_SET(x) -> target-specific distinct array aggregation
13204 "COLLECT_SET" if f.args.len() >= 1 => {
13205 let name = match target {
13206 DialectType::Spark
13207 | DialectType::Databricks
13208 | DialectType::Hive => "COLLECT_SET",
13209 DialectType::Presto
13210 | DialectType::Trino
13211 | DialectType::Athena => "SET_AGG",
13212 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
13213 _ => "ARRAY_AGG",
13214 };
13215 Ok(Expression::Function(Box::new(Function::new(
13216 name.to_string(),
13217 f.args,
13218 ))))
13219 }
13220 // ISNAN(x) / IS_NAN(x) - normalize
13221 "ISNAN" | "IS_NAN" => {
13222 let name = match target {
13223 DialectType::Spark
13224 | DialectType::Databricks
13225 | DialectType::Hive => "ISNAN",
13226 DialectType::Presto
13227 | DialectType::Trino
13228 | DialectType::Athena => "IS_NAN",
13229 DialectType::BigQuery
13230 | DialectType::PostgreSQL
13231 | DialectType::Redshift => "IS_NAN",
13232 DialectType::ClickHouse => "IS_NAN",
13233 _ => "ISNAN",
13234 };
13235 Ok(Expression::Function(Box::new(Function::new(
13236 name.to_string(),
13237 f.args,
13238 ))))
13239 }
13240 // SPLIT_PART(str, delim, index) -> target-specific
13241 "SPLIT_PART" if f.args.len() == 3 => {
13242 match target {
13243 DialectType::Spark | DialectType::Databricks => {
13244 // Keep as SPLIT_PART (Spark 3.4+)
13245 Ok(Expression::Function(Box::new(Function::new(
13246 "SPLIT_PART".to_string(),
13247 f.args,
13248 ))))
13249 }
13250 DialectType::DuckDB
13251 if matches!(source, DialectType::Snowflake) =>
13252 {
13253 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
13254 // - part_index 0 treated as 1
13255 // - empty delimiter: return whole string if index 1 or -1, else ''
13256 let mut args = f.args;
13257 let str_arg = args.remove(0);
13258 let delim_arg = args.remove(0);
13259 let idx_arg = args.remove(0);
13260
13261 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
13262 let adjusted_idx = Expression::Paren(Box::new(Paren {
13263 this: Expression::Case(Box::new(Case {
13264 operand: None,
13265 whens: vec![(
13266 Expression::Eq(Box::new(BinaryOp {
13267 left: idx_arg.clone(),
13268 right: Expression::number(0),
13269 left_comments: vec![],
13270 operator_comments: vec![],
13271 trailing_comments: vec![],
13272 inferred_type: None,
13273 })),
13274 Expression::number(1),
13275 )],
13276 else_: Some(idx_arg.clone()),
13277 comments: vec![],
13278 inferred_type: None,
13279 })),
13280 trailing_comments: vec![],
13281 }));
13282
13283 // SPLIT_PART(str, delim, adjusted_idx)
13284 let base_func =
13285 Expression::Function(Box::new(Function::new(
13286 "SPLIT_PART".to_string(),
13287 vec![
13288 str_arg.clone(),
13289 delim_arg.clone(),
13290 adjusted_idx.clone(),
13291 ],
13292 )));
13293
13294 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
13295 let empty_delim_case = Expression::Paren(Box::new(Paren {
13296 this: Expression::Case(Box::new(Case {
13297 operand: None,
13298 whens: vec![(
13299 Expression::Or(Box::new(BinaryOp {
13300 left: Expression::Eq(Box::new(BinaryOp {
13301 left: adjusted_idx.clone(),
13302 right: Expression::number(1),
13303 left_comments: vec![],
13304 operator_comments: vec![],
13305 trailing_comments: vec![],
13306 inferred_type: None,
13307 })),
13308 right: Expression::Eq(Box::new(BinaryOp {
13309 left: adjusted_idx,
13310 right: Expression::number(-1),
13311 left_comments: vec![],
13312 operator_comments: vec![],
13313 trailing_comments: vec![],
13314 inferred_type: None,
13315 })),
13316 left_comments: vec![],
13317 operator_comments: vec![],
13318 trailing_comments: vec![],
13319 inferred_type: None,
13320 })),
13321 str_arg,
13322 )],
13323 else_: Some(Expression::string("")),
13324 comments: vec![],
13325 inferred_type: None,
13326 })),
13327 trailing_comments: vec![],
13328 }));
13329
13330 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
13331 Ok(Expression::Case(Box::new(Case {
13332 operand: None,
13333 whens: vec![(
13334 Expression::Eq(Box::new(BinaryOp {
13335 left: delim_arg,
13336 right: Expression::string(""),
13337 left_comments: vec![],
13338 operator_comments: vec![],
13339 trailing_comments: vec![],
13340 inferred_type: None,
13341 })),
13342 empty_delim_case,
13343 )],
13344 else_: Some(base_func),
13345 comments: vec![],
13346 inferred_type: None,
13347 })))
13348 }
13349 DialectType::DuckDB
13350 | DialectType::PostgreSQL
13351 | DialectType::Snowflake
13352 | DialectType::Redshift
13353 | DialectType::Trino
13354 | DialectType::Presto => Ok(Expression::Function(Box::new(
13355 Function::new("SPLIT_PART".to_string(), f.args),
13356 ))),
13357 DialectType::Hive => {
13358 // SPLIT(str, delim)[index]
13359 // Complex conversion, just keep as-is for now
13360 Ok(Expression::Function(Box::new(Function::new(
13361 "SPLIT_PART".to_string(),
13362 f.args,
13363 ))))
13364 }
13365 _ => Ok(Expression::Function(Box::new(Function::new(
13366 "SPLIT_PART".to_string(),
13367 f.args,
13368 )))),
13369 }
13370 }
13371 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
13372 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
13373 let is_scalar = name == "JSON_EXTRACT_SCALAR";
13374 match target {
13375 DialectType::Spark
13376 | DialectType::Databricks
13377 | DialectType::Hive => {
13378 let mut args = f.args;
13379 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
13380 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
13381 if let Some(Expression::Function(inner)) = args.first() {
13382 if inner.name.eq_ignore_ascii_case("TRY")
13383 && inner.args.len() == 1
13384 {
13385 let mut inner_args = inner.args.clone();
13386 args[0] = inner_args.remove(0);
13387 }
13388 }
13389 Ok(Expression::Function(Box::new(Function::new(
13390 "GET_JSON_OBJECT".to_string(),
13391 args,
13392 ))))
13393 }
13394 DialectType::DuckDB | DialectType::SQLite => {
13395 // json -> path syntax
13396 let mut args = f.args;
13397 let json_expr = args.remove(0);
13398 let path = args.remove(0);
13399 Ok(Expression::JsonExtract(Box::new(
13400 crate::expressions::JsonExtractFunc {
13401 this: json_expr,
13402 path,
13403 returning: None,
13404 arrow_syntax: true,
13405 hash_arrow_syntax: false,
13406 wrapper_option: None,
13407 quotes_option: None,
13408 on_scalar_string: false,
13409 on_error: None,
13410 },
13411 )))
13412 }
13413 DialectType::TSQL => {
13414 let func_name = if is_scalar {
13415 "JSON_VALUE"
13416 } else {
13417 "JSON_QUERY"
13418 };
13419 Ok(Expression::Function(Box::new(Function::new(
13420 func_name.to_string(),
13421 f.args,
13422 ))))
13423 }
13424 DialectType::PostgreSQL | DialectType::Redshift => {
13425 let func_name = if is_scalar {
13426 "JSON_EXTRACT_PATH_TEXT"
13427 } else {
13428 "JSON_EXTRACT_PATH"
13429 };
13430 Ok(Expression::Function(Box::new(Function::new(
13431 func_name.to_string(),
13432 f.args,
13433 ))))
13434 }
13435 _ => Ok(Expression::Function(Box::new(Function::new(
13436 name.to_string(),
13437 f.args,
13438 )))),
13439 }
13440 }
13441 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
13442 "JSON_SEARCH"
13443 if matches!(target, DialectType::DuckDB)
13444 && (3..=5).contains(&f.args.len()) =>
13445 {
13446 let args = &f.args;
13447
13448 // Only rewrite deterministic modes and NULL/no escape-char variant.
13449 let mode = match &args[1] {
13450 Expression::Literal(lit)
13451 if matches!(
13452 lit.as_ref(),
13453 crate::expressions::Literal::String(_)
13454 ) =>
13455 {
13456 let crate::expressions::Literal::String(s) = lit.as_ref()
13457 else {
13458 unreachable!()
13459 };
13460 s.to_ascii_lowercase()
13461 }
13462 _ => return Ok(Expression::Function(f)),
13463 };
13464 if mode != "one" && mode != "all" {
13465 return Ok(Expression::Function(f));
13466 }
13467 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
13468 return Ok(Expression::Function(f));
13469 }
13470
13471 let json_doc_sql = match Generator::sql(&args[0]) {
13472 Ok(sql) => sql,
13473 Err(_) => return Ok(Expression::Function(f)),
13474 };
13475 let search_sql = match Generator::sql(&args[2]) {
13476 Ok(sql) => sql,
13477 Err(_) => return Ok(Expression::Function(f)),
13478 };
13479 let path_sql = if args.len() == 5 {
13480 match Generator::sql(&args[4]) {
13481 Ok(sql) => sql,
13482 Err(_) => return Ok(Expression::Function(f)),
13483 }
13484 } else {
13485 "'$'".to_string()
13486 };
13487
13488 let rewrite_sql = if mode == "all" {
13489 format!(
13490 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
13491 json_doc_sql, path_sql, search_sql
13492 )
13493 } else {
13494 format!(
13495 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
13496 json_doc_sql, path_sql, search_sql
13497 )
13498 };
13499
13500 Ok(Expression::Raw(crate::expressions::Raw {
13501 sql: rewrite_sql,
13502 }))
13503 }
13504 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
13505 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
13506 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
13507 if f.args.len() >= 2
13508 && matches!(source, DialectType::SingleStore) =>
13509 {
13510 let is_bson = name == "BSON_EXTRACT_BSON";
13511 let mut args = f.args;
13512 let json_expr = args.remove(0);
13513
13514 // Build JSONPath from remaining arguments
13515 let mut path = String::from("$");
13516 for arg in &args {
13517 if let Expression::Literal(lit) = arg {
13518 if let crate::expressions::Literal::String(s) = lit.as_ref()
13519 {
13520 // Check if it's a numeric string (array index)
13521 if s.parse::<i64>().is_ok() {
13522 path.push('[');
13523 path.push_str(s);
13524 path.push(']');
13525 } else {
13526 path.push('.');
13527 path.push_str(s);
13528 }
13529 }
13530 }
13531 }
13532
13533 let target_func = if is_bson {
13534 "JSONB_EXTRACT"
13535 } else {
13536 "JSON_EXTRACT"
13537 };
13538 Ok(Expression::Function(Box::new(Function::new(
13539 target_func.to_string(),
13540 vec![json_expr, Expression::string(&path)],
13541 ))))
13542 }
13543 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
13544 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
13545 Ok(Expression::Function(Box::new(Function {
13546 name: "arraySum".to_string(),
13547 args: f.args,
13548 distinct: f.distinct,
13549 trailing_comments: f.trailing_comments,
13550 use_bracket_syntax: f.use_bracket_syntax,
13551 no_parens: f.no_parens,
13552 quoted: f.quoted,
13553 span: None,
13554 inferred_type: None,
13555 })))
13556 }
13557 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
13558 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
13559 // and is handled by JsonQueryValueConvert action. This handles the case where
13560 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
13561 "JSON_QUERY" | "JSON_VALUE"
13562 if f.args.len() == 2
13563 && matches!(
13564 source,
13565 DialectType::TSQL | DialectType::Fabric
13566 ) =>
13567 {
13568 match target {
13569 DialectType::Spark
13570 | DialectType::Databricks
13571 | DialectType::Hive => Ok(Expression::Function(Box::new(
13572 Function::new("GET_JSON_OBJECT".to_string(), f.args),
13573 ))),
13574 _ => Ok(Expression::Function(Box::new(Function::new(
13575 name.to_string(),
13576 f.args,
13577 )))),
13578 }
13579 }
13580 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
13581 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
13582 let arg = f.args.into_iter().next().unwrap();
13583 let is_hive_source = matches!(
13584 source,
13585 DialectType::Hive
13586 | DialectType::Spark
13587 | DialectType::Databricks
13588 );
13589 match target {
13590 DialectType::DuckDB if is_hive_source => {
13591 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
13592 let strptime =
13593 Expression::Function(Box::new(Function::new(
13594 "STRPTIME".to_string(),
13595 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
13596 )));
13597 Ok(Expression::Function(Box::new(Function::new(
13598 "EPOCH".to_string(),
13599 vec![strptime],
13600 ))))
13601 }
13602 DialectType::Presto | DialectType::Trino if is_hive_source => {
13603 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
13604 let cast_varchar =
13605 Expression::Cast(Box::new(crate::expressions::Cast {
13606 this: arg.clone(),
13607 to: DataType::VarChar {
13608 length: None,
13609 parenthesized_length: false,
13610 },
13611 trailing_comments: vec![],
13612 double_colon_syntax: false,
13613 format: None,
13614 default: None,
13615 inferred_type: None,
13616 }));
13617 let date_parse =
13618 Expression::Function(Box::new(Function::new(
13619 "DATE_PARSE".to_string(),
13620 vec![
13621 cast_varchar,
13622 Expression::string("%Y-%m-%d %T"),
13623 ],
13624 )));
13625 let try_expr = Expression::Function(Box::new(
13626 Function::new("TRY".to_string(), vec![date_parse]),
13627 ));
13628 let date_format =
13629 Expression::Function(Box::new(Function::new(
13630 "DATE_FORMAT".to_string(),
13631 vec![arg, Expression::string("%Y-%m-%d %T")],
13632 )));
13633 let parse_datetime =
13634 Expression::Function(Box::new(Function::new(
13635 "PARSE_DATETIME".to_string(),
13636 vec![
13637 date_format,
13638 Expression::string("yyyy-MM-dd HH:mm:ss"),
13639 ],
13640 )));
13641 let coalesce =
13642 Expression::Function(Box::new(Function::new(
13643 "COALESCE".to_string(),
13644 vec![try_expr, parse_datetime],
13645 )));
13646 Ok(Expression::Function(Box::new(Function::new(
13647 "TO_UNIXTIME".to_string(),
13648 vec![coalesce],
13649 ))))
13650 }
13651 DialectType::Presto | DialectType::Trino => {
13652 Ok(Expression::Function(Box::new(Function::new(
13653 "TO_UNIXTIME".to_string(),
13654 vec![arg],
13655 ))))
13656 }
13657 _ => Ok(Expression::Function(Box::new(Function::new(
13658 "UNIX_TIMESTAMP".to_string(),
13659 vec![arg],
13660 )))),
13661 }
13662 }
13663 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13664 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
13665 DialectType::Spark
13666 | DialectType::Databricks
13667 | DialectType::Hive => Ok(Expression::Function(Box::new(
13668 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
13669 ))),
13670 _ => Ok(Expression::Function(Box::new(Function::new(
13671 "TO_UNIX_TIMESTAMP".to_string(),
13672 f.args,
13673 )))),
13674 },
13675 // CURDATE() -> CURRENT_DATE
13676 "CURDATE" => {
13677 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
13678 }
13679 // CURTIME() -> CURRENT_TIME
13680 "CURTIME" => {
13681 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
13682 precision: None,
13683 }))
13684 }
13685 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
13686 "ARRAY_SORT" if f.args.len() >= 1 => {
13687 match target {
13688 DialectType::Hive => {
13689 let mut args = f.args;
13690 args.truncate(1); // Drop lambda comparator
13691 Ok(Expression::Function(Box::new(Function::new(
13692 "SORT_ARRAY".to_string(),
13693 args,
13694 ))))
13695 }
13696 DialectType::DuckDB
13697 if matches!(source, DialectType::Snowflake) =>
13698 {
13699 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
13700 let mut args_iter = f.args.into_iter();
13701 let arr = args_iter.next().unwrap();
13702 let asc_arg = args_iter.next();
13703 let nulls_first_arg = args_iter.next();
13704
13705 let is_asc_bool = asc_arg
13706 .as_ref()
13707 .map(|a| matches!(a, Expression::Boolean(_)))
13708 .unwrap_or(false);
13709 let is_nf_bool = nulls_first_arg
13710 .as_ref()
13711 .map(|a| matches!(a, Expression::Boolean(_)))
13712 .unwrap_or(false);
13713
13714 // No boolean args: pass through as-is
13715 if !is_asc_bool && !is_nf_bool {
13716 let mut result_args = vec![arr];
13717 if let Some(asc) = asc_arg {
13718 result_args.push(asc);
13719 if let Some(nf) = nulls_first_arg {
13720 result_args.push(nf);
13721 }
13722 }
13723 Ok(Expression::Function(Box::new(Function::new(
13724 "LIST_SORT".to_string(),
13725 result_args,
13726 ))))
13727 } else {
13728 // Has boolean args: convert to DuckDB LIST_SORT format
13729 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
13730
13731 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
13732 let nulls_are_first = match &nulls_first_arg {
13733 Some(Expression::Boolean(b)) => b.value,
13734 None if is_asc_bool => descending, // Snowflake default
13735 _ => false,
13736 };
13737 let nulls_first_sql = if nulls_are_first {
13738 Some(Expression::string("NULLS FIRST"))
13739 } else {
13740 None
13741 };
13742
13743 if !is_asc_bool {
13744 // asc is non-boolean expression, nulls_first is boolean
13745 let mut result_args = vec![arr];
13746 if let Some(asc) = asc_arg {
13747 result_args.push(asc);
13748 }
13749 if let Some(nf) = nulls_first_sql {
13750 result_args.push(nf);
13751 }
13752 Ok(Expression::Function(Box::new(Function::new(
13753 "LIST_SORT".to_string(),
13754 result_args,
13755 ))))
13756 } else {
13757 if !descending && !nulls_are_first {
13758 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
13759 Ok(Expression::Function(Box::new(
13760 Function::new(
13761 "LIST_SORT".to_string(),
13762 vec![arr],
13763 ),
13764 )))
13765 } else if descending && !nulls_are_first {
13766 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
13767 Ok(Expression::Function(Box::new(
13768 Function::new(
13769 "ARRAY_REVERSE_SORT".to_string(),
13770 vec![arr],
13771 ),
13772 )))
13773 } else {
13774 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
13775 let order_str =
13776 if descending { "DESC" } else { "ASC" };
13777 Ok(Expression::Function(Box::new(
13778 Function::new(
13779 "LIST_SORT".to_string(),
13780 vec![
13781 arr,
13782 Expression::string(order_str),
13783 Expression::string("NULLS FIRST"),
13784 ],
13785 ),
13786 )))
13787 }
13788 }
13789 }
13790 }
13791 DialectType::DuckDB => {
13792 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
13793 let mut args = f.args;
13794 args.truncate(1); // Drop lambda comparator for DuckDB
13795 Ok(Expression::Function(Box::new(Function::new(
13796 "ARRAY_SORT".to_string(),
13797 args,
13798 ))))
13799 }
13800 _ => Ok(Expression::Function(f)),
13801 }
13802 }
13803 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
13804 "SORT_ARRAY" if f.args.len() == 1 => match target {
13805 DialectType::Hive
13806 | DialectType::Spark
13807 | DialectType::Databricks => Ok(Expression::Function(f)),
13808 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13809 Function::new("LIST_SORT".to_string(), f.args),
13810 ))),
13811 _ => Ok(Expression::Function(Box::new(Function::new(
13812 "ARRAY_SORT".to_string(),
13813 f.args,
13814 )))),
13815 },
13816 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
13817 "SORT_ARRAY" if f.args.len() == 2 => {
13818 let is_desc =
13819 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
13820 if is_desc {
13821 match target {
13822 DialectType::DuckDB => {
13823 Ok(Expression::Function(Box::new(Function::new(
13824 "ARRAY_REVERSE_SORT".to_string(),
13825 vec![f.args.into_iter().next().unwrap()],
13826 ))))
13827 }
13828 DialectType::Presto | DialectType::Trino => {
13829 let arr_arg = f.args.into_iter().next().unwrap();
13830 let a = Expression::Column(Box::new(
13831 crate::expressions::Column {
13832 name: crate::expressions::Identifier::new("a"),
13833 table: None,
13834 join_mark: false,
13835 trailing_comments: Vec::new(),
13836 span: None,
13837 inferred_type: None,
13838 },
13839 ));
13840 let b = Expression::Column(Box::new(
13841 crate::expressions::Column {
13842 name: crate::expressions::Identifier::new("b"),
13843 table: None,
13844 join_mark: false,
13845 trailing_comments: Vec::new(),
13846 span: None,
13847 inferred_type: None,
13848 },
13849 ));
13850 let case_expr = Expression::Case(Box::new(
13851 crate::expressions::Case {
13852 operand: None,
13853 whens: vec![
13854 (
13855 Expression::Lt(Box::new(
13856 BinaryOp::new(a.clone(), b.clone()),
13857 )),
13858 Expression::Literal(Box::new(
13859 Literal::Number("1".to_string()),
13860 )),
13861 ),
13862 (
13863 Expression::Gt(Box::new(
13864 BinaryOp::new(a.clone(), b.clone()),
13865 )),
13866 Expression::Literal(Box::new(
13867 Literal::Number("-1".to_string()),
13868 )),
13869 ),
13870 ],
13871 else_: Some(Expression::Literal(Box::new(
13872 Literal::Number("0".to_string()),
13873 ))),
13874 comments: Vec::new(),
13875 inferred_type: None,
13876 },
13877 ));
13878 let lambda = Expression::Lambda(Box::new(
13879 crate::expressions::LambdaExpr {
13880 parameters: vec![
13881 crate::expressions::Identifier::new("a"),
13882 crate::expressions::Identifier::new("b"),
13883 ],
13884 body: case_expr,
13885 colon: false,
13886 parameter_types: Vec::new(),
13887 },
13888 ));
13889 Ok(Expression::Function(Box::new(Function::new(
13890 "ARRAY_SORT".to_string(),
13891 vec![arr_arg, lambda],
13892 ))))
13893 }
13894 _ => Ok(Expression::Function(f)),
13895 }
13896 } else {
13897 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
13898 match target {
13899 DialectType::Hive => Ok(Expression::Function(f)),
13900 DialectType::DuckDB => {
13901 Ok(Expression::Function(Box::new(Function::new(
13902 "LIST_SORT".to_string(),
13903 vec![f.args.into_iter().next().unwrap()],
13904 ))))
13905 }
13906 _ => Ok(Expression::Function(Box::new(Function::new(
13907 "ARRAY_SORT".to_string(),
13908 vec![f.args.into_iter().next().unwrap()],
13909 )))),
13910 }
13911 }
13912 }
13913 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
13914 "LEFT" if f.args.len() == 2 => {
13915 match target {
13916 DialectType::Hive
13917 | DialectType::Presto
13918 | DialectType::Trino
13919 | DialectType::Athena => {
13920 let x = f.args[0].clone();
13921 let n = f.args[1].clone();
13922 Ok(Expression::Function(Box::new(Function::new(
13923 "SUBSTRING".to_string(),
13924 vec![x, Expression::number(1), n],
13925 ))))
13926 }
13927 DialectType::Spark | DialectType::Databricks
13928 if matches!(
13929 source,
13930 DialectType::TSQL | DialectType::Fabric
13931 ) =>
13932 {
13933 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
13934 let x = f.args[0].clone();
13935 let n = f.args[1].clone();
13936 let cast_x = Expression::Cast(Box::new(Cast {
13937 this: x,
13938 to: DataType::VarChar {
13939 length: None,
13940 parenthesized_length: false,
13941 },
13942 double_colon_syntax: false,
13943 trailing_comments: Vec::new(),
13944 format: None,
13945 default: None,
13946 inferred_type: None,
13947 }));
13948 Ok(Expression::Function(Box::new(Function::new(
13949 "LEFT".to_string(),
13950 vec![cast_x, n],
13951 ))))
13952 }
13953 _ => Ok(Expression::Function(f)),
13954 }
13955 }
13956 "RIGHT" if f.args.len() == 2 => {
13957 match target {
13958 DialectType::Hive
13959 | DialectType::Presto
13960 | DialectType::Trino
13961 | DialectType::Athena => {
13962 let x = f.args[0].clone();
13963 let n = f.args[1].clone();
13964 // SUBSTRING(x, LENGTH(x) - (n - 1))
13965 let len_x = Expression::Function(Box::new(Function::new(
13966 "LENGTH".to_string(),
13967 vec![x.clone()],
13968 )));
13969 let n_minus_1 = Expression::Sub(Box::new(
13970 crate::expressions::BinaryOp::new(
13971 n,
13972 Expression::number(1),
13973 ),
13974 ));
13975 let n_minus_1_paren = Expression::Paren(Box::new(
13976 crate::expressions::Paren {
13977 this: n_minus_1,
13978 trailing_comments: Vec::new(),
13979 },
13980 ));
13981 let offset = Expression::Sub(Box::new(
13982 crate::expressions::BinaryOp::new(
13983 len_x,
13984 n_minus_1_paren,
13985 ),
13986 ));
13987 Ok(Expression::Function(Box::new(Function::new(
13988 "SUBSTRING".to_string(),
13989 vec![x, offset],
13990 ))))
13991 }
13992 DialectType::Spark | DialectType::Databricks
13993 if matches!(
13994 source,
13995 DialectType::TSQL | DialectType::Fabric
13996 ) =>
13997 {
13998 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
13999 let x = f.args[0].clone();
14000 let n = f.args[1].clone();
14001 let cast_x = Expression::Cast(Box::new(Cast {
14002 this: x,
14003 to: DataType::VarChar {
14004 length: None,
14005 parenthesized_length: false,
14006 },
14007 double_colon_syntax: false,
14008 trailing_comments: Vec::new(),
14009 format: None,
14010 default: None,
14011 inferred_type: None,
14012 }));
14013 Ok(Expression::Function(Box::new(Function::new(
14014 "RIGHT".to_string(),
14015 vec![cast_x, n],
14016 ))))
14017 }
14018 _ => Ok(Expression::Function(f)),
14019 }
14020 }
14021 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
14022 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
14023 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14024 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
14025 ))),
14026 DialectType::Spark | DialectType::Databricks => {
14027 Ok(Expression::Function(Box::new(Function::new(
14028 "MAP_FROM_ARRAYS".to_string(),
14029 f.args,
14030 ))))
14031 }
14032 _ => Ok(Expression::Function(Box::new(Function::new(
14033 "MAP".to_string(),
14034 f.args,
14035 )))),
14036 },
14037 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
14038 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
14039 "LIKE" if f.args.len() >= 2 => {
14040 let (this, pattern) = if matches!(source, DialectType::SQLite) {
14041 // SQLite: LIKE(pattern, string) -> string LIKE pattern
14042 (f.args[1].clone(), f.args[0].clone())
14043 } else {
14044 // Standard: LIKE(string, pattern) -> string LIKE pattern
14045 (f.args[0].clone(), f.args[1].clone())
14046 };
14047 let escape = if f.args.len() >= 3 {
14048 Some(f.args[2].clone())
14049 } else {
14050 None
14051 };
14052 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
14053 left: this,
14054 right: pattern,
14055 escape,
14056 quantifier: None,
14057 inferred_type: None,
14058 })))
14059 }
14060 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
14061 "ILIKE" if f.args.len() >= 2 => {
14062 let this = f.args[0].clone();
14063 let pattern = f.args[1].clone();
14064 let escape = if f.args.len() >= 3 {
14065 Some(f.args[2].clone())
14066 } else {
14067 None
14068 };
14069 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
14070 left: this,
14071 right: pattern,
14072 escape,
14073 quantifier: None,
14074 inferred_type: None,
14075 })))
14076 }
14077 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
14078 "CHAR" if f.args.len() == 1 => match target {
14079 DialectType::MySQL
14080 | DialectType::SingleStore
14081 | DialectType::TSQL => Ok(Expression::Function(f)),
14082 _ => Ok(Expression::Function(Box::new(Function::new(
14083 "CHR".to_string(),
14084 f.args,
14085 )))),
14086 },
14087 // CONCAT(a, b) -> a || b for PostgreSQL
14088 "CONCAT"
14089 if f.args.len() == 2
14090 && matches!(target, DialectType::PostgreSQL)
14091 && matches!(
14092 source,
14093 DialectType::ClickHouse | DialectType::MySQL
14094 ) =>
14095 {
14096 let mut args = f.args;
14097 let right = args.pop().unwrap();
14098 let left = args.pop().unwrap();
14099 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
14100 this: Box::new(left),
14101 expression: Box::new(right),
14102 safe: None,
14103 })))
14104 }
14105 // ARRAY_TO_STRING(arr, delim) -> target-specific
14106 "ARRAY_TO_STRING"
14107 if f.args.len() == 2
14108 && matches!(target, DialectType::DuckDB)
14109 && matches!(source, DialectType::Snowflake) =>
14110 {
14111 let mut args = f.args;
14112 let arr = args.remove(0);
14113 let sep = args.remove(0);
14114 // sep IS NULL
14115 let sep_is_null = Expression::IsNull(Box::new(IsNull {
14116 this: sep.clone(),
14117 not: false,
14118 postfix_form: false,
14119 }));
14120 // COALESCE(CAST(x AS TEXT), '')
14121 let cast_x = Expression::Cast(Box::new(Cast {
14122 this: Expression::Identifier(Identifier::new("x")),
14123 to: DataType::Text,
14124 trailing_comments: Vec::new(),
14125 double_colon_syntax: false,
14126 format: None,
14127 default: None,
14128 inferred_type: None,
14129 }));
14130 let coalesce = Expression::Coalesce(Box::new(
14131 crate::expressions::VarArgFunc {
14132 original_name: None,
14133 expressions: vec![
14134 cast_x,
14135 Expression::Literal(Box::new(Literal::String(
14136 String::new(),
14137 ))),
14138 ],
14139 inferred_type: None,
14140 },
14141 ));
14142 let lambda =
14143 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14144 parameters: vec![Identifier::new("x")],
14145 body: coalesce,
14146 colon: false,
14147 parameter_types: Vec::new(),
14148 }));
14149 let list_transform = Expression::Function(Box::new(Function::new(
14150 "LIST_TRANSFORM".to_string(),
14151 vec![arr, lambda],
14152 )));
14153 let array_to_string =
14154 Expression::Function(Box::new(Function::new(
14155 "ARRAY_TO_STRING".to_string(),
14156 vec![list_transform, sep],
14157 )));
14158 Ok(Expression::Case(Box::new(Case {
14159 operand: None,
14160 whens: vec![(sep_is_null, Expression::Null(Null))],
14161 else_: Some(array_to_string),
14162 comments: Vec::new(),
14163 inferred_type: None,
14164 })))
14165 }
14166 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
14167 DialectType::Presto | DialectType::Trino => {
14168 Ok(Expression::Function(Box::new(Function::new(
14169 "ARRAY_JOIN".to_string(),
14170 f.args,
14171 ))))
14172 }
14173 DialectType::TSQL => Ok(Expression::Function(Box::new(
14174 Function::new("STRING_AGG".to_string(), f.args),
14175 ))),
14176 _ => Ok(Expression::Function(f)),
14177 },
14178 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
14179 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
14180 DialectType::Spark
14181 | DialectType::Databricks
14182 | DialectType::Hive => Ok(Expression::Function(Box::new(
14183 Function::new("CONCAT".to_string(), f.args),
14184 ))),
14185 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14186 Function::new("ARRAY_CAT".to_string(), f.args),
14187 ))),
14188 DialectType::Redshift => Ok(Expression::Function(Box::new(
14189 Function::new("ARRAY_CONCAT".to_string(), f.args),
14190 ))),
14191 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14192 Function::new("ARRAY_CAT".to_string(), f.args),
14193 ))),
14194 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14195 Function::new("LIST_CONCAT".to_string(), f.args),
14196 ))),
14197 DialectType::Presto | DialectType::Trino => {
14198 Ok(Expression::Function(Box::new(Function::new(
14199 "CONCAT".to_string(),
14200 f.args,
14201 ))))
14202 }
14203 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14204 Function::new("ARRAY_CONCAT".to_string(), f.args),
14205 ))),
14206 _ => Ok(Expression::Function(f)),
14207 },
14208 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
14209 "HAS" if f.args.len() == 2 => match target {
14210 DialectType::Spark
14211 | DialectType::Databricks
14212 | DialectType::Hive => Ok(Expression::Function(Box::new(
14213 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14214 ))),
14215 DialectType::Presto | DialectType::Trino => {
14216 Ok(Expression::Function(Box::new(Function::new(
14217 "CONTAINS".to_string(),
14218 f.args,
14219 ))))
14220 }
14221 _ => Ok(Expression::Function(f)),
14222 },
14223 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
14224 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
14225 Function::new("COALESCE".to_string(), f.args),
14226 ))),
14227 // ISNULL(x) in MySQL -> (x IS NULL)
14228 "ISNULL"
14229 if f.args.len() == 1
14230 && matches!(source, DialectType::MySQL)
14231 && matches!(target, DialectType::MySQL) =>
14232 {
14233 let arg = f.args.into_iter().next().unwrap();
14234 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14235 this: Expression::IsNull(Box::new(
14236 crate::expressions::IsNull {
14237 this: arg,
14238 not: false,
14239 postfix_form: false,
14240 },
14241 )),
14242 trailing_comments: Vec::new(),
14243 })))
14244 }
14245 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
14246 "MONTHNAME"
14247 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
14248 {
14249 let arg = f.args.into_iter().next().unwrap();
14250 Ok(Expression::Function(Box::new(Function::new(
14251 "DATE_FORMAT".to_string(),
14252 vec![arg, Expression::string("%M")],
14253 ))))
14254 }
14255 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
14256 "SPLITBYSTRING" if f.args.len() == 2 => {
14257 let sep = f.args[0].clone();
14258 let str_arg = f.args[1].clone();
14259 match target {
14260 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14261 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
14262 ))),
14263 DialectType::Doris => {
14264 Ok(Expression::Function(Box::new(Function::new(
14265 "SPLIT_BY_STRING".to_string(),
14266 vec![str_arg, sep],
14267 ))))
14268 }
14269 DialectType::Hive
14270 | DialectType::Spark
14271 | DialectType::Databricks => {
14272 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
14273 let escaped =
14274 Expression::Function(Box::new(Function::new(
14275 "CONCAT".to_string(),
14276 vec![
14277 Expression::string("\\Q"),
14278 sep,
14279 Expression::string("\\E"),
14280 ],
14281 )));
14282 Ok(Expression::Function(Box::new(Function::new(
14283 "SPLIT".to_string(),
14284 vec![str_arg, escaped],
14285 ))))
14286 }
14287 _ => Ok(Expression::Function(f)),
14288 }
14289 }
14290 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
14291 "SPLITBYREGEXP" if f.args.len() == 2 => {
14292 let sep = f.args[0].clone();
14293 let str_arg = f.args[1].clone();
14294 match target {
14295 DialectType::DuckDB => {
14296 Ok(Expression::Function(Box::new(Function::new(
14297 "STR_SPLIT_REGEX".to_string(),
14298 vec![str_arg, sep],
14299 ))))
14300 }
14301 DialectType::Hive
14302 | DialectType::Spark
14303 | DialectType::Databricks => {
14304 Ok(Expression::Function(Box::new(Function::new(
14305 "SPLIT".to_string(),
14306 vec![str_arg, sep],
14307 ))))
14308 }
14309 _ => Ok(Expression::Function(f)),
14310 }
14311 }
14312 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
14313 "TOMONDAY" => {
14314 if f.args.len() == 1 {
14315 let arg = f.args.into_iter().next().unwrap();
14316 match target {
14317 DialectType::Doris => {
14318 Ok(Expression::Function(Box::new(Function::new(
14319 "DATE_TRUNC".to_string(),
14320 vec![arg, Expression::string("WEEK")],
14321 ))))
14322 }
14323 _ => Ok(Expression::Function(Box::new(Function::new(
14324 "DATE_TRUNC".to_string(),
14325 vec![Expression::string("WEEK"), arg],
14326 )))),
14327 }
14328 } else {
14329 Ok(Expression::Function(f))
14330 }
14331 }
14332 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
14333 "COLLECT_LIST" if f.args.len() == 1 => match target {
14334 DialectType::Spark
14335 | DialectType::Databricks
14336 | DialectType::Hive => Ok(Expression::Function(f)),
14337 _ => Ok(Expression::Function(Box::new(Function::new(
14338 "ARRAY_AGG".to_string(),
14339 f.args,
14340 )))),
14341 },
14342 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
14343 "TO_CHAR"
14344 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
14345 {
14346 let arg = f.args.into_iter().next().unwrap();
14347 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14348 this: arg,
14349 to: DataType::Custom {
14350 name: "STRING".to_string(),
14351 },
14352 double_colon_syntax: false,
14353 trailing_comments: Vec::new(),
14354 format: None,
14355 default: None,
14356 inferred_type: None,
14357 })))
14358 }
14359 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
14360 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
14361 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14362 Function::new("RANDOM".to_string(), vec![]),
14363 ))),
14364 _ => Ok(Expression::Function(f)),
14365 },
14366 // ClickHouse formatDateTime -> target-specific
14367 "FORMATDATETIME" if f.args.len() >= 2 => match target {
14368 DialectType::MySQL => Ok(Expression::Function(Box::new(
14369 Function::new("DATE_FORMAT".to_string(), f.args),
14370 ))),
14371 _ => Ok(Expression::Function(f)),
14372 },
14373 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
14374 "REPLICATE" if f.args.len() == 2 => match target {
14375 DialectType::TSQL => Ok(Expression::Function(f)),
14376 _ => Ok(Expression::Function(Box::new(Function::new(
14377 "REPEAT".to_string(),
14378 f.args,
14379 )))),
14380 },
14381 // LEN(x) -> LENGTH(x) for non-TSQL targets
14382 // No CAST needed when arg is already a string literal
14383 "LEN" if f.args.len() == 1 => {
14384 match target {
14385 DialectType::TSQL => Ok(Expression::Function(f)),
14386 DialectType::Spark | DialectType::Databricks => {
14387 let arg = f.args.into_iter().next().unwrap();
14388 // Don't wrap string literals with CAST - they're already strings
14389 let is_string = matches!(
14390 &arg,
14391 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
14392 );
14393 let final_arg = if is_string {
14394 arg
14395 } else {
14396 Expression::Cast(Box::new(Cast {
14397 this: arg,
14398 to: DataType::VarChar {
14399 length: None,
14400 parenthesized_length: false,
14401 },
14402 double_colon_syntax: false,
14403 trailing_comments: Vec::new(),
14404 format: None,
14405 default: None,
14406 inferred_type: None,
14407 }))
14408 };
14409 Ok(Expression::Function(Box::new(Function::new(
14410 "LENGTH".to_string(),
14411 vec![final_arg],
14412 ))))
14413 }
14414 _ => {
14415 let arg = f.args.into_iter().next().unwrap();
14416 Ok(Expression::Function(Box::new(Function::new(
14417 "LENGTH".to_string(),
14418 vec![arg],
14419 ))))
14420 }
14421 }
14422 }
14423 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
14424 "COUNT_BIG" if f.args.len() == 1 => match target {
14425 DialectType::TSQL => Ok(Expression::Function(f)),
14426 _ => Ok(Expression::Function(Box::new(Function::new(
14427 "COUNT".to_string(),
14428 f.args,
14429 )))),
14430 },
14431 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
14432 "DATEFROMPARTS" if f.args.len() == 3 => match target {
14433 DialectType::TSQL => Ok(Expression::Function(f)),
14434 _ => Ok(Expression::Function(Box::new(Function::new(
14435 "MAKE_DATE".to_string(),
14436 f.args,
14437 )))),
14438 },
14439 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
14440 "REGEXP_LIKE" if f.args.len() >= 2 => {
14441 let str_expr = f.args[0].clone();
14442 let pattern = f.args[1].clone();
14443 let flags = if f.args.len() >= 3 {
14444 Some(f.args[2].clone())
14445 } else {
14446 None
14447 };
14448 match target {
14449 DialectType::DuckDB => {
14450 let mut new_args = vec![str_expr, pattern];
14451 if let Some(fl) = flags {
14452 new_args.push(fl);
14453 }
14454 Ok(Expression::Function(Box::new(Function::new(
14455 "REGEXP_MATCHES".to_string(),
14456 new_args,
14457 ))))
14458 }
14459 _ => Ok(Expression::RegexpLike(Box::new(
14460 crate::expressions::RegexpFunc {
14461 this: str_expr,
14462 pattern,
14463 flags,
14464 },
14465 ))),
14466 }
14467 }
14468 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
14469 "ARRAYJOIN" if f.args.len() == 1 => match target {
14470 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14471 Function::new("UNNEST".to_string(), f.args),
14472 ))),
14473 _ => Ok(Expression::Function(f)),
14474 },
14475 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
14476 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
14477 match target {
14478 DialectType::TSQL => Ok(Expression::Function(f)),
14479 DialectType::DuckDB => {
14480 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
14481 let mut args = f.args;
14482 let ms = args.pop().unwrap();
14483 let s = args.pop().unwrap();
14484 // s + (ms / 1000.0)
14485 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
14486 ms,
14487 Expression::Literal(Box::new(
14488 crate::expressions::Literal::Number(
14489 "1000.0".to_string(),
14490 ),
14491 )),
14492 )));
14493 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
14494 s,
14495 Expression::Paren(Box::new(Paren {
14496 this: ms_frac,
14497 trailing_comments: vec![],
14498 })),
14499 )));
14500 args.push(s_with_ms);
14501 Ok(Expression::Function(Box::new(Function::new(
14502 "MAKE_TIMESTAMP".to_string(),
14503 args,
14504 ))))
14505 }
14506 DialectType::Snowflake => {
14507 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
14508 let mut args = f.args;
14509 let ms = args.pop().unwrap();
14510 // ms * 1000000
14511 let ns = Expression::Mul(Box::new(BinaryOp::new(
14512 ms,
14513 Expression::number(1000000),
14514 )));
14515 args.push(ns);
14516 Ok(Expression::Function(Box::new(Function::new(
14517 "TIMESTAMP_FROM_PARTS".to_string(),
14518 args,
14519 ))))
14520 }
14521 _ => {
14522 // Default: keep function name for other targets
14523 Ok(Expression::Function(Box::new(Function::new(
14524 "DATETIMEFROMPARTS".to_string(),
14525 f.args,
14526 ))))
14527 }
14528 }
14529 }
14530 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
14531 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
14532 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
14533 let is_try = name == "TRY_CONVERT";
14534 let type_expr = f.args[0].clone();
14535 let value_expr = f.args[1].clone();
14536 let style = if f.args.len() >= 3 {
14537 Some(&f.args[2])
14538 } else {
14539 None
14540 };
14541
14542 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
14543 if matches!(target, DialectType::TSQL) {
14544 let normalized_type = match &type_expr {
14545 Expression::DataType(dt) => {
14546 let new_dt = match dt {
14547 DataType::Int { .. } => DataType::Custom {
14548 name: "INTEGER".to_string(),
14549 },
14550 _ => dt.clone(),
14551 };
14552 Expression::DataType(new_dt)
14553 }
14554 Expression::Identifier(id) => {
14555 if id.name.eq_ignore_ascii_case("INT") {
14556 Expression::Identifier(
14557 crate::expressions::Identifier::new("INTEGER"),
14558 )
14559 } else {
14560 let upper = id.name.to_ascii_uppercase();
14561 Expression::Identifier(
14562 crate::expressions::Identifier::new(upper),
14563 )
14564 }
14565 }
14566 Expression::Column(col) => {
14567 if col.name.name.eq_ignore_ascii_case("INT") {
14568 Expression::Identifier(
14569 crate::expressions::Identifier::new("INTEGER"),
14570 )
14571 } else {
14572 let upper = col.name.name.to_ascii_uppercase();
14573 Expression::Identifier(
14574 crate::expressions::Identifier::new(upper),
14575 )
14576 }
14577 }
14578 _ => type_expr.clone(),
14579 };
14580 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
14581 let mut new_args = vec![normalized_type, value_expr];
14582 if let Some(s) = style {
14583 new_args.push(s.clone());
14584 }
14585 return Ok(Expression::Function(Box::new(Function::new(
14586 func_name.to_string(),
14587 new_args,
14588 ))));
14589 }
14590
14591 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
14592 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
14593 match e {
14594 Expression::DataType(dt) => {
14595 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
14596 match dt {
14597 DataType::Custom { name }
14598 if name.starts_with("NVARCHAR(")
14599 || name.starts_with("NCHAR(") =>
14600 {
14601 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
14602 let inner = &name[name.find('(').unwrap() + 1
14603 ..name.len() - 1];
14604 if inner.eq_ignore_ascii_case("MAX") {
14605 Some(DataType::Text)
14606 } else if let Ok(len) = inner.parse::<u32>() {
14607 if name.starts_with("NCHAR") {
14608 Some(DataType::Char {
14609 length: Some(len),
14610 })
14611 } else {
14612 Some(DataType::VarChar {
14613 length: Some(len),
14614 parenthesized_length: false,
14615 })
14616 }
14617 } else {
14618 Some(dt.clone())
14619 }
14620 }
14621 DataType::Custom { name } if name == "NVARCHAR" => {
14622 Some(DataType::VarChar {
14623 length: None,
14624 parenthesized_length: false,
14625 })
14626 }
14627 DataType::Custom { name } if name == "NCHAR" => {
14628 Some(DataType::Char { length: None })
14629 }
14630 DataType::Custom { name }
14631 if name == "NVARCHAR(MAX)"
14632 || name == "VARCHAR(MAX)" =>
14633 {
14634 Some(DataType::Text)
14635 }
14636 _ => Some(dt.clone()),
14637 }
14638 }
14639 Expression::Identifier(id) => {
14640 let name = id.name.to_ascii_uppercase();
14641 match name.as_str() {
14642 "INT" | "INTEGER" => Some(DataType::Int {
14643 length: None,
14644 integer_spelling: false,
14645 }),
14646 "BIGINT" => Some(DataType::BigInt { length: None }),
14647 "SMALLINT" => {
14648 Some(DataType::SmallInt { length: None })
14649 }
14650 "TINYINT" => {
14651 Some(DataType::TinyInt { length: None })
14652 }
14653 "FLOAT" => Some(DataType::Float {
14654 precision: None,
14655 scale: None,
14656 real_spelling: false,
14657 }),
14658 "REAL" => Some(DataType::Float {
14659 precision: None,
14660 scale: None,
14661 real_spelling: true,
14662 }),
14663 "DATETIME" | "DATETIME2" => {
14664 Some(DataType::Timestamp {
14665 timezone: false,
14666 precision: None,
14667 })
14668 }
14669 "DATE" => Some(DataType::Date),
14670 "BIT" => Some(DataType::Boolean),
14671 "TEXT" => Some(DataType::Text),
14672 "NUMERIC" => Some(DataType::Decimal {
14673 precision: None,
14674 scale: None,
14675 }),
14676 "MONEY" => Some(DataType::Decimal {
14677 precision: Some(15),
14678 scale: Some(4),
14679 }),
14680 "SMALLMONEY" => Some(DataType::Decimal {
14681 precision: Some(6),
14682 scale: Some(4),
14683 }),
14684 "VARCHAR" => Some(DataType::VarChar {
14685 length: None,
14686 parenthesized_length: false,
14687 }),
14688 "NVARCHAR" => Some(DataType::VarChar {
14689 length: None,
14690 parenthesized_length: false,
14691 }),
14692 "CHAR" => Some(DataType::Char { length: None }),
14693 "NCHAR" => Some(DataType::Char { length: None }),
14694 _ => Some(DataType::Custom { name }),
14695 }
14696 }
14697 Expression::Column(col) => {
14698 let name = col.name.name.to_ascii_uppercase();
14699 match name.as_str() {
14700 "INT" | "INTEGER" => Some(DataType::Int {
14701 length: None,
14702 integer_spelling: false,
14703 }),
14704 "BIGINT" => Some(DataType::BigInt { length: None }),
14705 "FLOAT" => Some(DataType::Float {
14706 precision: None,
14707 scale: None,
14708 real_spelling: false,
14709 }),
14710 "DATETIME" | "DATETIME2" => {
14711 Some(DataType::Timestamp {
14712 timezone: false,
14713 precision: None,
14714 })
14715 }
14716 "DATE" => Some(DataType::Date),
14717 "NUMERIC" => Some(DataType::Decimal {
14718 precision: None,
14719 scale: None,
14720 }),
14721 "VARCHAR" => Some(DataType::VarChar {
14722 length: None,
14723 parenthesized_length: false,
14724 }),
14725 "NVARCHAR" => Some(DataType::VarChar {
14726 length: None,
14727 parenthesized_length: false,
14728 }),
14729 "CHAR" => Some(DataType::Char { length: None }),
14730 "NCHAR" => Some(DataType::Char { length: None }),
14731 _ => Some(DataType::Custom { name }),
14732 }
14733 }
14734 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
14735 Expression::Function(f) => {
14736 let fname = f.name.to_ascii_uppercase();
14737 match fname.as_str() {
14738 "VARCHAR" | "NVARCHAR" => {
14739 let len = f.args.first().and_then(|a| {
14740 if let Expression::Literal(lit) = a
14741 {
14742 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14743 n.parse::<u32>().ok()
14744 } else { None }
14745 } else if let Expression::Identifier(id) = a
14746 {
14747 if id.name.eq_ignore_ascii_case("MAX") {
14748 None
14749 } else {
14750 None
14751 }
14752 } else {
14753 None
14754 }
14755 });
14756 // Check for VARCHAR(MAX) -> TEXT
14757 let is_max = f.args.first().map_or(false, |a| {
14758 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
14759 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
14760 });
14761 if is_max {
14762 Some(DataType::Text)
14763 } else {
14764 Some(DataType::VarChar {
14765 length: len,
14766 parenthesized_length: false,
14767 })
14768 }
14769 }
14770 "NCHAR" | "CHAR" => {
14771 let len = f.args.first().and_then(|a| {
14772 if let Expression::Literal(lit) = a
14773 {
14774 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14775 n.parse::<u32>().ok()
14776 } else { None }
14777 } else {
14778 None
14779 }
14780 });
14781 Some(DataType::Char { length: len })
14782 }
14783 "NUMERIC" | "DECIMAL" => {
14784 let precision = f.args.first().and_then(|a| {
14785 if let Expression::Literal(lit) = a
14786 {
14787 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14788 n.parse::<u32>().ok()
14789 } else { None }
14790 } else {
14791 None
14792 }
14793 });
14794 let scale = f.args.get(1).and_then(|a| {
14795 if let Expression::Literal(lit) = a
14796 {
14797 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14798 n.parse::<u32>().ok()
14799 } else { None }
14800 } else {
14801 None
14802 }
14803 });
14804 Some(DataType::Decimal { precision, scale })
14805 }
14806 _ => None,
14807 }
14808 }
14809 _ => None,
14810 }
14811 }
14812
14813 if let Some(mut dt) = expr_to_datatype(&type_expr) {
14814 // For TSQL source: VARCHAR/CHAR without length defaults to 30
14815 let is_tsql_source =
14816 matches!(source, DialectType::TSQL | DialectType::Fabric);
14817 if is_tsql_source {
14818 match &dt {
14819 DataType::VarChar { length: None, .. } => {
14820 dt = DataType::VarChar {
14821 length: Some(30),
14822 parenthesized_length: false,
14823 };
14824 }
14825 DataType::Char { length: None } => {
14826 dt = DataType::Char { length: Some(30) };
14827 }
14828 _ => {}
14829 }
14830 }
14831
14832 // Determine if this is a string type
14833 let is_string_type = matches!(
14834 dt,
14835 DataType::VarChar { .. }
14836 | DataType::Char { .. }
14837 | DataType::Text
14838 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
14839 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
14840 || name.starts_with("VARCHAR(") || name == "VARCHAR"
14841 || name == "STRING");
14842
14843 // Determine if this is a date/time type
14844 let is_datetime_type = matches!(
14845 dt,
14846 DataType::Timestamp { .. } | DataType::Date
14847 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
14848 || name == "DATETIME2" || name == "SMALLDATETIME");
14849
14850 // Check for date conversion with style
14851 if style.is_some() {
14852 let style_num = style.and_then(|s| {
14853 if let Expression::Literal(lit) = s {
14854 if let crate::expressions::Literal::Number(n) =
14855 lit.as_ref()
14856 {
14857 n.parse::<u32>().ok()
14858 } else {
14859 None
14860 }
14861 } else {
14862 None
14863 }
14864 });
14865
14866 // TSQL CONVERT date styles (Java format)
14867 let format_str = style_num.and_then(|n| match n {
14868 101 => Some("MM/dd/yyyy"),
14869 102 => Some("yyyy.MM.dd"),
14870 103 => Some("dd/MM/yyyy"),
14871 104 => Some("dd.MM.yyyy"),
14872 105 => Some("dd-MM-yyyy"),
14873 108 => Some("HH:mm:ss"),
14874 110 => Some("MM-dd-yyyy"),
14875 112 => Some("yyyyMMdd"),
14876 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
14877 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
14878 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
14879 _ => None,
14880 });
14881
14882 // Non-string, non-datetime types with style: just CAST, ignore the style
14883 if !is_string_type && !is_datetime_type {
14884 let cast_expr = if is_try {
14885 Expression::TryCast(Box::new(
14886 crate::expressions::Cast {
14887 this: value_expr,
14888 to: dt,
14889 trailing_comments: Vec::new(),
14890 double_colon_syntax: false,
14891 format: None,
14892 default: None,
14893 inferred_type: None,
14894 },
14895 ))
14896 } else {
14897 Expression::Cast(Box::new(
14898 crate::expressions::Cast {
14899 this: value_expr,
14900 to: dt,
14901 trailing_comments: Vec::new(),
14902 double_colon_syntax: false,
14903 format: None,
14904 default: None,
14905 inferred_type: None,
14906 },
14907 ))
14908 };
14909 return Ok(cast_expr);
14910 }
14911
14912 if let Some(java_fmt) = format_str {
14913 let c_fmt = java_fmt
14914 .replace("yyyy", "%Y")
14915 .replace("MM", "%m")
14916 .replace("dd", "%d")
14917 .replace("HH", "%H")
14918 .replace("mm", "%M")
14919 .replace("ss", "%S")
14920 .replace("SSSSSS", "%f")
14921 .replace("SSS", "%f")
14922 .replace("'T'", "T");
14923
14924 // For datetime target types: style is the INPUT format for parsing strings -> dates
14925 if is_datetime_type {
14926 match target {
14927 DialectType::DuckDB => {
14928 return Ok(Expression::Function(Box::new(
14929 Function::new(
14930 "STRPTIME".to_string(),
14931 vec![
14932 value_expr,
14933 Expression::string(&c_fmt),
14934 ],
14935 ),
14936 )));
14937 }
14938 DialectType::Spark
14939 | DialectType::Databricks => {
14940 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
14941 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
14942 let func_name =
14943 if matches!(dt, DataType::Date) {
14944 "TO_DATE"
14945 } else {
14946 "TO_TIMESTAMP"
14947 };
14948 return Ok(Expression::Function(Box::new(
14949 Function::new(
14950 func_name.to_string(),
14951 vec![
14952 value_expr,
14953 Expression::string(java_fmt),
14954 ],
14955 ),
14956 )));
14957 }
14958 DialectType::Hive => {
14959 return Ok(Expression::Function(Box::new(
14960 Function::new(
14961 "TO_TIMESTAMP".to_string(),
14962 vec![
14963 value_expr,
14964 Expression::string(java_fmt),
14965 ],
14966 ),
14967 )));
14968 }
14969 _ => {
14970 return Ok(Expression::Cast(Box::new(
14971 crate::expressions::Cast {
14972 this: value_expr,
14973 to: dt,
14974 trailing_comments: Vec::new(),
14975 double_colon_syntax: false,
14976 format: None,
14977 default: None,
14978 inferred_type: None,
14979 },
14980 )));
14981 }
14982 }
14983 }
14984
14985 // For string target types: style is the OUTPUT format for dates -> strings
14986 match target {
14987 DialectType::DuckDB => Ok(Expression::Function(
14988 Box::new(Function::new(
14989 "STRPTIME".to_string(),
14990 vec![
14991 value_expr,
14992 Expression::string(&c_fmt),
14993 ],
14994 )),
14995 )),
14996 DialectType::Spark | DialectType::Databricks => {
14997 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
14998 // Determine the target string type
14999 let string_dt = match &dt {
15000 DataType::VarChar {
15001 length: Some(l),
15002 ..
15003 } => DataType::VarChar {
15004 length: Some(*l),
15005 parenthesized_length: false,
15006 },
15007 DataType::Text => DataType::Custom {
15008 name: "STRING".to_string(),
15009 },
15010 _ => DataType::Custom {
15011 name: "STRING".to_string(),
15012 },
15013 };
15014 let date_format_expr = Expression::Function(
15015 Box::new(Function::new(
15016 "DATE_FORMAT".to_string(),
15017 vec![
15018 value_expr,
15019 Expression::string(java_fmt),
15020 ],
15021 )),
15022 );
15023 let cast_expr = if is_try {
15024 Expression::TryCast(Box::new(
15025 crate::expressions::Cast {
15026 this: date_format_expr,
15027 to: string_dt,
15028 trailing_comments: Vec::new(),
15029 double_colon_syntax: false,
15030 format: None,
15031 default: None,
15032 inferred_type: None,
15033 },
15034 ))
15035 } else {
15036 Expression::Cast(Box::new(
15037 crate::expressions::Cast {
15038 this: date_format_expr,
15039 to: string_dt,
15040 trailing_comments: Vec::new(),
15041 double_colon_syntax: false,
15042 format: None,
15043 default: None,
15044 inferred_type: None,
15045 },
15046 ))
15047 };
15048 Ok(cast_expr)
15049 }
15050 DialectType::MySQL | DialectType::SingleStore => {
15051 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
15052 let mysql_fmt = java_fmt
15053 .replace("yyyy", "%Y")
15054 .replace("MM", "%m")
15055 .replace("dd", "%d")
15056 .replace("HH:mm:ss.SSSSSS", "%T")
15057 .replace("HH:mm:ss", "%T")
15058 .replace("HH", "%H")
15059 .replace("mm", "%i")
15060 .replace("ss", "%S");
15061 let date_format_expr = Expression::Function(
15062 Box::new(Function::new(
15063 "DATE_FORMAT".to_string(),
15064 vec![
15065 value_expr,
15066 Expression::string(&mysql_fmt),
15067 ],
15068 )),
15069 );
15070 // MySQL uses CHAR for string casts
15071 let mysql_dt = match &dt {
15072 DataType::VarChar { length, .. } => {
15073 DataType::Char { length: *length }
15074 }
15075 _ => dt,
15076 };
15077 Ok(Expression::Cast(Box::new(
15078 crate::expressions::Cast {
15079 this: date_format_expr,
15080 to: mysql_dt,
15081 trailing_comments: Vec::new(),
15082 double_colon_syntax: false,
15083 format: None,
15084 default: None,
15085 inferred_type: None,
15086 },
15087 )))
15088 }
15089 DialectType::Hive => {
15090 let func_name = "TO_TIMESTAMP";
15091 Ok(Expression::Function(Box::new(
15092 Function::new(
15093 func_name.to_string(),
15094 vec![
15095 value_expr,
15096 Expression::string(java_fmt),
15097 ],
15098 ),
15099 )))
15100 }
15101 _ => Ok(Expression::Cast(Box::new(
15102 crate::expressions::Cast {
15103 this: value_expr,
15104 to: dt,
15105 trailing_comments: Vec::new(),
15106 double_colon_syntax: false,
15107 format: None,
15108 default: None,
15109 inferred_type: None,
15110 },
15111 ))),
15112 }
15113 } else {
15114 // Unknown style, just CAST
15115 let cast_expr = if is_try {
15116 Expression::TryCast(Box::new(
15117 crate::expressions::Cast {
15118 this: value_expr,
15119 to: dt,
15120 trailing_comments: Vec::new(),
15121 double_colon_syntax: false,
15122 format: None,
15123 default: None,
15124 inferred_type: None,
15125 },
15126 ))
15127 } else {
15128 Expression::Cast(Box::new(
15129 crate::expressions::Cast {
15130 this: value_expr,
15131 to: dt,
15132 trailing_comments: Vec::new(),
15133 double_colon_syntax: false,
15134 format: None,
15135 default: None,
15136 inferred_type: None,
15137 },
15138 ))
15139 };
15140 Ok(cast_expr)
15141 }
15142 } else {
15143 // No style - simple CAST
15144 let final_dt = if matches!(
15145 target,
15146 DialectType::MySQL | DialectType::SingleStore
15147 ) {
15148 match &dt {
15149 DataType::Int { .. }
15150 | DataType::BigInt { .. }
15151 | DataType::SmallInt { .. }
15152 | DataType::TinyInt { .. } => DataType::Custom {
15153 name: "SIGNED".to_string(),
15154 },
15155 DataType::VarChar { length, .. } => {
15156 DataType::Char { length: *length }
15157 }
15158 _ => dt,
15159 }
15160 } else {
15161 dt
15162 };
15163 let cast_expr = if is_try {
15164 Expression::TryCast(Box::new(
15165 crate::expressions::Cast {
15166 this: value_expr,
15167 to: final_dt,
15168 trailing_comments: Vec::new(),
15169 double_colon_syntax: false,
15170 format: None,
15171 default: None,
15172 inferred_type: None,
15173 },
15174 ))
15175 } else {
15176 Expression::Cast(Box::new(crate::expressions::Cast {
15177 this: value_expr,
15178 to: final_dt,
15179 trailing_comments: Vec::new(),
15180 double_colon_syntax: false,
15181 format: None,
15182 default: None,
15183 inferred_type: None,
15184 }))
15185 };
15186 Ok(cast_expr)
15187 }
15188 } else {
15189 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
15190 Ok(Expression::Function(f))
15191 }
15192 }
15193 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
15194 "STRFTIME" if f.args.len() == 2 => {
15195 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
15196 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
15197 // SQLite: args[0] = format, args[1] = value
15198 (f.args[1].clone(), &f.args[0])
15199 } else {
15200 // DuckDB and others: args[0] = value, args[1] = format
15201 (f.args[0].clone(), &f.args[1])
15202 };
15203
15204 // Helper to convert C-style format to Java-style
15205 fn c_to_java_format(fmt: &str) -> String {
15206 fmt.replace("%Y", "yyyy")
15207 .replace("%m", "MM")
15208 .replace("%d", "dd")
15209 .replace("%H", "HH")
15210 .replace("%M", "mm")
15211 .replace("%S", "ss")
15212 .replace("%f", "SSSSSS")
15213 .replace("%y", "yy")
15214 .replace("%-m", "M")
15215 .replace("%-d", "d")
15216 .replace("%-H", "H")
15217 .replace("%-I", "h")
15218 .replace("%I", "hh")
15219 .replace("%p", "a")
15220 .replace("%j", "DDD")
15221 .replace("%a", "EEE")
15222 .replace("%b", "MMM")
15223 .replace("%F", "yyyy-MM-dd")
15224 .replace("%T", "HH:mm:ss")
15225 }
15226
15227 // Helper: recursively convert format strings within expressions (handles CONCAT)
15228 fn convert_fmt_expr(
15229 expr: &Expression,
15230 converter: &dyn Fn(&str) -> String,
15231 ) -> Expression {
15232 match expr {
15233 Expression::Literal(lit)
15234 if matches!(
15235 lit.as_ref(),
15236 crate::expressions::Literal::String(_)
15237 ) =>
15238 {
15239 let crate::expressions::Literal::String(s) =
15240 lit.as_ref()
15241 else {
15242 unreachable!()
15243 };
15244 Expression::string(&converter(s))
15245 }
15246 Expression::Function(func)
15247 if func.name.eq_ignore_ascii_case("CONCAT") =>
15248 {
15249 let new_args: Vec<Expression> = func
15250 .args
15251 .iter()
15252 .map(|a| convert_fmt_expr(a, converter))
15253 .collect();
15254 Expression::Function(Box::new(Function::new(
15255 "CONCAT".to_string(),
15256 new_args,
15257 )))
15258 }
15259 other => other.clone(),
15260 }
15261 }
15262
15263 match target {
15264 DialectType::DuckDB => {
15265 if matches!(source, DialectType::SQLite) {
15266 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
15267 let cast_val = Expression::Cast(Box::new(Cast {
15268 this: val,
15269 to: crate::expressions::DataType::Timestamp {
15270 precision: None,
15271 timezone: false,
15272 },
15273 trailing_comments: Vec::new(),
15274 double_colon_syntax: false,
15275 format: None,
15276 default: None,
15277 inferred_type: None,
15278 }));
15279 Ok(Expression::Function(Box::new(Function::new(
15280 "STRFTIME".to_string(),
15281 vec![cast_val, fmt_expr.clone()],
15282 ))))
15283 } else {
15284 Ok(Expression::Function(f))
15285 }
15286 }
15287 DialectType::Spark
15288 | DialectType::Databricks
15289 | DialectType::Hive => {
15290 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
15291 let converted_fmt =
15292 convert_fmt_expr(fmt_expr, &c_to_java_format);
15293 Ok(Expression::Function(Box::new(Function::new(
15294 "DATE_FORMAT".to_string(),
15295 vec![val, converted_fmt],
15296 ))))
15297 }
15298 DialectType::TSQL | DialectType::Fabric => {
15299 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
15300 let converted_fmt =
15301 convert_fmt_expr(fmt_expr, &c_to_java_format);
15302 Ok(Expression::Function(Box::new(Function::new(
15303 "FORMAT".to_string(),
15304 vec![val, converted_fmt],
15305 ))))
15306 }
15307 DialectType::Presto
15308 | DialectType::Trino
15309 | DialectType::Athena => {
15310 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
15311 if let Expression::Literal(lit) = fmt_expr {
15312 if let crate::expressions::Literal::String(s) =
15313 lit.as_ref()
15314 {
15315 let presto_fmt = duckdb_to_presto_format(s);
15316 Ok(Expression::Function(Box::new(Function::new(
15317 "DATE_FORMAT".to_string(),
15318 vec![val, Expression::string(&presto_fmt)],
15319 ))))
15320 } else {
15321 Ok(Expression::Function(Box::new(Function::new(
15322 "DATE_FORMAT".to_string(),
15323 vec![val, fmt_expr.clone()],
15324 ))))
15325 }
15326 } else {
15327 Ok(Expression::Function(Box::new(Function::new(
15328 "DATE_FORMAT".to_string(),
15329 vec![val, fmt_expr.clone()],
15330 ))))
15331 }
15332 }
15333 DialectType::BigQuery => {
15334 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
15335 if let Expression::Literal(lit) = fmt_expr {
15336 if let crate::expressions::Literal::String(s) =
15337 lit.as_ref()
15338 {
15339 let bq_fmt = duckdb_to_bigquery_format(s);
15340 Ok(Expression::Function(Box::new(Function::new(
15341 "FORMAT_DATE".to_string(),
15342 vec![Expression::string(&bq_fmt), val],
15343 ))))
15344 } else {
15345 Ok(Expression::Function(Box::new(Function::new(
15346 "FORMAT_DATE".to_string(),
15347 vec![fmt_expr.clone(), val],
15348 ))))
15349 }
15350 } else {
15351 Ok(Expression::Function(Box::new(Function::new(
15352 "FORMAT_DATE".to_string(),
15353 vec![fmt_expr.clone(), val],
15354 ))))
15355 }
15356 }
15357 DialectType::PostgreSQL | DialectType::Redshift => {
15358 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
15359 if let Expression::Literal(lit) = fmt_expr {
15360 if let crate::expressions::Literal::String(s) =
15361 lit.as_ref()
15362 {
15363 let pg_fmt = s
15364 .replace("%Y", "YYYY")
15365 .replace("%m", "MM")
15366 .replace("%d", "DD")
15367 .replace("%H", "HH24")
15368 .replace("%M", "MI")
15369 .replace("%S", "SS")
15370 .replace("%y", "YY")
15371 .replace("%-m", "FMMM")
15372 .replace("%-d", "FMDD")
15373 .replace("%-H", "FMHH24")
15374 .replace("%-I", "FMHH12")
15375 .replace("%p", "AM")
15376 .replace("%F", "YYYY-MM-DD")
15377 .replace("%T", "HH24:MI:SS");
15378 Ok(Expression::Function(Box::new(Function::new(
15379 "TO_CHAR".to_string(),
15380 vec![val, Expression::string(&pg_fmt)],
15381 ))))
15382 } else {
15383 Ok(Expression::Function(Box::new(Function::new(
15384 "TO_CHAR".to_string(),
15385 vec![val, fmt_expr.clone()],
15386 ))))
15387 }
15388 } else {
15389 Ok(Expression::Function(Box::new(Function::new(
15390 "TO_CHAR".to_string(),
15391 vec![val, fmt_expr.clone()],
15392 ))))
15393 }
15394 }
15395 _ => Ok(Expression::Function(f)),
15396 }
15397 }
15398 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
15399 "STRPTIME" if f.args.len() == 2 => {
15400 let val = f.args[0].clone();
15401 let fmt_expr = &f.args[1];
15402
15403 fn c_to_java_format_parse(fmt: &str) -> String {
15404 fmt.replace("%Y", "yyyy")
15405 .replace("%m", "MM")
15406 .replace("%d", "dd")
15407 .replace("%H", "HH")
15408 .replace("%M", "mm")
15409 .replace("%S", "ss")
15410 .replace("%f", "SSSSSS")
15411 .replace("%y", "yy")
15412 .replace("%-m", "M")
15413 .replace("%-d", "d")
15414 .replace("%-H", "H")
15415 .replace("%-I", "h")
15416 .replace("%I", "hh")
15417 .replace("%p", "a")
15418 .replace("%F", "yyyy-MM-dd")
15419 .replace("%T", "HH:mm:ss")
15420 }
15421
15422 match target {
15423 DialectType::DuckDB => Ok(Expression::Function(f)),
15424 DialectType::Spark | DialectType::Databricks => {
15425 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
15426 if let Expression::Literal(lit) = fmt_expr {
15427 if let crate::expressions::Literal::String(s) =
15428 lit.as_ref()
15429 {
15430 let java_fmt = c_to_java_format_parse(s);
15431 Ok(Expression::Function(Box::new(Function::new(
15432 "TO_TIMESTAMP".to_string(),
15433 vec![val, Expression::string(&java_fmt)],
15434 ))))
15435 } else {
15436 Ok(Expression::Function(Box::new(Function::new(
15437 "TO_TIMESTAMP".to_string(),
15438 vec![val, fmt_expr.clone()],
15439 ))))
15440 }
15441 } else {
15442 Ok(Expression::Function(Box::new(Function::new(
15443 "TO_TIMESTAMP".to_string(),
15444 vec![val, fmt_expr.clone()],
15445 ))))
15446 }
15447 }
15448 DialectType::Hive => {
15449 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
15450 if let Expression::Literal(lit) = fmt_expr {
15451 if let crate::expressions::Literal::String(s) =
15452 lit.as_ref()
15453 {
15454 let java_fmt = c_to_java_format_parse(s);
15455 let unix_ts =
15456 Expression::Function(Box::new(Function::new(
15457 "UNIX_TIMESTAMP".to_string(),
15458 vec![val, Expression::string(&java_fmt)],
15459 )));
15460 let from_unix =
15461 Expression::Function(Box::new(Function::new(
15462 "FROM_UNIXTIME".to_string(),
15463 vec![unix_ts],
15464 )));
15465 Ok(Expression::Cast(Box::new(
15466 crate::expressions::Cast {
15467 this: from_unix,
15468 to: DataType::Timestamp {
15469 timezone: false,
15470 precision: None,
15471 },
15472 trailing_comments: Vec::new(),
15473 double_colon_syntax: false,
15474 format: None,
15475 default: None,
15476 inferred_type: None,
15477 },
15478 )))
15479 } else {
15480 Ok(Expression::Function(f))
15481 }
15482 } else {
15483 Ok(Expression::Function(f))
15484 }
15485 }
15486 DialectType::Presto
15487 | DialectType::Trino
15488 | DialectType::Athena => {
15489 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
15490 if let Expression::Literal(lit) = fmt_expr {
15491 if let crate::expressions::Literal::String(s) =
15492 lit.as_ref()
15493 {
15494 let presto_fmt = duckdb_to_presto_format(s);
15495 Ok(Expression::Function(Box::new(Function::new(
15496 "DATE_PARSE".to_string(),
15497 vec![val, Expression::string(&presto_fmt)],
15498 ))))
15499 } else {
15500 Ok(Expression::Function(Box::new(Function::new(
15501 "DATE_PARSE".to_string(),
15502 vec![val, fmt_expr.clone()],
15503 ))))
15504 }
15505 } else {
15506 Ok(Expression::Function(Box::new(Function::new(
15507 "DATE_PARSE".to_string(),
15508 vec![val, fmt_expr.clone()],
15509 ))))
15510 }
15511 }
15512 DialectType::BigQuery => {
15513 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
15514 if let Expression::Literal(lit) = fmt_expr {
15515 if let crate::expressions::Literal::String(s) =
15516 lit.as_ref()
15517 {
15518 let bq_fmt = duckdb_to_bigquery_format(s);
15519 Ok(Expression::Function(Box::new(Function::new(
15520 "PARSE_TIMESTAMP".to_string(),
15521 vec![Expression::string(&bq_fmt), val],
15522 ))))
15523 } else {
15524 Ok(Expression::Function(Box::new(Function::new(
15525 "PARSE_TIMESTAMP".to_string(),
15526 vec![fmt_expr.clone(), val],
15527 ))))
15528 }
15529 } else {
15530 Ok(Expression::Function(Box::new(Function::new(
15531 "PARSE_TIMESTAMP".to_string(),
15532 vec![fmt_expr.clone(), val],
15533 ))))
15534 }
15535 }
15536 _ => Ok(Expression::Function(f)),
15537 }
15538 }
15539 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
15540 "DATE_FORMAT"
15541 if f.args.len() >= 2
15542 && matches!(
15543 source,
15544 DialectType::Presto
15545 | DialectType::Trino
15546 | DialectType::Athena
15547 ) =>
15548 {
15549 let val = f.args[0].clone();
15550 let fmt_expr = &f.args[1];
15551
15552 match target {
15553 DialectType::Presto
15554 | DialectType::Trino
15555 | DialectType::Athena => {
15556 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
15557 if let Expression::Literal(lit) = fmt_expr {
15558 if let crate::expressions::Literal::String(s) =
15559 lit.as_ref()
15560 {
15561 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15562 Ok(Expression::Function(Box::new(Function::new(
15563 "DATE_FORMAT".to_string(),
15564 vec![val, Expression::string(&normalized)],
15565 ))))
15566 } else {
15567 Ok(Expression::Function(f))
15568 }
15569 } else {
15570 Ok(Expression::Function(f))
15571 }
15572 }
15573 DialectType::Hive
15574 | DialectType::Spark
15575 | DialectType::Databricks => {
15576 // Convert Presto C-style to Java-style format
15577 if let Expression::Literal(lit) = fmt_expr {
15578 if let crate::expressions::Literal::String(s) =
15579 lit.as_ref()
15580 {
15581 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15582 Ok(Expression::Function(Box::new(Function::new(
15583 "DATE_FORMAT".to_string(),
15584 vec![val, Expression::string(&java_fmt)],
15585 ))))
15586 } else {
15587 Ok(Expression::Function(f))
15588 }
15589 } else {
15590 Ok(Expression::Function(f))
15591 }
15592 }
15593 DialectType::DuckDB => {
15594 // Convert to STRFTIME(val, duckdb_fmt)
15595 if let Expression::Literal(lit) = fmt_expr {
15596 if let crate::expressions::Literal::String(s) =
15597 lit.as_ref()
15598 {
15599 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15600 Ok(Expression::Function(Box::new(Function::new(
15601 "STRFTIME".to_string(),
15602 vec![val, Expression::string(&duckdb_fmt)],
15603 ))))
15604 } else {
15605 Ok(Expression::Function(Box::new(Function::new(
15606 "STRFTIME".to_string(),
15607 vec![val, fmt_expr.clone()],
15608 ))))
15609 }
15610 } else {
15611 Ok(Expression::Function(Box::new(Function::new(
15612 "STRFTIME".to_string(),
15613 vec![val, fmt_expr.clone()],
15614 ))))
15615 }
15616 }
15617 DialectType::BigQuery => {
15618 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
15619 if let Expression::Literal(lit) = fmt_expr {
15620 if let crate::expressions::Literal::String(s) =
15621 lit.as_ref()
15622 {
15623 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
15624 Ok(Expression::Function(Box::new(Function::new(
15625 "FORMAT_DATE".to_string(),
15626 vec![Expression::string(&bq_fmt), val],
15627 ))))
15628 } else {
15629 Ok(Expression::Function(Box::new(Function::new(
15630 "FORMAT_DATE".to_string(),
15631 vec![fmt_expr.clone(), val],
15632 ))))
15633 }
15634 } else {
15635 Ok(Expression::Function(Box::new(Function::new(
15636 "FORMAT_DATE".to_string(),
15637 vec![fmt_expr.clone(), val],
15638 ))))
15639 }
15640 }
15641 _ => Ok(Expression::Function(f)),
15642 }
15643 }
15644 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
15645 "DATE_PARSE"
15646 if f.args.len() >= 2
15647 && matches!(
15648 source,
15649 DialectType::Presto
15650 | DialectType::Trino
15651 | DialectType::Athena
15652 ) =>
15653 {
15654 let val = f.args[0].clone();
15655 let fmt_expr = &f.args[1];
15656
15657 match target {
15658 DialectType::Presto
15659 | DialectType::Trino
15660 | DialectType::Athena => {
15661 // Presto -> Presto: normalize format
15662 if let Expression::Literal(lit) = fmt_expr {
15663 if let crate::expressions::Literal::String(s) =
15664 lit.as_ref()
15665 {
15666 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15667 Ok(Expression::Function(Box::new(Function::new(
15668 "DATE_PARSE".to_string(),
15669 vec![val, Expression::string(&normalized)],
15670 ))))
15671 } else {
15672 Ok(Expression::Function(f))
15673 }
15674 } else {
15675 Ok(Expression::Function(f))
15676 }
15677 }
15678 DialectType::Hive => {
15679 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
15680 if let Expression::Literal(lit) = fmt_expr {
15681 if let crate::expressions::Literal::String(s) =
15682 lit.as_ref()
15683 {
15684 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
15685 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
15686 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15687 this: val,
15688 to: DataType::Timestamp { timezone: false, precision: None },
15689 trailing_comments: Vec::new(),
15690 double_colon_syntax: false,
15691 format: None,
15692 default: None,
15693 inferred_type: None,
15694 })))
15695 } else {
15696 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15697 Ok(Expression::Function(Box::new(Function::new(
15698 "TO_TIMESTAMP".to_string(),
15699 vec![val, Expression::string(&java_fmt)],
15700 ))))
15701 }
15702 } else {
15703 Ok(Expression::Function(f))
15704 }
15705 } else {
15706 Ok(Expression::Function(f))
15707 }
15708 }
15709 DialectType::Spark | DialectType::Databricks => {
15710 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
15711 if let Expression::Literal(lit) = fmt_expr {
15712 if let crate::expressions::Literal::String(s) =
15713 lit.as_ref()
15714 {
15715 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15716 Ok(Expression::Function(Box::new(Function::new(
15717 "TO_TIMESTAMP".to_string(),
15718 vec![val, Expression::string(&java_fmt)],
15719 ))))
15720 } else {
15721 Ok(Expression::Function(f))
15722 }
15723 } else {
15724 Ok(Expression::Function(f))
15725 }
15726 }
15727 DialectType::DuckDB => {
15728 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
15729 if let Expression::Literal(lit) = fmt_expr {
15730 if let crate::expressions::Literal::String(s) =
15731 lit.as_ref()
15732 {
15733 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15734 Ok(Expression::Function(Box::new(Function::new(
15735 "STRPTIME".to_string(),
15736 vec![val, Expression::string(&duckdb_fmt)],
15737 ))))
15738 } else {
15739 Ok(Expression::Function(Box::new(Function::new(
15740 "STRPTIME".to_string(),
15741 vec![val, fmt_expr.clone()],
15742 ))))
15743 }
15744 } else {
15745 Ok(Expression::Function(Box::new(Function::new(
15746 "STRPTIME".to_string(),
15747 vec![val, fmt_expr.clone()],
15748 ))))
15749 }
15750 }
15751 _ => Ok(Expression::Function(f)),
15752 }
15753 }
15754 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
15755 "FROM_BASE64"
15756 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15757 {
15758 Ok(Expression::Function(Box::new(Function::new(
15759 "UNBASE64".to_string(),
15760 f.args,
15761 ))))
15762 }
15763 "TO_BASE64"
15764 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15765 {
15766 Ok(Expression::Function(Box::new(Function::new(
15767 "BASE64".to_string(),
15768 f.args,
15769 ))))
15770 }
15771 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
15772 "FROM_UNIXTIME"
15773 if f.args.len() == 1
15774 && matches!(
15775 source,
15776 DialectType::Presto
15777 | DialectType::Trino
15778 | DialectType::Athena
15779 )
15780 && matches!(
15781 target,
15782 DialectType::Spark | DialectType::Databricks
15783 ) =>
15784 {
15785 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
15786 let from_unix = Expression::Function(Box::new(Function::new(
15787 "FROM_UNIXTIME".to_string(),
15788 f.args,
15789 )));
15790 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15791 this: from_unix,
15792 to: DataType::Timestamp {
15793 timezone: false,
15794 precision: None,
15795 },
15796 trailing_comments: Vec::new(),
15797 double_colon_syntax: false,
15798 format: None,
15799 default: None,
15800 inferred_type: None,
15801 })))
15802 }
15803 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
15804 "DATE_FORMAT"
15805 if f.args.len() >= 2
15806 && !matches!(
15807 target,
15808 DialectType::Hive
15809 | DialectType::Spark
15810 | DialectType::Databricks
15811 | DialectType::MySQL
15812 | DialectType::SingleStore
15813 ) =>
15814 {
15815 let val = f.args[0].clone();
15816 let fmt_expr = &f.args[1];
15817 let is_hive_source = matches!(
15818 source,
15819 DialectType::Hive
15820 | DialectType::Spark
15821 | DialectType::Databricks
15822 );
15823
15824 fn java_to_c_format(fmt: &str) -> String {
15825 // Replace Java patterns with C strftime patterns.
15826 // Uses multi-pass to handle patterns that conflict.
15827 // First pass: replace multi-char patterns (longer first)
15828 let result = fmt
15829 .replace("yyyy", "%Y")
15830 .replace("SSSSSS", "%f")
15831 .replace("EEEE", "%W")
15832 .replace("MM", "%m")
15833 .replace("dd", "%d")
15834 .replace("HH", "%H")
15835 .replace("mm", "%M")
15836 .replace("ss", "%S")
15837 .replace("yy", "%y");
15838 // Second pass: handle single-char timezone patterns
15839 // z -> %Z (timezone name), Z -> %z (timezone offset)
15840 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
15841 let mut out = String::new();
15842 let chars: Vec<char> = result.chars().collect();
15843 let mut i = 0;
15844 while i < chars.len() {
15845 if chars[i] == '%' && i + 1 < chars.len() {
15846 // Already a format specifier, skip both chars
15847 out.push(chars[i]);
15848 out.push(chars[i + 1]);
15849 i += 2;
15850 } else if chars[i] == 'z' {
15851 out.push_str("%Z");
15852 i += 1;
15853 } else if chars[i] == 'Z' {
15854 out.push_str("%z");
15855 i += 1;
15856 } else {
15857 out.push(chars[i]);
15858 i += 1;
15859 }
15860 }
15861 out
15862 }
15863
15864 fn java_to_presto_format(fmt: &str) -> String {
15865 // Presto uses %T for HH:MM:SS
15866 let c_fmt = java_to_c_format(fmt);
15867 c_fmt.replace("%H:%M:%S", "%T")
15868 }
15869
15870 fn java_to_bq_format(fmt: &str) -> String {
15871 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
15872 let c_fmt = java_to_c_format(fmt);
15873 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
15874 }
15875
15876 // For Hive source, CAST string literals to appropriate type
15877 let cast_val = if is_hive_source {
15878 match &val {
15879 Expression::Literal(lit)
15880 if matches!(
15881 lit.as_ref(),
15882 crate::expressions::Literal::String(_)
15883 ) =>
15884 {
15885 match target {
15886 DialectType::DuckDB
15887 | DialectType::Presto
15888 | DialectType::Trino
15889 | DialectType::Athena => {
15890 Self::ensure_cast_timestamp(val.clone())
15891 }
15892 DialectType::BigQuery => {
15893 // BigQuery: CAST(val AS DATETIME)
15894 Expression::Cast(Box::new(
15895 crate::expressions::Cast {
15896 this: val.clone(),
15897 to: DataType::Custom {
15898 name: "DATETIME".to_string(),
15899 },
15900 trailing_comments: vec![],
15901 double_colon_syntax: false,
15902 format: None,
15903 default: None,
15904 inferred_type: None,
15905 },
15906 ))
15907 }
15908 _ => val.clone(),
15909 }
15910 }
15911 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
15912 Expression::Cast(c)
15913 if matches!(c.to, DataType::Date)
15914 && matches!(
15915 target,
15916 DialectType::Presto
15917 | DialectType::Trino
15918 | DialectType::Athena
15919 ) =>
15920 {
15921 Expression::Cast(Box::new(crate::expressions::Cast {
15922 this: val.clone(),
15923 to: DataType::Timestamp {
15924 timezone: false,
15925 precision: None,
15926 },
15927 trailing_comments: vec![],
15928 double_colon_syntax: false,
15929 format: None,
15930 default: None,
15931 inferred_type: None,
15932 }))
15933 }
15934 Expression::Literal(lit)
15935 if matches!(
15936 lit.as_ref(),
15937 crate::expressions::Literal::Date(_)
15938 ) && matches!(
15939 target,
15940 DialectType::Presto
15941 | DialectType::Trino
15942 | DialectType::Athena
15943 ) =>
15944 {
15945 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
15946 let cast_date = Self::date_literal_to_cast(val.clone());
15947 Expression::Cast(Box::new(crate::expressions::Cast {
15948 this: cast_date,
15949 to: DataType::Timestamp {
15950 timezone: false,
15951 precision: None,
15952 },
15953 trailing_comments: vec![],
15954 double_colon_syntax: false,
15955 format: None,
15956 default: None,
15957 inferred_type: None,
15958 }))
15959 }
15960 _ => val.clone(),
15961 }
15962 } else {
15963 val.clone()
15964 };
15965
15966 match target {
15967 DialectType::DuckDB => {
15968 if let Expression::Literal(lit) = fmt_expr {
15969 if let crate::expressions::Literal::String(s) =
15970 lit.as_ref()
15971 {
15972 let c_fmt = if is_hive_source {
15973 java_to_c_format(s)
15974 } else {
15975 s.clone()
15976 };
15977 Ok(Expression::Function(Box::new(Function::new(
15978 "STRFTIME".to_string(),
15979 vec![cast_val, Expression::string(&c_fmt)],
15980 ))))
15981 } else {
15982 Ok(Expression::Function(Box::new(Function::new(
15983 "STRFTIME".to_string(),
15984 vec![cast_val, fmt_expr.clone()],
15985 ))))
15986 }
15987 } else {
15988 Ok(Expression::Function(Box::new(Function::new(
15989 "STRFTIME".to_string(),
15990 vec![cast_val, fmt_expr.clone()],
15991 ))))
15992 }
15993 }
15994 DialectType::Presto
15995 | DialectType::Trino
15996 | DialectType::Athena => {
15997 if is_hive_source {
15998 if let Expression::Literal(lit) = fmt_expr {
15999 if let crate::expressions::Literal::String(s) =
16000 lit.as_ref()
16001 {
16002 let p_fmt = java_to_presto_format(s);
16003 Ok(Expression::Function(Box::new(
16004 Function::new(
16005 "DATE_FORMAT".to_string(),
16006 vec![
16007 cast_val,
16008 Expression::string(&p_fmt),
16009 ],
16010 ),
16011 )))
16012 } else {
16013 Ok(Expression::Function(Box::new(
16014 Function::new(
16015 "DATE_FORMAT".to_string(),
16016 vec![cast_val, fmt_expr.clone()],
16017 ),
16018 )))
16019 }
16020 } else {
16021 Ok(Expression::Function(Box::new(Function::new(
16022 "DATE_FORMAT".to_string(),
16023 vec![cast_val, fmt_expr.clone()],
16024 ))))
16025 }
16026 } else {
16027 Ok(Expression::Function(Box::new(Function::new(
16028 "DATE_FORMAT".to_string(),
16029 f.args,
16030 ))))
16031 }
16032 }
16033 DialectType::BigQuery => {
16034 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
16035 if let Expression::Literal(lit) = fmt_expr {
16036 if let crate::expressions::Literal::String(s) =
16037 lit.as_ref()
16038 {
16039 let bq_fmt = if is_hive_source {
16040 java_to_bq_format(s)
16041 } else {
16042 java_to_c_format(s)
16043 };
16044 Ok(Expression::Function(Box::new(Function::new(
16045 "FORMAT_DATE".to_string(),
16046 vec![Expression::string(&bq_fmt), cast_val],
16047 ))))
16048 } else {
16049 Ok(Expression::Function(Box::new(Function::new(
16050 "FORMAT_DATE".to_string(),
16051 vec![fmt_expr.clone(), cast_val],
16052 ))))
16053 }
16054 } else {
16055 Ok(Expression::Function(Box::new(Function::new(
16056 "FORMAT_DATE".to_string(),
16057 vec![fmt_expr.clone(), cast_val],
16058 ))))
16059 }
16060 }
16061 DialectType::PostgreSQL | DialectType::Redshift => {
16062 if let Expression::Literal(lit) = fmt_expr {
16063 if let crate::expressions::Literal::String(s) =
16064 lit.as_ref()
16065 {
16066 let pg_fmt = s
16067 .replace("yyyy", "YYYY")
16068 .replace("MM", "MM")
16069 .replace("dd", "DD")
16070 .replace("HH", "HH24")
16071 .replace("mm", "MI")
16072 .replace("ss", "SS")
16073 .replace("yy", "YY");
16074 Ok(Expression::Function(Box::new(Function::new(
16075 "TO_CHAR".to_string(),
16076 vec![val, Expression::string(&pg_fmt)],
16077 ))))
16078 } else {
16079 Ok(Expression::Function(Box::new(Function::new(
16080 "TO_CHAR".to_string(),
16081 vec![val, fmt_expr.clone()],
16082 ))))
16083 }
16084 } else {
16085 Ok(Expression::Function(Box::new(Function::new(
16086 "TO_CHAR".to_string(),
16087 vec![val, fmt_expr.clone()],
16088 ))))
16089 }
16090 }
16091 _ => Ok(Expression::Function(f)),
16092 }
16093 }
16094 // DATEDIFF(unit, start, end) - 3-arg form
16095 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
16096 "DATEDIFF" if f.args.len() == 3 => {
16097 let mut args = f.args;
16098 // SQLite source: args = (date1, date2, unit_string)
16099 // Standard source: args = (unit, start, end)
16100 let (_arg0, arg1, arg2, unit_str) =
16101 if matches!(source, DialectType::SQLite) {
16102 let date1 = args.remove(0);
16103 let date2 = args.remove(0);
16104 let unit_expr = args.remove(0);
16105 let unit_s = Self::get_unit_str_static(&unit_expr);
16106
16107 // For SQLite target, generate JULIANDAY arithmetic directly
16108 if matches!(target, DialectType::SQLite) {
16109 let jd_first = Expression::Function(Box::new(
16110 Function::new("JULIANDAY".to_string(), vec![date1]),
16111 ));
16112 let jd_second = Expression::Function(Box::new(
16113 Function::new("JULIANDAY".to_string(), vec![date2]),
16114 ));
16115 let diff = Expression::Sub(Box::new(
16116 crate::expressions::BinaryOp::new(
16117 jd_first, jd_second,
16118 ),
16119 ));
16120 let paren_diff = Expression::Paren(Box::new(
16121 crate::expressions::Paren {
16122 this: diff,
16123 trailing_comments: Vec::new(),
16124 },
16125 ));
16126 let adjusted = match unit_s.as_str() {
16127 "HOUR" => Expression::Mul(Box::new(
16128 crate::expressions::BinaryOp::new(
16129 paren_diff,
16130 Expression::Literal(Box::new(
16131 Literal::Number("24.0".to_string()),
16132 )),
16133 ),
16134 )),
16135 "MINUTE" => Expression::Mul(Box::new(
16136 crate::expressions::BinaryOp::new(
16137 paren_diff,
16138 Expression::Literal(Box::new(
16139 Literal::Number("1440.0".to_string()),
16140 )),
16141 ),
16142 )),
16143 "SECOND" => Expression::Mul(Box::new(
16144 crate::expressions::BinaryOp::new(
16145 paren_diff,
16146 Expression::Literal(Box::new(
16147 Literal::Number("86400.0".to_string()),
16148 )),
16149 ),
16150 )),
16151 "MONTH" => Expression::Div(Box::new(
16152 crate::expressions::BinaryOp::new(
16153 paren_diff,
16154 Expression::Literal(Box::new(
16155 Literal::Number("30.0".to_string()),
16156 )),
16157 ),
16158 )),
16159 "YEAR" => Expression::Div(Box::new(
16160 crate::expressions::BinaryOp::new(
16161 paren_diff,
16162 Expression::Literal(Box::new(
16163 Literal::Number("365.0".to_string()),
16164 )),
16165 ),
16166 )),
16167 _ => paren_diff,
16168 };
16169 return Ok(Expression::Cast(Box::new(Cast {
16170 this: adjusted,
16171 to: DataType::Int {
16172 length: None,
16173 integer_spelling: true,
16174 },
16175 trailing_comments: vec![],
16176 double_colon_syntax: false,
16177 format: None,
16178 default: None,
16179 inferred_type: None,
16180 })));
16181 }
16182
16183 // For other targets, remap to standard (unit, start, end) form
16184 let unit_ident =
16185 Expression::Identifier(Identifier::new(&unit_s));
16186 (unit_ident, date1, date2, unit_s)
16187 } else {
16188 let arg0 = args.remove(0);
16189 let arg1 = args.remove(0);
16190 let arg2 = args.remove(0);
16191 let unit_s = Self::get_unit_str_static(&arg0);
16192 (arg0, arg1, arg2, unit_s)
16193 };
16194
16195 // For Hive/Spark source, string literal dates need to be cast
16196 // Note: Databricks is excluded - it handles string args like standard SQL
16197 let is_hive_spark =
16198 matches!(source, DialectType::Hive | DialectType::Spark);
16199
16200 match target {
16201 DialectType::Snowflake => {
16202 let unit =
16203 Expression::Identifier(Identifier::new(&unit_str));
16204 // Use ensure_to_date_preserved to add TO_DATE with a marker
16205 // that prevents the Snowflake TO_DATE handler from converting it to CAST
16206 let d1 = if is_hive_spark {
16207 Self::ensure_to_date_preserved(arg1)
16208 } else {
16209 arg1
16210 };
16211 let d2 = if is_hive_spark {
16212 Self::ensure_to_date_preserved(arg2)
16213 } else {
16214 arg2
16215 };
16216 Ok(Expression::Function(Box::new(Function::new(
16217 "DATEDIFF".to_string(),
16218 vec![unit, d1, d2],
16219 ))))
16220 }
16221 DialectType::Redshift => {
16222 let unit =
16223 Expression::Identifier(Identifier::new(&unit_str));
16224 let d1 = if is_hive_spark {
16225 Self::ensure_cast_date(arg1)
16226 } else {
16227 arg1
16228 };
16229 let d2 = if is_hive_spark {
16230 Self::ensure_cast_date(arg2)
16231 } else {
16232 arg2
16233 };
16234 Ok(Expression::Function(Box::new(Function::new(
16235 "DATEDIFF".to_string(),
16236 vec![unit, d1, d2],
16237 ))))
16238 }
16239 DialectType::TSQL => {
16240 let unit =
16241 Expression::Identifier(Identifier::new(&unit_str));
16242 Ok(Expression::Function(Box::new(Function::new(
16243 "DATEDIFF".to_string(),
16244 vec![unit, arg1, arg2],
16245 ))))
16246 }
16247 DialectType::DuckDB => {
16248 let is_redshift_tsql = matches!(
16249 source,
16250 DialectType::Redshift | DialectType::TSQL
16251 );
16252 if is_hive_spark {
16253 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
16254 let d1 = Self::ensure_cast_date(arg1);
16255 let d2 = Self::ensure_cast_date(arg2);
16256 Ok(Expression::Function(Box::new(Function::new(
16257 "DATE_DIFF".to_string(),
16258 vec![Expression::string(&unit_str), d1, d2],
16259 ))))
16260 } else if matches!(source, DialectType::Snowflake) {
16261 // For Snowflake source: special handling per unit
16262 match unit_str.as_str() {
16263 "NANOSECOND" => {
16264 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
16265 fn cast_to_timestamp_ns(
16266 expr: Expression,
16267 ) -> Expression
16268 {
16269 Expression::Cast(Box::new(Cast {
16270 this: expr,
16271 to: DataType::Custom {
16272 name: "TIMESTAMP_NS".to_string(),
16273 },
16274 trailing_comments: vec![],
16275 double_colon_syntax: false,
16276 format: None,
16277 default: None,
16278 inferred_type: None,
16279 }))
16280 }
16281 let epoch_end = Expression::Function(Box::new(
16282 Function::new(
16283 "EPOCH_NS".to_string(),
16284 vec![cast_to_timestamp_ns(arg2)],
16285 ),
16286 ));
16287 let epoch_start = Expression::Function(
16288 Box::new(Function::new(
16289 "EPOCH_NS".to_string(),
16290 vec![cast_to_timestamp_ns(arg1)],
16291 )),
16292 );
16293 Ok(Expression::Sub(Box::new(BinaryOp::new(
16294 epoch_end,
16295 epoch_start,
16296 ))))
16297 }
16298 "WEEK" => {
16299 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
16300 let d1 = Self::force_cast_date(arg1);
16301 let d2 = Self::force_cast_date(arg2);
16302 let dt1 = Expression::Function(Box::new(
16303 Function::new(
16304 "DATE_TRUNC".to_string(),
16305 vec![Expression::string("WEEK"), d1],
16306 ),
16307 ));
16308 let dt2 = Expression::Function(Box::new(
16309 Function::new(
16310 "DATE_TRUNC".to_string(),
16311 vec![Expression::string("WEEK"), d2],
16312 ),
16313 ));
16314 Ok(Expression::Function(Box::new(
16315 Function::new(
16316 "DATE_DIFF".to_string(),
16317 vec![
16318 Expression::string(&unit_str),
16319 dt1,
16320 dt2,
16321 ],
16322 ),
16323 )))
16324 }
16325 _ => {
16326 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
16327 let d1 = Self::force_cast_date(arg1);
16328 let d2 = Self::force_cast_date(arg2);
16329 Ok(Expression::Function(Box::new(
16330 Function::new(
16331 "DATE_DIFF".to_string(),
16332 vec![
16333 Expression::string(&unit_str),
16334 d1,
16335 d2,
16336 ],
16337 ),
16338 )))
16339 }
16340 }
16341 } else if is_redshift_tsql {
16342 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
16343 let d1 = Self::force_cast_timestamp(arg1);
16344 let d2 = Self::force_cast_timestamp(arg2);
16345 Ok(Expression::Function(Box::new(Function::new(
16346 "DATE_DIFF".to_string(),
16347 vec![Expression::string(&unit_str), d1, d2],
16348 ))))
16349 } else {
16350 // Keep as DATEDIFF so DuckDB's transform_datediff handles
16351 // DATE_TRUNC for WEEK, CAST for string literals, etc.
16352 let unit =
16353 Expression::Identifier(Identifier::new(&unit_str));
16354 Ok(Expression::Function(Box::new(Function::new(
16355 "DATEDIFF".to_string(),
16356 vec![unit, arg1, arg2],
16357 ))))
16358 }
16359 }
16360 DialectType::BigQuery => {
16361 let is_redshift_tsql = matches!(
16362 source,
16363 DialectType::Redshift
16364 | DialectType::TSQL
16365 | DialectType::Snowflake
16366 );
16367 let cast_d1 = if is_hive_spark {
16368 Self::ensure_cast_date(arg1)
16369 } else if is_redshift_tsql {
16370 Self::force_cast_datetime(arg1)
16371 } else {
16372 Self::ensure_cast_datetime(arg1)
16373 };
16374 let cast_d2 = if is_hive_spark {
16375 Self::ensure_cast_date(arg2)
16376 } else if is_redshift_tsql {
16377 Self::force_cast_datetime(arg2)
16378 } else {
16379 Self::ensure_cast_datetime(arg2)
16380 };
16381 let unit =
16382 Expression::Identifier(Identifier::new(&unit_str));
16383 Ok(Expression::Function(Box::new(Function::new(
16384 "DATE_DIFF".to_string(),
16385 vec![cast_d2, cast_d1, unit],
16386 ))))
16387 }
16388 DialectType::Presto
16389 | DialectType::Trino
16390 | DialectType::Athena => {
16391 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
16392 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
16393 let is_redshift_tsql = matches!(
16394 source,
16395 DialectType::Redshift
16396 | DialectType::TSQL
16397 | DialectType::Snowflake
16398 );
16399 let d1 = if is_hive_spark {
16400 Self::double_cast_timestamp_date(arg1)
16401 } else if is_redshift_tsql {
16402 Self::force_cast_timestamp(arg1)
16403 } else {
16404 arg1
16405 };
16406 let d2 = if is_hive_spark {
16407 Self::double_cast_timestamp_date(arg2)
16408 } else if is_redshift_tsql {
16409 Self::force_cast_timestamp(arg2)
16410 } else {
16411 arg2
16412 };
16413 Ok(Expression::Function(Box::new(Function::new(
16414 "DATE_DIFF".to_string(),
16415 vec![Expression::string(&unit_str), d1, d2],
16416 ))))
16417 }
16418 DialectType::Hive => match unit_str.as_str() {
16419 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
16420 this: Expression::Function(Box::new(Function::new(
16421 "MONTHS_BETWEEN".to_string(),
16422 vec![arg2, arg1],
16423 ))),
16424 to: DataType::Int {
16425 length: None,
16426 integer_spelling: false,
16427 },
16428 trailing_comments: vec![],
16429 double_colon_syntax: false,
16430 format: None,
16431 default: None,
16432 inferred_type: None,
16433 }))),
16434 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
16435 this: Expression::Div(Box::new(
16436 crate::expressions::BinaryOp::new(
16437 Expression::Function(Box::new(Function::new(
16438 "DATEDIFF".to_string(),
16439 vec![arg2, arg1],
16440 ))),
16441 Expression::number(7),
16442 ),
16443 )),
16444 to: DataType::Int {
16445 length: None,
16446 integer_spelling: false,
16447 },
16448 trailing_comments: vec![],
16449 double_colon_syntax: false,
16450 format: None,
16451 default: None,
16452 inferred_type: None,
16453 }))),
16454 _ => Ok(Expression::Function(Box::new(Function::new(
16455 "DATEDIFF".to_string(),
16456 vec![arg2, arg1],
16457 )))),
16458 },
16459 DialectType::Spark | DialectType::Databricks => {
16460 let unit =
16461 Expression::Identifier(Identifier::new(&unit_str));
16462 Ok(Expression::Function(Box::new(Function::new(
16463 "DATEDIFF".to_string(),
16464 vec![unit, arg1, arg2],
16465 ))))
16466 }
16467 _ => {
16468 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
16469 let d1 = if is_hive_spark {
16470 Self::ensure_cast_date(arg1)
16471 } else {
16472 arg1
16473 };
16474 let d2 = if is_hive_spark {
16475 Self::ensure_cast_date(arg2)
16476 } else {
16477 arg2
16478 };
16479 let unit =
16480 Expression::Identifier(Identifier::new(&unit_str));
16481 Ok(Expression::Function(Box::new(Function::new(
16482 "DATEDIFF".to_string(),
16483 vec![unit, d1, d2],
16484 ))))
16485 }
16486 }
16487 }
16488 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
16489 "DATEDIFF" if f.args.len() == 2 => {
16490 let mut args = f.args;
16491 let arg0 = args.remove(0);
16492 let arg1 = args.remove(0);
16493
16494 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
16495 // Also recognizes TryCast/Cast to DATE that may have been produced by
16496 // cross-dialect TO_DATE -> TRY_CAST conversion
16497 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
16498 if let Expression::Function(ref f) = e {
16499 if f.name.eq_ignore_ascii_case("TO_DATE")
16500 && f.args.len() == 1
16501 {
16502 return (f.args[0].clone(), true);
16503 }
16504 }
16505 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
16506 if let Expression::TryCast(ref c) = e {
16507 if matches!(c.to, DataType::Date) {
16508 return (e, true); // Already properly cast, return as-is
16509 }
16510 }
16511 (e, false)
16512 };
16513
16514 match target {
16515 DialectType::DuckDB => {
16516 // For Hive source, always CAST to DATE
16517 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
16518 let cast_d0 = if matches!(
16519 source,
16520 DialectType::Hive
16521 | DialectType::Spark
16522 | DialectType::Databricks
16523 ) {
16524 let (inner, was_to_date) = unwrap_to_date(arg1);
16525 if was_to_date {
16526 // Already a date expression, use directly
16527 if matches!(&inner, Expression::TryCast(_)) {
16528 inner // Already TRY_CAST(x AS DATE)
16529 } else {
16530 Self::try_cast_date(inner)
16531 }
16532 } else {
16533 Self::force_cast_date(inner)
16534 }
16535 } else {
16536 Self::ensure_cast_date(arg1)
16537 };
16538 let cast_d1 = if matches!(
16539 source,
16540 DialectType::Hive
16541 | DialectType::Spark
16542 | DialectType::Databricks
16543 ) {
16544 let (inner, was_to_date) = unwrap_to_date(arg0);
16545 if was_to_date {
16546 if matches!(&inner, Expression::TryCast(_)) {
16547 inner
16548 } else {
16549 Self::try_cast_date(inner)
16550 }
16551 } else {
16552 Self::force_cast_date(inner)
16553 }
16554 } else {
16555 Self::ensure_cast_date(arg0)
16556 };
16557 Ok(Expression::Function(Box::new(Function::new(
16558 "DATE_DIFF".to_string(),
16559 vec![Expression::string("DAY"), cast_d0, cast_d1],
16560 ))))
16561 }
16562 DialectType::Presto
16563 | DialectType::Trino
16564 | DialectType::Athena => {
16565 // For Hive/Spark source, apply double_cast_timestamp_date
16566 // For other sources (MySQL etc.), just swap args without casting
16567 if matches!(
16568 source,
16569 DialectType::Hive
16570 | DialectType::Spark
16571 | DialectType::Databricks
16572 ) {
16573 let cast_fn = |e: Expression| -> Expression {
16574 let (inner, was_to_date) = unwrap_to_date(e);
16575 if was_to_date {
16576 let first_cast =
16577 Self::double_cast_timestamp_date(inner);
16578 Self::double_cast_timestamp_date(first_cast)
16579 } else {
16580 Self::double_cast_timestamp_date(inner)
16581 }
16582 };
16583 Ok(Expression::Function(Box::new(Function::new(
16584 "DATE_DIFF".to_string(),
16585 vec![
16586 Expression::string("DAY"),
16587 cast_fn(arg1),
16588 cast_fn(arg0),
16589 ],
16590 ))))
16591 } else {
16592 Ok(Expression::Function(Box::new(Function::new(
16593 "DATE_DIFF".to_string(),
16594 vec![Expression::string("DAY"), arg1, arg0],
16595 ))))
16596 }
16597 }
16598 DialectType::Redshift => {
16599 let unit = Expression::Identifier(Identifier::new("DAY"));
16600 Ok(Expression::Function(Box::new(Function::new(
16601 "DATEDIFF".to_string(),
16602 vec![unit, arg1, arg0],
16603 ))))
16604 }
16605 _ => Ok(Expression::Function(Box::new(Function::new(
16606 "DATEDIFF".to_string(),
16607 vec![arg0, arg1],
16608 )))),
16609 }
16610 }
16611 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
16612 "DATE_DIFF" if f.args.len() == 3 => {
16613 let mut args = f.args;
16614 let arg0 = args.remove(0);
16615 let arg1 = args.remove(0);
16616 let arg2 = args.remove(0);
16617 let unit_str = Self::get_unit_str_static(&arg0);
16618
16619 match target {
16620 DialectType::DuckDB => {
16621 // DuckDB: DATE_DIFF('UNIT', start, end)
16622 Ok(Expression::Function(Box::new(Function::new(
16623 "DATE_DIFF".to_string(),
16624 vec![Expression::string(&unit_str), arg1, arg2],
16625 ))))
16626 }
16627 DialectType::Presto
16628 | DialectType::Trino
16629 | DialectType::Athena => {
16630 Ok(Expression::Function(Box::new(Function::new(
16631 "DATE_DIFF".to_string(),
16632 vec![Expression::string(&unit_str), arg1, arg2],
16633 ))))
16634 }
16635 DialectType::ClickHouse => {
16636 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
16637 let unit =
16638 Expression::Identifier(Identifier::new(&unit_str));
16639 Ok(Expression::Function(Box::new(Function::new(
16640 "DATE_DIFF".to_string(),
16641 vec![unit, arg1, arg2],
16642 ))))
16643 }
16644 DialectType::Snowflake | DialectType::Redshift => {
16645 let unit =
16646 Expression::Identifier(Identifier::new(&unit_str));
16647 Ok(Expression::Function(Box::new(Function::new(
16648 "DATEDIFF".to_string(),
16649 vec![unit, arg1, arg2],
16650 ))))
16651 }
16652 _ => {
16653 let unit =
16654 Expression::Identifier(Identifier::new(&unit_str));
16655 Ok(Expression::Function(Box::new(Function::new(
16656 "DATEDIFF".to_string(),
16657 vec![unit, arg1, arg2],
16658 ))))
16659 }
16660 }
16661 }
16662 // DATEADD(unit, val, date) - 3-arg form
16663 "DATEADD" if f.args.len() == 3 => {
16664 let mut args = f.args;
16665 let arg0 = args.remove(0);
16666 let arg1 = args.remove(0);
16667 let arg2 = args.remove(0);
16668 let unit_str = Self::get_unit_str_static(&arg0);
16669
16670 // Normalize TSQL unit abbreviations to standard names
16671 let unit_str = match unit_str.as_str() {
16672 "YY" | "YYYY" => "YEAR".to_string(),
16673 "QQ" | "Q" => "QUARTER".to_string(),
16674 "MM" | "M" => "MONTH".to_string(),
16675 "WK" | "WW" => "WEEK".to_string(),
16676 "DD" | "D" | "DY" => "DAY".to_string(),
16677 "HH" => "HOUR".to_string(),
16678 "MI" | "N" => "MINUTE".to_string(),
16679 "SS" | "S" => "SECOND".to_string(),
16680 "MS" => "MILLISECOND".to_string(),
16681 "MCS" | "US" => "MICROSECOND".to_string(),
16682 _ => unit_str,
16683 };
16684 match target {
16685 DialectType::Snowflake => {
16686 let unit =
16687 Expression::Identifier(Identifier::new(&unit_str));
16688 // Cast string literal to TIMESTAMP, but not for Snowflake source
16689 // (Snowflake natively accepts string literals in DATEADD)
16690 let arg2 = if matches!(
16691 &arg2,
16692 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16693 ) && !matches!(source, DialectType::Snowflake)
16694 {
16695 Expression::Cast(Box::new(Cast {
16696 this: arg2,
16697 to: DataType::Timestamp {
16698 precision: None,
16699 timezone: false,
16700 },
16701 trailing_comments: Vec::new(),
16702 double_colon_syntax: false,
16703 format: None,
16704 default: None,
16705 inferred_type: None,
16706 }))
16707 } else {
16708 arg2
16709 };
16710 Ok(Expression::Function(Box::new(Function::new(
16711 "DATEADD".to_string(),
16712 vec![unit, arg1, arg2],
16713 ))))
16714 }
16715 DialectType::TSQL => {
16716 let unit =
16717 Expression::Identifier(Identifier::new(&unit_str));
16718 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
16719 let arg2 = if matches!(
16720 &arg2,
16721 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16722 ) && !matches!(
16723 source,
16724 DialectType::Spark
16725 | DialectType::Databricks
16726 | DialectType::Hive
16727 ) {
16728 Expression::Cast(Box::new(Cast {
16729 this: arg2,
16730 to: DataType::Custom {
16731 name: "DATETIME2".to_string(),
16732 },
16733 trailing_comments: Vec::new(),
16734 double_colon_syntax: false,
16735 format: None,
16736 default: None,
16737 inferred_type: None,
16738 }))
16739 } else {
16740 arg2
16741 };
16742 Ok(Expression::Function(Box::new(Function::new(
16743 "DATEADD".to_string(),
16744 vec![unit, arg1, arg2],
16745 ))))
16746 }
16747 DialectType::Redshift => {
16748 let unit =
16749 Expression::Identifier(Identifier::new(&unit_str));
16750 Ok(Expression::Function(Box::new(Function::new(
16751 "DATEADD".to_string(),
16752 vec![unit, arg1, arg2],
16753 ))))
16754 }
16755 DialectType::Databricks => {
16756 let unit =
16757 Expression::Identifier(Identifier::new(&unit_str));
16758 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
16759 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
16760 let func_name = if matches!(
16761 source,
16762 DialectType::TSQL
16763 | DialectType::Fabric
16764 | DialectType::Databricks
16765 | DialectType::Snowflake
16766 ) {
16767 "DATEADD"
16768 } else {
16769 "DATE_ADD"
16770 };
16771 Ok(Expression::Function(Box::new(Function::new(
16772 func_name.to_string(),
16773 vec![unit, arg1, arg2],
16774 ))))
16775 }
16776 DialectType::DuckDB => {
16777 // Special handling for NANOSECOND from Snowflake
16778 if unit_str == "NANOSECOND"
16779 && matches!(source, DialectType::Snowflake)
16780 {
16781 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
16782 let cast_ts = Expression::Cast(Box::new(Cast {
16783 this: arg2,
16784 to: DataType::Custom {
16785 name: "TIMESTAMP_NS".to_string(),
16786 },
16787 trailing_comments: vec![],
16788 double_colon_syntax: false,
16789 format: None,
16790 default: None,
16791 inferred_type: None,
16792 }));
16793 let epoch_ns =
16794 Expression::Function(Box::new(Function::new(
16795 "EPOCH_NS".to_string(),
16796 vec![cast_ts],
16797 )));
16798 let sum = Expression::Add(Box::new(BinaryOp::new(
16799 epoch_ns, arg1,
16800 )));
16801 Ok(Expression::Function(Box::new(Function::new(
16802 "MAKE_TIMESTAMP_NS".to_string(),
16803 vec![sum],
16804 ))))
16805 } else {
16806 // DuckDB: convert to date + INTERVAL syntax with CAST
16807 let iu = Self::parse_interval_unit_static(&unit_str);
16808 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16809 this: Some(arg1),
16810 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16811 }));
16812 // Cast string literal to TIMESTAMP
16813 let arg2 = if matches!(
16814 &arg2,
16815 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16816 ) {
16817 Expression::Cast(Box::new(Cast {
16818 this: arg2,
16819 to: DataType::Timestamp {
16820 precision: None,
16821 timezone: false,
16822 },
16823 trailing_comments: Vec::new(),
16824 double_colon_syntax: false,
16825 format: None,
16826 default: None,
16827 inferred_type: None,
16828 }))
16829 } else {
16830 arg2
16831 };
16832 Ok(Expression::Add(Box::new(
16833 crate::expressions::BinaryOp::new(arg2, interval),
16834 )))
16835 }
16836 }
16837 DialectType::Spark => {
16838 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
16839 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
16840 if matches!(source, DialectType::TSQL | DialectType::Fabric)
16841 {
16842 fn multiply_expr_spark(
16843 expr: Expression,
16844 factor: i64,
16845 ) -> Expression
16846 {
16847 if let Expression::Literal(lit) = &expr {
16848 if let crate::expressions::Literal::Number(n) =
16849 lit.as_ref()
16850 {
16851 if let Ok(val) = n.parse::<i64>() {
16852 return Expression::Literal(Box::new(
16853 crate::expressions::Literal::Number(
16854 (val * factor).to_string(),
16855 ),
16856 ));
16857 }
16858 }
16859 }
16860 Expression::Mul(Box::new(
16861 crate::expressions::BinaryOp::new(
16862 expr,
16863 Expression::Literal(Box::new(
16864 crate::expressions::Literal::Number(
16865 factor.to_string(),
16866 ),
16867 )),
16868 ),
16869 ))
16870 }
16871 let normalized_unit = match unit_str.as_str() {
16872 "YEAR" | "YY" | "YYYY" => "YEAR",
16873 "QUARTER" | "QQ" | "Q" => "QUARTER",
16874 "MONTH" | "MM" | "M" => "MONTH",
16875 "WEEK" | "WK" | "WW" => "WEEK",
16876 "DAY" | "DD" | "D" | "DY" => "DAY",
16877 _ => &unit_str,
16878 };
16879 match normalized_unit {
16880 "YEAR" => {
16881 let months = multiply_expr_spark(arg1, 12);
16882 Ok(Expression::Function(Box::new(
16883 Function::new(
16884 "ADD_MONTHS".to_string(),
16885 vec![arg2, months],
16886 ),
16887 )))
16888 }
16889 "QUARTER" => {
16890 let months = multiply_expr_spark(arg1, 3);
16891 Ok(Expression::Function(Box::new(
16892 Function::new(
16893 "ADD_MONTHS".to_string(),
16894 vec![arg2, months],
16895 ),
16896 )))
16897 }
16898 "MONTH" => Ok(Expression::Function(Box::new(
16899 Function::new(
16900 "ADD_MONTHS".to_string(),
16901 vec![arg2, arg1],
16902 ),
16903 ))),
16904 "WEEK" => {
16905 let days = multiply_expr_spark(arg1, 7);
16906 Ok(Expression::Function(Box::new(
16907 Function::new(
16908 "DATE_ADD".to_string(),
16909 vec![arg2, days],
16910 ),
16911 )))
16912 }
16913 "DAY" => Ok(Expression::Function(Box::new(
16914 Function::new(
16915 "DATE_ADD".to_string(),
16916 vec![arg2, arg1],
16917 ),
16918 ))),
16919 _ => {
16920 let unit = Expression::Identifier(
16921 Identifier::new(&unit_str),
16922 );
16923 Ok(Expression::Function(Box::new(
16924 Function::new(
16925 "DATE_ADD".to_string(),
16926 vec![unit, arg1, arg2],
16927 ),
16928 )))
16929 }
16930 }
16931 } else {
16932 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
16933 let unit =
16934 Expression::Identifier(Identifier::new(&unit_str));
16935 Ok(Expression::Function(Box::new(Function::new(
16936 "DATE_ADD".to_string(),
16937 vec![unit, arg1, arg2],
16938 ))))
16939 }
16940 }
16941 DialectType::Hive => match unit_str.as_str() {
16942 "MONTH" => {
16943 Ok(Expression::Function(Box::new(Function::new(
16944 "ADD_MONTHS".to_string(),
16945 vec![arg2, arg1],
16946 ))))
16947 }
16948 _ => Ok(Expression::Function(Box::new(Function::new(
16949 "DATE_ADD".to_string(),
16950 vec![arg2, arg1],
16951 )))),
16952 },
16953 DialectType::Presto
16954 | DialectType::Trino
16955 | DialectType::Athena => {
16956 // Cast string literal date to TIMESTAMP
16957 let arg2 = if matches!(
16958 &arg2,
16959 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16960 ) {
16961 Expression::Cast(Box::new(Cast {
16962 this: arg2,
16963 to: DataType::Timestamp {
16964 precision: None,
16965 timezone: false,
16966 },
16967 trailing_comments: Vec::new(),
16968 double_colon_syntax: false,
16969 format: None,
16970 default: None,
16971 inferred_type: None,
16972 }))
16973 } else {
16974 arg2
16975 };
16976 Ok(Expression::Function(Box::new(Function::new(
16977 "DATE_ADD".to_string(),
16978 vec![Expression::string(&unit_str), arg1, arg2],
16979 ))))
16980 }
16981 DialectType::MySQL => {
16982 let iu = Self::parse_interval_unit_static(&unit_str);
16983 Ok(Expression::DateAdd(Box::new(
16984 crate::expressions::DateAddFunc {
16985 this: arg2,
16986 interval: arg1,
16987 unit: iu,
16988 },
16989 )))
16990 }
16991 DialectType::PostgreSQL => {
16992 // Cast string literal date to TIMESTAMP
16993 let arg2 = if matches!(
16994 &arg2,
16995 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16996 ) {
16997 Expression::Cast(Box::new(Cast {
16998 this: arg2,
16999 to: DataType::Timestamp {
17000 precision: None,
17001 timezone: false,
17002 },
17003 trailing_comments: Vec::new(),
17004 double_colon_syntax: false,
17005 format: None,
17006 default: None,
17007 inferred_type: None,
17008 }))
17009 } else {
17010 arg2
17011 };
17012 let interval = Expression::Interval(Box::new(
17013 crate::expressions::Interval {
17014 this: Some(Expression::string(&format!(
17015 "{} {}",
17016 Self::expr_to_string_static(&arg1),
17017 unit_str
17018 ))),
17019 unit: None,
17020 },
17021 ));
17022 Ok(Expression::Add(Box::new(
17023 crate::expressions::BinaryOp::new(arg2, interval),
17024 )))
17025 }
17026 DialectType::BigQuery => {
17027 let iu = Self::parse_interval_unit_static(&unit_str);
17028 let interval = Expression::Interval(Box::new(
17029 crate::expressions::Interval {
17030 this: Some(arg1),
17031 unit: Some(
17032 crate::expressions::IntervalUnitSpec::Simple {
17033 unit: iu,
17034 use_plural: false,
17035 },
17036 ),
17037 },
17038 ));
17039 // Non-TSQL sources: CAST string literal to DATETIME
17040 let arg2 = if !matches!(
17041 source,
17042 DialectType::TSQL | DialectType::Fabric
17043 ) && matches!(
17044 &arg2,
17045 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17046 ) {
17047 Expression::Cast(Box::new(Cast {
17048 this: arg2,
17049 to: DataType::Custom {
17050 name: "DATETIME".to_string(),
17051 },
17052 trailing_comments: Vec::new(),
17053 double_colon_syntax: false,
17054 format: None,
17055 default: None,
17056 inferred_type: None,
17057 }))
17058 } else {
17059 arg2
17060 };
17061 Ok(Expression::Function(Box::new(Function::new(
17062 "DATE_ADD".to_string(),
17063 vec![arg2, interval],
17064 ))))
17065 }
17066 _ => {
17067 let unit =
17068 Expression::Identifier(Identifier::new(&unit_str));
17069 Ok(Expression::Function(Box::new(Function::new(
17070 "DATEADD".to_string(),
17071 vec![unit, arg1, arg2],
17072 ))))
17073 }
17074 }
17075 }
17076 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
17077 // or (date, val, 'UNIT') from Generic canonical form
17078 "DATE_ADD" if f.args.len() == 3 => {
17079 let mut args = f.args;
17080 let arg0 = args.remove(0);
17081 let arg1 = args.remove(0);
17082 let arg2 = args.remove(0);
17083 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
17084 // where arg2 is a string literal matching a unit name
17085 let arg2_unit = match &arg2 {
17086 Expression::Literal(lit)
17087 if matches!(lit.as_ref(), Literal::String(_)) =>
17088 {
17089 let Literal::String(s) = lit.as_ref() else {
17090 unreachable!()
17091 };
17092 let u = s.to_ascii_uppercase();
17093 if matches!(
17094 u.as_str(),
17095 "DAY"
17096 | "MONTH"
17097 | "YEAR"
17098 | "HOUR"
17099 | "MINUTE"
17100 | "SECOND"
17101 | "WEEK"
17102 | "QUARTER"
17103 | "MILLISECOND"
17104 | "MICROSECOND"
17105 ) {
17106 Some(u)
17107 } else {
17108 None
17109 }
17110 }
17111 _ => None,
17112 };
17113 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
17114 let (unit_str, val, date) = if let Some(u) = arg2_unit {
17115 (u, arg1, arg0)
17116 } else {
17117 (Self::get_unit_str_static(&arg0), arg1, arg2)
17118 };
17119 // Alias for backward compat with the rest of the match
17120 let arg1 = val;
17121 let arg2 = date;
17122
17123 match target {
17124 DialectType::Presto
17125 | DialectType::Trino
17126 | DialectType::Athena => {
17127 Ok(Expression::Function(Box::new(Function::new(
17128 "DATE_ADD".to_string(),
17129 vec![Expression::string(&unit_str), arg1, arg2],
17130 ))))
17131 }
17132 DialectType::DuckDB => {
17133 let iu = Self::parse_interval_unit_static(&unit_str);
17134 let interval = Expression::Interval(Box::new(
17135 crate::expressions::Interval {
17136 this: Some(arg1),
17137 unit: Some(
17138 crate::expressions::IntervalUnitSpec::Simple {
17139 unit: iu,
17140 use_plural: false,
17141 },
17142 ),
17143 },
17144 ));
17145 Ok(Expression::Add(Box::new(
17146 crate::expressions::BinaryOp::new(arg2, interval),
17147 )))
17148 }
17149 DialectType::PostgreSQL
17150 | DialectType::Materialize
17151 | DialectType::RisingWave => {
17152 // PostgreSQL: x + INTERVAL '1 DAY'
17153 let amount_str = Self::expr_to_string_static(&arg1);
17154 let interval = Expression::Interval(Box::new(
17155 crate::expressions::Interval {
17156 this: Some(Expression::string(&format!(
17157 "{} {}",
17158 amount_str, unit_str
17159 ))),
17160 unit: None,
17161 },
17162 ));
17163 Ok(Expression::Add(Box::new(
17164 crate::expressions::BinaryOp::new(arg2, interval),
17165 )))
17166 }
17167 DialectType::Snowflake
17168 | DialectType::TSQL
17169 | DialectType::Redshift => {
17170 let unit =
17171 Expression::Identifier(Identifier::new(&unit_str));
17172 Ok(Expression::Function(Box::new(Function::new(
17173 "DATEADD".to_string(),
17174 vec![unit, arg1, arg2],
17175 ))))
17176 }
17177 DialectType::BigQuery
17178 | DialectType::MySQL
17179 | DialectType::Doris
17180 | DialectType::StarRocks
17181 | DialectType::Drill => {
17182 // DATE_ADD(date, INTERVAL amount UNIT)
17183 let iu = Self::parse_interval_unit_static(&unit_str);
17184 let interval = Expression::Interval(Box::new(
17185 crate::expressions::Interval {
17186 this: Some(arg1),
17187 unit: Some(
17188 crate::expressions::IntervalUnitSpec::Simple {
17189 unit: iu,
17190 use_plural: false,
17191 },
17192 ),
17193 },
17194 ));
17195 Ok(Expression::Function(Box::new(Function::new(
17196 "DATE_ADD".to_string(),
17197 vec![arg2, interval],
17198 ))))
17199 }
17200 DialectType::SQLite => {
17201 // SQLite: DATE(x, '1 DAY')
17202 // Build the string '1 DAY' from amount and unit
17203 let amount_str = match &arg1 {
17204 Expression::Literal(lit)
17205 if matches!(lit.as_ref(), Literal::Number(_)) =>
17206 {
17207 let Literal::Number(n) = lit.as_ref() else {
17208 unreachable!()
17209 };
17210 n.clone()
17211 }
17212 _ => "1".to_string(),
17213 };
17214 Ok(Expression::Function(Box::new(Function::new(
17215 "DATE".to_string(),
17216 vec![
17217 arg2,
17218 Expression::string(format!(
17219 "{} {}",
17220 amount_str, unit_str
17221 )),
17222 ],
17223 ))))
17224 }
17225 DialectType::Dremio => {
17226 // Dremio: DATE_ADD(date, amount) - drops unit
17227 Ok(Expression::Function(Box::new(Function::new(
17228 "DATE_ADD".to_string(),
17229 vec![arg2, arg1],
17230 ))))
17231 }
17232 DialectType::Spark => {
17233 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
17234 if unit_str == "DAY" {
17235 Ok(Expression::Function(Box::new(Function::new(
17236 "DATE_ADD".to_string(),
17237 vec![arg2, arg1],
17238 ))))
17239 } else {
17240 let unit =
17241 Expression::Identifier(Identifier::new(&unit_str));
17242 Ok(Expression::Function(Box::new(Function::new(
17243 "DATE_ADD".to_string(),
17244 vec![unit, arg1, arg2],
17245 ))))
17246 }
17247 }
17248 DialectType::Databricks => {
17249 let unit =
17250 Expression::Identifier(Identifier::new(&unit_str));
17251 Ok(Expression::Function(Box::new(Function::new(
17252 "DATE_ADD".to_string(),
17253 vec![unit, arg1, arg2],
17254 ))))
17255 }
17256 DialectType::Hive => {
17257 // Hive: DATE_ADD(date, val) for DAY
17258 Ok(Expression::Function(Box::new(Function::new(
17259 "DATE_ADD".to_string(),
17260 vec![arg2, arg1],
17261 ))))
17262 }
17263 _ => {
17264 let unit =
17265 Expression::Identifier(Identifier::new(&unit_str));
17266 Ok(Expression::Function(Box::new(Function::new(
17267 "DATE_ADD".to_string(),
17268 vec![unit, arg1, arg2],
17269 ))))
17270 }
17271 }
17272 }
17273 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
17274 "DATE_ADD"
17275 if f.args.len() == 2
17276 && matches!(
17277 source,
17278 DialectType::Hive
17279 | DialectType::Spark
17280 | DialectType::Databricks
17281 | DialectType::Generic
17282 ) =>
17283 {
17284 let mut args = f.args;
17285 let date = args.remove(0);
17286 let days = args.remove(0);
17287 match target {
17288 DialectType::Hive | DialectType::Spark => {
17289 // Keep as DATE_ADD(date, days) for Hive/Spark
17290 Ok(Expression::Function(Box::new(Function::new(
17291 "DATE_ADD".to_string(),
17292 vec![date, days],
17293 ))))
17294 }
17295 DialectType::Databricks => {
17296 // Databricks: DATEADD(DAY, days, date)
17297 Ok(Expression::Function(Box::new(Function::new(
17298 "DATEADD".to_string(),
17299 vec![
17300 Expression::Identifier(Identifier::new("DAY")),
17301 days,
17302 date,
17303 ],
17304 ))))
17305 }
17306 DialectType::DuckDB => {
17307 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
17308 let cast_date = Self::ensure_cast_date(date);
17309 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
17310 let interval_val = if matches!(
17311 days,
17312 Expression::Mul(_)
17313 | Expression::Sub(_)
17314 | Expression::Add(_)
17315 ) {
17316 Expression::Paren(Box::new(crate::expressions::Paren {
17317 this: days,
17318 trailing_comments: vec![],
17319 }))
17320 } else {
17321 days
17322 };
17323 let interval = Expression::Interval(Box::new(
17324 crate::expressions::Interval {
17325 this: Some(interval_val),
17326 unit: Some(
17327 crate::expressions::IntervalUnitSpec::Simple {
17328 unit: crate::expressions::IntervalUnit::Day,
17329 use_plural: false,
17330 },
17331 ),
17332 },
17333 ));
17334 Ok(Expression::Add(Box::new(
17335 crate::expressions::BinaryOp::new(cast_date, interval),
17336 )))
17337 }
17338 DialectType::Snowflake => {
17339 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17340 let cast_date = if matches!(
17341 source,
17342 DialectType::Hive
17343 | DialectType::Spark
17344 | DialectType::Databricks
17345 ) {
17346 if matches!(
17347 date,
17348 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17349 ) {
17350 Self::double_cast_timestamp_date(date)
17351 } else {
17352 date
17353 }
17354 } else {
17355 date
17356 };
17357 Ok(Expression::Function(Box::new(Function::new(
17358 "DATEADD".to_string(),
17359 vec![
17360 Expression::Identifier(Identifier::new("DAY")),
17361 days,
17362 cast_date,
17363 ],
17364 ))))
17365 }
17366 DialectType::Redshift => {
17367 Ok(Expression::Function(Box::new(Function::new(
17368 "DATEADD".to_string(),
17369 vec![
17370 Expression::Identifier(Identifier::new("DAY")),
17371 days,
17372 date,
17373 ],
17374 ))))
17375 }
17376 DialectType::TSQL | DialectType::Fabric => {
17377 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
17378 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
17379 let cast_date = if matches!(
17380 source,
17381 DialectType::Hive | DialectType::Spark
17382 ) {
17383 if matches!(
17384 date,
17385 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17386 ) {
17387 Self::double_cast_datetime2_date(date)
17388 } else {
17389 date
17390 }
17391 } else {
17392 date
17393 };
17394 Ok(Expression::Function(Box::new(Function::new(
17395 "DATEADD".to_string(),
17396 vec![
17397 Expression::Identifier(Identifier::new("DAY")),
17398 days,
17399 cast_date,
17400 ],
17401 ))))
17402 }
17403 DialectType::Presto
17404 | DialectType::Trino
17405 | DialectType::Athena => {
17406 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17407 let cast_date = if matches!(
17408 source,
17409 DialectType::Hive
17410 | DialectType::Spark
17411 | DialectType::Databricks
17412 ) {
17413 if matches!(
17414 date,
17415 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17416 ) {
17417 Self::double_cast_timestamp_date(date)
17418 } else {
17419 date
17420 }
17421 } else {
17422 date
17423 };
17424 Ok(Expression::Function(Box::new(Function::new(
17425 "DATE_ADD".to_string(),
17426 vec![Expression::string("DAY"), days, cast_date],
17427 ))))
17428 }
17429 DialectType::BigQuery => {
17430 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
17431 let cast_date = if matches!(
17432 source,
17433 DialectType::Hive
17434 | DialectType::Spark
17435 | DialectType::Databricks
17436 ) {
17437 Self::double_cast_datetime_date(date)
17438 } else {
17439 date
17440 };
17441 // Wrap complex expressions in Paren for interval
17442 let interval_val = if matches!(
17443 days,
17444 Expression::Mul(_)
17445 | Expression::Sub(_)
17446 | Expression::Add(_)
17447 ) {
17448 Expression::Paren(Box::new(crate::expressions::Paren {
17449 this: days,
17450 trailing_comments: vec![],
17451 }))
17452 } else {
17453 days
17454 };
17455 let interval = Expression::Interval(Box::new(
17456 crate::expressions::Interval {
17457 this: Some(interval_val),
17458 unit: Some(
17459 crate::expressions::IntervalUnitSpec::Simple {
17460 unit: crate::expressions::IntervalUnit::Day,
17461 use_plural: false,
17462 },
17463 ),
17464 },
17465 ));
17466 Ok(Expression::Function(Box::new(Function::new(
17467 "DATE_ADD".to_string(),
17468 vec![cast_date, interval],
17469 ))))
17470 }
17471 DialectType::MySQL => {
17472 let iu = crate::expressions::IntervalUnit::Day;
17473 Ok(Expression::DateAdd(Box::new(
17474 crate::expressions::DateAddFunc {
17475 this: date,
17476 interval: days,
17477 unit: iu,
17478 },
17479 )))
17480 }
17481 DialectType::PostgreSQL => {
17482 let interval = Expression::Interval(Box::new(
17483 crate::expressions::Interval {
17484 this: Some(Expression::string(&format!(
17485 "{} DAY",
17486 Self::expr_to_string_static(&days)
17487 ))),
17488 unit: None,
17489 },
17490 ));
17491 Ok(Expression::Add(Box::new(
17492 crate::expressions::BinaryOp::new(date, interval),
17493 )))
17494 }
17495 DialectType::Doris
17496 | DialectType::StarRocks
17497 | DialectType::Drill => {
17498 // DATE_ADD(date, INTERVAL days DAY)
17499 let interval = Expression::Interval(Box::new(
17500 crate::expressions::Interval {
17501 this: Some(days),
17502 unit: Some(
17503 crate::expressions::IntervalUnitSpec::Simple {
17504 unit: crate::expressions::IntervalUnit::Day,
17505 use_plural: false,
17506 },
17507 ),
17508 },
17509 ));
17510 Ok(Expression::Function(Box::new(Function::new(
17511 "DATE_ADD".to_string(),
17512 vec![date, interval],
17513 ))))
17514 }
17515 _ => Ok(Expression::Function(Box::new(Function::new(
17516 "DATE_ADD".to_string(),
17517 vec![date, days],
17518 )))),
17519 }
17520 }
17521 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
17522 "DATE_ADD"
17523 if f.args.len() == 2
17524 && matches!(
17525 source,
17526 DialectType::MySQL | DialectType::SingleStore
17527 )
17528 && matches!(&f.args[1], Expression::Interval(_)) =>
17529 {
17530 let mut args = f.args;
17531 let date = args.remove(0);
17532 let interval_expr = args.remove(0);
17533 let (val, unit) = Self::extract_interval_parts(&interval_expr);
17534 let unit_str = Self::interval_unit_to_string(&unit);
17535 let is_literal = matches!(&val,
17536 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
17537 );
17538
17539 match target {
17540 DialectType::MySQL | DialectType::SingleStore => {
17541 // Keep as DATE_ADD(date, INTERVAL val UNIT)
17542 Ok(Expression::Function(Box::new(Function::new(
17543 "DATE_ADD".to_string(),
17544 vec![date, interval_expr],
17545 ))))
17546 }
17547 DialectType::PostgreSQL => {
17548 if is_literal {
17549 // Literal: date + INTERVAL 'val UNIT'
17550 let interval = Expression::Interval(Box::new(
17551 crate::expressions::Interval {
17552 this: Some(Expression::Literal(Box::new(
17553 Literal::String(format!(
17554 "{} {}",
17555 Self::expr_to_string(&val),
17556 unit_str
17557 )),
17558 ))),
17559 unit: None,
17560 },
17561 ));
17562 Ok(Expression::Add(Box::new(
17563 crate::expressions::BinaryOp::new(date, interval),
17564 )))
17565 } else {
17566 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
17567 let interval_one = Expression::Interval(Box::new(
17568 crate::expressions::Interval {
17569 this: Some(Expression::Literal(Box::new(
17570 Literal::String(format!("1 {}", unit_str)),
17571 ))),
17572 unit: None,
17573 },
17574 ));
17575 let mul = Expression::Mul(Box::new(
17576 crate::expressions::BinaryOp::new(
17577 interval_one,
17578 val,
17579 ),
17580 ));
17581 Ok(Expression::Add(Box::new(
17582 crate::expressions::BinaryOp::new(date, mul),
17583 )))
17584 }
17585 }
17586 _ => {
17587 // Default: keep as DATE_ADD(date, interval)
17588 Ok(Expression::Function(Box::new(Function::new(
17589 "DATE_ADD".to_string(),
17590 vec![date, interval_expr],
17591 ))))
17592 }
17593 }
17594 }
17595 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
17596 "DATE_SUB"
17597 if f.args.len() == 2
17598 && matches!(
17599 source,
17600 DialectType::Hive
17601 | DialectType::Spark
17602 | DialectType::Databricks
17603 ) =>
17604 {
17605 let mut args = f.args;
17606 let date = args.remove(0);
17607 let days = args.remove(0);
17608 // Helper to create days * -1
17609 let make_neg_days = |d: Expression| -> Expression {
17610 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
17611 d,
17612 Expression::Literal(Box::new(Literal::Number(
17613 "-1".to_string(),
17614 ))),
17615 )))
17616 };
17617 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
17618 match target {
17619 DialectType::Hive
17620 | DialectType::Spark
17621 | DialectType::Databricks => {
17622 // Keep as DATE_SUB(date, days) for Hive/Spark
17623 Ok(Expression::Function(Box::new(Function::new(
17624 "DATE_SUB".to_string(),
17625 vec![date, days],
17626 ))))
17627 }
17628 DialectType::DuckDB => {
17629 let cast_date = Self::ensure_cast_date(date);
17630 let neg = make_neg_days(days);
17631 let interval = Expression::Interval(Box::new(
17632 crate::expressions::Interval {
17633 this: Some(Expression::Paren(Box::new(
17634 crate::expressions::Paren {
17635 this: neg,
17636 trailing_comments: vec![],
17637 },
17638 ))),
17639 unit: Some(
17640 crate::expressions::IntervalUnitSpec::Simple {
17641 unit: crate::expressions::IntervalUnit::Day,
17642 use_plural: false,
17643 },
17644 ),
17645 },
17646 ));
17647 Ok(Expression::Add(Box::new(
17648 crate::expressions::BinaryOp::new(cast_date, interval),
17649 )))
17650 }
17651 DialectType::Snowflake => {
17652 let cast_date = if is_string_literal {
17653 Self::double_cast_timestamp_date(date)
17654 } else {
17655 date
17656 };
17657 let neg = make_neg_days(days);
17658 Ok(Expression::Function(Box::new(Function::new(
17659 "DATEADD".to_string(),
17660 vec![
17661 Expression::Identifier(Identifier::new("DAY")),
17662 neg,
17663 cast_date,
17664 ],
17665 ))))
17666 }
17667 DialectType::Redshift => {
17668 let neg = make_neg_days(days);
17669 Ok(Expression::Function(Box::new(Function::new(
17670 "DATEADD".to_string(),
17671 vec![
17672 Expression::Identifier(Identifier::new("DAY")),
17673 neg,
17674 date,
17675 ],
17676 ))))
17677 }
17678 DialectType::TSQL | DialectType::Fabric => {
17679 let cast_date = if is_string_literal {
17680 Self::double_cast_datetime2_date(date)
17681 } else {
17682 date
17683 };
17684 let neg = make_neg_days(days);
17685 Ok(Expression::Function(Box::new(Function::new(
17686 "DATEADD".to_string(),
17687 vec![
17688 Expression::Identifier(Identifier::new("DAY")),
17689 neg,
17690 cast_date,
17691 ],
17692 ))))
17693 }
17694 DialectType::Presto
17695 | DialectType::Trino
17696 | DialectType::Athena => {
17697 let cast_date = if is_string_literal {
17698 Self::double_cast_timestamp_date(date)
17699 } else {
17700 date
17701 };
17702 let neg = make_neg_days(days);
17703 Ok(Expression::Function(Box::new(Function::new(
17704 "DATE_ADD".to_string(),
17705 vec![Expression::string("DAY"), neg, cast_date],
17706 ))))
17707 }
17708 DialectType::BigQuery => {
17709 let cast_date = if is_string_literal {
17710 Self::double_cast_datetime_date(date)
17711 } else {
17712 date
17713 };
17714 let neg = make_neg_days(days);
17715 let interval = Expression::Interval(Box::new(
17716 crate::expressions::Interval {
17717 this: Some(Expression::Paren(Box::new(
17718 crate::expressions::Paren {
17719 this: neg,
17720 trailing_comments: vec![],
17721 },
17722 ))),
17723 unit: Some(
17724 crate::expressions::IntervalUnitSpec::Simple {
17725 unit: crate::expressions::IntervalUnit::Day,
17726 use_plural: false,
17727 },
17728 ),
17729 },
17730 ));
17731 Ok(Expression::Function(Box::new(Function::new(
17732 "DATE_ADD".to_string(),
17733 vec![cast_date, interval],
17734 ))))
17735 }
17736 _ => Ok(Expression::Function(Box::new(Function::new(
17737 "DATE_SUB".to_string(),
17738 vec![date, days],
17739 )))),
17740 }
17741 }
17742 // ADD_MONTHS(date, val) -> target-specific
17743 "ADD_MONTHS" if f.args.len() == 2 => {
17744 let mut args = f.args;
17745 let date = args.remove(0);
17746 let val = args.remove(0);
17747 match target {
17748 DialectType::TSQL => {
17749 let cast_date = Self::ensure_cast_datetime2(date);
17750 Ok(Expression::Function(Box::new(Function::new(
17751 "DATEADD".to_string(),
17752 vec![
17753 Expression::Identifier(Identifier::new("MONTH")),
17754 val,
17755 cast_date,
17756 ],
17757 ))))
17758 }
17759 DialectType::DuckDB => {
17760 let interval = Expression::Interval(Box::new(
17761 crate::expressions::Interval {
17762 this: Some(val),
17763 unit: Some(
17764 crate::expressions::IntervalUnitSpec::Simple {
17765 unit:
17766 crate::expressions::IntervalUnit::Month,
17767 use_plural: false,
17768 },
17769 ),
17770 },
17771 ));
17772 Ok(Expression::Add(Box::new(
17773 crate::expressions::BinaryOp::new(date, interval),
17774 )))
17775 }
17776 DialectType::Snowflake => {
17777 // Keep ADD_MONTHS when source is Snowflake
17778 if matches!(source, DialectType::Snowflake) {
17779 Ok(Expression::Function(Box::new(Function::new(
17780 "ADD_MONTHS".to_string(),
17781 vec![date, val],
17782 ))))
17783 } else {
17784 Ok(Expression::Function(Box::new(Function::new(
17785 "DATEADD".to_string(),
17786 vec![
17787 Expression::Identifier(Identifier::new(
17788 "MONTH",
17789 )),
17790 val,
17791 date,
17792 ],
17793 ))))
17794 }
17795 }
17796 DialectType::Redshift => {
17797 Ok(Expression::Function(Box::new(Function::new(
17798 "DATEADD".to_string(),
17799 vec![
17800 Expression::Identifier(Identifier::new("MONTH")),
17801 val,
17802 date,
17803 ],
17804 ))))
17805 }
17806 DialectType::Presto
17807 | DialectType::Trino
17808 | DialectType::Athena => {
17809 Ok(Expression::Function(Box::new(Function::new(
17810 "DATE_ADD".to_string(),
17811 vec![Expression::string("MONTH"), val, date],
17812 ))))
17813 }
17814 DialectType::BigQuery => {
17815 let interval = Expression::Interval(Box::new(
17816 crate::expressions::Interval {
17817 this: Some(val),
17818 unit: Some(
17819 crate::expressions::IntervalUnitSpec::Simple {
17820 unit:
17821 crate::expressions::IntervalUnit::Month,
17822 use_plural: false,
17823 },
17824 ),
17825 },
17826 ));
17827 Ok(Expression::Function(Box::new(Function::new(
17828 "DATE_ADD".to_string(),
17829 vec![date, interval],
17830 ))))
17831 }
17832 _ => Ok(Expression::Function(Box::new(Function::new(
17833 "ADD_MONTHS".to_string(),
17834 vec![date, val],
17835 )))),
17836 }
17837 }
17838 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
17839 "DATETRUNC" if f.args.len() == 2 => {
17840 let mut args = f.args;
17841 let arg0 = args.remove(0);
17842 let arg1 = args.remove(0);
17843 let unit_str = Self::get_unit_str_static(&arg0);
17844 match target {
17845 DialectType::TSQL | DialectType::Fabric => {
17846 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
17847 Ok(Expression::Function(Box::new(Function::new(
17848 "DATETRUNC".to_string(),
17849 vec![
17850 Expression::Identifier(Identifier::new(&unit_str)),
17851 arg1,
17852 ],
17853 ))))
17854 }
17855 DialectType::DuckDB => {
17856 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
17857 let date = Self::ensure_cast_timestamp(arg1);
17858 Ok(Expression::Function(Box::new(Function::new(
17859 "DATE_TRUNC".to_string(),
17860 vec![Expression::string(&unit_str), date],
17861 ))))
17862 }
17863 DialectType::ClickHouse => {
17864 // ClickHouse: dateTrunc('UNIT', expr)
17865 Ok(Expression::Function(Box::new(Function::new(
17866 "dateTrunc".to_string(),
17867 vec![Expression::string(&unit_str), arg1],
17868 ))))
17869 }
17870 _ => {
17871 // Standard: DATE_TRUNC('UNIT', expr)
17872 let unit = Expression::string(&unit_str);
17873 Ok(Expression::Function(Box::new(Function::new(
17874 "DATE_TRUNC".to_string(),
17875 vec![unit, arg1],
17876 ))))
17877 }
17878 }
17879 }
17880 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
17881 "GETDATE" if f.args.is_empty() => match target {
17882 DialectType::TSQL => Ok(Expression::Function(f)),
17883 DialectType::Redshift => Ok(Expression::Function(Box::new(
17884 Function::new("GETDATE".to_string(), vec![]),
17885 ))),
17886 _ => Ok(Expression::CurrentTimestamp(
17887 crate::expressions::CurrentTimestamp {
17888 precision: None,
17889 sysdate: false,
17890 },
17891 )),
17892 },
17893 // TO_HEX(x) / HEX(x) -> target-specific hex function
17894 "TO_HEX" | "HEX" if f.args.len() == 1 => {
17895 let name = match target {
17896 DialectType::Presto | DialectType::Trino => "TO_HEX",
17897 DialectType::Spark
17898 | DialectType::Databricks
17899 | DialectType::Hive => "HEX",
17900 DialectType::DuckDB
17901 | DialectType::PostgreSQL
17902 | DialectType::Redshift => "TO_HEX",
17903 _ => &f.name,
17904 };
17905 Ok(Expression::Function(Box::new(Function::new(
17906 name.to_string(),
17907 f.args,
17908 ))))
17909 }
17910 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
17911 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
17912 match target {
17913 DialectType::BigQuery => {
17914 // BigQuery: UNHEX(x) -> FROM_HEX(x)
17915 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
17916 // because BigQuery MD5 returns BYTES, not hex string
17917 let arg = &f.args[0];
17918 let wrapped_arg = match arg {
17919 Expression::Function(inner_f)
17920 if inner_f.name.eq_ignore_ascii_case("MD5")
17921 || inner_f
17922 .name
17923 .eq_ignore_ascii_case("SHA1")
17924 || inner_f
17925 .name
17926 .eq_ignore_ascii_case("SHA256")
17927 || inner_f
17928 .name
17929 .eq_ignore_ascii_case("SHA512") =>
17930 {
17931 // Wrap hash function in TO_HEX for BigQuery
17932 Expression::Function(Box::new(Function::new(
17933 "TO_HEX".to_string(),
17934 vec![arg.clone()],
17935 )))
17936 }
17937 _ => f.args.into_iter().next().unwrap(),
17938 };
17939 Ok(Expression::Function(Box::new(Function::new(
17940 "FROM_HEX".to_string(),
17941 vec![wrapped_arg],
17942 ))))
17943 }
17944 _ => {
17945 let name = match target {
17946 DialectType::Presto | DialectType::Trino => "FROM_HEX",
17947 DialectType::Spark
17948 | DialectType::Databricks
17949 | DialectType::Hive => "UNHEX",
17950 _ => &f.name,
17951 };
17952 Ok(Expression::Function(Box::new(Function::new(
17953 name.to_string(),
17954 f.args,
17955 ))))
17956 }
17957 }
17958 }
17959 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
17960 "TO_UTF8" if f.args.len() == 1 => match target {
17961 DialectType::Spark | DialectType::Databricks => {
17962 let mut args = f.args;
17963 args.push(Expression::string("utf-8"));
17964 Ok(Expression::Function(Box::new(Function::new(
17965 "ENCODE".to_string(),
17966 args,
17967 ))))
17968 }
17969 _ => Ok(Expression::Function(f)),
17970 },
17971 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
17972 "FROM_UTF8" if f.args.len() == 1 => match target {
17973 DialectType::Spark | DialectType::Databricks => {
17974 let mut args = f.args;
17975 args.push(Expression::string("utf-8"));
17976 Ok(Expression::Function(Box::new(Function::new(
17977 "DECODE".to_string(),
17978 args,
17979 ))))
17980 }
17981 _ => Ok(Expression::Function(f)),
17982 },
17983 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
17984 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
17985 let name = match target {
17986 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
17987 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
17988 DialectType::PostgreSQL | DialectType::Redshift => {
17989 "STARTS_WITH"
17990 }
17991 _ => &f.name,
17992 };
17993 Ok(Expression::Function(Box::new(Function::new(
17994 name.to_string(),
17995 f.args,
17996 ))))
17997 }
17998 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
17999 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
18000 let name = match target {
18001 DialectType::Presto
18002 | DialectType::Trino
18003 | DialectType::Athena => "APPROX_DISTINCT",
18004 _ => "APPROX_COUNT_DISTINCT",
18005 };
18006 Ok(Expression::Function(Box::new(Function::new(
18007 name.to_string(),
18008 f.args,
18009 ))))
18010 }
18011 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
18012 "JSON_EXTRACT"
18013 if f.args.len() == 2
18014 && !matches!(source, DialectType::BigQuery)
18015 && matches!(
18016 target,
18017 DialectType::Spark
18018 | DialectType::Databricks
18019 | DialectType::Hive
18020 ) =>
18021 {
18022 Ok(Expression::Function(Box::new(Function::new(
18023 "GET_JSON_OBJECT".to_string(),
18024 f.args,
18025 ))))
18026 }
18027 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
18028 "JSON_EXTRACT"
18029 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
18030 {
18031 let mut args = f.args;
18032 let path = args.remove(1);
18033 let this = args.remove(0);
18034 Ok(Expression::JsonExtract(Box::new(
18035 crate::expressions::JsonExtractFunc {
18036 this,
18037 path,
18038 returning: None,
18039 arrow_syntax: true,
18040 hash_arrow_syntax: false,
18041 wrapper_option: None,
18042 quotes_option: None,
18043 on_scalar_string: false,
18044 on_error: None,
18045 },
18046 )))
18047 }
18048 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
18049 "JSON_FORMAT" if f.args.len() == 1 => {
18050 match target {
18051 DialectType::Spark | DialectType::Databricks => {
18052 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
18053 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
18054 if matches!(
18055 source,
18056 DialectType::Presto
18057 | DialectType::Trino
18058 | DialectType::Athena
18059 ) {
18060 if let Some(Expression::ParseJson(pj)) = f.args.first()
18061 {
18062 if let Expression::Literal(lit) = &pj.this {
18063 if let Literal::String(s) = lit.as_ref() {
18064 let wrapped =
18065 Expression::Literal(Box::new(
18066 Literal::String(format!("[{}]", s)),
18067 ));
18068 let schema_of_json = Expression::Function(
18069 Box::new(Function::new(
18070 "SCHEMA_OF_JSON".to_string(),
18071 vec![wrapped.clone()],
18072 )),
18073 );
18074 let from_json = Expression::Function(
18075 Box::new(Function::new(
18076 "FROM_JSON".to_string(),
18077 vec![wrapped, schema_of_json],
18078 )),
18079 );
18080 let to_json = Expression::Function(
18081 Box::new(Function::new(
18082 "TO_JSON".to_string(),
18083 vec![from_json],
18084 )),
18085 );
18086 return Ok(Expression::Function(Box::new(
18087 Function::new(
18088 "REGEXP_EXTRACT".to_string(),
18089 vec![
18090 to_json,
18091 Expression::Literal(Box::new(
18092 Literal::String(
18093 "^.(.*).$".to_string(),
18094 ),
18095 )),
18096 Expression::Literal(Box::new(
18097 Literal::Number(
18098 "1".to_string(),
18099 ),
18100 )),
18101 ],
18102 ),
18103 )));
18104 }
18105 }
18106 }
18107 }
18108
18109 // Strip inner CAST(... AS JSON) or TO_JSON() if present
18110 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
18111 let mut args = f.args;
18112 if let Some(Expression::Cast(ref c)) = args.first() {
18113 if matches!(&c.to, DataType::Json | DataType::JsonB) {
18114 args = vec![c.this.clone()];
18115 }
18116 } else if let Some(Expression::Function(ref inner_f)) =
18117 args.first()
18118 {
18119 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
18120 && inner_f.args.len() == 1
18121 {
18122 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
18123 args = inner_f.args.clone();
18124 }
18125 }
18126 Ok(Expression::Function(Box::new(Function::new(
18127 "TO_JSON".to_string(),
18128 args,
18129 ))))
18130 }
18131 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18132 Function::new("TO_JSON_STRING".to_string(), f.args),
18133 ))),
18134 DialectType::DuckDB => {
18135 // CAST(TO_JSON(x) AS TEXT)
18136 let to_json = Expression::Function(Box::new(
18137 Function::new("TO_JSON".to_string(), f.args),
18138 ));
18139 Ok(Expression::Cast(Box::new(Cast {
18140 this: to_json,
18141 to: DataType::Text,
18142 trailing_comments: Vec::new(),
18143 double_colon_syntax: false,
18144 format: None,
18145 default: None,
18146 inferred_type: None,
18147 })))
18148 }
18149 _ => Ok(Expression::Function(f)),
18150 }
18151 }
18152 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
18153 "SYSDATE" if f.args.is_empty() => {
18154 match target {
18155 DialectType::Oracle | DialectType::Redshift => {
18156 Ok(Expression::Function(f))
18157 }
18158 DialectType::Snowflake => {
18159 // Snowflake uses SYSDATE() with parens
18160 let mut f = *f;
18161 f.no_parens = false;
18162 Ok(Expression::Function(Box::new(f)))
18163 }
18164 DialectType::DuckDB => {
18165 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
18166 Ok(Expression::AtTimeZone(Box::new(
18167 crate::expressions::AtTimeZone {
18168 this: Expression::CurrentTimestamp(
18169 crate::expressions::CurrentTimestamp {
18170 precision: None,
18171 sysdate: false,
18172 },
18173 ),
18174 zone: Expression::Literal(Box::new(
18175 Literal::String("UTC".to_string()),
18176 )),
18177 },
18178 )))
18179 }
18180 _ => Ok(Expression::CurrentTimestamp(
18181 crate::expressions::CurrentTimestamp {
18182 precision: None,
18183 sysdate: true,
18184 },
18185 )),
18186 }
18187 }
18188 // LOGICAL_OR(x) -> BOOL_OR(x)
18189 "LOGICAL_OR" if f.args.len() == 1 => {
18190 let name = match target {
18191 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18192 _ => &f.name,
18193 };
18194 Ok(Expression::Function(Box::new(Function::new(
18195 name.to_string(),
18196 f.args,
18197 ))))
18198 }
18199 // LOGICAL_AND(x) -> BOOL_AND(x)
18200 "LOGICAL_AND" if f.args.len() == 1 => {
18201 let name = match target {
18202 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
18203 _ => &f.name,
18204 };
18205 Ok(Expression::Function(Box::new(Function::new(
18206 name.to_string(),
18207 f.args,
18208 ))))
18209 }
18210 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
18211 "MONTHS_ADD" if f.args.len() == 2 => match target {
18212 DialectType::Oracle => Ok(Expression::Function(Box::new(
18213 Function::new("ADD_MONTHS".to_string(), f.args),
18214 ))),
18215 _ => Ok(Expression::Function(f)),
18216 },
18217 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
18218 "ARRAY_JOIN" if f.args.len() >= 2 => {
18219 match target {
18220 DialectType::Spark | DialectType::Databricks => {
18221 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
18222 Ok(Expression::Function(f))
18223 }
18224 DialectType::Hive => {
18225 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
18226 let mut args = f.args;
18227 let arr = args.remove(0);
18228 let sep = args.remove(0);
18229 // Drop any remaining args (null_replacement)
18230 Ok(Expression::Function(Box::new(Function::new(
18231 "CONCAT_WS".to_string(),
18232 vec![sep, arr],
18233 ))))
18234 }
18235 DialectType::Presto | DialectType::Trino => {
18236 Ok(Expression::Function(f))
18237 }
18238 _ => Ok(Expression::Function(f)),
18239 }
18240 }
18241 // LOCATE(substr, str, pos) 3-arg -> target-specific
18242 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
18243 "LOCATE"
18244 if f.args.len() == 3
18245 && matches!(
18246 target,
18247 DialectType::Presto
18248 | DialectType::Trino
18249 | DialectType::Athena
18250 | DialectType::DuckDB
18251 ) =>
18252 {
18253 let mut args = f.args;
18254 let substr = args.remove(0);
18255 let string = args.remove(0);
18256 let pos = args.remove(0);
18257 // STRPOS(SUBSTRING(string, pos), substr)
18258 let substring_call = Expression::Function(Box::new(Function::new(
18259 "SUBSTRING".to_string(),
18260 vec![string.clone(), pos.clone()],
18261 )));
18262 let strpos_call = Expression::Function(Box::new(Function::new(
18263 "STRPOS".to_string(),
18264 vec![substring_call, substr.clone()],
18265 )));
18266 // STRPOS(...) + pos - 1
18267 let pos_adjusted =
18268 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
18269 Expression::Add(Box::new(
18270 crate::expressions::BinaryOp::new(
18271 strpos_call.clone(),
18272 pos.clone(),
18273 ),
18274 )),
18275 Expression::number(1),
18276 )));
18277 // STRPOS(...) = 0
18278 let is_zero =
18279 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
18280 strpos_call.clone(),
18281 Expression::number(0),
18282 )));
18283
18284 match target {
18285 DialectType::Presto
18286 | DialectType::Trino
18287 | DialectType::Athena => {
18288 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
18289 Ok(Expression::Function(Box::new(Function::new(
18290 "IF".to_string(),
18291 vec![is_zero, Expression::number(0), pos_adjusted],
18292 ))))
18293 }
18294 DialectType::DuckDB => {
18295 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
18296 Ok(Expression::Case(Box::new(crate::expressions::Case {
18297 operand: None,
18298 whens: vec![(is_zero, Expression::number(0))],
18299 else_: Some(pos_adjusted),
18300 comments: Vec::new(),
18301 inferred_type: None,
18302 })))
18303 }
18304 _ => Ok(Expression::Function(Box::new(Function::new(
18305 "LOCATE".to_string(),
18306 vec![substr, string, pos],
18307 )))),
18308 }
18309 }
18310 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
18311 "STRPOS"
18312 if f.args.len() == 3
18313 && matches!(
18314 target,
18315 DialectType::BigQuery
18316 | DialectType::Oracle
18317 | DialectType::Teradata
18318 ) =>
18319 {
18320 let mut args = f.args;
18321 let haystack = args.remove(0);
18322 let needle = args.remove(0);
18323 let occurrence = args.remove(0);
18324 Ok(Expression::Function(Box::new(Function::new(
18325 "INSTR".to_string(),
18326 vec![haystack, needle, Expression::number(1), occurrence],
18327 ))))
18328 }
18329 // SCHEMA_NAME(id) -> target-specific
18330 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
18331 DialectType::MySQL | DialectType::SingleStore => {
18332 Ok(Expression::Function(Box::new(Function::new(
18333 "SCHEMA".to_string(),
18334 vec![],
18335 ))))
18336 }
18337 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18338 crate::expressions::CurrentSchema { this: None },
18339 ))),
18340 DialectType::SQLite => Ok(Expression::string("main")),
18341 _ => Ok(Expression::Function(f)),
18342 },
18343 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
18344 "STRTOL" if f.args.len() == 2 => match target {
18345 DialectType::Presto | DialectType::Trino => {
18346 Ok(Expression::Function(Box::new(Function::new(
18347 "FROM_BASE".to_string(),
18348 f.args,
18349 ))))
18350 }
18351 _ => Ok(Expression::Function(f)),
18352 },
18353 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
18354 "EDITDIST3" if f.args.len() == 2 => match target {
18355 DialectType::Spark | DialectType::Databricks => {
18356 Ok(Expression::Function(Box::new(Function::new(
18357 "LEVENSHTEIN".to_string(),
18358 f.args,
18359 ))))
18360 }
18361 _ => Ok(Expression::Function(f)),
18362 },
18363 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
18364 "FORMAT"
18365 if f.args.len() == 2
18366 && matches!(
18367 source,
18368 DialectType::MySQL | DialectType::SingleStore
18369 )
18370 && matches!(target, DialectType::DuckDB) =>
18371 {
18372 let mut args = f.args;
18373 let num_expr = args.remove(0);
18374 let decimals_expr = args.remove(0);
18375 // Extract decimal count
18376 let dec_count = match &decimals_expr {
18377 Expression::Literal(lit)
18378 if matches!(lit.as_ref(), Literal::Number(_)) =>
18379 {
18380 let Literal::Number(n) = lit.as_ref() else {
18381 unreachable!()
18382 };
18383 n.clone()
18384 }
18385 _ => "0".to_string(),
18386 };
18387 let fmt_str = format!("{{:,.{}f}}", dec_count);
18388 Ok(Expression::Function(Box::new(Function::new(
18389 "FORMAT".to_string(),
18390 vec![Expression::string(&fmt_str), num_expr],
18391 ))))
18392 }
18393 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
18394 "FORMAT"
18395 if f.args.len() == 2
18396 && matches!(
18397 source,
18398 DialectType::TSQL | DialectType::Fabric
18399 ) =>
18400 {
18401 let val_expr = f.args[0].clone();
18402 let fmt_expr = f.args[1].clone();
18403 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
18404 // Only expand shortcodes that are NOT also valid numeric format specifiers.
18405 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
18406 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
18407 let (expanded_fmt, is_shortcode) = match &fmt_expr {
18408 Expression::Literal(lit)
18409 if matches!(
18410 lit.as_ref(),
18411 crate::expressions::Literal::String(_)
18412 ) =>
18413 {
18414 let crate::expressions::Literal::String(s) = lit.as_ref()
18415 else {
18416 unreachable!()
18417 };
18418 match s.as_str() {
18419 "m" | "M" => (Expression::string("MMMM d"), true),
18420 "t" => (Expression::string("h:mm tt"), true),
18421 "T" => (Expression::string("h:mm:ss tt"), true),
18422 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
18423 _ => (fmt_expr.clone(), false),
18424 }
18425 }
18426 _ => (fmt_expr.clone(), false),
18427 };
18428 // Check if the format looks like a date format
18429 let is_date_format = is_shortcode
18430 || match &expanded_fmt {
18431 Expression::Literal(lit)
18432 if matches!(
18433 lit.as_ref(),
18434 crate::expressions::Literal::String(_)
18435 ) =>
18436 {
18437 let crate::expressions::Literal::String(s) =
18438 lit.as_ref()
18439 else {
18440 unreachable!()
18441 };
18442 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
18443 s.contains("yyyy")
18444 || s.contains("YYYY")
18445 || s.contains("MM")
18446 || s.contains("dd")
18447 || s.contains("MMMM")
18448 || s.contains("HH")
18449 || s.contains("hh")
18450 || s.contains("ss")
18451 }
18452 _ => false,
18453 };
18454 match target {
18455 DialectType::Spark | DialectType::Databricks => {
18456 let func_name = if is_date_format {
18457 "DATE_FORMAT"
18458 } else {
18459 "FORMAT_NUMBER"
18460 };
18461 Ok(Expression::Function(Box::new(Function::new(
18462 func_name.to_string(),
18463 vec![val_expr, expanded_fmt],
18464 ))))
18465 }
18466 _ => {
18467 // For TSQL and other targets, expand shortcodes but keep FORMAT
18468 if is_shortcode {
18469 Ok(Expression::Function(Box::new(Function::new(
18470 "FORMAT".to_string(),
18471 vec![val_expr, expanded_fmt],
18472 ))))
18473 } else {
18474 Ok(Expression::Function(f))
18475 }
18476 }
18477 }
18478 }
18479 // FORMAT('%s', x) from Trino/Presto -> target-specific
18480 "FORMAT"
18481 if f.args.len() >= 2
18482 && matches!(
18483 source,
18484 DialectType::Trino
18485 | DialectType::Presto
18486 | DialectType::Athena
18487 ) =>
18488 {
18489 let fmt_expr = f.args[0].clone();
18490 let value_args: Vec<Expression> = f.args[1..].to_vec();
18491 match target {
18492 // DuckDB: replace %s with {} in format string
18493 DialectType::DuckDB => {
18494 let new_fmt = match &fmt_expr {
18495 Expression::Literal(lit)
18496 if matches!(lit.as_ref(), Literal::String(_)) =>
18497 {
18498 let Literal::String(s) = lit.as_ref() else {
18499 unreachable!()
18500 };
18501 Expression::Literal(Box::new(Literal::String(
18502 s.replace("%s", "{}"),
18503 )))
18504 }
18505 _ => fmt_expr,
18506 };
18507 let mut args = vec![new_fmt];
18508 args.extend(value_args);
18509 Ok(Expression::Function(Box::new(Function::new(
18510 "FORMAT".to_string(),
18511 args,
18512 ))))
18513 }
18514 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
18515 DialectType::Snowflake => match &fmt_expr {
18516 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
18517 {
18518 let Literal::String(_) = lit.as_ref() else {
18519 unreachable!()
18520 };
18521 Ok(Expression::Function(Box::new(Function::new(
18522 "TO_CHAR".to_string(),
18523 value_args,
18524 ))))
18525 }
18526 _ => Ok(Expression::Function(f)),
18527 },
18528 // Default: keep FORMAT as-is
18529 _ => Ok(Expression::Function(f)),
18530 }
18531 }
18532 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
18533 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
18534 if f.args.len() == 2 =>
18535 {
18536 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
18537 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
18538 if matches!(target, DialectType::DuckDB)
18539 && matches!(source, DialectType::Snowflake)
18540 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
18541 {
18542 let value = f.args[0].clone();
18543 let array = f.args[1].clone();
18544
18545 // value IS NULL
18546 let value_is_null =
18547 Expression::IsNull(Box::new(crate::expressions::IsNull {
18548 this: value.clone(),
18549 not: false,
18550 postfix_form: false,
18551 }));
18552
18553 // ARRAY_LENGTH(array)
18554 let array_length =
18555 Expression::Function(Box::new(Function::new(
18556 "ARRAY_LENGTH".to_string(),
18557 vec![array.clone()],
18558 )));
18559 // LIST_COUNT(array)
18560 let list_count = Expression::Function(Box::new(Function::new(
18561 "LIST_COUNT".to_string(),
18562 vec![array.clone()],
18563 )));
18564 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
18565 let neq =
18566 Expression::Neq(Box::new(crate::expressions::BinaryOp {
18567 left: array_length,
18568 right: list_count,
18569 left_comments: vec![],
18570 operator_comments: vec![],
18571 trailing_comments: vec![],
18572 inferred_type: None,
18573 }));
18574 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
18575 let nullif =
18576 Expression::Nullif(Box::new(crate::expressions::Nullif {
18577 this: Box::new(neq),
18578 expression: Box::new(Expression::Boolean(
18579 crate::expressions::BooleanLiteral { value: false },
18580 )),
18581 }));
18582
18583 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
18584 let array_contains =
18585 Expression::Function(Box::new(Function::new(
18586 "ARRAY_CONTAINS".to_string(),
18587 vec![array, value],
18588 )));
18589
18590 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
18591 return Ok(Expression::Case(Box::new(Case {
18592 operand: None,
18593 whens: vec![(value_is_null, nullif)],
18594 else_: Some(array_contains),
18595 comments: Vec::new(),
18596 inferred_type: None,
18597 })));
18598 }
18599 match target {
18600 DialectType::PostgreSQL | DialectType::Redshift => {
18601 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
18602 let arr = f.args[0].clone();
18603 let needle = f.args[1].clone();
18604 // Convert [] to ARRAY[] for PostgreSQL
18605 let pg_arr = match arr {
18606 Expression::Array(a) => Expression::ArrayFunc(
18607 Box::new(crate::expressions::ArrayConstructor {
18608 expressions: a.expressions,
18609 bracket_notation: false,
18610 use_list_keyword: false,
18611 }),
18612 ),
18613 _ => arr,
18614 };
18615 // needle = ANY(arr) using the Any quantified expression
18616 let any_expr = Expression::Any(Box::new(
18617 crate::expressions::QuantifiedExpr {
18618 this: needle.clone(),
18619 subquery: pg_arr,
18620 op: Some(crate::expressions::QuantifiedOp::Eq),
18621 },
18622 ));
18623 let coalesce = Expression::Coalesce(Box::new(
18624 crate::expressions::VarArgFunc {
18625 expressions: vec![
18626 any_expr,
18627 Expression::Boolean(
18628 crate::expressions::BooleanLiteral {
18629 value: false,
18630 },
18631 ),
18632 ],
18633 original_name: None,
18634 inferred_type: None,
18635 },
18636 ));
18637 let is_null_check = Expression::IsNull(Box::new(
18638 crate::expressions::IsNull {
18639 this: needle,
18640 not: false,
18641 postfix_form: false,
18642 },
18643 ));
18644 Ok(Expression::Case(Box::new(Case {
18645 operand: None,
18646 whens: vec![(
18647 is_null_check,
18648 Expression::Null(crate::expressions::Null),
18649 )],
18650 else_: Some(coalesce),
18651 comments: Vec::new(),
18652 inferred_type: None,
18653 })))
18654 }
18655 _ => Ok(Expression::Function(Box::new(Function::new(
18656 "ARRAY_CONTAINS".to_string(),
18657 f.args,
18658 )))),
18659 }
18660 }
18661 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
18662 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
18663 match target {
18664 DialectType::PostgreSQL | DialectType::Redshift => {
18665 // arr1 && arr2 with ARRAY[] syntax
18666 let mut args = f.args;
18667 let arr1 = args.remove(0);
18668 let arr2 = args.remove(0);
18669 let pg_arr1 = match arr1 {
18670 Expression::Array(a) => Expression::ArrayFunc(
18671 Box::new(crate::expressions::ArrayConstructor {
18672 expressions: a.expressions,
18673 bracket_notation: false,
18674 use_list_keyword: false,
18675 }),
18676 ),
18677 _ => arr1,
18678 };
18679 let pg_arr2 = match arr2 {
18680 Expression::Array(a) => Expression::ArrayFunc(
18681 Box::new(crate::expressions::ArrayConstructor {
18682 expressions: a.expressions,
18683 bracket_notation: false,
18684 use_list_keyword: false,
18685 }),
18686 ),
18687 _ => arr2,
18688 };
18689 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18690 pg_arr1, pg_arr2,
18691 ))))
18692 }
18693 DialectType::DuckDB => {
18694 // DuckDB: arr1 && arr2 (native support)
18695 let mut args = f.args;
18696 let arr1 = args.remove(0);
18697 let arr2 = args.remove(0);
18698 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18699 arr1, arr2,
18700 ))))
18701 }
18702 _ => Ok(Expression::Function(Box::new(Function::new(
18703 "LIST_HAS_ANY".to_string(),
18704 f.args,
18705 )))),
18706 }
18707 }
18708 // APPROX_QUANTILE(x, q) -> target-specific
18709 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
18710 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18711 Function::new("APPROX_PERCENTILE".to_string(), f.args),
18712 ))),
18713 DialectType::DuckDB => Ok(Expression::Function(f)),
18714 _ => Ok(Expression::Function(f)),
18715 },
18716 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
18717 "MAKE_DATE" if f.args.len() == 3 => match target {
18718 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18719 Function::new("DATE".to_string(), f.args),
18720 ))),
18721 _ => Ok(Expression::Function(f)),
18722 },
18723 // RANGE(start, end[, step]) -> target-specific
18724 "RANGE"
18725 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
18726 {
18727 let start = f.args[0].clone();
18728 let end = f.args[1].clone();
18729 let step = f.args.get(2).cloned();
18730 match target {
18731 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
18732 // so just rename without adjusting the end argument.
18733 DialectType::Snowflake => {
18734 let mut args = vec![start, end];
18735 if let Some(s) = step {
18736 args.push(s);
18737 }
18738 Ok(Expression::Function(Box::new(Function::new(
18739 "ARRAY_GENERATE_RANGE".to_string(),
18740 args,
18741 ))))
18742 }
18743 DialectType::Spark | DialectType::Databricks => {
18744 // RANGE(start, end) -> SEQUENCE(start, end-1)
18745 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
18746 // RANGE(start, start) -> ARRAY() (empty)
18747 // RANGE(start, end, 0) -> ARRAY() (empty)
18748 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
18749
18750 // Check for constant args
18751 fn extract_i64(e: &Expression) -> Option<i64> {
18752 match e {
18753 Expression::Literal(lit)
18754 if matches!(
18755 lit.as_ref(),
18756 Literal::Number(_)
18757 ) =>
18758 {
18759 let Literal::Number(n) = lit.as_ref() else {
18760 unreachable!()
18761 };
18762 n.parse::<i64>().ok()
18763 }
18764 Expression::Neg(u) => {
18765 if let Expression::Literal(lit) = &u.this {
18766 if let Literal::Number(n) = lit.as_ref() {
18767 n.parse::<i64>().ok().map(|v| -v)
18768 } else {
18769 None
18770 }
18771 } else {
18772 None
18773 }
18774 }
18775 _ => None,
18776 }
18777 }
18778 let start_val = extract_i64(&start);
18779 let end_val = extract_i64(&end);
18780 let step_val = step.as_ref().and_then(|s| extract_i64(s));
18781
18782 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
18783 if step_val == Some(0) {
18784 return Ok(Expression::Function(Box::new(
18785 Function::new("ARRAY".to_string(), vec![]),
18786 )));
18787 }
18788 if let (Some(s), Some(e_val)) = (start_val, end_val) {
18789 if s == e_val {
18790 return Ok(Expression::Function(Box::new(
18791 Function::new("ARRAY".to_string(), vec![]),
18792 )));
18793 }
18794 }
18795
18796 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
18797 // All constants - compute new end = end - step (if step provided) or end - 1
18798 match step_val {
18799 Some(st) if st < 0 => {
18800 // Negative step: SEQUENCE(start, end - step, step)
18801 let new_end = e_val - st; // end - step (= end + |step|)
18802 let mut args =
18803 vec![start, Expression::number(new_end)];
18804 if let Some(s) = step {
18805 args.push(s);
18806 }
18807 Ok(Expression::Function(Box::new(
18808 Function::new("SEQUENCE".to_string(), args),
18809 )))
18810 }
18811 Some(st) => {
18812 let new_end = e_val - st;
18813 let mut args =
18814 vec![start, Expression::number(new_end)];
18815 if let Some(s) = step {
18816 args.push(s);
18817 }
18818 Ok(Expression::Function(Box::new(
18819 Function::new("SEQUENCE".to_string(), args),
18820 )))
18821 }
18822 None => {
18823 // No step: SEQUENCE(start, end - 1)
18824 let new_end = e_val - 1;
18825 Ok(Expression::Function(Box::new(
18826 Function::new(
18827 "SEQUENCE".to_string(),
18828 vec![
18829 start,
18830 Expression::number(new_end),
18831 ],
18832 ),
18833 )))
18834 }
18835 }
18836 } else {
18837 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
18838 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
18839 end.clone(),
18840 Expression::number(1),
18841 )));
18842 let cond = Expression::Lt(Box::new(BinaryOp::new(
18843 Expression::Paren(Box::new(Paren {
18844 this: end_m1.clone(),
18845 trailing_comments: Vec::new(),
18846 })),
18847 start.clone(),
18848 )));
18849 let empty = Expression::Function(Box::new(
18850 Function::new("ARRAY".to_string(), vec![]),
18851 ));
18852 let mut seq_args = vec![
18853 start,
18854 Expression::Paren(Box::new(Paren {
18855 this: end_m1,
18856 trailing_comments: Vec::new(),
18857 })),
18858 ];
18859 if let Some(s) = step {
18860 seq_args.push(s);
18861 }
18862 let seq = Expression::Function(Box::new(
18863 Function::new("SEQUENCE".to_string(), seq_args),
18864 ));
18865 Ok(Expression::IfFunc(Box::new(
18866 crate::expressions::IfFunc {
18867 condition: cond,
18868 true_value: empty,
18869 false_value: Some(seq),
18870 original_name: None,
18871 inferred_type: None,
18872 },
18873 )))
18874 }
18875 }
18876 DialectType::SQLite => {
18877 // RANGE(start, end) -> GENERATE_SERIES(start, end)
18878 // The subquery wrapping is handled at the Alias level
18879 let mut args = vec![start, end];
18880 if let Some(s) = step {
18881 args.push(s);
18882 }
18883 Ok(Expression::Function(Box::new(Function::new(
18884 "GENERATE_SERIES".to_string(),
18885 args,
18886 ))))
18887 }
18888 _ => Ok(Expression::Function(f)),
18889 }
18890 }
18891 // ARRAY_REVERSE_SORT -> target-specific
18892 // (handled above as well, but also need DuckDB self-normalization)
18893 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18894 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
18895 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18896 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
18897 ))),
18898 DialectType::Spark | DialectType::Databricks => {
18899 Ok(Expression::Function(Box::new(Function::new(
18900 "MAP_FROM_ARRAYS".to_string(),
18901 f.args,
18902 ))))
18903 }
18904 _ => Ok(Expression::Function(Box::new(Function::new(
18905 "MAP".to_string(),
18906 f.args,
18907 )))),
18908 },
18909 // VARIANCE(x) -> varSamp(x) for ClickHouse
18910 "VARIANCE" if f.args.len() == 1 => match target {
18911 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18912 Function::new("varSamp".to_string(), f.args),
18913 ))),
18914 _ => Ok(Expression::Function(f)),
18915 },
18916 // STDDEV(x) -> stddevSamp(x) for ClickHouse
18917 "STDDEV" if f.args.len() == 1 => match target {
18918 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18919 Function::new("stddevSamp".to_string(), f.args),
18920 ))),
18921 _ => Ok(Expression::Function(f)),
18922 },
18923 // ISINF(x) -> IS_INF(x) for BigQuery
18924 "ISINF" if f.args.len() == 1 => match target {
18925 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18926 Function::new("IS_INF".to_string(), f.args),
18927 ))),
18928 _ => Ok(Expression::Function(f)),
18929 },
18930 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
18931 "CONTAINS" if f.args.len() == 2 => match target {
18932 DialectType::Spark
18933 | DialectType::Databricks
18934 | DialectType::Hive => Ok(Expression::Function(Box::new(
18935 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18936 ))),
18937 _ => Ok(Expression::Function(f)),
18938 },
18939 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
18940 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
18941 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18942 Ok(Expression::Function(Box::new(Function::new(
18943 "CONTAINS".to_string(),
18944 f.args,
18945 ))))
18946 }
18947 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18948 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18949 ))),
18950 _ => Ok(Expression::Function(f)),
18951 },
18952 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
18953 "TO_UNIXTIME" if f.args.len() == 1 => match target {
18954 DialectType::Hive
18955 | DialectType::Spark
18956 | DialectType::Databricks => Ok(Expression::Function(Box::new(
18957 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
18958 ))),
18959 _ => Ok(Expression::Function(f)),
18960 },
18961 // FROM_UNIXTIME(x) -> target-specific
18962 "FROM_UNIXTIME" if f.args.len() == 1 => {
18963 match target {
18964 DialectType::Hive
18965 | DialectType::Spark
18966 | DialectType::Databricks
18967 | DialectType::Presto
18968 | DialectType::Trino => Ok(Expression::Function(f)),
18969 DialectType::DuckDB => {
18970 // DuckDB: TO_TIMESTAMP(x)
18971 let arg = f.args.into_iter().next().unwrap();
18972 Ok(Expression::Function(Box::new(Function::new(
18973 "TO_TIMESTAMP".to_string(),
18974 vec![arg],
18975 ))))
18976 }
18977 DialectType::PostgreSQL => {
18978 // PG: TO_TIMESTAMP(col)
18979 let arg = f.args.into_iter().next().unwrap();
18980 Ok(Expression::Function(Box::new(Function::new(
18981 "TO_TIMESTAMP".to_string(),
18982 vec![arg],
18983 ))))
18984 }
18985 DialectType::Redshift => {
18986 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
18987 let arg = f.args.into_iter().next().unwrap();
18988 let epoch_ts = Expression::Literal(Box::new(
18989 Literal::Timestamp("epoch".to_string()),
18990 ));
18991 let interval = Expression::Interval(Box::new(
18992 crate::expressions::Interval {
18993 this: Some(Expression::string("1 SECOND")),
18994 unit: None,
18995 },
18996 ));
18997 let mul =
18998 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
18999 let add =
19000 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
19001 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19002 this: add,
19003 trailing_comments: Vec::new(),
19004 })))
19005 }
19006 _ => Ok(Expression::Function(f)),
19007 }
19008 }
19009 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
19010 "FROM_UNIXTIME"
19011 if f.args.len() == 2
19012 && matches!(
19013 source,
19014 DialectType::Hive
19015 | DialectType::Spark
19016 | DialectType::Databricks
19017 ) =>
19018 {
19019 let mut args = f.args;
19020 let unix_ts = args.remove(0);
19021 let fmt_expr = args.remove(0);
19022 match target {
19023 DialectType::DuckDB => {
19024 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
19025 let to_ts = Expression::Function(Box::new(Function::new(
19026 "TO_TIMESTAMP".to_string(),
19027 vec![unix_ts],
19028 )));
19029 if let Expression::Literal(lit) = &fmt_expr {
19030 if let crate::expressions::Literal::String(s) =
19031 lit.as_ref()
19032 {
19033 let c_fmt = Self::hive_format_to_c_format(s);
19034 Ok(Expression::Function(Box::new(Function::new(
19035 "STRFTIME".to_string(),
19036 vec![to_ts, Expression::string(&c_fmt)],
19037 ))))
19038 } else {
19039 Ok(Expression::Function(Box::new(Function::new(
19040 "STRFTIME".to_string(),
19041 vec![to_ts, fmt_expr],
19042 ))))
19043 }
19044 } else {
19045 Ok(Expression::Function(Box::new(Function::new(
19046 "STRFTIME".to_string(),
19047 vec![to_ts, fmt_expr],
19048 ))))
19049 }
19050 }
19051 DialectType::Presto
19052 | DialectType::Trino
19053 | DialectType::Athena => {
19054 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
19055 let from_unix =
19056 Expression::Function(Box::new(Function::new(
19057 "FROM_UNIXTIME".to_string(),
19058 vec![unix_ts],
19059 )));
19060 if let Expression::Literal(lit) = &fmt_expr {
19061 if let crate::expressions::Literal::String(s) =
19062 lit.as_ref()
19063 {
19064 let p_fmt = Self::hive_format_to_presto_format(s);
19065 Ok(Expression::Function(Box::new(Function::new(
19066 "DATE_FORMAT".to_string(),
19067 vec![from_unix, Expression::string(&p_fmt)],
19068 ))))
19069 } else {
19070 Ok(Expression::Function(Box::new(Function::new(
19071 "DATE_FORMAT".to_string(),
19072 vec![from_unix, fmt_expr],
19073 ))))
19074 }
19075 } else {
19076 Ok(Expression::Function(Box::new(Function::new(
19077 "DATE_FORMAT".to_string(),
19078 vec![from_unix, fmt_expr],
19079 ))))
19080 }
19081 }
19082 _ => {
19083 // Keep as FROM_UNIXTIME(x, fmt) for other targets
19084 Ok(Expression::Function(Box::new(Function::new(
19085 "FROM_UNIXTIME".to_string(),
19086 vec![unix_ts, fmt_expr],
19087 ))))
19088 }
19089 }
19090 }
19091 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
19092 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
19093 let unit_str = Self::get_unit_str_static(&f.args[0]);
19094 // Get the raw unit text preserving original case
19095 let raw_unit = match &f.args[0] {
19096 Expression::Identifier(id) => id.name.clone(),
19097 Expression::Var(v) => v.this.clone(),
19098 Expression::Literal(lit)
19099 if matches!(
19100 lit.as_ref(),
19101 crate::expressions::Literal::String(_)
19102 ) =>
19103 {
19104 let crate::expressions::Literal::String(s) = lit.as_ref()
19105 else {
19106 unreachable!()
19107 };
19108 s.clone()
19109 }
19110 Expression::Column(col) => col.name.name.clone(),
19111 _ => unit_str.clone(),
19112 };
19113 match target {
19114 DialectType::TSQL | DialectType::Fabric => {
19115 // Preserve original case of unit for TSQL
19116 let unit_name = match unit_str.as_str() {
19117 "YY" | "YYYY" => "YEAR".to_string(),
19118 "QQ" | "Q" => "QUARTER".to_string(),
19119 "MM" | "M" => "MONTH".to_string(),
19120 "WK" | "WW" => "WEEK".to_string(),
19121 "DD" | "D" | "DY" => "DAY".to_string(),
19122 "HH" => "HOUR".to_string(),
19123 "MI" | "N" => "MINUTE".to_string(),
19124 "SS" | "S" => "SECOND".to_string(),
19125 _ => raw_unit.clone(), // preserve original case
19126 };
19127 let mut args = f.args;
19128 args[0] =
19129 Expression::Identifier(Identifier::new(&unit_name));
19130 Ok(Expression::Function(Box::new(Function::new(
19131 "DATEPART".to_string(),
19132 args,
19133 ))))
19134 }
19135 DialectType::Spark | DialectType::Databricks => {
19136 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
19137 // Preserve original case for non-abbreviation units
19138 let unit = match unit_str.as_str() {
19139 "YY" | "YYYY" => "YEAR".to_string(),
19140 "QQ" | "Q" => "QUARTER".to_string(),
19141 "MM" | "M" => "MONTH".to_string(),
19142 "WK" | "WW" => "WEEK".to_string(),
19143 "DD" | "D" | "DY" => "DAY".to_string(),
19144 "HH" => "HOUR".to_string(),
19145 "MI" | "N" => "MINUTE".to_string(),
19146 "SS" | "S" => "SECOND".to_string(),
19147 _ => raw_unit, // preserve original case
19148 };
19149 Ok(Expression::Extract(Box::new(
19150 crate::expressions::ExtractFunc {
19151 this: f.args[1].clone(),
19152 field: crate::expressions::DateTimeField::Custom(
19153 unit,
19154 ),
19155 },
19156 )))
19157 }
19158 _ => Ok(Expression::Function(Box::new(Function::new(
19159 "DATE_PART".to_string(),
19160 f.args,
19161 )))),
19162 }
19163 }
19164 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
19165 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
19166 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
19167 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
19168 "DATENAME" if f.args.len() == 2 => {
19169 let unit_str = Self::get_unit_str_static(&f.args[0]);
19170 let date_expr = f.args[1].clone();
19171 match unit_str.as_str() {
19172 "MM" | "M" | "MONTH" => match target {
19173 DialectType::TSQL => {
19174 let cast_date = Expression::Cast(Box::new(
19175 crate::expressions::Cast {
19176 this: date_expr,
19177 to: DataType::Custom {
19178 name: "DATETIME2".to_string(),
19179 },
19180 trailing_comments: Vec::new(),
19181 double_colon_syntax: false,
19182 format: None,
19183 default: None,
19184 inferred_type: None,
19185 },
19186 ));
19187 Ok(Expression::Function(Box::new(Function::new(
19188 "FORMAT".to_string(),
19189 vec![cast_date, Expression::string("MMMM")],
19190 ))))
19191 }
19192 DialectType::Spark | DialectType::Databricks => {
19193 let cast_date = Expression::Cast(Box::new(
19194 crate::expressions::Cast {
19195 this: date_expr,
19196 to: DataType::Timestamp {
19197 timezone: false,
19198 precision: None,
19199 },
19200 trailing_comments: Vec::new(),
19201 double_colon_syntax: false,
19202 format: None,
19203 default: None,
19204 inferred_type: None,
19205 },
19206 ));
19207 Ok(Expression::Function(Box::new(Function::new(
19208 "DATE_FORMAT".to_string(),
19209 vec![cast_date, Expression::string("MMMM")],
19210 ))))
19211 }
19212 _ => Ok(Expression::Function(f)),
19213 },
19214 "DW" | "WEEKDAY" => match target {
19215 DialectType::TSQL => {
19216 let cast_date = Expression::Cast(Box::new(
19217 crate::expressions::Cast {
19218 this: date_expr,
19219 to: DataType::Custom {
19220 name: "DATETIME2".to_string(),
19221 },
19222 trailing_comments: Vec::new(),
19223 double_colon_syntax: false,
19224 format: None,
19225 default: None,
19226 inferred_type: None,
19227 },
19228 ));
19229 Ok(Expression::Function(Box::new(Function::new(
19230 "FORMAT".to_string(),
19231 vec![cast_date, Expression::string("dddd")],
19232 ))))
19233 }
19234 DialectType::Spark | DialectType::Databricks => {
19235 let cast_date = Expression::Cast(Box::new(
19236 crate::expressions::Cast {
19237 this: date_expr,
19238 to: DataType::Timestamp {
19239 timezone: false,
19240 precision: None,
19241 },
19242 trailing_comments: Vec::new(),
19243 double_colon_syntax: false,
19244 format: None,
19245 default: None,
19246 inferred_type: None,
19247 },
19248 ));
19249 Ok(Expression::Function(Box::new(Function::new(
19250 "DATE_FORMAT".to_string(),
19251 vec![cast_date, Expression::string("EEEE")],
19252 ))))
19253 }
19254 _ => Ok(Expression::Function(f)),
19255 },
19256 _ => Ok(Expression::Function(f)),
19257 }
19258 }
19259 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
19260 "STRING_AGG" if f.args.len() >= 2 => {
19261 let x = f.args[0].clone();
19262 let sep = f.args[1].clone();
19263 match target {
19264 DialectType::MySQL
19265 | DialectType::SingleStore
19266 | DialectType::Doris
19267 | DialectType::StarRocks => Ok(Expression::GroupConcat(
19268 Box::new(crate::expressions::GroupConcatFunc {
19269 this: x,
19270 separator: Some(sep),
19271 order_by: None,
19272 distinct: false,
19273 filter: None,
19274 limit: None,
19275 inferred_type: None,
19276 }),
19277 )),
19278 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
19279 crate::expressions::GroupConcatFunc {
19280 this: x,
19281 separator: Some(sep),
19282 order_by: None,
19283 distinct: false,
19284 filter: None,
19285 limit: None,
19286 inferred_type: None,
19287 },
19288 ))),
19289 DialectType::PostgreSQL | DialectType::Redshift => {
19290 Ok(Expression::StringAgg(Box::new(
19291 crate::expressions::StringAggFunc {
19292 this: x,
19293 separator: Some(sep),
19294 order_by: None,
19295 distinct: false,
19296 filter: None,
19297 limit: None,
19298 inferred_type: None,
19299 },
19300 )))
19301 }
19302 _ => Ok(Expression::Function(f)),
19303 }
19304 }
19305 "TRY_DIVIDE" if f.args.len() == 2 => {
19306 let mut args = f.args;
19307 let x = args.remove(0);
19308 let y = args.remove(0);
19309 match target {
19310 DialectType::Spark | DialectType::Databricks => {
19311 Ok(Expression::Function(Box::new(Function::new(
19312 "TRY_DIVIDE".to_string(),
19313 vec![x, y],
19314 ))))
19315 }
19316 DialectType::Snowflake => {
19317 let y_ref = match &y {
19318 Expression::Column(_)
19319 | Expression::Literal(_)
19320 | Expression::Identifier(_) => y.clone(),
19321 _ => Expression::Paren(Box::new(Paren {
19322 this: y.clone(),
19323 trailing_comments: vec![],
19324 })),
19325 };
19326 let x_ref = match &x {
19327 Expression::Column(_)
19328 | Expression::Literal(_)
19329 | Expression::Identifier(_) => x.clone(),
19330 _ => Expression::Paren(Box::new(Paren {
19331 this: x.clone(),
19332 trailing_comments: vec![],
19333 })),
19334 };
19335 let condition = Expression::Neq(Box::new(
19336 crate::expressions::BinaryOp::new(
19337 y_ref.clone(),
19338 Expression::number(0),
19339 ),
19340 ));
19341 let div_expr = Expression::Div(Box::new(
19342 crate::expressions::BinaryOp::new(x_ref, y_ref),
19343 ));
19344 Ok(Expression::IfFunc(Box::new(
19345 crate::expressions::IfFunc {
19346 condition,
19347 true_value: div_expr,
19348 false_value: Some(Expression::Null(Null)),
19349 original_name: Some("IFF".to_string()),
19350 inferred_type: None,
19351 },
19352 )))
19353 }
19354 DialectType::DuckDB => {
19355 let y_ref = match &y {
19356 Expression::Column(_)
19357 | Expression::Literal(_)
19358 | Expression::Identifier(_) => y.clone(),
19359 _ => Expression::Paren(Box::new(Paren {
19360 this: y.clone(),
19361 trailing_comments: vec![],
19362 })),
19363 };
19364 let x_ref = match &x {
19365 Expression::Column(_)
19366 | Expression::Literal(_)
19367 | Expression::Identifier(_) => x.clone(),
19368 _ => Expression::Paren(Box::new(Paren {
19369 this: x.clone(),
19370 trailing_comments: vec![],
19371 })),
19372 };
19373 let condition = Expression::Neq(Box::new(
19374 crate::expressions::BinaryOp::new(
19375 y_ref.clone(),
19376 Expression::number(0),
19377 ),
19378 ));
19379 let div_expr = Expression::Div(Box::new(
19380 crate::expressions::BinaryOp::new(x_ref, y_ref),
19381 ));
19382 Ok(Expression::Case(Box::new(Case {
19383 operand: None,
19384 whens: vec![(condition, div_expr)],
19385 else_: Some(Expression::Null(Null)),
19386 comments: Vec::new(),
19387 inferred_type: None,
19388 })))
19389 }
19390 _ => Ok(Expression::Function(Box::new(Function::new(
19391 "TRY_DIVIDE".to_string(),
19392 vec![x, y],
19393 )))),
19394 }
19395 }
19396 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
19397 "JSON_ARRAYAGG" => match target {
19398 DialectType::PostgreSQL => {
19399 Ok(Expression::Function(Box::new(Function {
19400 name: "JSON_AGG".to_string(),
19401 ..(*f)
19402 })))
19403 }
19404 _ => Ok(Expression::Function(f)),
19405 },
19406 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
19407 "SCHEMA_NAME" => match target {
19408 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19409 crate::expressions::CurrentSchema { this: None },
19410 ))),
19411 DialectType::SQLite => Ok(Expression::string("main")),
19412 _ => Ok(Expression::Function(f)),
19413 },
19414 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
19415 "TO_TIMESTAMP"
19416 if f.args.len() == 2
19417 && matches!(
19418 source,
19419 DialectType::Spark
19420 | DialectType::Databricks
19421 | DialectType::Hive
19422 )
19423 && matches!(target, DialectType::DuckDB) =>
19424 {
19425 let mut args = f.args;
19426 let val = args.remove(0);
19427 let fmt_expr = args.remove(0);
19428 if let Expression::Literal(ref lit) = fmt_expr {
19429 if let Literal::String(ref s) = lit.as_ref() {
19430 // Convert Java/Spark format to C strptime format
19431 fn java_to_c_fmt(fmt: &str) -> String {
19432 let result = fmt
19433 .replace("yyyy", "%Y")
19434 .replace("SSSSSS", "%f")
19435 .replace("EEEE", "%W")
19436 .replace("MM", "%m")
19437 .replace("dd", "%d")
19438 .replace("HH", "%H")
19439 .replace("mm", "%M")
19440 .replace("ss", "%S")
19441 .replace("yy", "%y");
19442 let mut out = String::new();
19443 let chars: Vec<char> = result.chars().collect();
19444 let mut i = 0;
19445 while i < chars.len() {
19446 if chars[i] == '%' && i + 1 < chars.len() {
19447 out.push(chars[i]);
19448 out.push(chars[i + 1]);
19449 i += 2;
19450 } else if chars[i] == 'z' {
19451 out.push_str("%Z");
19452 i += 1;
19453 } else if chars[i] == 'Z' {
19454 out.push_str("%z");
19455 i += 1;
19456 } else {
19457 out.push(chars[i]);
19458 i += 1;
19459 }
19460 }
19461 out
19462 }
19463 let c_fmt = java_to_c_fmt(s);
19464 Ok(Expression::Function(Box::new(Function::new(
19465 "STRPTIME".to_string(),
19466 vec![val, Expression::string(&c_fmt)],
19467 ))))
19468 } else {
19469 Ok(Expression::Function(Box::new(Function::new(
19470 "STRPTIME".to_string(),
19471 vec![val, fmt_expr],
19472 ))))
19473 }
19474 } else {
19475 Ok(Expression::Function(Box::new(Function::new(
19476 "STRPTIME".to_string(),
19477 vec![val, fmt_expr],
19478 ))))
19479 }
19480 }
19481 // TO_DATE(x) 1-arg from Doris: date conversion
19482 "TO_DATE"
19483 if f.args.len() == 1
19484 && matches!(
19485 source,
19486 DialectType::Doris | DialectType::StarRocks
19487 ) =>
19488 {
19489 let arg = f.args.into_iter().next().unwrap();
19490 match target {
19491 DialectType::Oracle
19492 | DialectType::DuckDB
19493 | DialectType::TSQL => {
19494 // CAST(x AS DATE)
19495 Ok(Expression::Cast(Box::new(Cast {
19496 this: arg,
19497 to: DataType::Date,
19498 double_colon_syntax: false,
19499 trailing_comments: vec![],
19500 format: None,
19501 default: None,
19502 inferred_type: None,
19503 })))
19504 }
19505 DialectType::MySQL | DialectType::SingleStore => {
19506 // DATE(x)
19507 Ok(Expression::Function(Box::new(Function::new(
19508 "DATE".to_string(),
19509 vec![arg],
19510 ))))
19511 }
19512 _ => {
19513 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
19514 Ok(Expression::Function(Box::new(Function::new(
19515 "TO_DATE".to_string(),
19516 vec![arg],
19517 ))))
19518 }
19519 }
19520 }
19521 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
19522 "TO_DATE"
19523 if f.args.len() == 1
19524 && matches!(
19525 source,
19526 DialectType::Spark
19527 | DialectType::Databricks
19528 | DialectType::Hive
19529 ) =>
19530 {
19531 let arg = f.args.into_iter().next().unwrap();
19532 match target {
19533 DialectType::DuckDB => {
19534 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
19535 Ok(Expression::TryCast(Box::new(Cast {
19536 this: arg,
19537 to: DataType::Date,
19538 double_colon_syntax: false,
19539 trailing_comments: vec![],
19540 format: None,
19541 default: None,
19542 inferred_type: None,
19543 })))
19544 }
19545 DialectType::Presto
19546 | DialectType::Trino
19547 | DialectType::Athena => {
19548 // CAST(CAST(x AS TIMESTAMP) AS DATE)
19549 Ok(Self::double_cast_timestamp_date(arg))
19550 }
19551 DialectType::Snowflake => {
19552 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
19553 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
19554 Ok(Expression::Function(Box::new(Function::new(
19555 "TRY_TO_DATE".to_string(),
19556 vec![arg, Expression::string("yyyy-mm-DD")],
19557 ))))
19558 }
19559 _ => {
19560 // Default: keep as TO_DATE(x)
19561 Ok(Expression::Function(Box::new(Function::new(
19562 "TO_DATE".to_string(),
19563 vec![arg],
19564 ))))
19565 }
19566 }
19567 }
19568 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
19569 "TO_DATE"
19570 if f.args.len() == 2
19571 && matches!(
19572 source,
19573 DialectType::Spark
19574 | DialectType::Databricks
19575 | DialectType::Hive
19576 ) =>
19577 {
19578 let mut args = f.args;
19579 let val = args.remove(0);
19580 let fmt_expr = args.remove(0);
19581 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
19582
19583 if is_default_format {
19584 // Default format: same as 1-arg form
19585 match target {
19586 DialectType::DuckDB => {
19587 Ok(Expression::TryCast(Box::new(Cast {
19588 this: val,
19589 to: DataType::Date,
19590 double_colon_syntax: false,
19591 trailing_comments: vec![],
19592 format: None,
19593 default: None,
19594 inferred_type: None,
19595 })))
19596 }
19597 DialectType::Presto
19598 | DialectType::Trino
19599 | DialectType::Athena => {
19600 Ok(Self::double_cast_timestamp_date(val))
19601 }
19602 DialectType::Snowflake => {
19603 // TRY_TO_DATE(x, format) with Snowflake format mapping
19604 let sf_fmt = "yyyy-MM-dd"
19605 .replace("yyyy", "yyyy")
19606 .replace("MM", "mm")
19607 .replace("dd", "DD");
19608 Ok(Expression::Function(Box::new(Function::new(
19609 "TRY_TO_DATE".to_string(),
19610 vec![val, Expression::string(&sf_fmt)],
19611 ))))
19612 }
19613 _ => Ok(Expression::Function(Box::new(Function::new(
19614 "TO_DATE".to_string(),
19615 vec![val],
19616 )))),
19617 }
19618 } else {
19619 // Non-default format: use format-based parsing
19620 if let Expression::Literal(ref lit) = fmt_expr {
19621 if let Literal::String(ref s) = lit.as_ref() {
19622 match target {
19623 DialectType::DuckDB => {
19624 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
19625 fn java_to_c_fmt_todate(fmt: &str) -> String {
19626 let result = fmt
19627 .replace("yyyy", "%Y")
19628 .replace("SSSSSS", "%f")
19629 .replace("EEEE", "%W")
19630 .replace("MM", "%m")
19631 .replace("dd", "%d")
19632 .replace("HH", "%H")
19633 .replace("mm", "%M")
19634 .replace("ss", "%S")
19635 .replace("yy", "%y");
19636 let mut out = String::new();
19637 let chars: Vec<char> =
19638 result.chars().collect();
19639 let mut i = 0;
19640 while i < chars.len() {
19641 if chars[i] == '%'
19642 && i + 1 < chars.len()
19643 {
19644 out.push(chars[i]);
19645 out.push(chars[i + 1]);
19646 i += 2;
19647 } else if chars[i] == 'z' {
19648 out.push_str("%Z");
19649 i += 1;
19650 } else if chars[i] == 'Z' {
19651 out.push_str("%z");
19652 i += 1;
19653 } else {
19654 out.push(chars[i]);
19655 i += 1;
19656 }
19657 }
19658 out
19659 }
19660 let c_fmt = java_to_c_fmt_todate(s);
19661 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
19662 let try_strptime = Expression::Function(
19663 Box::new(Function::new(
19664 "TRY_STRPTIME".to_string(),
19665 vec![val, Expression::string(&c_fmt)],
19666 )),
19667 );
19668 let cast_ts =
19669 Expression::Cast(Box::new(Cast {
19670 this: try_strptime,
19671 to: DataType::Timestamp {
19672 precision: None,
19673 timezone: false,
19674 },
19675 double_colon_syntax: false,
19676 trailing_comments: vec![],
19677 format: None,
19678 default: None,
19679 inferred_type: None,
19680 }));
19681 Ok(Expression::Cast(Box::new(Cast {
19682 this: cast_ts,
19683 to: DataType::Date,
19684 double_colon_syntax: false,
19685 trailing_comments: vec![],
19686 format: None,
19687 default: None,
19688 inferred_type: None,
19689 })))
19690 }
19691 DialectType::Presto
19692 | DialectType::Trino
19693 | DialectType::Athena => {
19694 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
19695 let p_fmt = s
19696 .replace("yyyy", "%Y")
19697 .replace("SSSSSS", "%f")
19698 .replace("MM", "%m")
19699 .replace("dd", "%d")
19700 .replace("HH", "%H")
19701 .replace("mm", "%M")
19702 .replace("ss", "%S")
19703 .replace("yy", "%y");
19704 let date_parse = Expression::Function(
19705 Box::new(Function::new(
19706 "DATE_PARSE".to_string(),
19707 vec![val, Expression::string(&p_fmt)],
19708 )),
19709 );
19710 Ok(Expression::Cast(Box::new(Cast {
19711 this: date_parse,
19712 to: DataType::Date,
19713 double_colon_syntax: false,
19714 trailing_comments: vec![],
19715 format: None,
19716 default: None,
19717 inferred_type: None,
19718 })))
19719 }
19720 DialectType::Snowflake => {
19721 // TRY_TO_DATE(x, snowflake_fmt)
19722 Ok(Expression::Function(Box::new(
19723 Function::new(
19724 "TRY_TO_DATE".to_string(),
19725 vec![val, Expression::string(s)],
19726 ),
19727 )))
19728 }
19729 _ => Ok(Expression::Function(Box::new(
19730 Function::new(
19731 "TO_DATE".to_string(),
19732 vec![val, fmt_expr],
19733 ),
19734 ))),
19735 }
19736 } else {
19737 Ok(Expression::Function(Box::new(Function::new(
19738 "TO_DATE".to_string(),
19739 vec![val, fmt_expr],
19740 ))))
19741 }
19742 } else {
19743 Ok(Expression::Function(Box::new(Function::new(
19744 "TO_DATE".to_string(),
19745 vec![val, fmt_expr],
19746 ))))
19747 }
19748 }
19749 }
19750 // TO_TIMESTAMP(x) 1-arg: epoch conversion
19751 "TO_TIMESTAMP"
19752 if f.args.len() == 1
19753 && matches!(source, DialectType::DuckDB)
19754 && matches!(
19755 target,
19756 DialectType::BigQuery
19757 | DialectType::Presto
19758 | DialectType::Trino
19759 | DialectType::Hive
19760 | DialectType::Spark
19761 | DialectType::Databricks
19762 | DialectType::Athena
19763 ) =>
19764 {
19765 let arg = f.args.into_iter().next().unwrap();
19766 let func_name = match target {
19767 DialectType::BigQuery => "TIMESTAMP_SECONDS",
19768 DialectType::Presto
19769 | DialectType::Trino
19770 | DialectType::Athena
19771 | DialectType::Hive
19772 | DialectType::Spark
19773 | DialectType::Databricks => "FROM_UNIXTIME",
19774 _ => "TO_TIMESTAMP",
19775 };
19776 Ok(Expression::Function(Box::new(Function::new(
19777 func_name.to_string(),
19778 vec![arg],
19779 ))))
19780 }
19781 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
19782 "CONCAT" if f.args.len() == 1 => {
19783 let arg = f.args.into_iter().next().unwrap();
19784 match target {
19785 DialectType::Presto
19786 | DialectType::Trino
19787 | DialectType::Athena => {
19788 // CONCAT(a) -> CAST(a AS VARCHAR)
19789 Ok(Expression::Cast(Box::new(Cast {
19790 this: arg,
19791 to: DataType::VarChar {
19792 length: None,
19793 parenthesized_length: false,
19794 },
19795 trailing_comments: vec![],
19796 double_colon_syntax: false,
19797 format: None,
19798 default: None,
19799 inferred_type: None,
19800 })))
19801 }
19802 DialectType::TSQL => {
19803 // CONCAT(a) -> a
19804 Ok(arg)
19805 }
19806 DialectType::DuckDB => {
19807 // Keep CONCAT(a) for DuckDB (native support)
19808 Ok(Expression::Function(Box::new(Function::new(
19809 "CONCAT".to_string(),
19810 vec![arg],
19811 ))))
19812 }
19813 DialectType::Spark | DialectType::Databricks => {
19814 let coalesced = Expression::Coalesce(Box::new(
19815 crate::expressions::VarArgFunc {
19816 expressions: vec![arg, Expression::string("")],
19817 original_name: None,
19818 inferred_type: None,
19819 },
19820 ));
19821 Ok(Expression::Function(Box::new(Function::new(
19822 "CONCAT".to_string(),
19823 vec![coalesced],
19824 ))))
19825 }
19826 _ => Ok(Expression::Function(Box::new(Function::new(
19827 "CONCAT".to_string(),
19828 vec![arg],
19829 )))),
19830 }
19831 }
19832 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
19833 "REGEXP_EXTRACT"
19834 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
19835 {
19836 // If group_index is 0, drop it
19837 let drop_group = match &f.args[2] {
19838 Expression::Literal(lit)
19839 if matches!(lit.as_ref(), Literal::Number(_)) =>
19840 {
19841 let Literal::Number(n) = lit.as_ref() else {
19842 unreachable!()
19843 };
19844 n == "0"
19845 }
19846 _ => false,
19847 };
19848 if drop_group {
19849 let mut args = f.args;
19850 args.truncate(2);
19851 Ok(Expression::Function(Box::new(Function::new(
19852 "REGEXP_EXTRACT".to_string(),
19853 args,
19854 ))))
19855 } else {
19856 Ok(Expression::Function(f))
19857 }
19858 }
19859 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
19860 "REGEXP_EXTRACT"
19861 if f.args.len() == 4
19862 && matches!(target, DialectType::Snowflake) =>
19863 {
19864 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
19865 let mut args = f.args;
19866 let this = args.remove(0);
19867 let pattern = args.remove(0);
19868 let group = args.remove(0);
19869 let flags = args.remove(0);
19870 Ok(Expression::Function(Box::new(Function::new(
19871 "REGEXP_SUBSTR".to_string(),
19872 vec![
19873 this,
19874 pattern,
19875 Expression::number(1),
19876 Expression::number(1),
19877 flags,
19878 group,
19879 ],
19880 ))))
19881 }
19882 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
19883 "REGEXP_SUBSTR"
19884 if f.args.len() == 3
19885 && matches!(
19886 target,
19887 DialectType::DuckDB
19888 | DialectType::Presto
19889 | DialectType::Trino
19890 | DialectType::Spark
19891 | DialectType::Databricks
19892 ) =>
19893 {
19894 let mut args = f.args;
19895 let this = args.remove(0);
19896 let pattern = args.remove(0);
19897 let position = args.remove(0);
19898 // Wrap subject in SUBSTRING(this, position) to apply the offset
19899 let substring_expr = Expression::Function(Box::new(Function::new(
19900 "SUBSTRING".to_string(),
19901 vec![this, position],
19902 )));
19903 let target_name = match target {
19904 DialectType::DuckDB => "REGEXP_EXTRACT",
19905 _ => "REGEXP_EXTRACT",
19906 };
19907 Ok(Expression::Function(Box::new(Function::new(
19908 target_name.to_string(),
19909 vec![substring_expr, pattern],
19910 ))))
19911 }
19912 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
19913 "TO_DAYS" if f.args.len() == 1 => {
19914 let x = f.args.into_iter().next().unwrap();
19915 let epoch = Expression::string("0000-01-01");
19916 // Build the final target-specific expression directly
19917 let datediff_expr = match target {
19918 DialectType::MySQL | DialectType::SingleStore => {
19919 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
19920 Expression::Function(Box::new(Function::new(
19921 "DATEDIFF".to_string(),
19922 vec![x, epoch],
19923 )))
19924 }
19925 DialectType::DuckDB => {
19926 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
19927 let cast_epoch = Expression::Cast(Box::new(Cast {
19928 this: epoch,
19929 to: DataType::Date,
19930 trailing_comments: Vec::new(),
19931 double_colon_syntax: false,
19932 format: None,
19933 default: None,
19934 inferred_type: None,
19935 }));
19936 let cast_x = Expression::Cast(Box::new(Cast {
19937 this: x,
19938 to: DataType::Date,
19939 trailing_comments: Vec::new(),
19940 double_colon_syntax: false,
19941 format: None,
19942 default: None,
19943 inferred_type: None,
19944 }));
19945 Expression::Function(Box::new(Function::new(
19946 "DATE_DIFF".to_string(),
19947 vec![Expression::string("DAY"), cast_epoch, cast_x],
19948 )))
19949 }
19950 DialectType::Presto
19951 | DialectType::Trino
19952 | DialectType::Athena => {
19953 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
19954 let cast_epoch = Self::double_cast_timestamp_date(epoch);
19955 let cast_x = Self::double_cast_timestamp_date(x);
19956 Expression::Function(Box::new(Function::new(
19957 "DATE_DIFF".to_string(),
19958 vec![Expression::string("DAY"), cast_epoch, cast_x],
19959 )))
19960 }
19961 _ => {
19962 // Default: (DATEDIFF(x, '0000-01-01') + 1)
19963 Expression::Function(Box::new(Function::new(
19964 "DATEDIFF".to_string(),
19965 vec![x, epoch],
19966 )))
19967 }
19968 };
19969 let add_one = Expression::Add(Box::new(BinaryOp::new(
19970 datediff_expr,
19971 Expression::number(1),
19972 )));
19973 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19974 this: add_one,
19975 trailing_comments: Vec::new(),
19976 })))
19977 }
19978 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
19979 "STR_TO_DATE"
19980 if f.args.len() == 2
19981 && matches!(
19982 target,
19983 DialectType::Presto | DialectType::Trino
19984 ) =>
19985 {
19986 let mut args = f.args;
19987 let x = args.remove(0);
19988 let format_expr = args.remove(0);
19989 // Check if the format contains time components
19990 let has_time = if let Expression::Literal(ref lit) = format_expr {
19991 if let Literal::String(ref fmt) = lit.as_ref() {
19992 fmt.contains("%H")
19993 || fmt.contains("%T")
19994 || fmt.contains("%M")
19995 || fmt.contains("%S")
19996 || fmt.contains("%I")
19997 || fmt.contains("%p")
19998 } else {
19999 false
20000 }
20001 } else {
20002 false
20003 };
20004 let date_parse = Expression::Function(Box::new(Function::new(
20005 "DATE_PARSE".to_string(),
20006 vec![x, format_expr],
20007 )));
20008 if has_time {
20009 // Has time components: just DATE_PARSE
20010 Ok(date_parse)
20011 } else {
20012 // Date-only: CAST(DATE_PARSE(...) AS DATE)
20013 Ok(Expression::Cast(Box::new(Cast {
20014 this: date_parse,
20015 to: DataType::Date,
20016 trailing_comments: Vec::new(),
20017 double_colon_syntax: false,
20018 format: None,
20019 default: None,
20020 inferred_type: None,
20021 })))
20022 }
20023 }
20024 "STR_TO_DATE"
20025 if f.args.len() == 2
20026 && matches!(
20027 target,
20028 DialectType::PostgreSQL | DialectType::Redshift
20029 ) =>
20030 {
20031 let mut args = f.args;
20032 let x = args.remove(0);
20033 let fmt = args.remove(0);
20034 let pg_fmt = match fmt {
20035 Expression::Literal(lit)
20036 if matches!(lit.as_ref(), Literal::String(_)) =>
20037 {
20038 let Literal::String(s) = lit.as_ref() else {
20039 unreachable!()
20040 };
20041 Expression::string(
20042 &s.replace("%Y", "YYYY")
20043 .replace("%m", "MM")
20044 .replace("%d", "DD")
20045 .replace("%H", "HH24")
20046 .replace("%M", "MI")
20047 .replace("%S", "SS"),
20048 )
20049 }
20050 other => other,
20051 };
20052 let to_date = Expression::Function(Box::new(Function::new(
20053 "TO_DATE".to_string(),
20054 vec![x, pg_fmt],
20055 )));
20056 Ok(Expression::Cast(Box::new(Cast {
20057 this: to_date,
20058 to: DataType::Timestamp {
20059 timezone: false,
20060 precision: None,
20061 },
20062 trailing_comments: Vec::new(),
20063 double_colon_syntax: false,
20064 format: None,
20065 default: None,
20066 inferred_type: None,
20067 })))
20068 }
20069 // RANGE(start, end) -> GENERATE_SERIES for SQLite
20070 "RANGE"
20071 if (f.args.len() == 1 || f.args.len() == 2)
20072 && matches!(target, DialectType::SQLite) =>
20073 {
20074 if f.args.len() == 2 {
20075 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
20076 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
20077 let mut args = f.args;
20078 let start = args.remove(0);
20079 let end = args.remove(0);
20080 Ok(Expression::Function(Box::new(Function::new(
20081 "GENERATE_SERIES".to_string(),
20082 vec![start, end],
20083 ))))
20084 } else {
20085 Ok(Expression::Function(f))
20086 }
20087 }
20088 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
20089 // When source is Snowflake, keep as-is (args already in correct form)
20090 "UNIFORM"
20091 if matches!(target, DialectType::Snowflake)
20092 && (f.args.len() == 2 || f.args.len() == 3) =>
20093 {
20094 if matches!(source, DialectType::Snowflake) {
20095 // Snowflake -> Snowflake: keep as-is
20096 Ok(Expression::Function(f))
20097 } else {
20098 let mut args = f.args;
20099 let low = args.remove(0);
20100 let high = args.remove(0);
20101 let random = if !args.is_empty() {
20102 let seed = args.remove(0);
20103 Expression::Function(Box::new(Function::new(
20104 "RANDOM".to_string(),
20105 vec![seed],
20106 )))
20107 } else {
20108 Expression::Function(Box::new(Function::new(
20109 "RANDOM".to_string(),
20110 vec![],
20111 )))
20112 };
20113 Ok(Expression::Function(Box::new(Function::new(
20114 "UNIFORM".to_string(),
20115 vec![low, high, random],
20116 ))))
20117 }
20118 }
20119 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20120 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
20121 let mut args = f.args;
20122 let ts_arg = args.remove(0);
20123 let tz_arg = args.remove(0);
20124 // Cast string literal to TIMESTAMP for all targets
20125 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20126 {
20127 Expression::Cast(Box::new(Cast {
20128 this: ts_arg,
20129 to: DataType::Timestamp {
20130 timezone: false,
20131 precision: None,
20132 },
20133 trailing_comments: vec![],
20134 double_colon_syntax: false,
20135 format: None,
20136 default: None,
20137 inferred_type: None,
20138 }))
20139 } else {
20140 ts_arg
20141 };
20142 match target {
20143 DialectType::Spark | DialectType::Databricks => {
20144 Ok(Expression::Function(Box::new(Function::new(
20145 "TO_UTC_TIMESTAMP".to_string(),
20146 vec![ts_cast, tz_arg],
20147 ))))
20148 }
20149 DialectType::Snowflake => {
20150 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
20151 Ok(Expression::Function(Box::new(Function::new(
20152 "CONVERT_TIMEZONE".to_string(),
20153 vec![tz_arg, Expression::string("UTC"), ts_cast],
20154 ))))
20155 }
20156 DialectType::Presto
20157 | DialectType::Trino
20158 | DialectType::Athena => {
20159 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
20160 let wtz = Expression::Function(Box::new(Function::new(
20161 "WITH_TIMEZONE".to_string(),
20162 vec![ts_cast, tz_arg],
20163 )));
20164 Ok(Expression::AtTimeZone(Box::new(
20165 crate::expressions::AtTimeZone {
20166 this: wtz,
20167 zone: Expression::string("UTC"),
20168 },
20169 )))
20170 }
20171 DialectType::BigQuery => {
20172 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
20173 let cast_dt = Expression::Cast(Box::new(Cast {
20174 this: if let Expression::Cast(c) = ts_cast {
20175 c.this
20176 } else {
20177 ts_cast.clone()
20178 },
20179 to: DataType::Custom {
20180 name: "DATETIME".to_string(),
20181 },
20182 trailing_comments: vec![],
20183 double_colon_syntax: false,
20184 format: None,
20185 default: None,
20186 inferred_type: None,
20187 }));
20188 let ts_func =
20189 Expression::Function(Box::new(Function::new(
20190 "TIMESTAMP".to_string(),
20191 vec![cast_dt, tz_arg],
20192 )));
20193 Ok(Expression::Function(Box::new(Function::new(
20194 "DATETIME".to_string(),
20195 vec![ts_func, Expression::string("UTC")],
20196 ))))
20197 }
20198 _ => {
20199 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
20200 let atz1 = Expression::AtTimeZone(Box::new(
20201 crate::expressions::AtTimeZone {
20202 this: ts_cast,
20203 zone: tz_arg,
20204 },
20205 ));
20206 Ok(Expression::AtTimeZone(Box::new(
20207 crate::expressions::AtTimeZone {
20208 this: atz1,
20209 zone: Expression::string("UTC"),
20210 },
20211 )))
20212 }
20213 }
20214 }
20215 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20216 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
20217 let mut args = f.args;
20218 let ts_arg = args.remove(0);
20219 let tz_arg = args.remove(0);
20220 // Cast string literal to TIMESTAMP
20221 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20222 {
20223 Expression::Cast(Box::new(Cast {
20224 this: ts_arg,
20225 to: DataType::Timestamp {
20226 timezone: false,
20227 precision: None,
20228 },
20229 trailing_comments: vec![],
20230 double_colon_syntax: false,
20231 format: None,
20232 default: None,
20233 inferred_type: None,
20234 }))
20235 } else {
20236 ts_arg
20237 };
20238 match target {
20239 DialectType::Spark | DialectType::Databricks => {
20240 Ok(Expression::Function(Box::new(Function::new(
20241 "FROM_UTC_TIMESTAMP".to_string(),
20242 vec![ts_cast, tz_arg],
20243 ))))
20244 }
20245 DialectType::Presto
20246 | DialectType::Trino
20247 | DialectType::Athena => {
20248 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
20249 Ok(Expression::Function(Box::new(Function::new(
20250 "AT_TIMEZONE".to_string(),
20251 vec![ts_cast, tz_arg],
20252 ))))
20253 }
20254 DialectType::Snowflake => {
20255 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
20256 Ok(Expression::Function(Box::new(Function::new(
20257 "CONVERT_TIMEZONE".to_string(),
20258 vec![Expression::string("UTC"), tz_arg, ts_cast],
20259 ))))
20260 }
20261 _ => {
20262 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
20263 Ok(Expression::AtTimeZone(Box::new(
20264 crate::expressions::AtTimeZone {
20265 this: ts_cast,
20266 zone: tz_arg,
20267 },
20268 )))
20269 }
20270 }
20271 }
20272 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20273 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
20274 let name = match target {
20275 DialectType::Snowflake => "OBJECT_CONSTRUCT",
20276 _ => "MAP",
20277 };
20278 Ok(Expression::Function(Box::new(Function::new(
20279 name.to_string(),
20280 f.args,
20281 ))))
20282 }
20283 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
20284 "STR_TO_MAP" if f.args.len() >= 1 => match target {
20285 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20286 Ok(Expression::Function(Box::new(Function::new(
20287 "SPLIT_TO_MAP".to_string(),
20288 f.args,
20289 ))))
20290 }
20291 _ => Ok(Expression::Function(f)),
20292 },
20293 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
20294 "TIME_TO_STR" if f.args.len() == 2 => {
20295 let mut args = f.args;
20296 let this = args.remove(0);
20297 let fmt_expr = args.remove(0);
20298 let format = if let Expression::Literal(lit) = fmt_expr {
20299 if let Literal::String(s) = lit.as_ref() {
20300 s.clone()
20301 } else {
20302 String::new()
20303 }
20304 } else {
20305 "%Y-%m-%d %H:%M:%S".to_string()
20306 };
20307 Ok(Expression::TimeToStr(Box::new(
20308 crate::expressions::TimeToStr {
20309 this: Box::new(this),
20310 format,
20311 culture: None,
20312 zone: None,
20313 },
20314 )))
20315 }
20316 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
20317 "STR_TO_TIME" if f.args.len() == 2 => {
20318 let mut args = f.args;
20319 let this = args.remove(0);
20320 let fmt_expr = args.remove(0);
20321 let format = if let Expression::Literal(lit) = fmt_expr {
20322 if let Literal::String(s) = lit.as_ref() {
20323 s.clone()
20324 } else {
20325 String::new()
20326 }
20327 } else {
20328 "%Y-%m-%d %H:%M:%S".to_string()
20329 };
20330 Ok(Expression::StrToTime(Box::new(
20331 crate::expressions::StrToTime {
20332 this: Box::new(this),
20333 format,
20334 zone: None,
20335 safe: None,
20336 target_type: None,
20337 },
20338 )))
20339 }
20340 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
20341 "STR_TO_UNIX" if f.args.len() >= 1 => {
20342 let mut args = f.args;
20343 let this = args.remove(0);
20344 let format = if !args.is_empty() {
20345 if let Expression::Literal(lit) = args.remove(0) {
20346 if let Literal::String(s) = lit.as_ref() {
20347 Some(s.clone())
20348 } else {
20349 None
20350 }
20351 } else {
20352 None
20353 }
20354 } else {
20355 None
20356 };
20357 Ok(Expression::StrToUnix(Box::new(
20358 crate::expressions::StrToUnix {
20359 this: Some(Box::new(this)),
20360 format,
20361 },
20362 )))
20363 }
20364 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
20365 "TIME_TO_UNIX" if f.args.len() == 1 => {
20366 let mut args = f.args;
20367 let this = args.remove(0);
20368 Ok(Expression::TimeToUnix(Box::new(
20369 crate::expressions::UnaryFunc {
20370 this,
20371 original_name: None,
20372 inferred_type: None,
20373 },
20374 )))
20375 }
20376 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
20377 "UNIX_TO_STR" if f.args.len() >= 1 => {
20378 let mut args = f.args;
20379 let this = args.remove(0);
20380 let format = if !args.is_empty() {
20381 if let Expression::Literal(lit) = args.remove(0) {
20382 if let Literal::String(s) = lit.as_ref() {
20383 Some(s.clone())
20384 } else {
20385 None
20386 }
20387 } else {
20388 None
20389 }
20390 } else {
20391 None
20392 };
20393 Ok(Expression::UnixToStr(Box::new(
20394 crate::expressions::UnixToStr {
20395 this: Box::new(this),
20396 format,
20397 },
20398 )))
20399 }
20400 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
20401 "UNIX_TO_TIME" if f.args.len() == 1 => {
20402 let mut args = f.args;
20403 let this = args.remove(0);
20404 Ok(Expression::UnixToTime(Box::new(
20405 crate::expressions::UnixToTime {
20406 this: Box::new(this),
20407 scale: None,
20408 zone: None,
20409 hours: None,
20410 minutes: None,
20411 format: None,
20412 target_type: None,
20413 },
20414 )))
20415 }
20416 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
20417 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
20418 let mut args = f.args;
20419 let this = args.remove(0);
20420 Ok(Expression::TimeStrToDate(Box::new(
20421 crate::expressions::UnaryFunc {
20422 this,
20423 original_name: None,
20424 inferred_type: None,
20425 },
20426 )))
20427 }
20428 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
20429 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
20430 let mut args = f.args;
20431 let this = args.remove(0);
20432 Ok(Expression::TimeStrToTime(Box::new(
20433 crate::expressions::TimeStrToTime {
20434 this: Box::new(this),
20435 zone: None,
20436 },
20437 )))
20438 }
20439 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
20440 "MONTHS_BETWEEN" if f.args.len() == 2 => {
20441 match target {
20442 DialectType::DuckDB => {
20443 let mut args = f.args;
20444 let end_date = args.remove(0);
20445 let start_date = args.remove(0);
20446 let cast_end = Self::ensure_cast_date(end_date);
20447 let cast_start = Self::ensure_cast_date(start_date);
20448 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
20449 let dd = Expression::Function(Box::new(Function::new(
20450 "DATE_DIFF".to_string(),
20451 vec![
20452 Expression::string("MONTH"),
20453 cast_start.clone(),
20454 cast_end.clone(),
20455 ],
20456 )));
20457 let day_end =
20458 Expression::Function(Box::new(Function::new(
20459 "DAY".to_string(),
20460 vec![cast_end.clone()],
20461 )));
20462 let day_start =
20463 Expression::Function(Box::new(Function::new(
20464 "DAY".to_string(),
20465 vec![cast_start.clone()],
20466 )));
20467 let last_day_end =
20468 Expression::Function(Box::new(Function::new(
20469 "LAST_DAY".to_string(),
20470 vec![cast_end.clone()],
20471 )));
20472 let last_day_start =
20473 Expression::Function(Box::new(Function::new(
20474 "LAST_DAY".to_string(),
20475 vec![cast_start.clone()],
20476 )));
20477 let day_last_end = Expression::Function(Box::new(
20478 Function::new("DAY".to_string(), vec![last_day_end]),
20479 ));
20480 let day_last_start = Expression::Function(Box::new(
20481 Function::new("DAY".to_string(), vec![last_day_start]),
20482 ));
20483 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20484 day_end.clone(),
20485 day_last_end,
20486 )));
20487 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20488 day_start.clone(),
20489 day_last_start,
20490 )));
20491 let both_cond =
20492 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20493 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
20494 day_end, day_start,
20495 )));
20496 let day_diff_paren = Expression::Paren(Box::new(
20497 crate::expressions::Paren {
20498 this: day_diff,
20499 trailing_comments: Vec::new(),
20500 },
20501 ));
20502 let frac = Expression::Div(Box::new(BinaryOp::new(
20503 day_diff_paren,
20504 Expression::Literal(Box::new(Literal::Number(
20505 "31.0".to_string(),
20506 ))),
20507 )));
20508 let case_expr = Expression::Case(Box::new(Case {
20509 operand: None,
20510 whens: vec![(both_cond, Expression::number(0))],
20511 else_: Some(frac),
20512 comments: Vec::new(),
20513 inferred_type: None,
20514 }));
20515 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20516 }
20517 DialectType::Snowflake | DialectType::Redshift => {
20518 let mut args = f.args;
20519 let end_date = args.remove(0);
20520 let start_date = args.remove(0);
20521 let unit = Expression::Identifier(Identifier::new("MONTH"));
20522 Ok(Expression::Function(Box::new(Function::new(
20523 "DATEDIFF".to_string(),
20524 vec![unit, start_date, end_date],
20525 ))))
20526 }
20527 DialectType::Presto
20528 | DialectType::Trino
20529 | DialectType::Athena => {
20530 let mut args = f.args;
20531 let end_date = args.remove(0);
20532 let start_date = args.remove(0);
20533 Ok(Expression::Function(Box::new(Function::new(
20534 "DATE_DIFF".to_string(),
20535 vec![Expression::string("MONTH"), start_date, end_date],
20536 ))))
20537 }
20538 _ => Ok(Expression::Function(f)),
20539 }
20540 }
20541 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
20542 // Drop the roundOff arg for non-Spark targets, keep it for Spark
20543 "MONTHS_BETWEEN" if f.args.len() == 3 => {
20544 match target {
20545 DialectType::Spark | DialectType::Databricks => {
20546 Ok(Expression::Function(f))
20547 }
20548 _ => {
20549 // Drop the 3rd arg and delegate to the 2-arg logic
20550 let mut args = f.args;
20551 let end_date = args.remove(0);
20552 let start_date = args.remove(0);
20553 // Re-create as 2-arg and process
20554 let f2 = Function::new(
20555 "MONTHS_BETWEEN".to_string(),
20556 vec![end_date, start_date],
20557 );
20558 let e2 = Expression::Function(Box::new(f2));
20559 Self::cross_dialect_normalize(e2, source, target)
20560 }
20561 }
20562 }
20563 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
20564 "TO_TIMESTAMP"
20565 if f.args.len() == 1
20566 && matches!(
20567 source,
20568 DialectType::Spark
20569 | DialectType::Databricks
20570 | DialectType::Hive
20571 ) =>
20572 {
20573 let arg = f.args.into_iter().next().unwrap();
20574 Ok(Expression::Cast(Box::new(Cast {
20575 this: arg,
20576 to: DataType::Timestamp {
20577 timezone: false,
20578 precision: None,
20579 },
20580 trailing_comments: vec![],
20581 double_colon_syntax: false,
20582 format: None,
20583 default: None,
20584 inferred_type: None,
20585 })))
20586 }
20587 // STRING(x) -> CAST(x AS STRING) for Spark target
20588 "STRING"
20589 if f.args.len() == 1
20590 && matches!(
20591 source,
20592 DialectType::Spark | DialectType::Databricks
20593 ) =>
20594 {
20595 let arg = f.args.into_iter().next().unwrap();
20596 let dt = match target {
20597 DialectType::Spark
20598 | DialectType::Databricks
20599 | DialectType::Hive => DataType::Custom {
20600 name: "STRING".to_string(),
20601 },
20602 _ => DataType::Text,
20603 };
20604 Ok(Expression::Cast(Box::new(Cast {
20605 this: arg,
20606 to: dt,
20607 trailing_comments: vec![],
20608 double_colon_syntax: false,
20609 format: None,
20610 default: None,
20611 inferred_type: None,
20612 })))
20613 }
20614 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
20615 "LOGICAL_OR" if f.args.len() == 1 => {
20616 let name = match target {
20617 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
20618 _ => "LOGICAL_OR",
20619 };
20620 Ok(Expression::Function(Box::new(Function::new(
20621 name.to_string(),
20622 f.args,
20623 ))))
20624 }
20625 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
20626 "SPLIT"
20627 if f.args.len() == 2
20628 && matches!(
20629 source,
20630 DialectType::Spark
20631 | DialectType::Databricks
20632 | DialectType::Hive
20633 ) =>
20634 {
20635 let name = match target {
20636 DialectType::DuckDB => "STR_SPLIT_REGEX",
20637 DialectType::Presto
20638 | DialectType::Trino
20639 | DialectType::Athena => "REGEXP_SPLIT",
20640 DialectType::Spark
20641 | DialectType::Databricks
20642 | DialectType::Hive => "SPLIT",
20643 _ => "SPLIT",
20644 };
20645 Ok(Expression::Function(Box::new(Function::new(
20646 name.to_string(),
20647 f.args,
20648 ))))
20649 }
20650 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
20651 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
20652 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20653 Ok(Expression::Function(Box::new(Function::new(
20654 "ELEMENT_AT".to_string(),
20655 f.args,
20656 ))))
20657 }
20658 DialectType::DuckDB => {
20659 let mut args = f.args;
20660 let arr = args.remove(0);
20661 let idx = args.remove(0);
20662 Ok(Expression::Subscript(Box::new(
20663 crate::expressions::Subscript {
20664 this: arr,
20665 index: idx,
20666 },
20667 )))
20668 }
20669 _ => Ok(Expression::Function(f)),
20670 },
20671 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
20672 "ARRAY_FILTER" if f.args.len() == 2 => {
20673 let name = match target {
20674 DialectType::DuckDB => "LIST_FILTER",
20675 DialectType::StarRocks => "ARRAY_FILTER",
20676 _ => "FILTER",
20677 };
20678 Ok(Expression::Function(Box::new(Function::new(
20679 name.to_string(),
20680 f.args,
20681 ))))
20682 }
20683 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
20684 "FILTER" if f.args.len() == 2 => {
20685 let name = match target {
20686 DialectType::DuckDB => "LIST_FILTER",
20687 DialectType::StarRocks => "ARRAY_FILTER",
20688 _ => "FILTER",
20689 };
20690 Ok(Expression::Function(Box::new(Function::new(
20691 name.to_string(),
20692 f.args,
20693 ))))
20694 }
20695 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
20696 "REDUCE" if f.args.len() >= 3 => {
20697 let name = match target {
20698 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
20699 _ => "REDUCE",
20700 };
20701 Ok(Expression::Function(Box::new(Function::new(
20702 name.to_string(),
20703 f.args,
20704 ))))
20705 }
20706 // CURRENT_SCHEMA() -> dialect-specific
20707 "CURRENT_SCHEMA" => {
20708 match target {
20709 DialectType::PostgreSQL => {
20710 // PostgreSQL: CURRENT_SCHEMA (no parens)
20711 Ok(Expression::Function(Box::new(Function {
20712 name: "CURRENT_SCHEMA".to_string(),
20713 args: vec![],
20714 distinct: false,
20715 trailing_comments: vec![],
20716 use_bracket_syntax: false,
20717 no_parens: true,
20718 quoted: false,
20719 span: None,
20720 inferred_type: None,
20721 })))
20722 }
20723 DialectType::MySQL
20724 | DialectType::Doris
20725 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
20726 Function::new("SCHEMA".to_string(), vec![]),
20727 ))),
20728 DialectType::TSQL => Ok(Expression::Function(Box::new(
20729 Function::new("SCHEMA_NAME".to_string(), vec![]),
20730 ))),
20731 DialectType::SQLite => Ok(Expression::Literal(Box::new(
20732 Literal::String("main".to_string()),
20733 ))),
20734 _ => Ok(Expression::Function(f)),
20735 }
20736 }
20737 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20738 "LTRIM" if f.args.len() == 2 => match target {
20739 DialectType::Spark
20740 | DialectType::Hive
20741 | DialectType::Databricks
20742 | DialectType::ClickHouse => {
20743 let mut args = f.args;
20744 let str_expr = args.remove(0);
20745 let chars = args.remove(0);
20746 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20747 this: str_expr,
20748 characters: Some(chars),
20749 position: crate::expressions::TrimPosition::Leading,
20750 sql_standard_syntax: true,
20751 position_explicit: true,
20752 })))
20753 }
20754 _ => Ok(Expression::Function(f)),
20755 },
20756 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20757 "RTRIM" if f.args.len() == 2 => match target {
20758 DialectType::Spark
20759 | DialectType::Hive
20760 | DialectType::Databricks
20761 | DialectType::ClickHouse => {
20762 let mut args = f.args;
20763 let str_expr = args.remove(0);
20764 let chars = args.remove(0);
20765 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20766 this: str_expr,
20767 characters: Some(chars),
20768 position: crate::expressions::TrimPosition::Trailing,
20769 sql_standard_syntax: true,
20770 position_explicit: true,
20771 })))
20772 }
20773 _ => Ok(Expression::Function(f)),
20774 },
20775 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20776 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
20777 DialectType::ClickHouse => {
20778 let mut new_f = *f;
20779 new_f.name = "arrayReverse".to_string();
20780 Ok(Expression::Function(Box::new(new_f)))
20781 }
20782 _ => Ok(Expression::Function(f)),
20783 },
20784 // UUID() -> NEWID() for TSQL
20785 "UUID" if f.args.is_empty() => match target {
20786 DialectType::TSQL | DialectType::Fabric => {
20787 Ok(Expression::Function(Box::new(Function::new(
20788 "NEWID".to_string(),
20789 vec![],
20790 ))))
20791 }
20792 _ => Ok(Expression::Function(f)),
20793 },
20794 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
20795 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
20796 DialectType::ClickHouse => {
20797 let mut new_f = *f;
20798 new_f.name = "farmFingerprint64".to_string();
20799 Ok(Expression::Function(Box::new(new_f)))
20800 }
20801 DialectType::Redshift => {
20802 let mut new_f = *f;
20803 new_f.name = "FARMFINGERPRINT64".to_string();
20804 Ok(Expression::Function(Box::new(new_f)))
20805 }
20806 _ => Ok(Expression::Function(f)),
20807 },
20808 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
20809 "JSON_KEYS" => match target {
20810 DialectType::Databricks | DialectType::Spark => {
20811 let mut new_f = *f;
20812 new_f.name = "JSON_OBJECT_KEYS".to_string();
20813 Ok(Expression::Function(Box::new(new_f)))
20814 }
20815 DialectType::Snowflake => {
20816 let mut new_f = *f;
20817 new_f.name = "OBJECT_KEYS".to_string();
20818 Ok(Expression::Function(Box::new(new_f)))
20819 }
20820 _ => Ok(Expression::Function(f)),
20821 },
20822 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
20823 "WEEKOFYEAR" => match target {
20824 DialectType::Snowflake => {
20825 let mut new_f = *f;
20826 new_f.name = "WEEKISO".to_string();
20827 Ok(Expression::Function(Box::new(new_f)))
20828 }
20829 _ => Ok(Expression::Function(f)),
20830 },
20831 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
20832 "FORMAT"
20833 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
20834 {
20835 match target {
20836 DialectType::Databricks | DialectType::Spark => {
20837 let mut new_f = *f;
20838 new_f.name = "FORMAT_STRING".to_string();
20839 Ok(Expression::Function(Box::new(new_f)))
20840 }
20841 _ => Ok(Expression::Function(f)),
20842 }
20843 }
20844 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
20845 "CONCAT_WS" if f.args.len() >= 2 => match target {
20846 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20847 let mut args = f.args;
20848 let sep = args.remove(0);
20849 let cast_args: Vec<Expression> = args
20850 .into_iter()
20851 .map(|a| {
20852 Expression::Cast(Box::new(Cast {
20853 this: a,
20854 to: DataType::VarChar {
20855 length: None,
20856 parenthesized_length: false,
20857 },
20858 double_colon_syntax: false,
20859 trailing_comments: Vec::new(),
20860 format: None,
20861 default: None,
20862 inferred_type: None,
20863 }))
20864 })
20865 .collect();
20866 let mut new_args = vec![sep];
20867 new_args.extend(cast_args);
20868 Ok(Expression::Function(Box::new(Function::new(
20869 "CONCAT_WS".to_string(),
20870 new_args,
20871 ))))
20872 }
20873 DialectType::DuckDB => {
20874 let args = f.args;
20875 let mut null_checks = args.iter().cloned().map(|arg| {
20876 Expression::IsNull(Box::new(crate::expressions::IsNull {
20877 this: arg,
20878 not: false,
20879 postfix_form: false,
20880 }))
20881 });
20882 let first_null_check = null_checks
20883 .next()
20884 .expect("CONCAT_WS with >= 2 args must yield a null check");
20885 let null_check =
20886 null_checks.fold(first_null_check, |left, right| {
20887 Expression::Or(Box::new(BinaryOp {
20888 left,
20889 right,
20890 left_comments: Vec::new(),
20891 operator_comments: Vec::new(),
20892 trailing_comments: Vec::new(),
20893 inferred_type: None,
20894 }))
20895 });
20896 Ok(Expression::Case(Box::new(Case {
20897 operand: None,
20898 whens: vec![(null_check, Expression::Null(Null))],
20899 else_: Some(Expression::Function(Box::new(Function::new(
20900 "CONCAT_WS".to_string(),
20901 args,
20902 )))),
20903 comments: vec![],
20904 inferred_type: None,
20905 })))
20906 }
20907 _ => Ok(Expression::Function(f)),
20908 },
20909 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
20910 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
20911 DialectType::DuckDB
20912 if f.args.len() == 3
20913 && matches!(source, DialectType::Snowflake) =>
20914 {
20915 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
20916 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
20917 let mut args = f.args;
20918 let arr = args.remove(0);
20919 let start = args.remove(0);
20920 let end = args.remove(0);
20921
20922 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
20923 let adjusted_start = Expression::Case(Box::new(Case {
20924 operand: None,
20925 whens: vec![(
20926 Expression::Gte(Box::new(BinaryOp {
20927 left: start.clone(),
20928 right: Expression::number(0),
20929 left_comments: vec![],
20930 operator_comments: vec![],
20931 trailing_comments: vec![],
20932 inferred_type: None,
20933 })),
20934 Expression::Add(Box::new(BinaryOp {
20935 left: start.clone(),
20936 right: Expression::number(1),
20937 left_comments: vec![],
20938 operator_comments: vec![],
20939 trailing_comments: vec![],
20940 inferred_type: None,
20941 })),
20942 )],
20943 else_: Some(start),
20944 comments: vec![],
20945 inferred_type: None,
20946 }));
20947
20948 // CASE WHEN end < 0 THEN end - 1 ELSE end END
20949 let adjusted_end = Expression::Case(Box::new(Case {
20950 operand: None,
20951 whens: vec![(
20952 Expression::Lt(Box::new(BinaryOp {
20953 left: end.clone(),
20954 right: Expression::number(0),
20955 left_comments: vec![],
20956 operator_comments: vec![],
20957 trailing_comments: vec![],
20958 inferred_type: None,
20959 })),
20960 Expression::Sub(Box::new(BinaryOp {
20961 left: end.clone(),
20962 right: Expression::number(1),
20963 left_comments: vec![],
20964 operator_comments: vec![],
20965 trailing_comments: vec![],
20966 inferred_type: None,
20967 })),
20968 )],
20969 else_: Some(end),
20970 comments: vec![],
20971 inferred_type: None,
20972 }));
20973
20974 Ok(Expression::Function(Box::new(Function::new(
20975 "ARRAY_SLICE".to_string(),
20976 vec![arr, adjusted_start, adjusted_end],
20977 ))))
20978 }
20979 DialectType::Presto
20980 | DialectType::Trino
20981 | DialectType::Athena
20982 | DialectType::Databricks
20983 | DialectType::Spark => {
20984 let mut new_f = *f;
20985 new_f.name = "SLICE".to_string();
20986 Ok(Expression::Function(Box::new(new_f)))
20987 }
20988 DialectType::ClickHouse => {
20989 let mut new_f = *f;
20990 new_f.name = "arraySlice".to_string();
20991 Ok(Expression::Function(Box::new(new_f)))
20992 }
20993 _ => Ok(Expression::Function(f)),
20994 },
20995 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
20996 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
20997 DialectType::DuckDB => {
20998 let mut args = f.args;
20999 let arr = args.remove(0);
21000 let val = args.remove(0);
21001 Ok(Expression::Function(Box::new(Function::new(
21002 "LIST_PREPEND".to_string(),
21003 vec![val, arr],
21004 ))))
21005 }
21006 _ => Ok(Expression::Function(f)),
21007 },
21008 // ARRAY_REMOVE(arr, target) -> dialect-specific
21009 "ARRAY_REMOVE" if f.args.len() == 2 => {
21010 match target {
21011 DialectType::DuckDB => {
21012 let mut args = f.args;
21013 let arr = args.remove(0);
21014 let target_val = args.remove(0);
21015 let u_id = crate::expressions::Identifier::new("_u");
21016 // LIST_FILTER(arr, _u -> _u <> target)
21017 let lambda = Expression::Lambda(Box::new(
21018 crate::expressions::LambdaExpr {
21019 parameters: vec![u_id.clone()],
21020 body: Expression::Neq(Box::new(BinaryOp {
21021 left: Expression::Identifier(u_id),
21022 right: target_val,
21023 left_comments: Vec::new(),
21024 operator_comments: Vec::new(),
21025 trailing_comments: Vec::new(),
21026 inferred_type: None,
21027 })),
21028 colon: false,
21029 parameter_types: Vec::new(),
21030 },
21031 ));
21032 Ok(Expression::Function(Box::new(Function::new(
21033 "LIST_FILTER".to_string(),
21034 vec![arr, lambda],
21035 ))))
21036 }
21037 DialectType::ClickHouse => {
21038 let mut args = f.args;
21039 let arr = args.remove(0);
21040 let target_val = args.remove(0);
21041 let u_id = crate::expressions::Identifier::new("_u");
21042 // arrayFilter(_u -> _u <> target, arr)
21043 let lambda = Expression::Lambda(Box::new(
21044 crate::expressions::LambdaExpr {
21045 parameters: vec![u_id.clone()],
21046 body: Expression::Neq(Box::new(BinaryOp {
21047 left: Expression::Identifier(u_id),
21048 right: target_val,
21049 left_comments: Vec::new(),
21050 operator_comments: Vec::new(),
21051 trailing_comments: Vec::new(),
21052 inferred_type: None,
21053 })),
21054 colon: false,
21055 parameter_types: Vec::new(),
21056 },
21057 ));
21058 Ok(Expression::Function(Box::new(Function::new(
21059 "arrayFilter".to_string(),
21060 vec![lambda, arr],
21061 ))))
21062 }
21063 DialectType::BigQuery => {
21064 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
21065 let mut args = f.args;
21066 let arr = args.remove(0);
21067 let target_val = args.remove(0);
21068 let u_id = crate::expressions::Identifier::new("_u");
21069 let u_col = Expression::Column(Box::new(
21070 crate::expressions::Column {
21071 name: u_id.clone(),
21072 table: None,
21073 join_mark: false,
21074 trailing_comments: Vec::new(),
21075 span: None,
21076 inferred_type: None,
21077 },
21078 ));
21079 // UNNEST(the_array) AS _u
21080 let unnest_expr = Expression::Unnest(Box::new(
21081 crate::expressions::UnnestFunc {
21082 this: arr,
21083 expressions: Vec::new(),
21084 with_ordinality: false,
21085 alias: None,
21086 offset_alias: None,
21087 },
21088 ));
21089 let aliased_unnest = Expression::Alias(Box::new(
21090 crate::expressions::Alias {
21091 this: unnest_expr,
21092 alias: u_id.clone(),
21093 column_aliases: Vec::new(),
21094 pre_alias_comments: Vec::new(),
21095 trailing_comments: Vec::new(),
21096 inferred_type: None,
21097 },
21098 ));
21099 // _u <> target
21100 let where_cond = Expression::Neq(Box::new(BinaryOp {
21101 left: u_col.clone(),
21102 right: target_val,
21103 left_comments: Vec::new(),
21104 operator_comments: Vec::new(),
21105 trailing_comments: Vec::new(),
21106 inferred_type: None,
21107 }));
21108 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
21109 let subquery = Expression::Select(Box::new(
21110 crate::expressions::Select::new()
21111 .column(u_col)
21112 .from(aliased_unnest)
21113 .where_(where_cond),
21114 ));
21115 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
21116 Ok(Expression::ArrayFunc(Box::new(
21117 crate::expressions::ArrayConstructor {
21118 expressions: vec![subquery],
21119 bracket_notation: false,
21120 use_list_keyword: false,
21121 },
21122 )))
21123 }
21124 _ => Ok(Expression::Function(f)),
21125 }
21126 }
21127 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
21128 "PARSE_JSON" if f.args.len() == 1 => {
21129 match target {
21130 DialectType::SQLite
21131 | DialectType::Doris
21132 | DialectType::MySQL
21133 | DialectType::StarRocks => {
21134 // Strip PARSE_JSON, return the inner argument
21135 Ok(f.args.into_iter().next().unwrap())
21136 }
21137 _ => Ok(Expression::Function(f)),
21138 }
21139 }
21140 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
21141 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
21142 "JSON_REMOVE" => Ok(Expression::Function(f)),
21143 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
21144 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
21145 "JSON_SET" => Ok(Expression::Function(f)),
21146 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
21147 // Behavior per search value type:
21148 // NULL literal -> CASE WHEN x IS NULL THEN result
21149 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
21150 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
21151 "DECODE" if f.args.len() >= 3 => {
21152 // Keep as DECODE for targets that support it natively
21153 let keep_as_decode = matches!(
21154 target,
21155 DialectType::Oracle
21156 | DialectType::Snowflake
21157 | DialectType::Redshift
21158 | DialectType::Teradata
21159 | DialectType::Spark
21160 | DialectType::Databricks
21161 );
21162 if keep_as_decode {
21163 return Ok(Expression::Function(f));
21164 }
21165
21166 let mut args = f.args;
21167 let this_expr = args.remove(0);
21168 let mut pairs = Vec::new();
21169 let mut default = None;
21170 let mut i = 0;
21171 while i + 1 < args.len() {
21172 pairs.push((args[i].clone(), args[i + 1].clone()));
21173 i += 2;
21174 }
21175 if i < args.len() {
21176 default = Some(args[i].clone());
21177 }
21178 // Helper: check if expression is a literal value
21179 fn is_literal(e: &Expression) -> bool {
21180 matches!(
21181 e,
21182 Expression::Literal(_)
21183 | Expression::Boolean(_)
21184 | Expression::Neg(_)
21185 )
21186 }
21187 let whens: Vec<(Expression, Expression)> = pairs
21188 .into_iter()
21189 .map(|(search, result)| {
21190 if matches!(&search, Expression::Null(_)) {
21191 // NULL search -> IS NULL
21192 let condition = Expression::Is(Box::new(BinaryOp {
21193 left: this_expr.clone(),
21194 right: Expression::Null(crate::expressions::Null),
21195 left_comments: Vec::new(),
21196 operator_comments: Vec::new(),
21197 trailing_comments: Vec::new(),
21198 inferred_type: None,
21199 }));
21200 (condition, result)
21201 } else if is_literal(&search) {
21202 // Literal search -> simple equality
21203 let eq = Expression::Eq(Box::new(BinaryOp {
21204 left: this_expr.clone(),
21205 right: search,
21206 left_comments: Vec::new(),
21207 operator_comments: Vec::new(),
21208 trailing_comments: Vec::new(),
21209 inferred_type: None,
21210 }));
21211 (eq, result)
21212 } else {
21213 // Non-literal (column ref, expression) -> null-safe comparison
21214 let needs_paren = matches!(
21215 &search,
21216 Expression::Eq(_)
21217 | Expression::Neq(_)
21218 | Expression::Gt(_)
21219 | Expression::Gte(_)
21220 | Expression::Lt(_)
21221 | Expression::Lte(_)
21222 );
21223 let search_for_eq = if needs_paren {
21224 Expression::Paren(Box::new(
21225 crate::expressions::Paren {
21226 this: search.clone(),
21227 trailing_comments: Vec::new(),
21228 },
21229 ))
21230 } else {
21231 search.clone()
21232 };
21233 let eq = Expression::Eq(Box::new(BinaryOp {
21234 left: this_expr.clone(),
21235 right: search_for_eq,
21236 left_comments: Vec::new(),
21237 operator_comments: Vec::new(),
21238 trailing_comments: Vec::new(),
21239 inferred_type: None,
21240 }));
21241 let search_for_null = if needs_paren {
21242 Expression::Paren(Box::new(
21243 crate::expressions::Paren {
21244 this: search.clone(),
21245 trailing_comments: Vec::new(),
21246 },
21247 ))
21248 } else {
21249 search.clone()
21250 };
21251 let x_is_null = Expression::Is(Box::new(BinaryOp {
21252 left: this_expr.clone(),
21253 right: Expression::Null(crate::expressions::Null),
21254 left_comments: Vec::new(),
21255 operator_comments: Vec::new(),
21256 trailing_comments: Vec::new(),
21257 inferred_type: None,
21258 }));
21259 let s_is_null = Expression::Is(Box::new(BinaryOp {
21260 left: search_for_null,
21261 right: Expression::Null(crate::expressions::Null),
21262 left_comments: Vec::new(),
21263 operator_comments: Vec::new(),
21264 trailing_comments: Vec::new(),
21265 inferred_type: None,
21266 }));
21267 let both_null = Expression::And(Box::new(BinaryOp {
21268 left: x_is_null,
21269 right: s_is_null,
21270 left_comments: Vec::new(),
21271 operator_comments: Vec::new(),
21272 trailing_comments: Vec::new(),
21273 inferred_type: None,
21274 }));
21275 let condition = Expression::Or(Box::new(BinaryOp {
21276 left: eq,
21277 right: Expression::Paren(Box::new(
21278 crate::expressions::Paren {
21279 this: both_null,
21280 trailing_comments: Vec::new(),
21281 },
21282 )),
21283 left_comments: Vec::new(),
21284 operator_comments: Vec::new(),
21285 trailing_comments: Vec::new(),
21286 inferred_type: None,
21287 }));
21288 (condition, result)
21289 }
21290 })
21291 .collect();
21292 Ok(Expression::Case(Box::new(Case {
21293 operand: None,
21294 whens,
21295 else_: default,
21296 comments: Vec::new(),
21297 inferred_type: None,
21298 })))
21299 }
21300 // LEVENSHTEIN(a, b, ...) -> dialect-specific
21301 "LEVENSHTEIN" => {
21302 match target {
21303 DialectType::BigQuery => {
21304 let mut new_f = *f;
21305 new_f.name = "EDIT_DISTANCE".to_string();
21306 Ok(Expression::Function(Box::new(new_f)))
21307 }
21308 DialectType::Drill => {
21309 let mut new_f = *f;
21310 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
21311 Ok(Expression::Function(Box::new(new_f)))
21312 }
21313 DialectType::PostgreSQL if f.args.len() == 6 => {
21314 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
21315 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
21316 let mut new_f = *f;
21317 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
21318 Ok(Expression::Function(Box::new(new_f)))
21319 }
21320 _ => Ok(Expression::Function(f)),
21321 }
21322 }
21323 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
21324 "ARRAY_MAX" => {
21325 let name = match target {
21326 DialectType::ClickHouse => "arrayMax",
21327 DialectType::DuckDB => "LIST_MAX",
21328 _ => "ARRAY_MAX",
21329 };
21330 let mut new_f = *f;
21331 new_f.name = name.to_string();
21332 Ok(Expression::Function(Box::new(new_f)))
21333 }
21334 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
21335 "ARRAY_MIN" => {
21336 let name = match target {
21337 DialectType::ClickHouse => "arrayMin",
21338 DialectType::DuckDB => "LIST_MIN",
21339 _ => "ARRAY_MIN",
21340 };
21341 let mut new_f = *f;
21342 new_f.name = name.to_string();
21343 Ok(Expression::Function(Box::new(new_f)))
21344 }
21345 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
21346 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
21347 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
21348 let mut args = f.args;
21349 let b = args.pop().unwrap();
21350 let a = args.pop().unwrap();
21351 match target {
21352 DialectType::ClickHouse => {
21353 let upper_a = Expression::Upper(Box::new(
21354 crate::expressions::UnaryFunc::new(a),
21355 ));
21356 let upper_b = Expression::Upper(Box::new(
21357 crate::expressions::UnaryFunc::new(b),
21358 ));
21359 Ok(Expression::Function(Box::new(Function::new(
21360 "jaroWinklerSimilarity".to_string(),
21361 vec![upper_a, upper_b],
21362 ))))
21363 }
21364 DialectType::DuckDB => {
21365 let upper_a = Expression::Upper(Box::new(
21366 crate::expressions::UnaryFunc::new(a),
21367 ));
21368 let upper_b = Expression::Upper(Box::new(
21369 crate::expressions::UnaryFunc::new(b),
21370 ));
21371 let score = Expression::Function(Box::new(Function::new(
21372 "JARO_WINKLER_SIMILARITY".to_string(),
21373 vec![upper_a, upper_b],
21374 )));
21375 let scaled = Expression::Mul(Box::new(BinaryOp {
21376 left: score,
21377 right: Expression::number(100),
21378 left_comments: Vec::new(),
21379 operator_comments: Vec::new(),
21380 trailing_comments: Vec::new(),
21381 inferred_type: None,
21382 }));
21383 Ok(Expression::Cast(Box::new(Cast {
21384 this: scaled,
21385 to: DataType::Int {
21386 length: None,
21387 integer_spelling: false,
21388 },
21389 trailing_comments: Vec::new(),
21390 double_colon_syntax: false,
21391 format: None,
21392 default: None,
21393 inferred_type: None,
21394 })))
21395 }
21396 _ => Ok(Expression::Function(Box::new(Function::new(
21397 "JAROWINKLER_SIMILARITY".to_string(),
21398 vec![a, b],
21399 )))),
21400 }
21401 }
21402 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
21403 "CURRENT_SCHEMAS" => match target {
21404 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21405 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
21406 ))),
21407 _ => Ok(Expression::Function(f)),
21408 },
21409 // TRUNC/TRUNCATE (numeric) -> dialect-specific
21410 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
21411 match target {
21412 DialectType::TSQL | DialectType::Fabric => {
21413 // ROUND(x, decimals, 1) - the 1 flag means truncation
21414 let mut args = f.args;
21415 let this = if args.is_empty() {
21416 return Ok(Expression::Function(Box::new(
21417 Function::new("TRUNC".to_string(), args),
21418 )));
21419 } else {
21420 args.remove(0)
21421 };
21422 let decimals = if args.is_empty() {
21423 Expression::Literal(Box::new(Literal::Number(
21424 "0".to_string(),
21425 )))
21426 } else {
21427 args.remove(0)
21428 };
21429 Ok(Expression::Function(Box::new(Function::new(
21430 "ROUND".to_string(),
21431 vec![
21432 this,
21433 decimals,
21434 Expression::Literal(Box::new(Literal::Number(
21435 "1".to_string(),
21436 ))),
21437 ],
21438 ))))
21439 }
21440 DialectType::Presto
21441 | DialectType::Trino
21442 | DialectType::Athena => {
21443 // TRUNCATE(x, decimals)
21444 let mut new_f = *f;
21445 new_f.name = "TRUNCATE".to_string();
21446 Ok(Expression::Function(Box::new(new_f)))
21447 }
21448 DialectType::MySQL
21449 | DialectType::SingleStore
21450 | DialectType::TiDB => {
21451 // TRUNCATE(x, decimals)
21452 let mut new_f = *f;
21453 new_f.name = "TRUNCATE".to_string();
21454 Ok(Expression::Function(Box::new(new_f)))
21455 }
21456 DialectType::DuckDB => {
21457 // DuckDB supports TRUNC(x, decimals) — preserve both args
21458 let mut args = f.args;
21459 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
21460 if args.len() == 2
21461 && matches!(source, DialectType::Snowflake)
21462 {
21463 let decimals = args.remove(1);
21464 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
21465 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
21466 let wrapped = if !is_int {
21467 Expression::Cast(Box::new(
21468 crate::expressions::Cast {
21469 this: decimals,
21470 to: DataType::Int {
21471 length: None,
21472 integer_spelling: false,
21473 },
21474 double_colon_syntax: false,
21475 trailing_comments: Vec::new(),
21476 format: None,
21477 default: None,
21478 inferred_type: None,
21479 },
21480 ))
21481 } else {
21482 decimals
21483 };
21484 args.push(wrapped);
21485 }
21486 Ok(Expression::Function(Box::new(Function::new(
21487 "TRUNC".to_string(),
21488 args,
21489 ))))
21490 }
21491 DialectType::ClickHouse => {
21492 // trunc(x, decimals) - lowercase
21493 let mut new_f = *f;
21494 new_f.name = "trunc".to_string();
21495 Ok(Expression::Function(Box::new(new_f)))
21496 }
21497 DialectType::Spark | DialectType::Databricks => {
21498 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
21499 let this = f.args.into_iter().next().unwrap_or(
21500 Expression::Literal(Box::new(Literal::Number(
21501 "0".to_string(),
21502 ))),
21503 );
21504 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
21505 this,
21506 to: crate::expressions::DataType::BigInt {
21507 length: None,
21508 },
21509 double_colon_syntax: false,
21510 trailing_comments: Vec::new(),
21511 format: None,
21512 default: None,
21513 inferred_type: None,
21514 })))
21515 }
21516 _ => {
21517 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
21518 let mut new_f = *f;
21519 new_f.name = "TRUNC".to_string();
21520 Ok(Expression::Function(Box::new(new_f)))
21521 }
21522 }
21523 }
21524 // CURRENT_VERSION() -> VERSION() for most dialects
21525 "CURRENT_VERSION" => match target {
21526 DialectType::Snowflake
21527 | DialectType::Databricks
21528 | DialectType::StarRocks => Ok(Expression::Function(f)),
21529 DialectType::SQLite => {
21530 let mut new_f = *f;
21531 new_f.name = "SQLITE_VERSION".to_string();
21532 Ok(Expression::Function(Box::new(new_f)))
21533 }
21534 _ => {
21535 let mut new_f = *f;
21536 new_f.name = "VERSION".to_string();
21537 Ok(Expression::Function(Box::new(new_f)))
21538 }
21539 },
21540 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21541 "ARRAY_REVERSE" => match target {
21542 DialectType::ClickHouse => {
21543 let mut new_f = *f;
21544 new_f.name = "arrayReverse".to_string();
21545 Ok(Expression::Function(Box::new(new_f)))
21546 }
21547 _ => Ok(Expression::Function(f)),
21548 },
21549 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
21550 "GENERATE_DATE_ARRAY" => {
21551 let mut args = f.args;
21552 if matches!(target, DialectType::BigQuery) {
21553 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
21554 if args.len() == 2 {
21555 let default_interval = Expression::Interval(Box::new(
21556 crate::expressions::Interval {
21557 this: Some(Expression::Literal(Box::new(
21558 Literal::String("1".to_string()),
21559 ))),
21560 unit: Some(
21561 crate::expressions::IntervalUnitSpec::Simple {
21562 unit: crate::expressions::IntervalUnit::Day,
21563 use_plural: false,
21564 },
21565 ),
21566 },
21567 ));
21568 args.push(default_interval);
21569 }
21570 Ok(Expression::Function(Box::new(Function::new(
21571 "GENERATE_DATE_ARRAY".to_string(),
21572 args,
21573 ))))
21574 } else if matches!(target, DialectType::DuckDB) {
21575 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
21576 let start = args.get(0).cloned();
21577 let end = args.get(1).cloned();
21578 let step = args.get(2).cloned().or_else(|| {
21579 Some(Expression::Interval(Box::new(
21580 crate::expressions::Interval {
21581 this: Some(Expression::Literal(Box::new(
21582 Literal::String("1".to_string()),
21583 ))),
21584 unit: Some(
21585 crate::expressions::IntervalUnitSpec::Simple {
21586 unit: crate::expressions::IntervalUnit::Day,
21587 use_plural: false,
21588 },
21589 ),
21590 },
21591 )))
21592 });
21593 let gen_series = Expression::GenerateSeries(Box::new(
21594 crate::expressions::GenerateSeries {
21595 start: start.map(Box::new),
21596 end: end.map(Box::new),
21597 step: step.map(Box::new),
21598 is_end_exclusive: None,
21599 },
21600 ));
21601 Ok(Expression::Cast(Box::new(Cast {
21602 this: gen_series,
21603 to: DataType::Array {
21604 element_type: Box::new(DataType::Date),
21605 dimension: None,
21606 },
21607 trailing_comments: vec![],
21608 double_colon_syntax: false,
21609 format: None,
21610 default: None,
21611 inferred_type: None,
21612 })))
21613 } else if matches!(
21614 target,
21615 DialectType::Presto | DialectType::Trino | DialectType::Athena
21616 ) {
21617 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
21618 let start = args.get(0).cloned();
21619 let end = args.get(1).cloned();
21620 let step = args.get(2).cloned().or_else(|| {
21621 Some(Expression::Interval(Box::new(
21622 crate::expressions::Interval {
21623 this: Some(Expression::Literal(Box::new(
21624 Literal::String("1".to_string()),
21625 ))),
21626 unit: Some(
21627 crate::expressions::IntervalUnitSpec::Simple {
21628 unit: crate::expressions::IntervalUnit::Day,
21629 use_plural: false,
21630 },
21631 ),
21632 },
21633 )))
21634 });
21635 let gen_series = Expression::GenerateSeries(Box::new(
21636 crate::expressions::GenerateSeries {
21637 start: start.map(Box::new),
21638 end: end.map(Box::new),
21639 step: step.map(Box::new),
21640 is_end_exclusive: None,
21641 },
21642 ));
21643 Ok(gen_series)
21644 } else if matches!(
21645 target,
21646 DialectType::Spark | DialectType::Databricks
21647 ) {
21648 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
21649 let start = args.get(0).cloned();
21650 let end = args.get(1).cloned();
21651 let step = args.get(2).cloned().or_else(|| {
21652 Some(Expression::Interval(Box::new(
21653 crate::expressions::Interval {
21654 this: Some(Expression::Literal(Box::new(
21655 Literal::String("1".to_string()),
21656 ))),
21657 unit: Some(
21658 crate::expressions::IntervalUnitSpec::Simple {
21659 unit: crate::expressions::IntervalUnit::Day,
21660 use_plural: false,
21661 },
21662 ),
21663 },
21664 )))
21665 });
21666 let gen_series = Expression::GenerateSeries(Box::new(
21667 crate::expressions::GenerateSeries {
21668 start: start.map(Box::new),
21669 end: end.map(Box::new),
21670 step: step.map(Box::new),
21671 is_end_exclusive: None,
21672 },
21673 ));
21674 Ok(gen_series)
21675 } else if matches!(target, DialectType::Snowflake) {
21676 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
21677 if args.len() == 2 {
21678 let default_interval = Expression::Interval(Box::new(
21679 crate::expressions::Interval {
21680 this: Some(Expression::Literal(Box::new(
21681 Literal::String("1".to_string()),
21682 ))),
21683 unit: Some(
21684 crate::expressions::IntervalUnitSpec::Simple {
21685 unit: crate::expressions::IntervalUnit::Day,
21686 use_plural: false,
21687 },
21688 ),
21689 },
21690 ));
21691 args.push(default_interval);
21692 }
21693 Ok(Expression::Function(Box::new(Function::new(
21694 "GENERATE_DATE_ARRAY".to_string(),
21695 args,
21696 ))))
21697 } else if matches!(
21698 target,
21699 DialectType::MySQL
21700 | DialectType::TSQL
21701 | DialectType::Fabric
21702 | DialectType::Redshift
21703 ) {
21704 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
21705 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
21706 Ok(Expression::Function(Box::new(Function::new(
21707 "GENERATE_DATE_ARRAY".to_string(),
21708 args,
21709 ))))
21710 } else {
21711 // PostgreSQL/others: convert to GenerateSeries
21712 let start = args.get(0).cloned();
21713 let end = args.get(1).cloned();
21714 let step = args.get(2).cloned().or_else(|| {
21715 Some(Expression::Interval(Box::new(
21716 crate::expressions::Interval {
21717 this: Some(Expression::Literal(Box::new(
21718 Literal::String("1".to_string()),
21719 ))),
21720 unit: Some(
21721 crate::expressions::IntervalUnitSpec::Simple {
21722 unit: crate::expressions::IntervalUnit::Day,
21723 use_plural: false,
21724 },
21725 ),
21726 },
21727 )))
21728 });
21729 Ok(Expression::GenerateSeries(Box::new(
21730 crate::expressions::GenerateSeries {
21731 start: start.map(Box::new),
21732 end: end.map(Box::new),
21733 step: step.map(Box::new),
21734 is_end_exclusive: None,
21735 },
21736 )))
21737 }
21738 }
21739 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
21740 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21741 "ARRAYS_OVERLAP"
21742 if f.args.len() == 2
21743 && matches!(source, DialectType::Snowflake)
21744 && matches!(target, DialectType::DuckDB) =>
21745 {
21746 let mut args = f.args;
21747 let arr1 = args.remove(0);
21748 let arr2 = args.remove(0);
21749
21750 // (arr1 && arr2)
21751 let overlap = Expression::Paren(Box::new(Paren {
21752 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
21753 left: arr1.clone(),
21754 right: arr2.clone(),
21755 left_comments: vec![],
21756 operator_comments: vec![],
21757 trailing_comments: vec![],
21758 inferred_type: None,
21759 })),
21760 trailing_comments: vec![],
21761 }));
21762
21763 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
21764 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
21765 left: Expression::Function(Box::new(Function::new(
21766 "ARRAY_LENGTH".to_string(),
21767 vec![arr1.clone()],
21768 ))),
21769 right: Expression::Function(Box::new(Function::new(
21770 "LIST_COUNT".to_string(),
21771 vec![arr1],
21772 ))),
21773 left_comments: vec![],
21774 operator_comments: vec![],
21775 trailing_comments: vec![],
21776 inferred_type: None,
21777 }));
21778
21779 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
21780 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
21781 left: Expression::Function(Box::new(Function::new(
21782 "ARRAY_LENGTH".to_string(),
21783 vec![arr2.clone()],
21784 ))),
21785 right: Expression::Function(Box::new(Function::new(
21786 "LIST_COUNT".to_string(),
21787 vec![arr2],
21788 ))),
21789 left_comments: vec![],
21790 operator_comments: vec![],
21791 trailing_comments: vec![],
21792 inferred_type: None,
21793 }));
21794
21795 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21796 let null_check = Expression::Paren(Box::new(Paren {
21797 this: Expression::And(Box::new(BinaryOp {
21798 left: arr1_has_null,
21799 right: arr2_has_null,
21800 left_comments: vec![],
21801 operator_comments: vec![],
21802 trailing_comments: vec![],
21803 inferred_type: None,
21804 })),
21805 trailing_comments: vec![],
21806 }));
21807
21808 // (arr1 && arr2) OR (null_check)
21809 Ok(Expression::Or(Box::new(BinaryOp {
21810 left: overlap,
21811 right: null_check,
21812 left_comments: vec![],
21813 operator_comments: vec![],
21814 trailing_comments: vec![],
21815 inferred_type: None,
21816 })))
21817 }
21818 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
21819 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
21820 "ARRAY_INTERSECTION"
21821 if f.args.len() == 2
21822 && matches!(source, DialectType::Snowflake)
21823 && matches!(target, DialectType::DuckDB) =>
21824 {
21825 let mut args = f.args;
21826 let arr1 = args.remove(0);
21827 let arr2 = args.remove(0);
21828
21829 // Build: arr1 IS NULL
21830 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
21831 this: arr1.clone(),
21832 not: false,
21833 postfix_form: false,
21834 }));
21835 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
21836 this: arr2.clone(),
21837 not: false,
21838 postfix_form: false,
21839 }));
21840 let null_check = Expression::Or(Box::new(BinaryOp {
21841 left: arr1_is_null,
21842 right: arr2_is_null,
21843 left_comments: vec![],
21844 operator_comments: vec![],
21845 trailing_comments: vec![],
21846 inferred_type: None,
21847 }));
21848
21849 // GENERATE_SERIES(1, LENGTH(arr1))
21850 let gen_series = Expression::Function(Box::new(Function::new(
21851 "GENERATE_SERIES".to_string(),
21852 vec![
21853 Expression::number(1),
21854 Expression::Function(Box::new(Function::new(
21855 "LENGTH".to_string(),
21856 vec![arr1.clone()],
21857 ))),
21858 ],
21859 )));
21860
21861 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
21862 let list_zip = Expression::Function(Box::new(Function::new(
21863 "LIST_ZIP".to_string(),
21864 vec![arr1.clone(), gen_series],
21865 )));
21866
21867 // pair[1] and pair[2]
21868 let pair_col = Expression::column("pair");
21869 let pair_1 = Expression::Subscript(Box::new(
21870 crate::expressions::Subscript {
21871 this: pair_col.clone(),
21872 index: Expression::number(1),
21873 },
21874 ));
21875 let pair_2 = Expression::Subscript(Box::new(
21876 crate::expressions::Subscript {
21877 this: pair_col.clone(),
21878 index: Expression::number(2),
21879 },
21880 ));
21881
21882 // arr1[1:pair[2]]
21883 let arr1_slice = Expression::ArraySlice(Box::new(
21884 crate::expressions::ArraySlice {
21885 this: arr1.clone(),
21886 start: Some(Expression::number(1)),
21887 end: Some(pair_2),
21888 },
21889 ));
21890
21891 // e IS NOT DISTINCT FROM pair[1]
21892 let e_col = Expression::column("e");
21893 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
21894 left: e_col.clone(),
21895 right: pair_1.clone(),
21896 left_comments: vec![],
21897 operator_comments: vec![],
21898 trailing_comments: vec![],
21899 inferred_type: None,
21900 }));
21901
21902 // e -> e IS NOT DISTINCT FROM pair[1]
21903 let inner_lambda1 =
21904 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21905 parameters: vec![crate::expressions::Identifier::new("e")],
21906 body: is_not_distinct,
21907 colon: false,
21908 parameter_types: vec![],
21909 }));
21910
21911 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
21912 let inner_filter1 = Expression::Function(Box::new(Function::new(
21913 "LIST_FILTER".to_string(),
21914 vec![arr1_slice, inner_lambda1],
21915 )));
21916
21917 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
21918 let len1 = Expression::Function(Box::new(Function::new(
21919 "LENGTH".to_string(),
21920 vec![inner_filter1],
21921 )));
21922
21923 // e -> e IS NOT DISTINCT FROM pair[1]
21924 let inner_lambda2 =
21925 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21926 parameters: vec![crate::expressions::Identifier::new("e")],
21927 body: Expression::NullSafeEq(Box::new(BinaryOp {
21928 left: e_col,
21929 right: pair_1.clone(),
21930 left_comments: vec![],
21931 operator_comments: vec![],
21932 trailing_comments: vec![],
21933 inferred_type: None,
21934 })),
21935 colon: false,
21936 parameter_types: vec![],
21937 }));
21938
21939 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
21940 let inner_filter2 = Expression::Function(Box::new(Function::new(
21941 "LIST_FILTER".to_string(),
21942 vec![arr2.clone(), inner_lambda2],
21943 )));
21944
21945 // LENGTH(LIST_FILTER(arr2, ...))
21946 let len2 = Expression::Function(Box::new(Function::new(
21947 "LENGTH".to_string(),
21948 vec![inner_filter2],
21949 )));
21950
21951 // LENGTH(...) <= LENGTH(...)
21952 let cond = Expression::Paren(Box::new(Paren {
21953 this: Expression::Lte(Box::new(BinaryOp {
21954 left: len1,
21955 right: len2,
21956 left_comments: vec![],
21957 operator_comments: vec![],
21958 trailing_comments: vec![],
21959 inferred_type: None,
21960 })),
21961 trailing_comments: vec![],
21962 }));
21963
21964 // pair -> (condition)
21965 let filter_lambda =
21966 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21967 parameters: vec![crate::expressions::Identifier::new(
21968 "pair",
21969 )],
21970 body: cond,
21971 colon: false,
21972 parameter_types: vec![],
21973 }));
21974
21975 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
21976 let outer_filter = Expression::Function(Box::new(Function::new(
21977 "LIST_FILTER".to_string(),
21978 vec![list_zip, filter_lambda],
21979 )));
21980
21981 // pair -> pair[1]
21982 let transform_lambda =
21983 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21984 parameters: vec![crate::expressions::Identifier::new(
21985 "pair",
21986 )],
21987 body: pair_1,
21988 colon: false,
21989 parameter_types: vec![],
21990 }));
21991
21992 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
21993 let list_transform = Expression::Function(Box::new(Function::new(
21994 "LIST_TRANSFORM".to_string(),
21995 vec![outer_filter, transform_lambda],
21996 )));
21997
21998 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
21999 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22000 // END
22001 Ok(Expression::Case(Box::new(Case {
22002 operand: None,
22003 whens: vec![(null_check, Expression::Null(Null))],
22004 else_: Some(list_transform),
22005 comments: vec![],
22006 inferred_type: None,
22007 })))
22008 }
22009 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
22010 "ARRAY_CONSTRUCT" => {
22011 if matches!(target, DialectType::Snowflake) {
22012 Ok(Expression::Function(f))
22013 } else {
22014 Ok(Expression::Array(Box::new(crate::expressions::Array {
22015 expressions: f.args,
22016 })))
22017 }
22018 }
22019 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
22020 "ARRAY"
22021 if !f.args.iter().any(|a| {
22022 matches!(a, Expression::Select(_) | Expression::Subquery(_))
22023 }) =>
22024 {
22025 match target {
22026 DialectType::DuckDB
22027 | DialectType::Snowflake
22028 | DialectType::Presto
22029 | DialectType::Trino
22030 | DialectType::Athena => {
22031 Ok(Expression::Array(Box::new(crate::expressions::Array {
22032 expressions: f.args,
22033 })))
22034 }
22035 _ => Ok(Expression::Function(f)),
22036 }
22037 }
22038 _ => Ok(Expression::Function(f)),
22039 }
22040 } else if let Expression::AggregateFunction(mut af) = e {
22041 let name = af.name.to_ascii_uppercase();
22042 match name.as_str() {
22043 "ARBITRARY" if af.args.len() == 1 => {
22044 let arg = af.args.into_iter().next().unwrap();
22045 Ok(convert_arbitrary(arg, target))
22046 }
22047 "JSON_ARRAYAGG" => {
22048 match target {
22049 DialectType::PostgreSQL => {
22050 af.name = "JSON_AGG".to_string();
22051 // Add NULLS FIRST to ORDER BY items for PostgreSQL
22052 for ordered in af.order_by.iter_mut() {
22053 if ordered.nulls_first.is_none() {
22054 ordered.nulls_first = Some(true);
22055 }
22056 }
22057 Ok(Expression::AggregateFunction(af))
22058 }
22059 _ => Ok(Expression::AggregateFunction(af)),
22060 }
22061 }
22062 _ => Ok(Expression::AggregateFunction(af)),
22063 }
22064 } else if let Expression::JSONArrayAgg(ja) = e {
22065 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
22066 match target {
22067 DialectType::PostgreSQL => {
22068 let mut order_by = Vec::new();
22069 if let Some(order_expr) = ja.order {
22070 if let Expression::OrderBy(ob) = *order_expr {
22071 for mut ordered in ob.expressions {
22072 if ordered.nulls_first.is_none() {
22073 ordered.nulls_first = Some(true);
22074 }
22075 order_by.push(ordered);
22076 }
22077 }
22078 }
22079 Ok(Expression::AggregateFunction(Box::new(
22080 crate::expressions::AggregateFunction {
22081 name: "JSON_AGG".to_string(),
22082 args: vec![*ja.this],
22083 distinct: false,
22084 filter: None,
22085 order_by,
22086 limit: None,
22087 ignore_nulls: None,
22088 inferred_type: None,
22089 },
22090 )))
22091 }
22092 _ => Ok(Expression::JSONArrayAgg(ja)),
22093 }
22094 } else if let Expression::JSONArray(ja) = e {
22095 match target {
22096 DialectType::Snowflake
22097 if ja.null_handling.is_none()
22098 && ja.return_type.is_none()
22099 && ja.strict.is_none() =>
22100 {
22101 let array_construct = Expression::ArrayFunc(Box::new(
22102 crate::expressions::ArrayConstructor {
22103 expressions: ja.expressions,
22104 bracket_notation: false,
22105 use_list_keyword: false,
22106 },
22107 ));
22108 Ok(Expression::Function(Box::new(Function::new(
22109 "TO_VARIANT".to_string(),
22110 vec![array_construct],
22111 ))))
22112 }
22113 _ => Ok(Expression::JSONArray(ja)),
22114 }
22115 } else if let Expression::JsonArray(f) = e {
22116 match target {
22117 DialectType::Snowflake => {
22118 let array_construct = Expression::ArrayFunc(Box::new(
22119 crate::expressions::ArrayConstructor {
22120 expressions: f.expressions,
22121 bracket_notation: false,
22122 use_list_keyword: false,
22123 },
22124 ));
22125 Ok(Expression::Function(Box::new(Function::new(
22126 "TO_VARIANT".to_string(),
22127 vec![array_construct],
22128 ))))
22129 }
22130 _ => Ok(Expression::JsonArray(f)),
22131 }
22132 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
22133 let function_name = match cpa.this.as_ref() {
22134 Expression::Identifier(ident) => Some(ident.name.as_str()),
22135 _ => None,
22136 };
22137 match function_name {
22138 Some(name)
22139 if name.eq_ignore_ascii_case("groupConcat")
22140 && cpa.expressions.len() == 1 =>
22141 {
22142 match target {
22143 DialectType::MySQL | DialectType::SingleStore => {
22144 let this = cpa.expressions[0].clone();
22145 let separator = cpa.params.first().cloned();
22146 Ok(Expression::GroupConcat(Box::new(
22147 crate::expressions::GroupConcatFunc {
22148 this,
22149 separator,
22150 order_by: None,
22151 distinct: false,
22152 filter: None,
22153 limit: None,
22154 inferred_type: None,
22155 },
22156 )))
22157 }
22158 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
22159 let this = cpa.expressions[0].clone();
22160 let separator = cpa.params.first().cloned();
22161 crate::expressions::ListAggFunc {
22162 this,
22163 separator,
22164 on_overflow: None,
22165 order_by: None,
22166 distinct: false,
22167 filter: None,
22168 inferred_type: None,
22169 }
22170 }))),
22171 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22172 }
22173 }
22174 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22175 }
22176 } else if let Expression::ToNumber(tn) = e {
22177 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
22178 let arg = *tn.this;
22179 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
22180 this: arg,
22181 to: crate::expressions::DataType::Double {
22182 precision: None,
22183 scale: None,
22184 },
22185 double_colon_syntax: false,
22186 trailing_comments: Vec::new(),
22187 format: None,
22188 default: None,
22189 inferred_type: None,
22190 })))
22191 } else {
22192 Ok(e)
22193 }
22194 }
22195
22196 Action::RegexpLikeToDuckDB => {
22197 if let Expression::RegexpLike(f) = e {
22198 let mut args = vec![f.this, f.pattern];
22199 if let Some(flags) = f.flags {
22200 args.push(flags);
22201 }
22202 Ok(Expression::Function(Box::new(Function::new(
22203 "REGEXP_MATCHES".to_string(),
22204 args,
22205 ))))
22206 } else {
22207 Ok(e)
22208 }
22209 }
22210 Action::EpochConvert => {
22211 if let Expression::Epoch(f) = e {
22212 let arg = f.this;
22213 let name = match target {
22214 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22215 "UNIX_TIMESTAMP"
22216 }
22217 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
22218 DialectType::BigQuery => "TIME_TO_UNIX",
22219 _ => "EPOCH",
22220 };
22221 Ok(Expression::Function(Box::new(Function::new(
22222 name.to_string(),
22223 vec![arg],
22224 ))))
22225 } else {
22226 Ok(e)
22227 }
22228 }
22229 Action::EpochMsConvert => {
22230 use crate::expressions::{BinaryOp, Cast};
22231 if let Expression::EpochMs(f) = e {
22232 let arg = f.this;
22233 match target {
22234 DialectType::Spark | DialectType::Databricks => {
22235 Ok(Expression::Function(Box::new(Function::new(
22236 "TIMESTAMP_MILLIS".to_string(),
22237 vec![arg],
22238 ))))
22239 }
22240 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22241 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
22242 ))),
22243 DialectType::Presto | DialectType::Trino => {
22244 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
22245 let cast_arg = Expression::Cast(Box::new(Cast {
22246 this: arg,
22247 to: DataType::Double {
22248 precision: None,
22249 scale: None,
22250 },
22251 trailing_comments: Vec::new(),
22252 double_colon_syntax: false,
22253 format: None,
22254 default: None,
22255 inferred_type: None,
22256 }));
22257 let div = Expression::Div(Box::new(BinaryOp::new(
22258 cast_arg,
22259 Expression::Function(Box::new(Function::new(
22260 "POW".to_string(),
22261 vec![Expression::number(10), Expression::number(3)],
22262 ))),
22263 )));
22264 Ok(Expression::Function(Box::new(Function::new(
22265 "FROM_UNIXTIME".to_string(),
22266 vec![div],
22267 ))))
22268 }
22269 DialectType::MySQL => {
22270 // FROM_UNIXTIME(x / POWER(10, 3))
22271 let div = Expression::Div(Box::new(BinaryOp::new(
22272 arg,
22273 Expression::Function(Box::new(Function::new(
22274 "POWER".to_string(),
22275 vec![Expression::number(10), Expression::number(3)],
22276 ))),
22277 )));
22278 Ok(Expression::Function(Box::new(Function::new(
22279 "FROM_UNIXTIME".to_string(),
22280 vec![div],
22281 ))))
22282 }
22283 DialectType::PostgreSQL | DialectType::Redshift => {
22284 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
22285 let cast_arg = Expression::Cast(Box::new(Cast {
22286 this: arg,
22287 to: DataType::Custom {
22288 name: "DOUBLE PRECISION".to_string(),
22289 },
22290 trailing_comments: Vec::new(),
22291 double_colon_syntax: false,
22292 format: None,
22293 default: None,
22294 inferred_type: None,
22295 }));
22296 let div = Expression::Div(Box::new(BinaryOp::new(
22297 cast_arg,
22298 Expression::Function(Box::new(Function::new(
22299 "POWER".to_string(),
22300 vec![Expression::number(10), Expression::number(3)],
22301 ))),
22302 )));
22303 Ok(Expression::Function(Box::new(Function::new(
22304 "TO_TIMESTAMP".to_string(),
22305 vec![div],
22306 ))))
22307 }
22308 DialectType::ClickHouse => {
22309 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
22310 let cast_arg = Expression::Cast(Box::new(Cast {
22311 this: arg,
22312 to: DataType::Nullable {
22313 inner: Box::new(DataType::BigInt { length: None }),
22314 },
22315 trailing_comments: Vec::new(),
22316 double_colon_syntax: false,
22317 format: None,
22318 default: None,
22319 inferred_type: None,
22320 }));
22321 Ok(Expression::Function(Box::new(Function::new(
22322 "fromUnixTimestamp64Milli".to_string(),
22323 vec![cast_arg],
22324 ))))
22325 }
22326 _ => Ok(Expression::Function(Box::new(Function::new(
22327 "EPOCH_MS".to_string(),
22328 vec![arg],
22329 )))),
22330 }
22331 } else {
22332 Ok(e)
22333 }
22334 }
22335 Action::TSQLTypeNormalize => {
22336 if let Expression::DataType(dt) = e {
22337 let new_dt = match &dt {
22338 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
22339 DataType::Decimal {
22340 precision: Some(15),
22341 scale: Some(4),
22342 }
22343 }
22344 DataType::Custom { name }
22345 if name.eq_ignore_ascii_case("SMALLMONEY") =>
22346 {
22347 DataType::Decimal {
22348 precision: Some(6),
22349 scale: Some(4),
22350 }
22351 }
22352 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
22353 DataType::Timestamp {
22354 timezone: false,
22355 precision: None,
22356 }
22357 }
22358 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
22359 DataType::Float {
22360 precision: None,
22361 scale: None,
22362 real_spelling: false,
22363 }
22364 }
22365 DataType::Float {
22366 real_spelling: true,
22367 ..
22368 } => DataType::Float {
22369 precision: None,
22370 scale: None,
22371 real_spelling: false,
22372 },
22373 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
22374 DataType::Custom {
22375 name: "BLOB".to_string(),
22376 }
22377 }
22378 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
22379 DataType::Boolean
22380 }
22381 DataType::Custom { name }
22382 if name.eq_ignore_ascii_case("ROWVERSION") =>
22383 {
22384 DataType::Custom {
22385 name: "BINARY".to_string(),
22386 }
22387 }
22388 DataType::Custom { name }
22389 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
22390 {
22391 match target {
22392 DialectType::Spark
22393 | DialectType::Databricks
22394 | DialectType::Hive => DataType::Custom {
22395 name: "STRING".to_string(),
22396 },
22397 _ => DataType::VarChar {
22398 length: Some(36),
22399 parenthesized_length: true,
22400 },
22401 }
22402 }
22403 DataType::Custom { name }
22404 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
22405 {
22406 match target {
22407 DialectType::Spark
22408 | DialectType::Databricks
22409 | DialectType::Hive => DataType::Timestamp {
22410 timezone: false,
22411 precision: None,
22412 },
22413 _ => DataType::Timestamp {
22414 timezone: true,
22415 precision: None,
22416 },
22417 }
22418 }
22419 DataType::Custom { ref name }
22420 if name.len() >= 10
22421 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
22422 {
22423 // DATETIME2(n) -> TIMESTAMP
22424 DataType::Timestamp {
22425 timezone: false,
22426 precision: None,
22427 }
22428 }
22429 DataType::Custom { ref name }
22430 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
22431 {
22432 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
22433 match target {
22434 DialectType::Spark
22435 | DialectType::Databricks
22436 | DialectType::Hive => DataType::Timestamp {
22437 timezone: false,
22438 precision: None,
22439 },
22440 _ => return Ok(Expression::DataType(dt)),
22441 }
22442 }
22443 DataType::Custom { ref name }
22444 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
22445 {
22446 // Parse NUMERIC(p,s) back to Decimal(p,s)
22447 let upper = name.to_ascii_uppercase();
22448 if let Some(inner) = upper
22449 .strip_prefix("NUMERIC(")
22450 .and_then(|s| s.strip_suffix(')'))
22451 {
22452 let parts: Vec<&str> = inner.split(',').collect();
22453 let precision =
22454 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
22455 let scale =
22456 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
22457 DataType::Decimal { precision, scale }
22458 } else if upper == "NUMERIC" {
22459 DataType::Decimal {
22460 precision: None,
22461 scale: None,
22462 }
22463 } else {
22464 return Ok(Expression::DataType(dt));
22465 }
22466 }
22467 DataType::Float {
22468 precision: Some(p), ..
22469 } => {
22470 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
22471 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
22472 let boundary = match target {
22473 DialectType::Hive
22474 | DialectType::Spark
22475 | DialectType::Databricks => 32,
22476 _ => 24,
22477 };
22478 if *p <= boundary {
22479 DataType::Float {
22480 precision: None,
22481 scale: None,
22482 real_spelling: false,
22483 }
22484 } else {
22485 DataType::Double {
22486 precision: None,
22487 scale: None,
22488 }
22489 }
22490 }
22491 DataType::TinyInt { .. } => match target {
22492 DialectType::DuckDB => DataType::Custom {
22493 name: "UTINYINT".to_string(),
22494 },
22495 DialectType::Hive
22496 | DialectType::Spark
22497 | DialectType::Databricks => DataType::SmallInt { length: None },
22498 _ => return Ok(Expression::DataType(dt)),
22499 },
22500 // INTEGER -> INT for Spark/Databricks
22501 DataType::Int {
22502 length,
22503 integer_spelling: true,
22504 } => DataType::Int {
22505 length: *length,
22506 integer_spelling: false,
22507 },
22508 _ => return Ok(Expression::DataType(dt)),
22509 };
22510 Ok(Expression::DataType(new_dt))
22511 } else {
22512 Ok(e)
22513 }
22514 }
22515 Action::MySQLSafeDivide => {
22516 use crate::expressions::{BinaryOp, Cast};
22517 if let Expression::Div(op) = e {
22518 let left = op.left;
22519 let right = op.right;
22520 // For SQLite: CAST left as REAL but NO NULLIF wrapping
22521 if matches!(target, DialectType::SQLite) {
22522 let new_left = Expression::Cast(Box::new(Cast {
22523 this: left,
22524 to: DataType::Float {
22525 precision: None,
22526 scale: None,
22527 real_spelling: true,
22528 },
22529 trailing_comments: Vec::new(),
22530 double_colon_syntax: false,
22531 format: None,
22532 default: None,
22533 inferred_type: None,
22534 }));
22535 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
22536 }
22537 // Wrap right in NULLIF(right, 0)
22538 let nullif_right = Expression::Function(Box::new(Function::new(
22539 "NULLIF".to_string(),
22540 vec![right, Expression::number(0)],
22541 )));
22542 // For some dialects, also CAST the left side
22543 let new_left = match target {
22544 DialectType::PostgreSQL
22545 | DialectType::Redshift
22546 | DialectType::Teradata
22547 | DialectType::Materialize
22548 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
22549 this: left,
22550 to: DataType::Custom {
22551 name: "DOUBLE PRECISION".to_string(),
22552 },
22553 trailing_comments: Vec::new(),
22554 double_colon_syntax: false,
22555 format: None,
22556 default: None,
22557 inferred_type: None,
22558 })),
22559 DialectType::Drill
22560 | DialectType::Trino
22561 | DialectType::Presto
22562 | DialectType::Athena => Expression::Cast(Box::new(Cast {
22563 this: left,
22564 to: DataType::Double {
22565 precision: None,
22566 scale: None,
22567 },
22568 trailing_comments: Vec::new(),
22569 double_colon_syntax: false,
22570 format: None,
22571 default: None,
22572 inferred_type: None,
22573 })),
22574 DialectType::TSQL => Expression::Cast(Box::new(Cast {
22575 this: left,
22576 to: DataType::Float {
22577 precision: None,
22578 scale: None,
22579 real_spelling: false,
22580 },
22581 trailing_comments: Vec::new(),
22582 double_colon_syntax: false,
22583 format: None,
22584 default: None,
22585 inferred_type: None,
22586 })),
22587 _ => left,
22588 };
22589 Ok(Expression::Div(Box::new(BinaryOp::new(
22590 new_left,
22591 nullif_right,
22592 ))))
22593 } else {
22594 Ok(e)
22595 }
22596 }
22597 Action::AlterTableRenameStripSchema => {
22598 if let Expression::AlterTable(mut at) = e {
22599 if let Some(crate::expressions::AlterTableAction::RenameTable(
22600 ref mut new_tbl,
22601 )) = at.actions.first_mut()
22602 {
22603 new_tbl.schema = None;
22604 new_tbl.catalog = None;
22605 }
22606 Ok(Expression::AlterTable(at))
22607 } else {
22608 Ok(e)
22609 }
22610 }
22611 Action::NullsOrdering => {
22612 // Fill in the source dialect's implied null ordering default.
22613 // This makes implicit null ordering explicit so the target generator
22614 // can correctly strip or keep it.
22615 //
22616 // Dialect null ordering categories:
22617 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
22618 // ASC -> NULLS LAST, DESC -> NULLS FIRST
22619 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
22620 // ASC -> NULLS FIRST, DESC -> NULLS LAST
22621 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
22622 // NULLS LAST always (both ASC and DESC)
22623 if let Expression::Ordered(mut o) = e {
22624 let is_asc = !o.desc;
22625
22626 let is_source_nulls_large = matches!(
22627 source,
22628 DialectType::Oracle
22629 | DialectType::PostgreSQL
22630 | DialectType::Redshift
22631 | DialectType::Snowflake
22632 );
22633 let is_source_nulls_last = matches!(
22634 source,
22635 DialectType::DuckDB
22636 | DialectType::Presto
22637 | DialectType::Trino
22638 | DialectType::Dremio
22639 | DialectType::Athena
22640 | DialectType::ClickHouse
22641 | DialectType::Drill
22642 | DialectType::Exasol
22643 | DialectType::DataFusion
22644 );
22645
22646 // Determine target category to check if default matches
22647 let is_target_nulls_large = matches!(
22648 target,
22649 DialectType::Oracle
22650 | DialectType::PostgreSQL
22651 | DialectType::Redshift
22652 | DialectType::Snowflake
22653 );
22654 let is_target_nulls_last = matches!(
22655 target,
22656 DialectType::DuckDB
22657 | DialectType::Presto
22658 | DialectType::Trino
22659 | DialectType::Dremio
22660 | DialectType::Athena
22661 | DialectType::ClickHouse
22662 | DialectType::Drill
22663 | DialectType::Exasol
22664 | DialectType::DataFusion
22665 );
22666
22667 // Compute the implied nulls_first for source
22668 let source_nulls_first = if is_source_nulls_large {
22669 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
22670 } else if is_source_nulls_last {
22671 false // NULLS LAST always
22672 } else {
22673 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
22674 };
22675
22676 // Compute the target's default
22677 let target_nulls_first = if is_target_nulls_large {
22678 !is_asc
22679 } else if is_target_nulls_last {
22680 false
22681 } else {
22682 is_asc
22683 };
22684
22685 // Only add explicit nulls ordering if source and target defaults differ
22686 if source_nulls_first != target_nulls_first {
22687 o.nulls_first = Some(source_nulls_first);
22688 }
22689 // If they match, leave nulls_first as None so the generator won't output it
22690
22691 Ok(Expression::Ordered(o))
22692 } else {
22693 Ok(e)
22694 }
22695 }
22696 Action::StringAggConvert => {
22697 match e {
22698 Expression::WithinGroup(wg) => {
22699 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
22700 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
22701 let (x_opt, sep_opt, distinct) = match wg.this {
22702 Expression::AggregateFunction(ref af)
22703 if af.name.eq_ignore_ascii_case("STRING_AGG")
22704 && af.args.len() >= 2 =>
22705 {
22706 (
22707 Some(af.args[0].clone()),
22708 Some(af.args[1].clone()),
22709 af.distinct,
22710 )
22711 }
22712 Expression::Function(ref f)
22713 if f.name.eq_ignore_ascii_case("STRING_AGG")
22714 && f.args.len() >= 2 =>
22715 {
22716 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
22717 }
22718 Expression::StringAgg(ref sa) => {
22719 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
22720 }
22721 _ => (None, None, false),
22722 };
22723 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
22724 let order_by = wg.order_by;
22725
22726 match target {
22727 DialectType::TSQL | DialectType::Fabric => {
22728 // Keep as WithinGroup(StringAgg) for TSQL
22729 Ok(Expression::WithinGroup(Box::new(
22730 crate::expressions::WithinGroup {
22731 this: Expression::StringAgg(Box::new(
22732 crate::expressions::StringAggFunc {
22733 this: x,
22734 separator: Some(sep),
22735 order_by: None, // order_by goes in WithinGroup, not StringAgg
22736 distinct,
22737 filter: None,
22738 limit: None,
22739 inferred_type: None,
22740 },
22741 )),
22742 order_by,
22743 },
22744 )))
22745 }
22746 DialectType::MySQL
22747 | DialectType::SingleStore
22748 | DialectType::Doris
22749 | DialectType::StarRocks => {
22750 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
22751 Ok(Expression::GroupConcat(Box::new(
22752 crate::expressions::GroupConcatFunc {
22753 this: x,
22754 separator: Some(sep),
22755 order_by: Some(order_by),
22756 distinct,
22757 filter: None,
22758 limit: None,
22759 inferred_type: None,
22760 },
22761 )))
22762 }
22763 DialectType::SQLite => {
22764 // GROUP_CONCAT(x, sep) - no ORDER BY support
22765 Ok(Expression::GroupConcat(Box::new(
22766 crate::expressions::GroupConcatFunc {
22767 this: x,
22768 separator: Some(sep),
22769 order_by: None,
22770 distinct,
22771 filter: None,
22772 limit: None,
22773 inferred_type: None,
22774 },
22775 )))
22776 }
22777 DialectType::PostgreSQL | DialectType::Redshift => {
22778 // STRING_AGG(x, sep ORDER BY z)
22779 Ok(Expression::StringAgg(Box::new(
22780 crate::expressions::StringAggFunc {
22781 this: x,
22782 separator: Some(sep),
22783 order_by: Some(order_by),
22784 distinct,
22785 filter: None,
22786 limit: None,
22787 inferred_type: None,
22788 },
22789 )))
22790 }
22791 _ => {
22792 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
22793 Ok(Expression::StringAgg(Box::new(
22794 crate::expressions::StringAggFunc {
22795 this: x,
22796 separator: Some(sep),
22797 order_by: Some(order_by),
22798 distinct,
22799 filter: None,
22800 limit: None,
22801 inferred_type: None,
22802 },
22803 )))
22804 }
22805 }
22806 } else {
22807 Ok(Expression::WithinGroup(wg))
22808 }
22809 }
22810 Expression::StringAgg(sa) => {
22811 match target {
22812 DialectType::MySQL
22813 | DialectType::SingleStore
22814 | DialectType::Doris
22815 | DialectType::StarRocks => {
22816 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
22817 Ok(Expression::GroupConcat(Box::new(
22818 crate::expressions::GroupConcatFunc {
22819 this: sa.this,
22820 separator: sa.separator,
22821 order_by: sa.order_by,
22822 distinct: sa.distinct,
22823 filter: sa.filter,
22824 limit: None,
22825 inferred_type: None,
22826 },
22827 )))
22828 }
22829 DialectType::SQLite => {
22830 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
22831 Ok(Expression::GroupConcat(Box::new(
22832 crate::expressions::GroupConcatFunc {
22833 this: sa.this,
22834 separator: sa.separator,
22835 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
22836 distinct: sa.distinct,
22837 filter: sa.filter,
22838 limit: None,
22839 inferred_type: None,
22840 },
22841 )))
22842 }
22843 DialectType::Spark | DialectType::Databricks => {
22844 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
22845 Ok(Expression::ListAgg(Box::new(
22846 crate::expressions::ListAggFunc {
22847 this: sa.this,
22848 separator: sa.separator,
22849 on_overflow: None,
22850 order_by: sa.order_by,
22851 distinct: sa.distinct,
22852 filter: None,
22853 inferred_type: None,
22854 },
22855 )))
22856 }
22857 _ => Ok(Expression::StringAgg(sa)),
22858 }
22859 }
22860 _ => Ok(e),
22861 }
22862 }
22863 Action::GroupConcatConvert => {
22864 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
22865 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
22866 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
22867 if let Expression::Function(ref f) = expr {
22868 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22869 let mut result = f.args[0].clone();
22870 for arg in &f.args[1..] {
22871 result = Expression::Concat(Box::new(BinaryOp {
22872 left: result,
22873 right: arg.clone(),
22874 left_comments: vec![],
22875 operator_comments: vec![],
22876 trailing_comments: vec![],
22877 inferred_type: None,
22878 }));
22879 }
22880 return result;
22881 }
22882 }
22883 expr
22884 }
22885 fn expand_concat_to_plus(expr: Expression) -> Expression {
22886 if let Expression::Function(ref f) = expr {
22887 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22888 let mut result = f.args[0].clone();
22889 for arg in &f.args[1..] {
22890 result = Expression::Add(Box::new(BinaryOp {
22891 left: result,
22892 right: arg.clone(),
22893 left_comments: vec![],
22894 operator_comments: vec![],
22895 trailing_comments: vec![],
22896 inferred_type: None,
22897 }));
22898 }
22899 return result;
22900 }
22901 }
22902 expr
22903 }
22904 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
22905 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
22906 if let Expression::Function(ref f) = expr {
22907 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22908 let new_args: Vec<Expression> = f
22909 .args
22910 .iter()
22911 .map(|arg| {
22912 Expression::Cast(Box::new(crate::expressions::Cast {
22913 this: arg.clone(),
22914 to: crate::expressions::DataType::VarChar {
22915 length: None,
22916 parenthesized_length: false,
22917 },
22918 trailing_comments: Vec::new(),
22919 double_colon_syntax: false,
22920 format: None,
22921 default: None,
22922 inferred_type: None,
22923 }))
22924 })
22925 .collect();
22926 return Expression::Function(Box::new(
22927 crate::expressions::Function::new(
22928 "CONCAT".to_string(),
22929 new_args,
22930 ),
22931 ));
22932 }
22933 }
22934 expr
22935 }
22936 if let Expression::GroupConcat(gc) = e {
22937 match target {
22938 DialectType::Presto => {
22939 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
22940 let sep = gc.separator.unwrap_or(Expression::string(","));
22941 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
22942 let this = wrap_concat_args_in_varchar_cast(gc.this);
22943 let array_agg =
22944 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
22945 this,
22946 distinct: gc.distinct,
22947 filter: gc.filter,
22948 order_by: gc.order_by.unwrap_or_default(),
22949 name: None,
22950 ignore_nulls: None,
22951 having_max: None,
22952 limit: None,
22953 inferred_type: None,
22954 }));
22955 Ok(Expression::ArrayJoin(Box::new(
22956 crate::expressions::ArrayJoinFunc {
22957 this: array_agg,
22958 separator: sep,
22959 null_replacement: None,
22960 },
22961 )))
22962 }
22963 DialectType::Trino => {
22964 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
22965 let sep = gc.separator.unwrap_or(Expression::string(","));
22966 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
22967 let this = wrap_concat_args_in_varchar_cast(gc.this);
22968 Ok(Expression::ListAgg(Box::new(
22969 crate::expressions::ListAggFunc {
22970 this,
22971 separator: Some(sep),
22972 on_overflow: None,
22973 order_by: gc.order_by,
22974 distinct: gc.distinct,
22975 filter: gc.filter,
22976 inferred_type: None,
22977 },
22978 )))
22979 }
22980 DialectType::PostgreSQL
22981 | DialectType::Redshift
22982 | DialectType::Snowflake
22983 | DialectType::DuckDB
22984 | DialectType::Hive
22985 | DialectType::ClickHouse => {
22986 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
22987 let sep = gc.separator.unwrap_or(Expression::string(","));
22988 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
22989 let this = expand_concat_to_dpipe(gc.this);
22990 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
22991 let order_by = if target == DialectType::PostgreSQL {
22992 gc.order_by.map(|ords| {
22993 ords.into_iter()
22994 .map(|mut o| {
22995 if o.nulls_first.is_none() {
22996 if o.desc {
22997 o.nulls_first = Some(false);
22998 // NULLS LAST
22999 } else {
23000 o.nulls_first = Some(true);
23001 // NULLS FIRST
23002 }
23003 }
23004 o
23005 })
23006 .collect()
23007 })
23008 } else {
23009 gc.order_by
23010 };
23011 Ok(Expression::StringAgg(Box::new(
23012 crate::expressions::StringAggFunc {
23013 this,
23014 separator: Some(sep),
23015 order_by,
23016 distinct: gc.distinct,
23017 filter: gc.filter,
23018 limit: None,
23019 inferred_type: None,
23020 },
23021 )))
23022 }
23023 DialectType::TSQL => {
23024 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
23025 // TSQL doesn't support DISTINCT in STRING_AGG
23026 let sep = gc.separator.unwrap_or(Expression::string(","));
23027 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
23028 let this = expand_concat_to_plus(gc.this);
23029 Ok(Expression::StringAgg(Box::new(
23030 crate::expressions::StringAggFunc {
23031 this,
23032 separator: Some(sep),
23033 order_by: gc.order_by,
23034 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
23035 filter: gc.filter,
23036 limit: None,
23037 inferred_type: None,
23038 },
23039 )))
23040 }
23041 DialectType::SQLite => {
23042 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
23043 // SQLite GROUP_CONCAT doesn't support ORDER BY
23044 // Expand CONCAT(a,b,c) -> a || b || c
23045 let this = expand_concat_to_dpipe(gc.this);
23046 Ok(Expression::GroupConcat(Box::new(
23047 crate::expressions::GroupConcatFunc {
23048 this,
23049 separator: gc.separator,
23050 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23051 distinct: gc.distinct,
23052 filter: gc.filter,
23053 limit: None,
23054 inferred_type: None,
23055 },
23056 )))
23057 }
23058 DialectType::Spark | DialectType::Databricks => {
23059 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23060 let sep = gc.separator.unwrap_or(Expression::string(","));
23061 Ok(Expression::ListAgg(Box::new(
23062 crate::expressions::ListAggFunc {
23063 this: gc.this,
23064 separator: Some(sep),
23065 on_overflow: None,
23066 order_by: gc.order_by,
23067 distinct: gc.distinct,
23068 filter: None,
23069 inferred_type: None,
23070 },
23071 )))
23072 }
23073 DialectType::MySQL
23074 | DialectType::SingleStore
23075 | DialectType::StarRocks => {
23076 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
23077 if gc.separator.is_none() {
23078 let mut gc = gc;
23079 gc.separator = Some(Expression::string(","));
23080 Ok(Expression::GroupConcat(gc))
23081 } else {
23082 Ok(Expression::GroupConcat(gc))
23083 }
23084 }
23085 _ => Ok(Expression::GroupConcat(gc)),
23086 }
23087 } else {
23088 Ok(e)
23089 }
23090 }
23091 Action::TempTableHash => {
23092 match e {
23093 Expression::CreateTable(mut ct) => {
23094 // TSQL #table -> TEMPORARY TABLE with # stripped from name
23095 let name = &ct.name.name.name;
23096 if name.starts_with('#') {
23097 ct.name.name.name = name.trim_start_matches('#').to_string();
23098 }
23099 // Set temporary flag
23100 ct.temporary = true;
23101 Ok(Expression::CreateTable(ct))
23102 }
23103 Expression::Table(mut tr) => {
23104 // Strip # from table references
23105 let name = &tr.name.name;
23106 if name.starts_with('#') {
23107 tr.name.name = name.trim_start_matches('#').to_string();
23108 }
23109 Ok(Expression::Table(tr))
23110 }
23111 Expression::DropTable(mut dt) => {
23112 // Strip # from DROP TABLE names
23113 for table_ref in &mut dt.names {
23114 if table_ref.name.name.starts_with('#') {
23115 table_ref.name.name =
23116 table_ref.name.name.trim_start_matches('#').to_string();
23117 }
23118 }
23119 Ok(Expression::DropTable(dt))
23120 }
23121 _ => Ok(e),
23122 }
23123 }
23124 Action::NvlClearOriginal => {
23125 if let Expression::Nvl(mut f) = e {
23126 f.original_name = None;
23127 Ok(Expression::Nvl(f))
23128 } else {
23129 Ok(e)
23130 }
23131 }
23132 Action::HiveCastToTryCast => {
23133 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
23134 if let Expression::Cast(mut c) = e {
23135 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
23136 // (Spark's TIMESTAMP is always timezone-aware)
23137 if matches!(target, DialectType::DuckDB)
23138 && matches!(source, DialectType::Spark | DialectType::Databricks)
23139 && matches!(
23140 c.to,
23141 DataType::Timestamp {
23142 timezone: false,
23143 ..
23144 }
23145 )
23146 {
23147 c.to = DataType::Custom {
23148 name: "TIMESTAMPTZ".to_string(),
23149 };
23150 }
23151 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
23152 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
23153 if matches!(target, DialectType::Databricks | DialectType::Spark)
23154 && matches!(
23155 source,
23156 DialectType::Spark | DialectType::Databricks | DialectType::Hive
23157 )
23158 && Self::has_varchar_char_type(&c.to)
23159 {
23160 c.to = Self::normalize_varchar_to_string(c.to);
23161 }
23162 Ok(Expression::TryCast(c))
23163 } else {
23164 Ok(e)
23165 }
23166 }
23167 Action::XorExpand => {
23168 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
23169 // Snowflake: use BOOLXOR(a, b) instead
23170 if let Expression::Xor(xor) = e {
23171 // Collect all XOR operands
23172 let mut operands = Vec::new();
23173 if let Some(this) = xor.this {
23174 operands.push(*this);
23175 }
23176 if let Some(expr) = xor.expression {
23177 operands.push(*expr);
23178 }
23179 operands.extend(xor.expressions);
23180
23181 // Snowflake: use BOOLXOR(a, b)
23182 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
23183 let a = operands.remove(0);
23184 let b = operands.remove(0);
23185 return Ok(Expression::Function(Box::new(Function::new(
23186 "BOOLXOR".to_string(),
23187 vec![a, b],
23188 ))));
23189 }
23190
23191 // Helper to build (a AND NOT b) OR (NOT a AND b)
23192 let make_xor = |a: Expression, b: Expression| -> Expression {
23193 let not_b = Expression::Not(Box::new(
23194 crate::expressions::UnaryOp::new(b.clone()),
23195 ));
23196 let not_a = Expression::Not(Box::new(
23197 crate::expressions::UnaryOp::new(a.clone()),
23198 ));
23199 let left_and = Expression::And(Box::new(BinaryOp {
23200 left: a,
23201 right: Expression::Paren(Box::new(Paren {
23202 this: not_b,
23203 trailing_comments: Vec::new(),
23204 })),
23205 left_comments: Vec::new(),
23206 operator_comments: Vec::new(),
23207 trailing_comments: Vec::new(),
23208 inferred_type: None,
23209 }));
23210 let right_and = Expression::And(Box::new(BinaryOp {
23211 left: Expression::Paren(Box::new(Paren {
23212 this: not_a,
23213 trailing_comments: Vec::new(),
23214 })),
23215 right: b,
23216 left_comments: Vec::new(),
23217 operator_comments: Vec::new(),
23218 trailing_comments: Vec::new(),
23219 inferred_type: None,
23220 }));
23221 Expression::Or(Box::new(BinaryOp {
23222 left: Expression::Paren(Box::new(Paren {
23223 this: left_and,
23224 trailing_comments: Vec::new(),
23225 })),
23226 right: Expression::Paren(Box::new(Paren {
23227 this: right_and,
23228 trailing_comments: Vec::new(),
23229 })),
23230 left_comments: Vec::new(),
23231 operator_comments: Vec::new(),
23232 trailing_comments: Vec::new(),
23233 inferred_type: None,
23234 }))
23235 };
23236
23237 if operands.len() >= 2 {
23238 let mut result = make_xor(operands.remove(0), operands.remove(0));
23239 for operand in operands {
23240 result = make_xor(result, operand);
23241 }
23242 Ok(result)
23243 } else if operands.len() == 1 {
23244 Ok(operands.remove(0))
23245 } else {
23246 // No operands - return FALSE (shouldn't happen)
23247 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
23248 value: false,
23249 }))
23250 }
23251 } else {
23252 Ok(e)
23253 }
23254 }
23255 Action::DatePartUnquote => {
23256 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
23257 // Convert the quoted string first arg to a bare Column/Identifier
23258 if let Expression::Function(mut f) = e {
23259 if let Some(Expression::Literal(lit)) = f.args.first() {
23260 if let crate::expressions::Literal::String(s) = lit.as_ref() {
23261 let bare_name = s.to_ascii_lowercase();
23262 f.args[0] =
23263 Expression::Column(Box::new(crate::expressions::Column {
23264 name: Identifier::new(bare_name),
23265 table: None,
23266 join_mark: false,
23267 trailing_comments: Vec::new(),
23268 span: None,
23269 inferred_type: None,
23270 }));
23271 }
23272 }
23273 Ok(Expression::Function(f))
23274 } else {
23275 Ok(e)
23276 }
23277 }
23278 Action::ArrayLengthConvert => {
23279 // Extract the argument from the expression
23280 let arg = match e {
23281 Expression::Cardinality(ref f) => f.this.clone(),
23282 Expression::ArrayLength(ref f) => f.this.clone(),
23283 Expression::ArraySize(ref f) => f.this.clone(),
23284 _ => return Ok(e),
23285 };
23286 match target {
23287 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23288 Ok(Expression::Function(Box::new(Function::new(
23289 "SIZE".to_string(),
23290 vec![arg],
23291 ))))
23292 }
23293 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23294 Ok(Expression::Cardinality(Box::new(
23295 crate::expressions::UnaryFunc::new(arg),
23296 )))
23297 }
23298 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
23299 crate::expressions::UnaryFunc::new(arg),
23300 ))),
23301 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
23302 crate::expressions::UnaryFunc::new(arg),
23303 ))),
23304 DialectType::PostgreSQL | DialectType::Redshift => {
23305 // PostgreSQL ARRAY_LENGTH requires dimension arg
23306 Ok(Expression::Function(Box::new(Function::new(
23307 "ARRAY_LENGTH".to_string(),
23308 vec![arg, Expression::number(1)],
23309 ))))
23310 }
23311 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
23312 crate::expressions::UnaryFunc::new(arg),
23313 ))),
23314 _ => Ok(e), // Keep original
23315 }
23316 }
23317
23318 Action::JsonExtractToArrow => {
23319 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
23320 if let Expression::JsonExtract(mut f) = e {
23321 f.arrow_syntax = true;
23322 // Transform path: convert bracket notation to dot notation
23323 // SQLite strips wildcards, DuckDB preserves them
23324 if let Expression::Literal(ref lit) = f.path {
23325 if let Literal::String(ref s) = lit.as_ref() {
23326 let mut transformed = s.clone();
23327 if matches!(target, DialectType::SQLite) {
23328 transformed = Self::strip_json_wildcards(&transformed);
23329 }
23330 transformed = Self::bracket_to_dot_notation(&transformed);
23331 if transformed != *s {
23332 f.path = Expression::string(&transformed);
23333 }
23334 }
23335 }
23336 Ok(Expression::JsonExtract(f))
23337 } else {
23338 Ok(e)
23339 }
23340 }
23341
23342 Action::JsonExtractToGetJsonObject => {
23343 if let Expression::JsonExtract(f) = e {
23344 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23345 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
23346 // Use proper decomposition that handles brackets
23347 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
23348 if let Literal::String(ref s) = lit.as_ref() {
23349 let parts = Self::decompose_json_path(s);
23350 parts.into_iter().map(|k| Expression::string(&k)).collect()
23351 } else {
23352 vec![]
23353 }
23354 } else {
23355 vec![f.path]
23356 };
23357 let func_name = if matches!(target, DialectType::Redshift) {
23358 "JSON_EXTRACT_PATH_TEXT"
23359 } else {
23360 "JSON_EXTRACT_PATH"
23361 };
23362 let mut args = vec![f.this];
23363 args.extend(keys);
23364 Ok(Expression::Function(Box::new(Function::new(
23365 func_name.to_string(),
23366 args,
23367 ))))
23368 } else {
23369 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23370 // Convert bracket double quotes to single quotes
23371 let path = if let Expression::Literal(ref lit) = f.path {
23372 if let Literal::String(ref s) = lit.as_ref() {
23373 let normalized = Self::bracket_to_single_quotes(s);
23374 if normalized != *s {
23375 Expression::string(&normalized)
23376 } else {
23377 f.path.clone()
23378 }
23379 } else {
23380 f.path.clone()
23381 }
23382 } else {
23383 f.path.clone()
23384 };
23385 Ok(Expression::Function(Box::new(Function::new(
23386 "GET_JSON_OBJECT".to_string(),
23387 vec![f.this, path],
23388 ))))
23389 }
23390 } else {
23391 Ok(e)
23392 }
23393 }
23394
23395 Action::JsonExtractScalarToGetJsonObject => {
23396 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23397 if let Expression::JsonExtractScalar(f) = e {
23398 Ok(Expression::Function(Box::new(Function::new(
23399 "GET_JSON_OBJECT".to_string(),
23400 vec![f.this, f.path],
23401 ))))
23402 } else {
23403 Ok(e)
23404 }
23405 }
23406
23407 Action::JsonExtractToTsql => {
23408 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
23409 let (this, path) = match e {
23410 Expression::JsonExtract(f) => (f.this, f.path),
23411 Expression::JsonExtractScalar(f) => (f.this, f.path),
23412 _ => return Ok(e),
23413 };
23414 // Transform path: strip wildcards, convert bracket notation to dot notation
23415 let transformed_path = if let Expression::Literal(ref lit) = path {
23416 if let Literal::String(ref s) = lit.as_ref() {
23417 let stripped = Self::strip_json_wildcards(s);
23418 let dotted = Self::bracket_to_dot_notation(&stripped);
23419 Expression::string(&dotted)
23420 } else {
23421 path.clone()
23422 }
23423 } else {
23424 path
23425 };
23426 let json_query = Expression::Function(Box::new(Function::new(
23427 "JSON_QUERY".to_string(),
23428 vec![this.clone(), transformed_path.clone()],
23429 )));
23430 let json_value = Expression::Function(Box::new(Function::new(
23431 "JSON_VALUE".to_string(),
23432 vec![this, transformed_path],
23433 )));
23434 Ok(Expression::Function(Box::new(Function::new(
23435 "ISNULL".to_string(),
23436 vec![json_query, json_value],
23437 ))))
23438 }
23439
23440 Action::JsonExtractToClickHouse => {
23441 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
23442 let (this, path) = match e {
23443 Expression::JsonExtract(f) => (f.this, f.path),
23444 Expression::JsonExtractScalar(f) => (f.this, f.path),
23445 _ => return Ok(e),
23446 };
23447 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
23448 if let Literal::String(ref s) = lit.as_ref() {
23449 let parts = Self::decompose_json_path(s);
23450 let mut result = vec![this];
23451 for part in parts {
23452 // ClickHouse uses 1-based integer indices for array access
23453 if let Ok(idx) = part.parse::<i64>() {
23454 result.push(Expression::number(idx + 1));
23455 } else {
23456 result.push(Expression::string(&part));
23457 }
23458 }
23459 result
23460 } else {
23461 vec![]
23462 }
23463 } else {
23464 vec![this, path]
23465 };
23466 Ok(Expression::Function(Box::new(Function::new(
23467 "JSONExtractString".to_string(),
23468 args,
23469 ))))
23470 }
23471
23472 Action::JsonExtractScalarConvert => {
23473 // JSON_EXTRACT_SCALAR -> target-specific
23474 if let Expression::JsonExtractScalar(f) = e {
23475 match target {
23476 DialectType::PostgreSQL | DialectType::Redshift => {
23477 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
23478 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
23479 {
23480 if let Literal::String(ref s) = lit.as_ref() {
23481 let parts = Self::decompose_json_path(s);
23482 parts.into_iter().map(|k| Expression::string(&k)).collect()
23483 } else {
23484 vec![]
23485 }
23486 } else {
23487 vec![f.path]
23488 };
23489 let mut args = vec![f.this];
23490 args.extend(keys);
23491 Ok(Expression::Function(Box::new(Function::new(
23492 "JSON_EXTRACT_PATH_TEXT".to_string(),
23493 args,
23494 ))))
23495 }
23496 DialectType::Snowflake => {
23497 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
23498 let stripped_path = if let Expression::Literal(ref lit) = f.path {
23499 if let Literal::String(ref s) = lit.as_ref() {
23500 let stripped = Self::strip_json_dollar_prefix(s);
23501 Expression::string(&stripped)
23502 } else {
23503 f.path.clone()
23504 }
23505 } else {
23506 f.path
23507 };
23508 Ok(Expression::Function(Box::new(Function::new(
23509 "JSON_EXTRACT_PATH_TEXT".to_string(),
23510 vec![f.this, stripped_path],
23511 ))))
23512 }
23513 DialectType::SQLite | DialectType::DuckDB => {
23514 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
23515 Ok(Expression::JsonExtractScalar(Box::new(
23516 crate::expressions::JsonExtractFunc {
23517 this: f.this,
23518 path: f.path,
23519 returning: f.returning,
23520 arrow_syntax: true,
23521 hash_arrow_syntax: false,
23522 wrapper_option: None,
23523 quotes_option: None,
23524 on_scalar_string: false,
23525 on_error: None,
23526 },
23527 )))
23528 }
23529 _ => Ok(Expression::JsonExtractScalar(f)),
23530 }
23531 } else {
23532 Ok(e)
23533 }
23534 }
23535
23536 Action::JsonPathNormalize => {
23537 // Normalize JSON path format for BigQuery, MySQL, etc.
23538 if let Expression::JsonExtract(mut f) = e {
23539 if let Expression::Literal(ref lit) = f.path {
23540 if let Literal::String(ref s) = lit.as_ref() {
23541 let mut normalized = s.clone();
23542 // Convert bracket notation and handle wildcards per dialect
23543 match target {
23544 DialectType::BigQuery => {
23545 // BigQuery strips wildcards and uses single quotes in brackets
23546 normalized = Self::strip_json_wildcards(&normalized);
23547 normalized = Self::bracket_to_single_quotes(&normalized);
23548 }
23549 DialectType::MySQL => {
23550 // MySQL preserves wildcards, converts brackets to dot notation
23551 normalized = Self::bracket_to_dot_notation(&normalized);
23552 }
23553 _ => {}
23554 }
23555 if normalized != *s {
23556 f.path = Expression::string(&normalized);
23557 }
23558 }
23559 }
23560 Ok(Expression::JsonExtract(f))
23561 } else {
23562 Ok(e)
23563 }
23564 }
23565
23566 Action::JsonQueryValueConvert => {
23567 // JsonQuery/JsonValue -> target-specific
23568 let (f, is_query) = match e {
23569 Expression::JsonQuery(f) => (f, true),
23570 Expression::JsonValue(f) => (f, false),
23571 _ => return Ok(e),
23572 };
23573 match target {
23574 DialectType::TSQL | DialectType::Fabric => {
23575 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
23576 let json_query = Expression::Function(Box::new(Function::new(
23577 "JSON_QUERY".to_string(),
23578 vec![f.this.clone(), f.path.clone()],
23579 )));
23580 let json_value = Expression::Function(Box::new(Function::new(
23581 "JSON_VALUE".to_string(),
23582 vec![f.this, f.path],
23583 )));
23584 Ok(Expression::Function(Box::new(Function::new(
23585 "ISNULL".to_string(),
23586 vec![json_query, json_value],
23587 ))))
23588 }
23589 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23590 Ok(Expression::Function(Box::new(Function::new(
23591 "GET_JSON_OBJECT".to_string(),
23592 vec![f.this, f.path],
23593 ))))
23594 }
23595 DialectType::PostgreSQL | DialectType::Redshift => {
23596 Ok(Expression::Function(Box::new(Function::new(
23597 "JSON_EXTRACT_PATH_TEXT".to_string(),
23598 vec![f.this, f.path],
23599 ))))
23600 }
23601 DialectType::DuckDB | DialectType::SQLite => {
23602 // json -> path arrow syntax
23603 Ok(Expression::JsonExtract(Box::new(
23604 crate::expressions::JsonExtractFunc {
23605 this: f.this,
23606 path: f.path,
23607 returning: f.returning,
23608 arrow_syntax: true,
23609 hash_arrow_syntax: false,
23610 wrapper_option: f.wrapper_option,
23611 quotes_option: f.quotes_option,
23612 on_scalar_string: f.on_scalar_string,
23613 on_error: f.on_error,
23614 },
23615 )))
23616 }
23617 DialectType::Snowflake => {
23618 // GET_PATH(PARSE_JSON(json), 'path')
23619 // Strip $. prefix from path
23620 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
23621 let json_expr = match &f.this {
23622 Expression::Function(ref inner_f)
23623 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23624 {
23625 f.this
23626 }
23627 Expression::ParseJson(_) => {
23628 // Already a ParseJson expression, which generates as PARSE_JSON(...)
23629 f.this
23630 }
23631 _ => Expression::Function(Box::new(Function::new(
23632 "PARSE_JSON".to_string(),
23633 vec![f.this],
23634 ))),
23635 };
23636 let path_str = match &f.path {
23637 Expression::Literal(lit)
23638 if matches!(lit.as_ref(), Literal::String(_)) =>
23639 {
23640 let Literal::String(s) = lit.as_ref() else {
23641 unreachable!()
23642 };
23643 let stripped = s.strip_prefix("$.").unwrap_or(s);
23644 Expression::Literal(Box::new(Literal::String(
23645 stripped.to_string(),
23646 )))
23647 }
23648 other => other.clone(),
23649 };
23650 Ok(Expression::Function(Box::new(Function::new(
23651 "GET_PATH".to_string(),
23652 vec![json_expr, path_str],
23653 ))))
23654 }
23655 _ => {
23656 // Default: keep as JSON_QUERY/JSON_VALUE function
23657 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
23658 Ok(Expression::Function(Box::new(Function::new(
23659 func_name.to_string(),
23660 vec![f.this, f.path],
23661 ))))
23662 }
23663 }
23664 }
23665
23666 Action::JsonLiteralToJsonParse => {
23667 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
23668 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
23669 if let Expression::Cast(c) = e {
23670 let func_name = if matches!(target, DialectType::Snowflake) {
23671 "PARSE_JSON"
23672 } else {
23673 "JSON_PARSE"
23674 };
23675 Ok(Expression::Function(Box::new(Function::new(
23676 func_name.to_string(),
23677 vec![c.this],
23678 ))))
23679 } else {
23680 Ok(e)
23681 }
23682 }
23683
23684 Action::DuckDBCastJsonToVariant => {
23685 if let Expression::Cast(c) = e {
23686 Ok(Expression::Cast(Box::new(Cast {
23687 this: c.this,
23688 to: DataType::Custom {
23689 name: "VARIANT".to_string(),
23690 },
23691 trailing_comments: c.trailing_comments,
23692 double_colon_syntax: false,
23693 format: None,
23694 default: None,
23695 inferred_type: None,
23696 })))
23697 } else {
23698 Ok(e)
23699 }
23700 }
23701
23702 Action::DuckDBTryCastJsonToTryJsonParse => {
23703 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
23704 if let Expression::TryCast(c) = e {
23705 let json_parse = Expression::Function(Box::new(Function::new(
23706 "JSON_PARSE".to_string(),
23707 vec![c.this],
23708 )));
23709 Ok(Expression::Function(Box::new(Function::new(
23710 "TRY".to_string(),
23711 vec![json_parse],
23712 ))))
23713 } else {
23714 Ok(e)
23715 }
23716 }
23717
23718 Action::DuckDBJsonFuncToJsonParse => {
23719 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
23720 if let Expression::Function(f) = e {
23721 let args = f.args;
23722 Ok(Expression::Function(Box::new(Function::new(
23723 "JSON_PARSE".to_string(),
23724 args,
23725 ))))
23726 } else {
23727 Ok(e)
23728 }
23729 }
23730
23731 Action::DuckDBJsonValidToIsJson => {
23732 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
23733 if let Expression::Function(mut f) = e {
23734 let arg = f.args.remove(0);
23735 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
23736 this: arg,
23737 json_type: None,
23738 unique_keys: None,
23739 negated: false,
23740 })))
23741 } else {
23742 Ok(e)
23743 }
23744 }
23745
23746 Action::AtTimeZoneConvert => {
23747 // AT TIME ZONE -> target-specific conversion
23748 if let Expression::AtTimeZone(atz) = e {
23749 match target {
23750 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23751 Ok(Expression::Function(Box::new(Function::new(
23752 "AT_TIMEZONE".to_string(),
23753 vec![atz.this, atz.zone],
23754 ))))
23755 }
23756 DialectType::Spark | DialectType::Databricks => {
23757 Ok(Expression::Function(Box::new(Function::new(
23758 "FROM_UTC_TIMESTAMP".to_string(),
23759 vec![atz.this, atz.zone],
23760 ))))
23761 }
23762 DialectType::Snowflake => {
23763 // CONVERT_TIMEZONE('zone', expr)
23764 Ok(Expression::Function(Box::new(Function::new(
23765 "CONVERT_TIMEZONE".to_string(),
23766 vec![atz.zone, atz.this],
23767 ))))
23768 }
23769 DialectType::BigQuery => {
23770 // TIMESTAMP(DATETIME(expr, 'zone'))
23771 let datetime_call = Expression::Function(Box::new(Function::new(
23772 "DATETIME".to_string(),
23773 vec![atz.this, atz.zone],
23774 )));
23775 Ok(Expression::Function(Box::new(Function::new(
23776 "TIMESTAMP".to_string(),
23777 vec![datetime_call],
23778 ))))
23779 }
23780 _ => Ok(Expression::Function(Box::new(Function::new(
23781 "AT_TIMEZONE".to_string(),
23782 vec![atz.this, atz.zone],
23783 )))),
23784 }
23785 } else {
23786 Ok(e)
23787 }
23788 }
23789
23790 Action::DayOfWeekConvert => {
23791 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
23792 if let Expression::DayOfWeek(f) = e {
23793 match target {
23794 DialectType::DuckDB => Ok(Expression::Function(Box::new(
23795 Function::new("ISODOW".to_string(), vec![f.this]),
23796 ))),
23797 DialectType::Spark | DialectType::Databricks => {
23798 // ((DAYOFWEEK(x) % 7) + 1)
23799 let dayofweek = Expression::Function(Box::new(Function::new(
23800 "DAYOFWEEK".to_string(),
23801 vec![f.this],
23802 )));
23803 let modulo = Expression::Mod(Box::new(BinaryOp {
23804 left: dayofweek,
23805 right: Expression::number(7),
23806 left_comments: Vec::new(),
23807 operator_comments: Vec::new(),
23808 trailing_comments: Vec::new(),
23809 inferred_type: None,
23810 }));
23811 let paren_mod = Expression::Paren(Box::new(Paren {
23812 this: modulo,
23813 trailing_comments: Vec::new(),
23814 }));
23815 let add_one = Expression::Add(Box::new(BinaryOp {
23816 left: paren_mod,
23817 right: Expression::number(1),
23818 left_comments: Vec::new(),
23819 operator_comments: Vec::new(),
23820 trailing_comments: Vec::new(),
23821 inferred_type: None,
23822 }));
23823 Ok(Expression::Paren(Box::new(Paren {
23824 this: add_one,
23825 trailing_comments: Vec::new(),
23826 })))
23827 }
23828 _ => Ok(Expression::DayOfWeek(f)),
23829 }
23830 } else {
23831 Ok(e)
23832 }
23833 }
23834
23835 Action::MaxByMinByConvert => {
23836 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
23837 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
23838 // Handle both Expression::Function and Expression::AggregateFunction
23839 let (is_max, args) = match &e {
23840 Expression::Function(f) => {
23841 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
23842 }
23843 Expression::AggregateFunction(af) => {
23844 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
23845 }
23846 _ => return Ok(e),
23847 };
23848 match target {
23849 DialectType::ClickHouse => {
23850 let name = if is_max { "argMax" } else { "argMin" };
23851 let mut args = args;
23852 args.truncate(2);
23853 Ok(Expression::Function(Box::new(Function::new(
23854 name.to_string(),
23855 args,
23856 ))))
23857 }
23858 DialectType::DuckDB => {
23859 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
23860 Ok(Expression::Function(Box::new(Function::new(
23861 name.to_string(),
23862 args,
23863 ))))
23864 }
23865 DialectType::Spark | DialectType::Databricks => {
23866 let mut args = args;
23867 args.truncate(2);
23868 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
23869 Ok(Expression::Function(Box::new(Function::new(
23870 name.to_string(),
23871 args,
23872 ))))
23873 }
23874 _ => Ok(e),
23875 }
23876 }
23877
23878 Action::ElementAtConvert => {
23879 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
23880 let (arr, idx) = if let Expression::ElementAt(bf) = e {
23881 (bf.this, bf.expression)
23882 } else if let Expression::Function(ref f) = e {
23883 if f.args.len() >= 2 {
23884 if let Expression::Function(f) = e {
23885 let mut args = f.args;
23886 let arr = args.remove(0);
23887 let idx = args.remove(0);
23888 (arr, idx)
23889 } else {
23890 unreachable!("outer condition already matched Expression::Function")
23891 }
23892 } else {
23893 return Ok(e);
23894 }
23895 } else {
23896 return Ok(e);
23897 };
23898 match target {
23899 DialectType::PostgreSQL => {
23900 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
23901 let arr_expr = Expression::Paren(Box::new(Paren {
23902 this: arr,
23903 trailing_comments: vec![],
23904 }));
23905 Ok(Expression::Subscript(Box::new(
23906 crate::expressions::Subscript {
23907 this: arr_expr,
23908 index: idx,
23909 },
23910 )))
23911 }
23912 DialectType::BigQuery => {
23913 // BigQuery: convert ARRAY[...] to bare [...] for subscript
23914 let arr_expr = match arr {
23915 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
23916 crate::expressions::ArrayConstructor {
23917 expressions: af.expressions,
23918 bracket_notation: true,
23919 use_list_keyword: false,
23920 },
23921 )),
23922 other => other,
23923 };
23924 let safe_ordinal = Expression::Function(Box::new(Function::new(
23925 "SAFE_ORDINAL".to_string(),
23926 vec![idx],
23927 )));
23928 Ok(Expression::Subscript(Box::new(
23929 crate::expressions::Subscript {
23930 this: arr_expr,
23931 index: safe_ordinal,
23932 },
23933 )))
23934 }
23935 _ => Ok(Expression::Function(Box::new(Function::new(
23936 "ELEMENT_AT".to_string(),
23937 vec![arr, idx],
23938 )))),
23939 }
23940 }
23941
23942 Action::CurrentUserParens => {
23943 // CURRENT_USER -> CURRENT_USER() for Snowflake
23944 Ok(Expression::Function(Box::new(Function::new(
23945 "CURRENT_USER".to_string(),
23946 vec![],
23947 ))))
23948 }
23949
23950 Action::ArrayAggToCollectList => {
23951 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
23952 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
23953 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
23954 match e {
23955 Expression::AggregateFunction(mut af) => {
23956 let is_simple =
23957 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
23958 let args = if af.args.is_empty() {
23959 vec![]
23960 } else {
23961 vec![af.args[0].clone()]
23962 };
23963 af.name = "COLLECT_LIST".to_string();
23964 af.args = args;
23965 if is_simple {
23966 af.order_by = Vec::new();
23967 }
23968 Ok(Expression::AggregateFunction(af))
23969 }
23970 Expression::ArrayAgg(agg) => {
23971 let is_simple =
23972 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
23973 Ok(Expression::AggregateFunction(Box::new(
23974 crate::expressions::AggregateFunction {
23975 name: "COLLECT_LIST".to_string(),
23976 args: vec![agg.this.clone()],
23977 distinct: agg.distinct,
23978 filter: agg.filter.clone(),
23979 order_by: if is_simple {
23980 Vec::new()
23981 } else {
23982 agg.order_by.clone()
23983 },
23984 limit: agg.limit.clone(),
23985 ignore_nulls: agg.ignore_nulls,
23986 inferred_type: None,
23987 },
23988 )))
23989 }
23990 _ => Ok(e),
23991 }
23992 }
23993
23994 Action::ArraySyntaxConvert => {
23995 match e {
23996 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
23997 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
23998 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
23999 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
24000 expressions: arr.expressions,
24001 bracket_notation: true,
24002 use_list_keyword: false,
24003 })),
24004 ),
24005 // ARRAY(y) function style -> ArrayFunc for target dialect
24006 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
24007 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
24008 let bracket = matches!(
24009 target,
24010 DialectType::BigQuery
24011 | DialectType::DuckDB
24012 | DialectType::Snowflake
24013 | DialectType::ClickHouse
24014 | DialectType::StarRocks
24015 );
24016 Ok(Expression::ArrayFunc(Box::new(
24017 crate::expressions::ArrayConstructor {
24018 expressions: f.args,
24019 bracket_notation: bracket,
24020 use_list_keyword: false,
24021 },
24022 )))
24023 }
24024 _ => Ok(e),
24025 }
24026 }
24027
24028 Action::CastToJsonForSpark => {
24029 // CAST(x AS JSON) -> TO_JSON(x) for Spark
24030 if let Expression::Cast(c) = e {
24031 Ok(Expression::Function(Box::new(Function::new(
24032 "TO_JSON".to_string(),
24033 vec![c.this],
24034 ))))
24035 } else {
24036 Ok(e)
24037 }
24038 }
24039
24040 Action::CastJsonToFromJson => {
24041 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
24042 if let Expression::Cast(c) = e {
24043 // Extract the string literal from ParseJson
24044 let literal_expr = if let Expression::ParseJson(pj) = c.this {
24045 pj.this
24046 } else {
24047 c.this
24048 };
24049 // Convert the target DataType to Spark's type string format
24050 let type_str = Self::data_type_to_spark_string(&c.to);
24051 Ok(Expression::Function(Box::new(Function::new(
24052 "FROM_JSON".to_string(),
24053 vec![
24054 literal_expr,
24055 Expression::Literal(Box::new(Literal::String(type_str))),
24056 ],
24057 ))))
24058 } else {
24059 Ok(e)
24060 }
24061 }
24062
24063 Action::ToJsonConvert => {
24064 // TO_JSON(x) -> target-specific conversion
24065 if let Expression::ToJson(f) = e {
24066 let arg = f.this;
24067 match target {
24068 DialectType::Presto | DialectType::Trino => {
24069 // JSON_FORMAT(CAST(x AS JSON))
24070 let cast_json = Expression::Cast(Box::new(Cast {
24071 this: arg,
24072 to: DataType::Custom {
24073 name: "JSON".to_string(),
24074 },
24075 trailing_comments: vec![],
24076 double_colon_syntax: false,
24077 format: None,
24078 default: None,
24079 inferred_type: None,
24080 }));
24081 Ok(Expression::Function(Box::new(Function::new(
24082 "JSON_FORMAT".to_string(),
24083 vec![cast_json],
24084 ))))
24085 }
24086 DialectType::BigQuery => Ok(Expression::Function(Box::new(
24087 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
24088 ))),
24089 DialectType::DuckDB => {
24090 // CAST(TO_JSON(x) AS TEXT)
24091 let to_json =
24092 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
24093 this: arg,
24094 original_name: None,
24095 inferred_type: None,
24096 }));
24097 Ok(Expression::Cast(Box::new(Cast {
24098 this: to_json,
24099 to: DataType::Text,
24100 trailing_comments: vec![],
24101 double_colon_syntax: false,
24102 format: None,
24103 default: None,
24104 inferred_type: None,
24105 })))
24106 }
24107 _ => Ok(Expression::ToJson(Box::new(
24108 crate::expressions::UnaryFunc {
24109 this: arg,
24110 original_name: None,
24111 inferred_type: None,
24112 },
24113 ))),
24114 }
24115 } else {
24116 Ok(e)
24117 }
24118 }
24119
24120 Action::VarianceToClickHouse => {
24121 if let Expression::Variance(f) = e {
24122 Ok(Expression::Function(Box::new(Function::new(
24123 "varSamp".to_string(),
24124 vec![f.this],
24125 ))))
24126 } else {
24127 Ok(e)
24128 }
24129 }
24130
24131 Action::StddevToClickHouse => {
24132 if let Expression::Stddev(f) = e {
24133 Ok(Expression::Function(Box::new(Function::new(
24134 "stddevSamp".to_string(),
24135 vec![f.this],
24136 ))))
24137 } else {
24138 Ok(e)
24139 }
24140 }
24141
24142 Action::ApproxQuantileConvert => {
24143 if let Expression::ApproxQuantile(aq) = e {
24144 let mut args = vec![*aq.this];
24145 if let Some(q) = aq.quantile {
24146 args.push(*q);
24147 }
24148 Ok(Expression::Function(Box::new(Function::new(
24149 "APPROX_PERCENTILE".to_string(),
24150 args,
24151 ))))
24152 } else {
24153 Ok(e)
24154 }
24155 }
24156
24157 Action::DollarParamConvert => {
24158 if let Expression::Parameter(p) = e {
24159 Ok(Expression::Parameter(Box::new(
24160 crate::expressions::Parameter {
24161 name: p.name,
24162 index: p.index,
24163 style: crate::expressions::ParameterStyle::At,
24164 quoted: p.quoted,
24165 string_quoted: p.string_quoted,
24166 expression: p.expression,
24167 },
24168 )))
24169 } else {
24170 Ok(e)
24171 }
24172 }
24173
24174 Action::EscapeStringNormalize => {
24175 if let Expression::Literal(ref lit) = e {
24176 if let Literal::EscapeString(s) = lit.as_ref() {
24177 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
24178 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
24179 s[2..].to_string()
24180 } else {
24181 s.clone()
24182 };
24183 let normalized = stripped
24184 .replace('\n', "\\n")
24185 .replace('\r', "\\r")
24186 .replace('\t', "\\t");
24187 match target {
24188 DialectType::BigQuery => {
24189 // BigQuery: e'...' -> CAST(b'...' AS STRING)
24190 // Use Raw for the b'...' part to avoid double-escaping
24191 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
24192 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
24193 }
24194 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
24195 normalized,
24196 )))),
24197 }
24198 } else {
24199 Ok(e)
24200 }
24201 } else {
24202 Ok(e)
24203 }
24204 }
24205
24206 Action::StraightJoinCase => {
24207 // straight_join: keep lowercase for DuckDB, quote for MySQL
24208 if let Expression::Column(col) = e {
24209 if col.name.name == "STRAIGHT_JOIN" {
24210 let mut new_col = col;
24211 new_col.name.name = "straight_join".to_string();
24212 if matches!(target, DialectType::MySQL) {
24213 // MySQL: needs quoting since it's a reserved keyword
24214 new_col.name.quoted = true;
24215 }
24216 Ok(Expression::Column(new_col))
24217 } else {
24218 Ok(Expression::Column(col))
24219 }
24220 } else {
24221 Ok(e)
24222 }
24223 }
24224
24225 Action::TablesampleReservoir => {
24226 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
24227 if let Expression::TableSample(mut ts) = e {
24228 if let Some(ref mut sample) = ts.sample {
24229 sample.method = crate::expressions::SampleMethod::Reservoir;
24230 sample.explicit_method = true;
24231 }
24232 Ok(Expression::TableSample(ts))
24233 } else {
24234 Ok(e)
24235 }
24236 }
24237
24238 Action::TablesampleSnowflakeStrip => {
24239 // Strip method and PERCENT for Snowflake target from non-Snowflake source
24240 match e {
24241 Expression::TableSample(mut ts) => {
24242 if let Some(ref mut sample) = ts.sample {
24243 sample.suppress_method_output = true;
24244 sample.unit_after_size = false;
24245 sample.is_percent = false;
24246 }
24247 Ok(Expression::TableSample(ts))
24248 }
24249 Expression::Table(mut t) => {
24250 if let Some(ref mut sample) = t.table_sample {
24251 sample.suppress_method_output = true;
24252 sample.unit_after_size = false;
24253 sample.is_percent = false;
24254 }
24255 Ok(Expression::Table(t))
24256 }
24257 _ => Ok(e),
24258 }
24259 }
24260
24261 Action::FirstToAnyValue => {
24262 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
24263 if let Expression::First(mut agg) = e {
24264 agg.ignore_nulls = None;
24265 agg.name = Some("ANY_VALUE".to_string());
24266 Ok(Expression::AnyValue(agg))
24267 } else {
24268 Ok(e)
24269 }
24270 }
24271
24272 Action::ArrayIndexConvert => {
24273 // Subscript index: 1-based to 0-based for BigQuery
24274 if let Expression::Subscript(mut sub) = e {
24275 if let Expression::Literal(ref lit) = sub.index {
24276 if let Literal::Number(ref n) = lit.as_ref() {
24277 if let Ok(val) = n.parse::<i64>() {
24278 sub.index = Expression::Literal(Box::new(Literal::Number(
24279 (val - 1).to_string(),
24280 )));
24281 }
24282 }
24283 }
24284 Ok(Expression::Subscript(sub))
24285 } else {
24286 Ok(e)
24287 }
24288 }
24289
24290 Action::AnyValueIgnoreNulls => {
24291 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
24292 if let Expression::AnyValue(mut av) = e {
24293 if av.ignore_nulls.is_none() {
24294 av.ignore_nulls = Some(true);
24295 }
24296 Ok(Expression::AnyValue(av))
24297 } else {
24298 Ok(e)
24299 }
24300 }
24301
24302 Action::BigQueryNullsOrdering => {
24303 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
24304 if let Expression::WindowFunction(mut wf) = e {
24305 for o in &mut wf.over.order_by {
24306 o.nulls_first = None;
24307 }
24308 Ok(Expression::WindowFunction(wf))
24309 } else if let Expression::Ordered(mut o) = e {
24310 o.nulls_first = None;
24311 Ok(Expression::Ordered(o))
24312 } else {
24313 Ok(e)
24314 }
24315 }
24316
24317 Action::SnowflakeFloatProtect => {
24318 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
24319 // Snowflake's target transform from converting it to DOUBLE.
24320 // Non-Snowflake sources should keep their FLOAT spelling.
24321 if let Expression::DataType(DataType::Float { .. }) = e {
24322 Ok(Expression::DataType(DataType::Custom {
24323 name: "FLOAT".to_string(),
24324 }))
24325 } else {
24326 Ok(e)
24327 }
24328 }
24329
24330 Action::MysqlNullsOrdering => {
24331 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
24332 if let Expression::Ordered(mut o) = e {
24333 let nulls_last = o.nulls_first == Some(false);
24334 let desc = o.desc;
24335 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
24336 // If requested ordering matches default, just strip NULLS clause
24337 let matches_default = if desc {
24338 // DESC default is NULLS FIRST, so nulls_first=true matches
24339 o.nulls_first == Some(true)
24340 } else {
24341 // ASC default is NULLS LAST, so nulls_first=false matches
24342 nulls_last
24343 };
24344 if matches_default {
24345 o.nulls_first = None;
24346 Ok(Expression::Ordered(o))
24347 } else {
24348 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
24349 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
24350 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
24351 let null_val = if desc { 1 } else { 0 };
24352 let non_null_val = if desc { 0 } else { 1 };
24353 let _case_expr = Expression::Case(Box::new(Case {
24354 operand: None,
24355 whens: vec![(
24356 Expression::IsNull(Box::new(crate::expressions::IsNull {
24357 this: o.this.clone(),
24358 not: false,
24359 postfix_form: false,
24360 })),
24361 Expression::number(null_val),
24362 )],
24363 else_: Some(Expression::number(non_null_val)),
24364 comments: Vec::new(),
24365 inferred_type: None,
24366 }));
24367 o.nulls_first = None;
24368 // Return a tuple of [case_expr, ordered_expr]
24369 // We need to return both as part of the ORDER BY
24370 // But since transform_recursive processes individual expressions,
24371 // we can't easily add extra ORDER BY items here.
24372 // Instead, strip the nulls_first
24373 o.nulls_first = None;
24374 Ok(Expression::Ordered(o))
24375 }
24376 } else {
24377 Ok(e)
24378 }
24379 }
24380
24381 Action::MysqlNullsLastRewrite => {
24382 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
24383 // to simulate NULLS LAST for ASC ordering
24384 if let Expression::WindowFunction(mut wf) = e {
24385 let mut new_order_by = Vec::new();
24386 for o in wf.over.order_by {
24387 if !o.desc {
24388 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
24389 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
24390 let case_expr = Expression::Case(Box::new(Case {
24391 operand: None,
24392 whens: vec![(
24393 Expression::IsNull(Box::new(crate::expressions::IsNull {
24394 this: o.this.clone(),
24395 not: false,
24396 postfix_form: false,
24397 })),
24398 Expression::Literal(Box::new(Literal::Number(
24399 "1".to_string(),
24400 ))),
24401 )],
24402 else_: Some(Expression::Literal(Box::new(Literal::Number(
24403 "0".to_string(),
24404 )))),
24405 comments: Vec::new(),
24406 inferred_type: None,
24407 }));
24408 new_order_by.push(crate::expressions::Ordered {
24409 this: case_expr,
24410 desc: false,
24411 nulls_first: None,
24412 explicit_asc: false,
24413 with_fill: None,
24414 });
24415 let mut ordered = o;
24416 ordered.nulls_first = None;
24417 new_order_by.push(ordered);
24418 } else {
24419 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
24420 // No change needed
24421 let mut ordered = o;
24422 ordered.nulls_first = None;
24423 new_order_by.push(ordered);
24424 }
24425 }
24426 wf.over.order_by = new_order_by;
24427 Ok(Expression::WindowFunction(wf))
24428 } else {
24429 Ok(e)
24430 }
24431 }
24432
24433 Action::RespectNullsConvert => {
24434 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
24435 if let Expression::WindowFunction(mut wf) = e {
24436 match &mut wf.this {
24437 Expression::FirstValue(ref mut vf) => {
24438 if vf.ignore_nulls == Some(false) {
24439 vf.ignore_nulls = None;
24440 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
24441 // but that's handled by the generator's NULLS ordering
24442 }
24443 }
24444 Expression::LastValue(ref mut vf) => {
24445 if vf.ignore_nulls == Some(false) {
24446 vf.ignore_nulls = None;
24447 }
24448 }
24449 _ => {}
24450 }
24451 Ok(Expression::WindowFunction(wf))
24452 } else {
24453 Ok(e)
24454 }
24455 }
24456
24457 Action::SnowflakeWindowFrameStrip => {
24458 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24459 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
24460 if let Expression::WindowFunction(mut wf) = e {
24461 wf.over.frame = None;
24462 Ok(Expression::WindowFunction(wf))
24463 } else {
24464 Ok(e)
24465 }
24466 }
24467
24468 Action::SnowflakeWindowFrameAdd => {
24469 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24470 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
24471 if let Expression::WindowFunction(mut wf) = e {
24472 wf.over.frame = Some(crate::expressions::WindowFrame {
24473 kind: crate::expressions::WindowFrameKind::Rows,
24474 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
24475 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
24476 exclude: None,
24477 kind_text: None,
24478 start_side_text: None,
24479 end_side_text: None,
24480 });
24481 Ok(Expression::WindowFunction(wf))
24482 } else {
24483 Ok(e)
24484 }
24485 }
24486
24487 Action::CreateTableStripComment => {
24488 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
24489 if let Expression::CreateTable(mut ct) = e {
24490 for col in &mut ct.columns {
24491 col.comment = None;
24492 col.constraints.retain(|c| {
24493 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
24494 });
24495 // Also remove Comment from constraint_order
24496 col.constraint_order.retain(|c| {
24497 !matches!(c, crate::expressions::ConstraintType::Comment)
24498 });
24499 }
24500 // Strip properties (USING, PARTITIONED BY, etc.)
24501 ct.properties.clear();
24502 Ok(Expression::CreateTable(ct))
24503 } else {
24504 Ok(e)
24505 }
24506 }
24507
24508 Action::AlterTableToSpRename => {
24509 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
24510 if let Expression::AlterTable(ref at) = e {
24511 if let Some(crate::expressions::AlterTableAction::RenameTable(
24512 ref new_tbl,
24513 )) = at.actions.first()
24514 {
24515 // Build the old table name using TSQL bracket quoting
24516 let old_name = if let Some(ref schema) = at.name.schema {
24517 if at.name.name.quoted || schema.quoted {
24518 format!("[{}].[{}]", schema.name, at.name.name.name)
24519 } else {
24520 format!("{}.{}", schema.name, at.name.name.name)
24521 }
24522 } else {
24523 if at.name.name.quoted {
24524 format!("[{}]", at.name.name.name)
24525 } else {
24526 at.name.name.name.clone()
24527 }
24528 };
24529 let new_name = new_tbl.name.name.clone();
24530 // EXEC sp_rename 'old_name', 'new_name'
24531 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
24532 Ok(Expression::Raw(crate::expressions::Raw { sql }))
24533 } else {
24534 Ok(e)
24535 }
24536 } else {
24537 Ok(e)
24538 }
24539 }
24540
24541 Action::SnowflakeIntervalFormat => {
24542 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
24543 if let Expression::Interval(mut iv) = e {
24544 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
24545 (&iv.this, &iv.unit)
24546 {
24547 if let Literal::String(ref val) = lit.as_ref() {
24548 let unit_str = match unit_spec {
24549 crate::expressions::IntervalUnitSpec::Simple {
24550 unit, ..
24551 } => match unit {
24552 crate::expressions::IntervalUnit::Year => "YEAR",
24553 crate::expressions::IntervalUnit::Quarter => "QUARTER",
24554 crate::expressions::IntervalUnit::Month => "MONTH",
24555 crate::expressions::IntervalUnit::Week => "WEEK",
24556 crate::expressions::IntervalUnit::Day => "DAY",
24557 crate::expressions::IntervalUnit::Hour => "HOUR",
24558 crate::expressions::IntervalUnit::Minute => "MINUTE",
24559 crate::expressions::IntervalUnit::Second => "SECOND",
24560 crate::expressions::IntervalUnit::Millisecond => {
24561 "MILLISECOND"
24562 }
24563 crate::expressions::IntervalUnit::Microsecond => {
24564 "MICROSECOND"
24565 }
24566 crate::expressions::IntervalUnit::Nanosecond => {
24567 "NANOSECOND"
24568 }
24569 },
24570 _ => "",
24571 };
24572 if !unit_str.is_empty() {
24573 let combined = format!("{} {}", val, unit_str);
24574 iv.this = Some(Expression::Literal(Box::new(Literal::String(
24575 combined,
24576 ))));
24577 iv.unit = None;
24578 }
24579 }
24580 }
24581 Ok(Expression::Interval(iv))
24582 } else {
24583 Ok(e)
24584 }
24585 }
24586
24587 Action::ArrayConcatBracketConvert => {
24588 // Expression::Array/ArrayFunc -> target-specific
24589 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
24590 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
24591 match e {
24592 Expression::Array(arr) => {
24593 if matches!(target, DialectType::Redshift) {
24594 Ok(Expression::Function(Box::new(Function::new(
24595 "ARRAY".to_string(),
24596 arr.expressions,
24597 ))))
24598 } else {
24599 Ok(Expression::ArrayFunc(Box::new(
24600 crate::expressions::ArrayConstructor {
24601 expressions: arr.expressions,
24602 bracket_notation: false,
24603 use_list_keyword: false,
24604 },
24605 )))
24606 }
24607 }
24608 Expression::ArrayFunc(arr) => {
24609 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
24610 if matches!(target, DialectType::Redshift) {
24611 Ok(Expression::Function(Box::new(Function::new(
24612 "ARRAY".to_string(),
24613 arr.expressions,
24614 ))))
24615 } else {
24616 Ok(Expression::ArrayFunc(arr))
24617 }
24618 }
24619 _ => Ok(e),
24620 }
24621 }
24622
24623 Action::BitAggFloatCast => {
24624 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
24625 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24626 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24627 let int_type = DataType::Int {
24628 length: None,
24629 integer_spelling: false,
24630 };
24631 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
24632 if let Expression::Cast(c) = agg_this {
24633 match &c.to {
24634 DataType::Float { .. }
24635 | DataType::Double { .. }
24636 | DataType::Custom { .. } => {
24637 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24638 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
24639 let inner_type = match &c.to {
24640 DataType::Float {
24641 precision, scale, ..
24642 } => DataType::Float {
24643 precision: *precision,
24644 scale: *scale,
24645 real_spelling: true,
24646 },
24647 other => other.clone(),
24648 };
24649 let inner_cast =
24650 Expression::Cast(Box::new(crate::expressions::Cast {
24651 this: c.this.clone(),
24652 to: inner_type,
24653 trailing_comments: Vec::new(),
24654 double_colon_syntax: false,
24655 format: None,
24656 default: None,
24657 inferred_type: None,
24658 }));
24659 let rounded = Expression::Function(Box::new(Function::new(
24660 "ROUND".to_string(),
24661 vec![inner_cast],
24662 )));
24663 Expression::Cast(Box::new(crate::expressions::Cast {
24664 this: rounded,
24665 to: int_dt,
24666 trailing_comments: Vec::new(),
24667 double_colon_syntax: false,
24668 format: None,
24669 default: None,
24670 inferred_type: None,
24671 }))
24672 }
24673 DataType::Decimal { .. } => {
24674 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24675 Expression::Cast(Box::new(crate::expressions::Cast {
24676 this: Expression::Cast(c),
24677 to: int_dt,
24678 trailing_comments: Vec::new(),
24679 double_colon_syntax: false,
24680 format: None,
24681 default: None,
24682 inferred_type: None,
24683 }))
24684 }
24685 _ => Expression::Cast(c),
24686 }
24687 } else {
24688 agg_this
24689 }
24690 };
24691 match e {
24692 Expression::BitwiseOrAgg(mut f) => {
24693 f.this = wrap_agg(f.this, int_type);
24694 Ok(Expression::BitwiseOrAgg(f))
24695 }
24696 Expression::BitwiseAndAgg(mut f) => {
24697 let int_type = DataType::Int {
24698 length: None,
24699 integer_spelling: false,
24700 };
24701 f.this = wrap_agg(f.this, int_type);
24702 Ok(Expression::BitwiseAndAgg(f))
24703 }
24704 Expression::BitwiseXorAgg(mut f) => {
24705 let int_type = DataType::Int {
24706 length: None,
24707 integer_spelling: false,
24708 };
24709 f.this = wrap_agg(f.this, int_type);
24710 Ok(Expression::BitwiseXorAgg(f))
24711 }
24712 _ => Ok(e),
24713 }
24714 }
24715
24716 Action::BitAggSnowflakeRename => {
24717 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
24718 match e {
24719 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
24720 Function::new("BITORAGG".to_string(), vec![f.this]),
24721 ))),
24722 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
24723 Function::new("BITANDAGG".to_string(), vec![f.this]),
24724 ))),
24725 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
24726 Function::new("BITXORAGG".to_string(), vec![f.this]),
24727 ))),
24728 _ => Ok(e),
24729 }
24730 }
24731
24732 Action::StrftimeCastTimestamp => {
24733 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
24734 if let Expression::Cast(mut c) = e {
24735 if matches!(
24736 c.to,
24737 DataType::Timestamp {
24738 timezone: false,
24739 ..
24740 }
24741 ) {
24742 c.to = DataType::Custom {
24743 name: "TIMESTAMP_NTZ".to_string(),
24744 };
24745 }
24746 Ok(Expression::Cast(c))
24747 } else {
24748 Ok(e)
24749 }
24750 }
24751
24752 Action::DecimalDefaultPrecision => {
24753 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
24754 if let Expression::Cast(mut c) = e {
24755 if matches!(
24756 c.to,
24757 DataType::Decimal {
24758 precision: None,
24759 ..
24760 }
24761 ) {
24762 c.to = DataType::Decimal {
24763 precision: Some(18),
24764 scale: Some(3),
24765 };
24766 }
24767 Ok(Expression::Cast(c))
24768 } else {
24769 Ok(e)
24770 }
24771 }
24772
24773 Action::FilterToIff => {
24774 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
24775 if let Expression::Filter(f) = e {
24776 let condition = *f.expression;
24777 let agg = *f.this;
24778 // Strip WHERE from condition
24779 let cond = match condition {
24780 Expression::Where(w) => w.this,
24781 other => other,
24782 };
24783 // Extract the aggregate function and its argument
24784 // We want AVG(IFF(condition, x, NULL))
24785 match agg {
24786 Expression::Function(mut func) => {
24787 if !func.args.is_empty() {
24788 let orig_arg = func.args[0].clone();
24789 let iff_call = Expression::Function(Box::new(Function::new(
24790 "IFF".to_string(),
24791 vec![cond, orig_arg, Expression::Null(Null)],
24792 )));
24793 func.args[0] = iff_call;
24794 Ok(Expression::Function(func))
24795 } else {
24796 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24797 this: Box::new(Expression::Function(func)),
24798 expression: Box::new(cond),
24799 })))
24800 }
24801 }
24802 Expression::Avg(mut avg) => {
24803 let iff_call = Expression::Function(Box::new(Function::new(
24804 "IFF".to_string(),
24805 vec![cond, avg.this.clone(), Expression::Null(Null)],
24806 )));
24807 avg.this = iff_call;
24808 Ok(Expression::Avg(avg))
24809 }
24810 Expression::Sum(mut s) => {
24811 let iff_call = Expression::Function(Box::new(Function::new(
24812 "IFF".to_string(),
24813 vec![cond, s.this.clone(), Expression::Null(Null)],
24814 )));
24815 s.this = iff_call;
24816 Ok(Expression::Sum(s))
24817 }
24818 Expression::Count(mut c) => {
24819 if let Some(ref this_expr) = c.this {
24820 let iff_call = Expression::Function(Box::new(Function::new(
24821 "IFF".to_string(),
24822 vec![cond, this_expr.clone(), Expression::Null(Null)],
24823 )));
24824 c.this = Some(iff_call);
24825 }
24826 Ok(Expression::Count(c))
24827 }
24828 other => {
24829 // Fallback: keep as Filter
24830 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24831 this: Box::new(other),
24832 expression: Box::new(cond),
24833 })))
24834 }
24835 }
24836 } else {
24837 Ok(e)
24838 }
24839 }
24840
24841 Action::AggFilterToIff => {
24842 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
24843 // Helper macro to handle the common AggFunc case
24844 macro_rules! handle_agg_filter_to_iff {
24845 ($variant:ident, $agg:expr) => {{
24846 let mut agg = $agg;
24847 if let Some(filter_cond) = agg.filter.take() {
24848 let iff_call = Expression::Function(Box::new(Function::new(
24849 "IFF".to_string(),
24850 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
24851 )));
24852 agg.this = iff_call;
24853 }
24854 Ok(Expression::$variant(agg))
24855 }};
24856 }
24857
24858 match e {
24859 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
24860 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
24861 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
24862 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
24863 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
24864 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
24865 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
24866 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
24867 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
24868 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
24869 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
24870 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
24871 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
24872 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
24873 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
24874 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
24875 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
24876 Expression::ApproxDistinct(agg) => {
24877 handle_agg_filter_to_iff!(ApproxDistinct, agg)
24878 }
24879 Expression::Count(mut c) => {
24880 if let Some(filter_cond) = c.filter.take() {
24881 if let Some(ref this_expr) = c.this {
24882 let iff_call = Expression::Function(Box::new(Function::new(
24883 "IFF".to_string(),
24884 vec![
24885 filter_cond,
24886 this_expr.clone(),
24887 Expression::Null(Null),
24888 ],
24889 )));
24890 c.this = Some(iff_call);
24891 }
24892 }
24893 Ok(Expression::Count(c))
24894 }
24895 other => Ok(other),
24896 }
24897 }
24898
24899 Action::JsonToGetPath => {
24900 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
24901 if let Expression::JsonExtract(je) = e {
24902 // Convert to PARSE_JSON() wrapper:
24903 // - JSON(x) -> PARSE_JSON(x)
24904 // - PARSE_JSON(x) -> keep as-is
24905 // - anything else -> wrap in PARSE_JSON()
24906 let this = match &je.this {
24907 Expression::Function(f)
24908 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
24909 {
24910 Expression::Function(Box::new(Function::new(
24911 "PARSE_JSON".to_string(),
24912 f.args.clone(),
24913 )))
24914 }
24915 Expression::Function(f)
24916 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
24917 {
24918 je.this.clone()
24919 }
24920 // GET_PATH result is already JSON, don't wrap
24921 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
24922 je.this.clone()
24923 }
24924 other => {
24925 // Wrap non-JSON expressions in PARSE_JSON()
24926 Expression::Function(Box::new(Function::new(
24927 "PARSE_JSON".to_string(),
24928 vec![other.clone()],
24929 )))
24930 }
24931 };
24932 // Convert path: extract key from JSONPath or strip $. prefix from string
24933 let path = match &je.path {
24934 Expression::JSONPath(jp) => {
24935 // Extract the key from JSONPath: $root.key -> 'key'
24936 let mut key_parts = Vec::new();
24937 for expr in &jp.expressions {
24938 match expr {
24939 Expression::JSONPathRoot(_) => {} // skip root
24940 Expression::JSONPathKey(k) => {
24941 if let Expression::Literal(lit) = &*k.this {
24942 if let Literal::String(s) = lit.as_ref() {
24943 key_parts.push(s.clone());
24944 }
24945 }
24946 }
24947 _ => {}
24948 }
24949 }
24950 if !key_parts.is_empty() {
24951 Expression::Literal(Box::new(Literal::String(
24952 key_parts.join("."),
24953 )))
24954 } else {
24955 je.path.clone()
24956 }
24957 }
24958 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
24959 {
24960 let Literal::String(s) = lit.as_ref() else {
24961 unreachable!()
24962 };
24963 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
24964 Expression::Literal(Box::new(Literal::String(stripped)))
24965 }
24966 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
24967 {
24968 let Literal::String(s) = lit.as_ref() else {
24969 unreachable!()
24970 };
24971 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
24972 Expression::Literal(Box::new(Literal::String(stripped)))
24973 }
24974 _ => je.path.clone(),
24975 };
24976 Ok(Expression::Function(Box::new(Function::new(
24977 "GET_PATH".to_string(),
24978 vec![this, path],
24979 ))))
24980 } else {
24981 Ok(e)
24982 }
24983 }
24984
24985 Action::StructToRow => {
24986 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
24987 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
24988
24989 // Extract key-value pairs from either Struct or MapFunc
24990 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
24991 Expression::Struct(s) => Some(
24992 s.fields
24993 .iter()
24994 .map(|(opt_name, field_expr)| {
24995 if let Some(name) = opt_name {
24996 (name.clone(), field_expr.clone())
24997 } else if let Expression::NamedArgument(na) = field_expr {
24998 (na.name.name.clone(), na.value.clone())
24999 } else {
25000 (String::new(), field_expr.clone())
25001 }
25002 })
25003 .collect(),
25004 ),
25005 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25006 m.keys
25007 .iter()
25008 .zip(m.values.iter())
25009 .map(|(key, value)| {
25010 let key_name = match key {
25011 Expression::Literal(lit)
25012 if matches!(lit.as_ref(), Literal::String(_)) =>
25013 {
25014 let Literal::String(s) = lit.as_ref() else {
25015 unreachable!()
25016 };
25017 s.clone()
25018 }
25019 Expression::Identifier(id) => id.name.clone(),
25020 _ => String::new(),
25021 };
25022 (key_name, value.clone())
25023 })
25024 .collect(),
25025 ),
25026 _ => None,
25027 };
25028
25029 if let Some(pairs) = kv_pairs {
25030 let mut named_args = Vec::new();
25031 for (key_name, value) in pairs {
25032 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
25033 named_args.push(Expression::Alias(Box::new(
25034 crate::expressions::Alias::new(
25035 value,
25036 Identifier::new(key_name),
25037 ),
25038 )));
25039 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25040 named_args.push(value);
25041 } else {
25042 named_args.push(value);
25043 }
25044 }
25045
25046 if matches!(target, DialectType::BigQuery) {
25047 Ok(Expression::Function(Box::new(Function::new(
25048 "STRUCT".to_string(),
25049 named_args,
25050 ))))
25051 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25052 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
25053 let row_func = Expression::Function(Box::new(Function::new(
25054 "ROW".to_string(),
25055 named_args,
25056 )));
25057
25058 // Try to infer types for each pair
25059 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
25060 Expression::Struct(s) => Some(
25061 s.fields
25062 .iter()
25063 .map(|(opt_name, field_expr)| {
25064 if let Some(name) = opt_name {
25065 (name.clone(), field_expr.clone())
25066 } else if let Expression::NamedArgument(na) = field_expr
25067 {
25068 (na.name.name.clone(), na.value.clone())
25069 } else {
25070 (String::new(), field_expr.clone())
25071 }
25072 })
25073 .collect(),
25074 ),
25075 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25076 m.keys
25077 .iter()
25078 .zip(m.values.iter())
25079 .map(|(key, value)| {
25080 let key_name = match key {
25081 Expression::Literal(lit)
25082 if matches!(
25083 lit.as_ref(),
25084 Literal::String(_)
25085 ) =>
25086 {
25087 let Literal::String(s) = lit.as_ref() else {
25088 unreachable!()
25089 };
25090 s.clone()
25091 }
25092 Expression::Identifier(id) => id.name.clone(),
25093 _ => String::new(),
25094 };
25095 (key_name, value.clone())
25096 })
25097 .collect(),
25098 ),
25099 _ => None,
25100 };
25101
25102 if let Some(pairs) = kv_pairs_again {
25103 // Infer types for all values
25104 let mut all_inferred = true;
25105 let mut fields = Vec::new();
25106 for (name, value) in &pairs {
25107 let inferred_type = match value {
25108 Expression::Literal(lit)
25109 if matches!(lit.as_ref(), Literal::Number(_)) =>
25110 {
25111 let Literal::Number(n) = lit.as_ref() else {
25112 unreachable!()
25113 };
25114 if n.contains('.') {
25115 Some(DataType::Double {
25116 precision: None,
25117 scale: None,
25118 })
25119 } else {
25120 Some(DataType::Int {
25121 length: None,
25122 integer_spelling: true,
25123 })
25124 }
25125 }
25126 Expression::Literal(lit)
25127 if matches!(lit.as_ref(), Literal::String(_)) =>
25128 {
25129 Some(DataType::VarChar {
25130 length: None,
25131 parenthesized_length: false,
25132 })
25133 }
25134 Expression::Boolean(_) => Some(DataType::Boolean),
25135 _ => None,
25136 };
25137 if let Some(dt) = inferred_type {
25138 fields.push(crate::expressions::StructField::new(
25139 name.clone(),
25140 dt,
25141 ));
25142 } else {
25143 all_inferred = false;
25144 break;
25145 }
25146 }
25147
25148 if all_inferred && !fields.is_empty() {
25149 let row_type = DataType::Struct {
25150 fields,
25151 nested: true,
25152 };
25153 Ok(Expression::Cast(Box::new(Cast {
25154 this: row_func,
25155 to: row_type,
25156 trailing_comments: Vec::new(),
25157 double_colon_syntax: false,
25158 format: None,
25159 default: None,
25160 inferred_type: None,
25161 })))
25162 } else {
25163 Ok(row_func)
25164 }
25165 } else {
25166 Ok(row_func)
25167 }
25168 } else {
25169 Ok(Expression::Function(Box::new(Function::new(
25170 "ROW".to_string(),
25171 named_args,
25172 ))))
25173 }
25174 } else {
25175 Ok(e)
25176 }
25177 }
25178
25179 Action::SparkStructConvert => {
25180 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
25181 // or DuckDB {'name': val, ...}
25182 if let Expression::Function(f) = e {
25183 // Extract name-value pairs from aliased args
25184 let mut pairs: Vec<(String, Expression)> = Vec::new();
25185 for arg in &f.args {
25186 match arg {
25187 Expression::Alias(a) => {
25188 pairs.push((a.alias.name.clone(), a.this.clone()));
25189 }
25190 _ => {
25191 pairs.push((String::new(), arg.clone()));
25192 }
25193 }
25194 }
25195
25196 match target {
25197 DialectType::DuckDB => {
25198 // Convert to DuckDB struct literal {'name': value, ...}
25199 let mut keys = Vec::new();
25200 let mut values = Vec::new();
25201 for (name, value) in &pairs {
25202 keys.push(Expression::Literal(Box::new(Literal::String(
25203 name.clone(),
25204 ))));
25205 values.push(value.clone());
25206 }
25207 Ok(Expression::MapFunc(Box::new(
25208 crate::expressions::MapConstructor {
25209 keys,
25210 values,
25211 curly_brace_syntax: true,
25212 with_map_keyword: false,
25213 },
25214 )))
25215 }
25216 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25217 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
25218 let row_args: Vec<Expression> =
25219 pairs.iter().map(|(_, v)| v.clone()).collect();
25220 let row_func = Expression::Function(Box::new(Function::new(
25221 "ROW".to_string(),
25222 row_args,
25223 )));
25224
25225 // Infer types
25226 let mut all_inferred = true;
25227 let mut fields = Vec::new();
25228 for (name, value) in &pairs {
25229 let inferred_type = match value {
25230 Expression::Literal(lit)
25231 if matches!(lit.as_ref(), Literal::Number(_)) =>
25232 {
25233 let Literal::Number(n) = lit.as_ref() else {
25234 unreachable!()
25235 };
25236 if n.contains('.') {
25237 Some(DataType::Double {
25238 precision: None,
25239 scale: None,
25240 })
25241 } else {
25242 Some(DataType::Int {
25243 length: None,
25244 integer_spelling: true,
25245 })
25246 }
25247 }
25248 Expression::Literal(lit)
25249 if matches!(lit.as_ref(), Literal::String(_)) =>
25250 {
25251 Some(DataType::VarChar {
25252 length: None,
25253 parenthesized_length: false,
25254 })
25255 }
25256 Expression::Boolean(_) => Some(DataType::Boolean),
25257 _ => None,
25258 };
25259 if let Some(dt) = inferred_type {
25260 fields.push(crate::expressions::StructField::new(
25261 name.clone(),
25262 dt,
25263 ));
25264 } else {
25265 all_inferred = false;
25266 break;
25267 }
25268 }
25269
25270 if all_inferred && !fields.is_empty() {
25271 let row_type = DataType::Struct {
25272 fields,
25273 nested: true,
25274 };
25275 Ok(Expression::Cast(Box::new(Cast {
25276 this: row_func,
25277 to: row_type,
25278 trailing_comments: Vec::new(),
25279 double_colon_syntax: false,
25280 format: None,
25281 default: None,
25282 inferred_type: None,
25283 })))
25284 } else {
25285 Ok(row_func)
25286 }
25287 }
25288 _ => Ok(Expression::Function(f)),
25289 }
25290 } else {
25291 Ok(e)
25292 }
25293 }
25294
25295 Action::ApproxCountDistinctToApproxDistinct => {
25296 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
25297 if let Expression::ApproxCountDistinct(f) = e {
25298 Ok(Expression::ApproxDistinct(f))
25299 } else {
25300 Ok(e)
25301 }
25302 }
25303
25304 Action::CollectListToArrayAgg => {
25305 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
25306 if let Expression::AggregateFunction(f) = e {
25307 let filter_expr = if !f.args.is_empty() {
25308 let arg = f.args[0].clone();
25309 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
25310 this: arg,
25311 not: true,
25312 postfix_form: false,
25313 })))
25314 } else {
25315 None
25316 };
25317 let agg = crate::expressions::AggFunc {
25318 this: if f.args.is_empty() {
25319 Expression::Null(crate::expressions::Null)
25320 } else {
25321 f.args[0].clone()
25322 },
25323 distinct: f.distinct,
25324 order_by: f.order_by.clone(),
25325 filter: filter_expr,
25326 ignore_nulls: None,
25327 name: None,
25328 having_max: None,
25329 limit: None,
25330 inferred_type: None,
25331 };
25332 Ok(Expression::ArrayAgg(Box::new(agg)))
25333 } else {
25334 Ok(e)
25335 }
25336 }
25337
25338 Action::CollectSetConvert => {
25339 // COLLECT_SET(x) -> target-specific
25340 if let Expression::AggregateFunction(f) = e {
25341 match target {
25342 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
25343 crate::expressions::AggregateFunction {
25344 name: "SET_AGG".to_string(),
25345 args: f.args,
25346 distinct: false,
25347 order_by: f.order_by,
25348 filter: f.filter,
25349 limit: f.limit,
25350 ignore_nulls: f.ignore_nulls,
25351 inferred_type: None,
25352 },
25353 ))),
25354 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
25355 crate::expressions::AggregateFunction {
25356 name: "ARRAY_UNIQUE_AGG".to_string(),
25357 args: f.args,
25358 distinct: false,
25359 order_by: f.order_by,
25360 filter: f.filter,
25361 limit: f.limit,
25362 ignore_nulls: f.ignore_nulls,
25363 inferred_type: None,
25364 },
25365 ))),
25366 DialectType::Trino | DialectType::DuckDB => {
25367 let agg = crate::expressions::AggFunc {
25368 this: if f.args.is_empty() {
25369 Expression::Null(crate::expressions::Null)
25370 } else {
25371 f.args[0].clone()
25372 },
25373 distinct: true,
25374 order_by: Vec::new(),
25375 filter: None,
25376 ignore_nulls: None,
25377 name: None,
25378 having_max: None,
25379 limit: None,
25380 inferred_type: None,
25381 };
25382 Ok(Expression::ArrayAgg(Box::new(agg)))
25383 }
25384 _ => Ok(Expression::AggregateFunction(f)),
25385 }
25386 } else {
25387 Ok(e)
25388 }
25389 }
25390
25391 Action::PercentileConvert => {
25392 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
25393 if let Expression::AggregateFunction(f) = e {
25394 let name = match target {
25395 DialectType::DuckDB => "QUANTILE",
25396 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
25397 _ => "PERCENTILE",
25398 };
25399 Ok(Expression::AggregateFunction(Box::new(
25400 crate::expressions::AggregateFunction {
25401 name: name.to_string(),
25402 args: f.args,
25403 distinct: f.distinct,
25404 order_by: f.order_by,
25405 filter: f.filter,
25406 limit: f.limit,
25407 ignore_nulls: f.ignore_nulls,
25408 inferred_type: None,
25409 },
25410 )))
25411 } else {
25412 Ok(e)
25413 }
25414 }
25415
25416 Action::CorrIsnanWrap => {
25417 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
25418 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
25419 let corr_clone = e.clone();
25420 let isnan = Expression::Function(Box::new(Function::new(
25421 "ISNAN".to_string(),
25422 vec![corr_clone.clone()],
25423 )));
25424 let case_expr = Expression::Case(Box::new(Case {
25425 operand: None,
25426 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
25427 else_: Some(corr_clone),
25428 comments: Vec::new(),
25429 inferred_type: None,
25430 }));
25431 Ok(case_expr)
25432 }
25433
25434 Action::TruncToDateTrunc => {
25435 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
25436 if let Expression::Function(f) = e {
25437 if f.args.len() == 2 {
25438 let timestamp = f.args[0].clone();
25439 let unit_expr = f.args[1].clone();
25440
25441 if matches!(target, DialectType::ClickHouse) {
25442 // For ClickHouse, produce Expression::DateTrunc which the generator
25443 // outputs as DATE_TRUNC(...) without going through the ClickHouse
25444 // target transform that would convert it to dateTrunc
25445 let unit_str = Self::get_unit_str_static(&unit_expr);
25446 let dt_field = match unit_str.as_str() {
25447 "YEAR" => DateTimeField::Year,
25448 "MONTH" => DateTimeField::Month,
25449 "DAY" => DateTimeField::Day,
25450 "HOUR" => DateTimeField::Hour,
25451 "MINUTE" => DateTimeField::Minute,
25452 "SECOND" => DateTimeField::Second,
25453 "WEEK" => DateTimeField::Week,
25454 "QUARTER" => DateTimeField::Quarter,
25455 _ => DateTimeField::Custom(unit_str),
25456 };
25457 Ok(Expression::DateTrunc(Box::new(
25458 crate::expressions::DateTruncFunc {
25459 this: timestamp,
25460 unit: dt_field,
25461 },
25462 )))
25463 } else {
25464 let new_args = vec![unit_expr, timestamp];
25465 Ok(Expression::Function(Box::new(Function::new(
25466 "DATE_TRUNC".to_string(),
25467 new_args,
25468 ))))
25469 }
25470 } else {
25471 Ok(Expression::Function(f))
25472 }
25473 } else {
25474 Ok(e)
25475 }
25476 }
25477
25478 Action::ArrayContainsConvert => {
25479 if let Expression::ArrayContains(f) = e {
25480 match target {
25481 DialectType::Presto | DialectType::Trino => {
25482 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
25483 Ok(Expression::Function(Box::new(Function::new(
25484 "CONTAINS".to_string(),
25485 vec![f.this, f.expression],
25486 ))))
25487 }
25488 DialectType::Snowflake => {
25489 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
25490 let cast_val =
25491 Expression::Cast(Box::new(crate::expressions::Cast {
25492 this: f.expression,
25493 to: crate::expressions::DataType::Custom {
25494 name: "VARIANT".to_string(),
25495 },
25496 trailing_comments: Vec::new(),
25497 double_colon_syntax: false,
25498 format: None,
25499 default: None,
25500 inferred_type: None,
25501 }));
25502 Ok(Expression::Function(Box::new(Function::new(
25503 "ARRAY_CONTAINS".to_string(),
25504 vec![cast_val, f.this],
25505 ))))
25506 }
25507 _ => Ok(Expression::ArrayContains(f)),
25508 }
25509 } else {
25510 Ok(e)
25511 }
25512 }
25513
25514 Action::ArrayExceptConvert => {
25515 if let Expression::ArrayExcept(f) = e {
25516 let source_arr = f.this;
25517 let exclude_arr = f.expression;
25518 match target {
25519 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
25520 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
25521 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25522 // ELSE LIST_TRANSFORM(LIST_FILTER(
25523 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
25524 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
25525 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
25526 // pair -> pair[1])
25527 // END
25528
25529 // Build null check
25530 let source_is_null =
25531 Expression::IsNull(Box::new(crate::expressions::IsNull {
25532 this: source_arr.clone(),
25533 not: false,
25534 postfix_form: false,
25535 }));
25536 let exclude_is_null =
25537 Expression::IsNull(Box::new(crate::expressions::IsNull {
25538 this: exclude_arr.clone(),
25539 not: false,
25540 postfix_form: false,
25541 }));
25542 let null_check =
25543 Expression::Or(Box::new(crate::expressions::BinaryOp {
25544 left: source_is_null,
25545 right: exclude_is_null,
25546 left_comments: vec![],
25547 operator_comments: vec![],
25548 trailing_comments: vec![],
25549 inferred_type: None,
25550 }));
25551
25552 // GENERATE_SERIES(1, LENGTH(source))
25553 let gen_series = Expression::Function(Box::new(Function::new(
25554 "GENERATE_SERIES".to_string(),
25555 vec![
25556 Expression::number(1),
25557 Expression::Function(Box::new(Function::new(
25558 "LENGTH".to_string(),
25559 vec![source_arr.clone()],
25560 ))),
25561 ],
25562 )));
25563
25564 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
25565 let list_zip = Expression::Function(Box::new(Function::new(
25566 "LIST_ZIP".to_string(),
25567 vec![source_arr.clone(), gen_series],
25568 )));
25569
25570 // pair[1] and pair[2]
25571 let pair_col = Expression::column("pair");
25572 let pair_1 = Expression::Subscript(Box::new(
25573 crate::expressions::Subscript {
25574 this: pair_col.clone(),
25575 index: Expression::number(1),
25576 },
25577 ));
25578 let pair_2 = Expression::Subscript(Box::new(
25579 crate::expressions::Subscript {
25580 this: pair_col.clone(),
25581 index: Expression::number(2),
25582 },
25583 ));
25584
25585 // source[1:pair[2]]
25586 let source_slice = Expression::ArraySlice(Box::new(
25587 crate::expressions::ArraySlice {
25588 this: source_arr.clone(),
25589 start: Some(Expression::number(1)),
25590 end: Some(pair_2),
25591 },
25592 ));
25593
25594 let e_col = Expression::column("e");
25595
25596 // e -> e IS NOT DISTINCT FROM pair[1]
25597 let inner_lambda1 =
25598 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25599 parameters: vec![crate::expressions::Identifier::new("e")],
25600 body: Expression::NullSafeEq(Box::new(
25601 crate::expressions::BinaryOp {
25602 left: e_col.clone(),
25603 right: pair_1.clone(),
25604 left_comments: vec![],
25605 operator_comments: vec![],
25606 trailing_comments: vec![],
25607 inferred_type: None,
25608 },
25609 )),
25610 colon: false,
25611 parameter_types: vec![],
25612 }));
25613
25614 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
25615 let inner_filter1 = Expression::Function(Box::new(Function::new(
25616 "LIST_FILTER".to_string(),
25617 vec![source_slice, inner_lambda1],
25618 )));
25619
25620 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
25621 let len1 = Expression::Function(Box::new(Function::new(
25622 "LENGTH".to_string(),
25623 vec![inner_filter1],
25624 )));
25625
25626 // e -> e IS NOT DISTINCT FROM pair[1]
25627 let inner_lambda2 =
25628 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25629 parameters: vec![crate::expressions::Identifier::new("e")],
25630 body: Expression::NullSafeEq(Box::new(
25631 crate::expressions::BinaryOp {
25632 left: e_col,
25633 right: pair_1.clone(),
25634 left_comments: vec![],
25635 operator_comments: vec![],
25636 trailing_comments: vec![],
25637 inferred_type: None,
25638 },
25639 )),
25640 colon: false,
25641 parameter_types: vec![],
25642 }));
25643
25644 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
25645 let inner_filter2 = Expression::Function(Box::new(Function::new(
25646 "LIST_FILTER".to_string(),
25647 vec![exclude_arr.clone(), inner_lambda2],
25648 )));
25649
25650 // LENGTH(LIST_FILTER(exclude, ...))
25651 let len2 = Expression::Function(Box::new(Function::new(
25652 "LENGTH".to_string(),
25653 vec![inner_filter2],
25654 )));
25655
25656 // (LENGTH(...) > LENGTH(...))
25657 let cond = Expression::Paren(Box::new(Paren {
25658 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
25659 left: len1,
25660 right: len2,
25661 left_comments: vec![],
25662 operator_comments: vec![],
25663 trailing_comments: vec![],
25664 inferred_type: None,
25665 })),
25666 trailing_comments: vec![],
25667 }));
25668
25669 // pair -> (condition)
25670 let filter_lambda =
25671 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25672 parameters: vec![crate::expressions::Identifier::new(
25673 "pair",
25674 )],
25675 body: cond,
25676 colon: false,
25677 parameter_types: vec![],
25678 }));
25679
25680 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
25681 let outer_filter = Expression::Function(Box::new(Function::new(
25682 "LIST_FILTER".to_string(),
25683 vec![list_zip, filter_lambda],
25684 )));
25685
25686 // pair -> pair[1]
25687 let transform_lambda =
25688 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25689 parameters: vec![crate::expressions::Identifier::new(
25690 "pair",
25691 )],
25692 body: pair_1,
25693 colon: false,
25694 parameter_types: vec![],
25695 }));
25696
25697 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
25698 let list_transform = Expression::Function(Box::new(Function::new(
25699 "LIST_TRANSFORM".to_string(),
25700 vec![outer_filter, transform_lambda],
25701 )));
25702
25703 Ok(Expression::Case(Box::new(Case {
25704 operand: None,
25705 whens: vec![(null_check, Expression::Null(Null))],
25706 else_: Some(list_transform),
25707 comments: Vec::new(),
25708 inferred_type: None,
25709 })))
25710 }
25711 DialectType::DuckDB => {
25712 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
25713 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25714 // ELSE LIST_FILTER(LIST_DISTINCT(source),
25715 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
25716 // END
25717
25718 // Build: source IS NULL
25719 let source_is_null =
25720 Expression::IsNull(Box::new(crate::expressions::IsNull {
25721 this: source_arr.clone(),
25722 not: false,
25723 postfix_form: false,
25724 }));
25725 // Build: exclude IS NULL
25726 let exclude_is_null =
25727 Expression::IsNull(Box::new(crate::expressions::IsNull {
25728 this: exclude_arr.clone(),
25729 not: false,
25730 postfix_form: false,
25731 }));
25732 // source IS NULL OR exclude IS NULL
25733 let null_check =
25734 Expression::Or(Box::new(crate::expressions::BinaryOp {
25735 left: source_is_null,
25736 right: exclude_is_null,
25737 left_comments: vec![],
25738 operator_comments: vec![],
25739 trailing_comments: vec![],
25740 inferred_type: None,
25741 }));
25742
25743 // LIST_DISTINCT(source)
25744 let list_distinct = Expression::Function(Box::new(Function::new(
25745 "LIST_DISTINCT".to_string(),
25746 vec![source_arr.clone()],
25747 )));
25748
25749 // x IS NOT DISTINCT FROM e
25750 let x_col = Expression::column("x");
25751 let e_col = Expression::column("e");
25752 let is_not_distinct = Expression::NullSafeEq(Box::new(
25753 crate::expressions::BinaryOp {
25754 left: x_col,
25755 right: e_col.clone(),
25756 left_comments: vec![],
25757 operator_comments: vec![],
25758 trailing_comments: vec![],
25759 inferred_type: None,
25760 },
25761 ));
25762
25763 // x -> x IS NOT DISTINCT FROM e
25764 let inner_lambda =
25765 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25766 parameters: vec![crate::expressions::Identifier::new("x")],
25767 body: is_not_distinct,
25768 colon: false,
25769 parameter_types: vec![],
25770 }));
25771
25772 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
25773 let inner_list_filter =
25774 Expression::Function(Box::new(Function::new(
25775 "LIST_FILTER".to_string(),
25776 vec![exclude_arr.clone(), inner_lambda],
25777 )));
25778
25779 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
25780 let len_inner = Expression::Function(Box::new(Function::new(
25781 "LENGTH".to_string(),
25782 vec![inner_list_filter],
25783 )));
25784
25785 // LENGTH(...) = 0
25786 let eq_zero =
25787 Expression::Eq(Box::new(crate::expressions::BinaryOp {
25788 left: len_inner,
25789 right: Expression::number(0),
25790 left_comments: vec![],
25791 operator_comments: vec![],
25792 trailing_comments: vec![],
25793 inferred_type: None,
25794 }));
25795
25796 // e -> LENGTH(LIST_FILTER(...)) = 0
25797 let outer_lambda =
25798 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25799 parameters: vec![crate::expressions::Identifier::new("e")],
25800 body: eq_zero,
25801 colon: false,
25802 parameter_types: vec![],
25803 }));
25804
25805 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
25806 let outer_list_filter =
25807 Expression::Function(Box::new(Function::new(
25808 "LIST_FILTER".to_string(),
25809 vec![list_distinct, outer_lambda],
25810 )));
25811
25812 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
25813 Ok(Expression::Case(Box::new(Case {
25814 operand: None,
25815 whens: vec![(null_check, Expression::Null(Null))],
25816 else_: Some(outer_list_filter),
25817 comments: Vec::new(),
25818 inferred_type: None,
25819 })))
25820 }
25821 DialectType::Snowflake => {
25822 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
25823 Ok(Expression::ArrayExcept(Box::new(
25824 crate::expressions::BinaryFunc {
25825 this: source_arr,
25826 expression: exclude_arr,
25827 original_name: None,
25828 inferred_type: None,
25829 },
25830 )))
25831 }
25832 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25833 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
25834 Ok(Expression::Function(Box::new(Function::new(
25835 "ARRAY_EXCEPT".to_string(),
25836 vec![source_arr, exclude_arr],
25837 ))))
25838 }
25839 _ => Ok(Expression::ArrayExcept(Box::new(
25840 crate::expressions::BinaryFunc {
25841 this: source_arr,
25842 expression: exclude_arr,
25843 original_name: None,
25844 inferred_type: None,
25845 },
25846 ))),
25847 }
25848 } else {
25849 Ok(e)
25850 }
25851 }
25852
25853 Action::RegexpLikeExasolAnchor => {
25854 // RegexpLike -> Exasol: wrap pattern with .*...*
25855 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
25856 // dialects does partial match, so we need to anchor with .* on both sides
25857 if let Expression::RegexpLike(mut f) = e {
25858 match &f.pattern {
25859 Expression::Literal(lit)
25860 if matches!(lit.as_ref(), Literal::String(_)) =>
25861 {
25862 let Literal::String(s) = lit.as_ref() else {
25863 unreachable!()
25864 };
25865 // String literal: wrap with .*...*
25866 f.pattern = Expression::Literal(Box::new(Literal::String(
25867 format!(".*{}.*", s),
25868 )));
25869 }
25870 _ => {
25871 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
25872 f.pattern =
25873 Expression::Paren(Box::new(crate::expressions::Paren {
25874 this: Expression::Concat(Box::new(
25875 crate::expressions::BinaryOp {
25876 left: Expression::Concat(Box::new(
25877 crate::expressions::BinaryOp {
25878 left: Expression::Literal(Box::new(
25879 Literal::String(".*".to_string()),
25880 )),
25881 right: f.pattern,
25882 left_comments: vec![],
25883 operator_comments: vec![],
25884 trailing_comments: vec![],
25885 inferred_type: None,
25886 },
25887 )),
25888 right: Expression::Literal(Box::new(
25889 Literal::String(".*".to_string()),
25890 )),
25891 left_comments: vec![],
25892 operator_comments: vec![],
25893 trailing_comments: vec![],
25894 inferred_type: None,
25895 },
25896 )),
25897 trailing_comments: vec![],
25898 }));
25899 }
25900 }
25901 Ok(Expression::RegexpLike(f))
25902 } else {
25903 Ok(e)
25904 }
25905 }
25906
25907 Action::ArrayPositionSnowflakeSwap => {
25908 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
25909 if let Expression::ArrayPosition(f) = e {
25910 Ok(Expression::ArrayPosition(Box::new(
25911 crate::expressions::BinaryFunc {
25912 this: f.expression,
25913 expression: f.this,
25914 original_name: f.original_name,
25915 inferred_type: f.inferred_type,
25916 },
25917 )))
25918 } else {
25919 Ok(e)
25920 }
25921 }
25922
25923 Action::SnowflakeArrayPositionToDuckDB => {
25924 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
25925 // Snowflake uses 0-based indexing, DuckDB uses 1-based
25926 // The parser has this=value, expression=array (Snowflake order)
25927 if let Expression::ArrayPosition(f) = e {
25928 // Create ARRAY_POSITION(array, value) in standard order
25929 let standard_pos =
25930 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
25931 this: f.expression, // array
25932 expression: f.this, // value
25933 original_name: f.original_name,
25934 inferred_type: f.inferred_type,
25935 }));
25936 // Subtract 1 for zero-based indexing
25937 Ok(Expression::Sub(Box::new(BinaryOp {
25938 left: standard_pos,
25939 right: Expression::number(1),
25940 left_comments: vec![],
25941 operator_comments: vec![],
25942 trailing_comments: vec![],
25943 inferred_type: None,
25944 })))
25945 } else {
25946 Ok(e)
25947 }
25948 }
25949
25950 Action::ArrayDistinctConvert => {
25951 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
25952 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
25953 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
25954 // ELSE LIST_DISTINCT(arr)
25955 // END
25956 if let Expression::ArrayDistinct(f) = e {
25957 let arr = f.this;
25958
25959 // ARRAY_LENGTH(arr)
25960 let array_length = Expression::Function(Box::new(Function::new(
25961 "ARRAY_LENGTH".to_string(),
25962 vec![arr.clone()],
25963 )));
25964 // LIST_COUNT(arr)
25965 let list_count = Expression::Function(Box::new(Function::new(
25966 "LIST_COUNT".to_string(),
25967 vec![arr.clone()],
25968 )));
25969 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
25970 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
25971 left: array_length,
25972 right: list_count,
25973 left_comments: vec![],
25974 operator_comments: vec![],
25975 trailing_comments: vec![],
25976 inferred_type: None,
25977 }));
25978
25979 // _u column
25980 let u_col = Expression::column("_u");
25981 // NOT _u IS NULL
25982 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
25983 this: u_col.clone(),
25984 not: false,
25985 postfix_form: false,
25986 }));
25987 let not_u_is_null =
25988 Expression::Not(Box::new(crate::expressions::UnaryOp {
25989 this: u_is_null,
25990 inferred_type: None,
25991 }));
25992 // _u -> NOT _u IS NULL
25993 let filter_lambda =
25994 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25995 parameters: vec![crate::expressions::Identifier::new("_u")],
25996 body: not_u_is_null,
25997 colon: false,
25998 parameter_types: vec![],
25999 }));
26000 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
26001 let list_filter = Expression::Function(Box::new(Function::new(
26002 "LIST_FILTER".to_string(),
26003 vec![arr.clone(), filter_lambda],
26004 )));
26005 // LIST_DISTINCT(LIST_FILTER(arr, ...))
26006 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
26007 "LIST_DISTINCT".to_string(),
26008 vec![list_filter],
26009 )));
26010 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
26011 let list_append = Expression::Function(Box::new(Function::new(
26012 "LIST_APPEND".to_string(),
26013 vec![list_distinct_filtered, Expression::Null(Null)],
26014 )));
26015
26016 // LIST_DISTINCT(arr)
26017 let list_distinct = Expression::Function(Box::new(Function::new(
26018 "LIST_DISTINCT".to_string(),
26019 vec![arr],
26020 )));
26021
26022 // CASE WHEN neq THEN list_append ELSE list_distinct END
26023 Ok(Expression::Case(Box::new(Case {
26024 operand: None,
26025 whens: vec![(neq, list_append)],
26026 else_: Some(list_distinct),
26027 comments: Vec::new(),
26028 inferred_type: None,
26029 })))
26030 } else {
26031 Ok(e)
26032 }
26033 }
26034
26035 Action::ArrayDistinctClickHouse => {
26036 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
26037 if let Expression::ArrayDistinct(f) = e {
26038 Ok(Expression::Function(Box::new(Function::new(
26039 "arrayDistinct".to_string(),
26040 vec![f.this],
26041 ))))
26042 } else {
26043 Ok(e)
26044 }
26045 }
26046
26047 Action::ArrayContainsDuckDBConvert => {
26048 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
26049 // CASE WHEN value IS NULL
26050 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26051 // ELSE ARRAY_CONTAINS(array, value)
26052 // END
26053 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
26054 if let Expression::ArrayContains(f) = e {
26055 let value = f.this;
26056 let array = f.expression;
26057
26058 // value IS NULL
26059 let value_is_null =
26060 Expression::IsNull(Box::new(crate::expressions::IsNull {
26061 this: value.clone(),
26062 not: false,
26063 postfix_form: false,
26064 }));
26065
26066 // ARRAY_LENGTH(array)
26067 let array_length = Expression::Function(Box::new(Function::new(
26068 "ARRAY_LENGTH".to_string(),
26069 vec![array.clone()],
26070 )));
26071 // LIST_COUNT(array)
26072 let list_count = Expression::Function(Box::new(Function::new(
26073 "LIST_COUNT".to_string(),
26074 vec![array.clone()],
26075 )));
26076 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
26077 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26078 left: array_length,
26079 right: list_count,
26080 left_comments: vec![],
26081 operator_comments: vec![],
26082 trailing_comments: vec![],
26083 inferred_type: None,
26084 }));
26085 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26086 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
26087 this: Box::new(neq),
26088 expression: Box::new(Expression::Boolean(
26089 crate::expressions::BooleanLiteral { value: false },
26090 )),
26091 }));
26092
26093 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
26094 let array_contains = Expression::Function(Box::new(Function::new(
26095 "ARRAY_CONTAINS".to_string(),
26096 vec![array, value],
26097 )));
26098
26099 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
26100 Ok(Expression::Case(Box::new(Case {
26101 operand: None,
26102 whens: vec![(value_is_null, nullif)],
26103 else_: Some(array_contains),
26104 comments: Vec::new(),
26105 inferred_type: None,
26106 })))
26107 } else {
26108 Ok(e)
26109 }
26110 }
26111
26112 Action::StrPositionExpand => {
26113 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
26114 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26115 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26116 if let Expression::StrPosition(sp) = e {
26117 let crate::expressions::StrPosition {
26118 this,
26119 substr,
26120 position,
26121 occurrence,
26122 } = *sp;
26123 let string = *this;
26124 let substr_expr = match substr {
26125 Some(s) => *s,
26126 None => Expression::Null(Null),
26127 };
26128 let pos = match position {
26129 Some(p) => *p,
26130 None => Expression::number(1),
26131 };
26132
26133 // SUBSTRING(string, pos)
26134 let substring_call = Expression::Function(Box::new(Function::new(
26135 "SUBSTRING".to_string(),
26136 vec![string.clone(), pos.clone()],
26137 )));
26138 // STRPOS(SUBSTRING(string, pos), substr)
26139 let strpos_call = Expression::Function(Box::new(Function::new(
26140 "STRPOS".to_string(),
26141 vec![substring_call, substr_expr.clone()],
26142 )));
26143 // STRPOS(...) + pos - 1
26144 let pos_adjusted =
26145 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
26146 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26147 strpos_call.clone(),
26148 pos.clone(),
26149 ))),
26150 Expression::number(1),
26151 )));
26152 // STRPOS(...) = 0
26153 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
26154 strpos_call.clone(),
26155 Expression::number(0),
26156 )));
26157
26158 match target {
26159 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26160 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26161 Ok(Expression::Function(Box::new(Function::new(
26162 "IF".to_string(),
26163 vec![is_zero, Expression::number(0), pos_adjusted],
26164 ))))
26165 }
26166 DialectType::DuckDB => {
26167 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26168 Ok(Expression::Case(Box::new(Case {
26169 operand: None,
26170 whens: vec![(is_zero, Expression::number(0))],
26171 else_: Some(pos_adjusted),
26172 comments: Vec::new(),
26173 inferred_type: None,
26174 })))
26175 }
26176 _ => {
26177 // Reconstruct StrPosition
26178 Ok(Expression::StrPosition(Box::new(
26179 crate::expressions::StrPosition {
26180 this: Box::new(string),
26181 substr: Some(Box::new(substr_expr)),
26182 position: Some(Box::new(pos)),
26183 occurrence,
26184 },
26185 )))
26186 }
26187 }
26188 } else {
26189 Ok(e)
26190 }
26191 }
26192
26193 Action::MonthsBetweenConvert => {
26194 if let Expression::MonthsBetween(mb) = e {
26195 let crate::expressions::BinaryFunc {
26196 this: end_date,
26197 expression: start_date,
26198 ..
26199 } = *mb;
26200 match target {
26201 DialectType::DuckDB => {
26202 let cast_end = Self::ensure_cast_date(end_date);
26203 let cast_start = Self::ensure_cast_date(start_date);
26204 let dd = Expression::Function(Box::new(Function::new(
26205 "DATE_DIFF".to_string(),
26206 vec![
26207 Expression::string("MONTH"),
26208 cast_start.clone(),
26209 cast_end.clone(),
26210 ],
26211 )));
26212 let day_end = Expression::Function(Box::new(Function::new(
26213 "DAY".to_string(),
26214 vec![cast_end.clone()],
26215 )));
26216 let day_start = Expression::Function(Box::new(Function::new(
26217 "DAY".to_string(),
26218 vec![cast_start.clone()],
26219 )));
26220 let last_day_end = Expression::Function(Box::new(Function::new(
26221 "LAST_DAY".to_string(),
26222 vec![cast_end.clone()],
26223 )));
26224 let last_day_start = Expression::Function(Box::new(Function::new(
26225 "LAST_DAY".to_string(),
26226 vec![cast_start.clone()],
26227 )));
26228 let day_last_end = Expression::Function(Box::new(Function::new(
26229 "DAY".to_string(),
26230 vec![last_day_end],
26231 )));
26232 let day_last_start = Expression::Function(Box::new(Function::new(
26233 "DAY".to_string(),
26234 vec![last_day_start],
26235 )));
26236 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
26237 day_end.clone(),
26238 day_last_end,
26239 )));
26240 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
26241 day_start.clone(),
26242 day_last_start,
26243 )));
26244 let both_cond =
26245 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
26246 let day_diff =
26247 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
26248 let day_diff_paren =
26249 Expression::Paren(Box::new(crate::expressions::Paren {
26250 this: day_diff,
26251 trailing_comments: Vec::new(),
26252 }));
26253 let frac = Expression::Div(Box::new(BinaryOp::new(
26254 day_diff_paren,
26255 Expression::Literal(Box::new(Literal::Number(
26256 "31.0".to_string(),
26257 ))),
26258 )));
26259 let case_expr = Expression::Case(Box::new(Case {
26260 operand: None,
26261 whens: vec![(both_cond, Expression::number(0))],
26262 else_: Some(frac),
26263 comments: Vec::new(),
26264 inferred_type: None,
26265 }));
26266 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
26267 }
26268 DialectType::Snowflake | DialectType::Redshift => {
26269 let unit = Expression::Identifier(Identifier::new("MONTH"));
26270 Ok(Expression::Function(Box::new(Function::new(
26271 "DATEDIFF".to_string(),
26272 vec![unit, start_date, end_date],
26273 ))))
26274 }
26275 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26276 Ok(Expression::Function(Box::new(Function::new(
26277 "DATE_DIFF".to_string(),
26278 vec![Expression::string("MONTH"), start_date, end_date],
26279 ))))
26280 }
26281 _ => Ok(Expression::MonthsBetween(Box::new(
26282 crate::expressions::BinaryFunc {
26283 this: end_date,
26284 expression: start_date,
26285 original_name: None,
26286 inferred_type: None,
26287 },
26288 ))),
26289 }
26290 } else {
26291 Ok(e)
26292 }
26293 }
26294
26295 Action::AddMonthsConvert => {
26296 if let Expression::AddMonths(am) = e {
26297 let date = am.this;
26298 let val = am.expression;
26299 match target {
26300 DialectType::TSQL | DialectType::Fabric => {
26301 let cast_date = Self::ensure_cast_datetime2(date);
26302 Ok(Expression::Function(Box::new(Function::new(
26303 "DATEADD".to_string(),
26304 vec![
26305 Expression::Identifier(Identifier::new("MONTH")),
26306 val,
26307 cast_date,
26308 ],
26309 ))))
26310 }
26311 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
26312 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26313 // Optionally wrapped in CAST(... AS type) if the input had a specific type
26314
26315 // Determine the cast type from the date expression
26316 let (cast_date, return_type) = match &date {
26317 Expression::Literal(lit)
26318 if matches!(lit.as_ref(), Literal::String(_)) =>
26319 {
26320 // String literal: CAST(str AS TIMESTAMP), no outer CAST
26321 (
26322 Expression::Cast(Box::new(Cast {
26323 this: date.clone(),
26324 to: DataType::Timestamp {
26325 precision: None,
26326 timezone: false,
26327 },
26328 trailing_comments: Vec::new(),
26329 double_colon_syntax: false,
26330 format: None,
26331 default: None,
26332 inferred_type: None,
26333 })),
26334 None,
26335 )
26336 }
26337 Expression::Cast(c) => {
26338 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
26339 (date.clone(), Some(c.to.clone()))
26340 }
26341 _ => {
26342 // Expression or NULL::TYPE - keep as-is, check for cast type
26343 if let Expression::Cast(c) = &date {
26344 (date.clone(), Some(c.to.clone()))
26345 } else {
26346 (date.clone(), None)
26347 }
26348 }
26349 };
26350
26351 // Build the interval expression
26352 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
26353 // For integer values, use INTERVAL val MONTH
26354 let is_non_integer_val = match &val {
26355 Expression::Literal(lit)
26356 if matches!(lit.as_ref(), Literal::Number(_)) =>
26357 {
26358 let Literal::Number(n) = lit.as_ref() else {
26359 unreachable!()
26360 };
26361 n.contains('.')
26362 }
26363 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
26364 Expression::Neg(n) => {
26365 if let Expression::Literal(lit) = &n.this {
26366 if let Literal::Number(s) = lit.as_ref() {
26367 s.contains('.')
26368 } else {
26369 false
26370 }
26371 } else {
26372 false
26373 }
26374 }
26375 _ => false,
26376 };
26377
26378 let add_interval = if is_non_integer_val {
26379 // TO_MONTHS(CAST(ROUND(val) AS INT))
26380 let round_val = Expression::Function(Box::new(Function::new(
26381 "ROUND".to_string(),
26382 vec![val.clone()],
26383 )));
26384 let cast_int = Expression::Cast(Box::new(Cast {
26385 this: round_val,
26386 to: DataType::Int {
26387 length: None,
26388 integer_spelling: false,
26389 },
26390 trailing_comments: Vec::new(),
26391 double_colon_syntax: false,
26392 format: None,
26393 default: None,
26394 inferred_type: None,
26395 }));
26396 Expression::Function(Box::new(Function::new(
26397 "TO_MONTHS".to_string(),
26398 vec![cast_int],
26399 )))
26400 } else {
26401 // INTERVAL val MONTH
26402 // For negative numbers, wrap in parens
26403 let interval_val = match &val {
26404 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
26405 {
26406 let Literal::Number(_) = lit.as_ref() else {
26407 unreachable!()
26408 };
26409 Expression::Paren(Box::new(Paren {
26410 this: val.clone(),
26411 trailing_comments: Vec::new(),
26412 }))
26413 }
26414 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
26415 this: val.clone(),
26416 trailing_comments: Vec::new(),
26417 })),
26418 Expression::Null(_) => Expression::Paren(Box::new(Paren {
26419 this: val.clone(),
26420 trailing_comments: Vec::new(),
26421 })),
26422 _ => val.clone(),
26423 };
26424 Expression::Interval(Box::new(crate::expressions::Interval {
26425 this: Some(interval_val),
26426 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26427 unit: crate::expressions::IntervalUnit::Month,
26428 use_plural: false,
26429 }),
26430 }))
26431 };
26432
26433 // Build: date + interval
26434 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
26435 cast_date.clone(),
26436 add_interval.clone(),
26437 )));
26438
26439 // Build LAST_DAY(date)
26440 let last_day_date = Expression::Function(Box::new(Function::new(
26441 "LAST_DAY".to_string(),
26442 vec![cast_date.clone()],
26443 )));
26444
26445 // Build LAST_DAY(date + interval)
26446 let last_day_date_plus =
26447 Expression::Function(Box::new(Function::new(
26448 "LAST_DAY".to_string(),
26449 vec![date_plus_interval.clone()],
26450 )));
26451
26452 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26453 let case_expr = Expression::Case(Box::new(Case {
26454 operand: None,
26455 whens: vec![(
26456 Expression::Eq(Box::new(BinaryOp::new(
26457 last_day_date,
26458 cast_date.clone(),
26459 ))),
26460 last_day_date_plus,
26461 )],
26462 else_: Some(date_plus_interval),
26463 comments: Vec::new(),
26464 inferred_type: None,
26465 }));
26466
26467 // Wrap in CAST(... AS type) if needed
26468 if let Some(dt) = return_type {
26469 Ok(Expression::Cast(Box::new(Cast {
26470 this: case_expr,
26471 to: dt,
26472 trailing_comments: Vec::new(),
26473 double_colon_syntax: false,
26474 format: None,
26475 default: None,
26476 inferred_type: None,
26477 })))
26478 } else {
26479 Ok(case_expr)
26480 }
26481 }
26482 DialectType::DuckDB => {
26483 // Non-Snowflake source: simple date + INTERVAL
26484 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26485 {
26486 Expression::Cast(Box::new(Cast {
26487 this: date,
26488 to: DataType::Timestamp {
26489 precision: None,
26490 timezone: false,
26491 },
26492 trailing_comments: Vec::new(),
26493 double_colon_syntax: false,
26494 format: None,
26495 default: None,
26496 inferred_type: None,
26497 }))
26498 } else {
26499 date
26500 };
26501 let interval =
26502 Expression::Interval(Box::new(crate::expressions::Interval {
26503 this: Some(val),
26504 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26505 unit: crate::expressions::IntervalUnit::Month,
26506 use_plural: false,
26507 }),
26508 }));
26509 Ok(Expression::Add(Box::new(BinaryOp::new(
26510 cast_date, interval,
26511 ))))
26512 }
26513 DialectType::Snowflake => {
26514 // Keep ADD_MONTHS when source is also Snowflake
26515 if matches!(source, DialectType::Snowflake) {
26516 Ok(Expression::Function(Box::new(Function::new(
26517 "ADD_MONTHS".to_string(),
26518 vec![date, val],
26519 ))))
26520 } else {
26521 Ok(Expression::Function(Box::new(Function::new(
26522 "DATEADD".to_string(),
26523 vec![
26524 Expression::Identifier(Identifier::new("MONTH")),
26525 val,
26526 date,
26527 ],
26528 ))))
26529 }
26530 }
26531 DialectType::Redshift => {
26532 Ok(Expression::Function(Box::new(Function::new(
26533 "DATEADD".to_string(),
26534 vec![
26535 Expression::Identifier(Identifier::new("MONTH")),
26536 val,
26537 date,
26538 ],
26539 ))))
26540 }
26541 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26542 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26543 {
26544 Expression::Cast(Box::new(Cast {
26545 this: date,
26546 to: DataType::Timestamp {
26547 precision: None,
26548 timezone: false,
26549 },
26550 trailing_comments: Vec::new(),
26551 double_colon_syntax: false,
26552 format: None,
26553 default: None,
26554 inferred_type: None,
26555 }))
26556 } else {
26557 date
26558 };
26559 Ok(Expression::Function(Box::new(Function::new(
26560 "DATE_ADD".to_string(),
26561 vec![Expression::string("MONTH"), val, cast_date],
26562 ))))
26563 }
26564 DialectType::BigQuery => {
26565 let interval =
26566 Expression::Interval(Box::new(crate::expressions::Interval {
26567 this: Some(val),
26568 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26569 unit: crate::expressions::IntervalUnit::Month,
26570 use_plural: false,
26571 }),
26572 }));
26573 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26574 {
26575 Expression::Cast(Box::new(Cast {
26576 this: date,
26577 to: DataType::Custom {
26578 name: "DATETIME".to_string(),
26579 },
26580 trailing_comments: Vec::new(),
26581 double_colon_syntax: false,
26582 format: None,
26583 default: None,
26584 inferred_type: None,
26585 }))
26586 } else {
26587 date
26588 };
26589 Ok(Expression::Function(Box::new(Function::new(
26590 "DATE_ADD".to_string(),
26591 vec![cast_date, interval],
26592 ))))
26593 }
26594 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
26595 Ok(Expression::Function(Box::new(Function::new(
26596 "ADD_MONTHS".to_string(),
26597 vec![date, val],
26598 ))))
26599 }
26600 _ => {
26601 // Default: keep as AddMonths expression
26602 Ok(Expression::AddMonths(Box::new(
26603 crate::expressions::BinaryFunc {
26604 this: date,
26605 expression: val,
26606 original_name: None,
26607 inferred_type: None,
26608 },
26609 )))
26610 }
26611 }
26612 } else {
26613 Ok(e)
26614 }
26615 }
26616
26617 Action::PercentileContConvert => {
26618 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
26619 // Presto/Trino: APPROX_PERCENTILE(col, p)
26620 // Spark/Databricks: PERCENTILE_APPROX(col, p)
26621 if let Expression::WithinGroup(wg) = e {
26622 // Extract percentile value and order by column
26623 let (percentile, _is_disc) = match &wg.this {
26624 Expression::Function(f) => {
26625 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26626 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
26627 Box::new(Literal::Number("0.5".to_string())),
26628 ));
26629 (pct, is_disc)
26630 }
26631 Expression::AggregateFunction(af) => {
26632 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26633 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
26634 Box::new(Literal::Number("0.5".to_string())),
26635 ));
26636 (pct, is_disc)
26637 }
26638 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
26639 _ => return Ok(Expression::WithinGroup(wg)),
26640 };
26641 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
26642 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
26643 );
26644
26645 let func_name = match target {
26646 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26647 "APPROX_PERCENTILE"
26648 }
26649 _ => "PERCENTILE_APPROX", // Spark, Databricks
26650 };
26651 Ok(Expression::Function(Box::new(Function::new(
26652 func_name.to_string(),
26653 vec![col, percentile],
26654 ))))
26655 } else {
26656 Ok(e)
26657 }
26658 }
26659
26660 Action::CurrentUserSparkParens => {
26661 // CURRENT_USER -> CURRENT_USER() for Spark
26662 if let Expression::CurrentUser(_) = e {
26663 Ok(Expression::Function(Box::new(Function::new(
26664 "CURRENT_USER".to_string(),
26665 vec![],
26666 ))))
26667 } else {
26668 Ok(e)
26669 }
26670 }
26671
26672 Action::SparkDateFuncCast => {
26673 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
26674 let cast_arg = |arg: Expression| -> Expression {
26675 match target {
26676 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26677 Self::double_cast_timestamp_date(arg)
26678 }
26679 _ => {
26680 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
26681 Self::ensure_cast_date(arg)
26682 }
26683 }
26684 };
26685 match e {
26686 Expression::Month(f) => Ok(Expression::Month(Box::new(
26687 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26688 ))),
26689 Expression::Year(f) => Ok(Expression::Year(Box::new(
26690 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26691 ))),
26692 Expression::Day(f) => Ok(Expression::Day(Box::new(
26693 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26694 ))),
26695 other => Ok(other),
26696 }
26697 }
26698
26699 Action::MapFromArraysConvert => {
26700 // Expression::MapFromArrays -> target-specific
26701 if let Expression::MapFromArrays(mfa) = e {
26702 let keys = mfa.this;
26703 let values = mfa.expression;
26704 match target {
26705 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26706 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
26707 ))),
26708 _ => {
26709 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
26710 Ok(Expression::Function(Box::new(Function::new(
26711 "MAP".to_string(),
26712 vec![keys, values],
26713 ))))
26714 }
26715 }
26716 } else {
26717 Ok(e)
26718 }
26719 }
26720
26721 Action::AnyToExists => {
26722 if let Expression::Any(q) = e {
26723 if let Some(op) = q.op.clone() {
26724 let lambda_param = crate::expressions::Identifier::new("x");
26725 let rhs = Expression::Identifier(lambda_param.clone());
26726 let body = match op {
26727 crate::expressions::QuantifiedOp::Eq => {
26728 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
26729 }
26730 crate::expressions::QuantifiedOp::Neq => {
26731 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
26732 }
26733 crate::expressions::QuantifiedOp::Lt => {
26734 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
26735 }
26736 crate::expressions::QuantifiedOp::Lte => {
26737 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
26738 }
26739 crate::expressions::QuantifiedOp::Gt => {
26740 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
26741 }
26742 crate::expressions::QuantifiedOp::Gte => {
26743 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
26744 }
26745 };
26746 let lambda =
26747 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26748 parameters: vec![lambda_param],
26749 body,
26750 colon: false,
26751 parameter_types: Vec::new(),
26752 }));
26753 Ok(Expression::Function(Box::new(Function::new(
26754 "EXISTS".to_string(),
26755 vec![q.subquery, lambda],
26756 ))))
26757 } else {
26758 Ok(Expression::Any(q))
26759 }
26760 } else {
26761 Ok(e)
26762 }
26763 }
26764
26765 Action::GenerateSeriesConvert => {
26766 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
26767 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26768 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
26769 if let Expression::Function(f) = e {
26770 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
26771 let start = f.args[0].clone();
26772 let end = f.args[1].clone();
26773 let step = f.args.get(2).cloned();
26774
26775 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
26776 let step = step.map(|s| Self::normalize_interval_string(s, target));
26777
26778 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
26779 let maybe_cast_timestamp = |arg: Expression| -> Expression {
26780 if matches!(
26781 target,
26782 DialectType::Presto
26783 | DialectType::Trino
26784 | DialectType::Athena
26785 | DialectType::Spark
26786 | DialectType::Databricks
26787 | DialectType::Hive
26788 ) {
26789 match &arg {
26790 Expression::CurrentTimestamp(_) => {
26791 Expression::Cast(Box::new(Cast {
26792 this: arg,
26793 to: DataType::Timestamp {
26794 precision: None,
26795 timezone: false,
26796 },
26797 trailing_comments: Vec::new(),
26798 double_colon_syntax: false,
26799 format: None,
26800 default: None,
26801 inferred_type: None,
26802 }))
26803 }
26804 _ => arg,
26805 }
26806 } else {
26807 arg
26808 }
26809 };
26810
26811 let start = maybe_cast_timestamp(start);
26812 let end = maybe_cast_timestamp(end);
26813
26814 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
26815 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
26816 let mut gs_args = vec![start, end];
26817 if let Some(step) = step {
26818 gs_args.push(step);
26819 }
26820 return Ok(Expression::Function(Box::new(Function::new(
26821 "GENERATE_SERIES".to_string(),
26822 gs_args,
26823 ))));
26824 }
26825
26826 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26827 if matches!(target, DialectType::DuckDB) {
26828 let mut gs_args = vec![start, end];
26829 if let Some(step) = step {
26830 gs_args.push(step);
26831 }
26832 let gs = Expression::Function(Box::new(Function::new(
26833 "GENERATE_SERIES".to_string(),
26834 gs_args,
26835 )));
26836 return Ok(Expression::Function(Box::new(Function::new(
26837 "UNNEST".to_string(),
26838 vec![gs],
26839 ))));
26840 }
26841
26842 let mut seq_args = vec![start, end];
26843 if let Some(step) = step {
26844 seq_args.push(step);
26845 }
26846
26847 let seq = Expression::Function(Box::new(Function::new(
26848 "SEQUENCE".to_string(),
26849 seq_args,
26850 )));
26851
26852 match target {
26853 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26854 // Wrap in UNNEST
26855 Ok(Expression::Function(Box::new(Function::new(
26856 "UNNEST".to_string(),
26857 vec![seq],
26858 ))))
26859 }
26860 DialectType::Spark
26861 | DialectType::Databricks
26862 | DialectType::Hive => {
26863 // Wrap in EXPLODE
26864 Ok(Expression::Function(Box::new(Function::new(
26865 "EXPLODE".to_string(),
26866 vec![seq],
26867 ))))
26868 }
26869 _ => {
26870 // Just SEQUENCE for others
26871 Ok(seq)
26872 }
26873 }
26874 } else {
26875 Ok(Expression::Function(f))
26876 }
26877 } else {
26878 Ok(e)
26879 }
26880 }
26881
26882 Action::ConcatCoalesceWrap => {
26883 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
26884 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
26885 if let Expression::Function(f) = e {
26886 if f.name.eq_ignore_ascii_case("CONCAT") {
26887 let new_args: Vec<Expression> = f
26888 .args
26889 .into_iter()
26890 .map(|arg| {
26891 let cast_arg = if matches!(
26892 target,
26893 DialectType::Presto
26894 | DialectType::Trino
26895 | DialectType::Athena
26896 ) {
26897 Expression::Cast(Box::new(Cast {
26898 this: arg,
26899 to: DataType::VarChar {
26900 length: None,
26901 parenthesized_length: false,
26902 },
26903 trailing_comments: Vec::new(),
26904 double_colon_syntax: false,
26905 format: None,
26906 default: None,
26907 inferred_type: None,
26908 }))
26909 } else {
26910 arg
26911 };
26912 Expression::Function(Box::new(Function::new(
26913 "COALESCE".to_string(),
26914 vec![cast_arg, Expression::string("")],
26915 )))
26916 })
26917 .collect();
26918 Ok(Expression::Function(Box::new(Function::new(
26919 "CONCAT".to_string(),
26920 new_args,
26921 ))))
26922 } else {
26923 Ok(Expression::Function(f))
26924 }
26925 } else {
26926 Ok(e)
26927 }
26928 }
26929
26930 Action::PipeConcatToConcat => {
26931 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
26932 if let Expression::Concat(op) = e {
26933 let cast_left = Expression::Cast(Box::new(Cast {
26934 this: op.left,
26935 to: DataType::VarChar {
26936 length: None,
26937 parenthesized_length: false,
26938 },
26939 trailing_comments: Vec::new(),
26940 double_colon_syntax: false,
26941 format: None,
26942 default: None,
26943 inferred_type: None,
26944 }));
26945 let cast_right = Expression::Cast(Box::new(Cast {
26946 this: op.right,
26947 to: DataType::VarChar {
26948 length: None,
26949 parenthesized_length: false,
26950 },
26951 trailing_comments: Vec::new(),
26952 double_colon_syntax: false,
26953 format: None,
26954 default: None,
26955 inferred_type: None,
26956 }));
26957 Ok(Expression::Function(Box::new(Function::new(
26958 "CONCAT".to_string(),
26959 vec![cast_left, cast_right],
26960 ))))
26961 } else {
26962 Ok(e)
26963 }
26964 }
26965
26966 Action::DivFuncConvert => {
26967 // DIV(a, b) -> target-specific integer division
26968 if let Expression::Function(f) = e {
26969 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
26970 let a = f.args[0].clone();
26971 let b = f.args[1].clone();
26972 match target {
26973 DialectType::DuckDB => {
26974 // DIV(a, b) -> CAST(a // b AS DECIMAL)
26975 let int_div = Expression::IntDiv(Box::new(
26976 crate::expressions::BinaryFunc {
26977 this: a,
26978 expression: b,
26979 original_name: None,
26980 inferred_type: None,
26981 },
26982 ));
26983 Ok(Expression::Cast(Box::new(Cast {
26984 this: int_div,
26985 to: DataType::Decimal {
26986 precision: None,
26987 scale: None,
26988 },
26989 trailing_comments: Vec::new(),
26990 double_colon_syntax: false,
26991 format: None,
26992 default: None,
26993 inferred_type: None,
26994 })))
26995 }
26996 DialectType::BigQuery => {
26997 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
26998 let div_func = Expression::Function(Box::new(Function::new(
26999 "DIV".to_string(),
27000 vec![a, b],
27001 )));
27002 Ok(Expression::Cast(Box::new(Cast {
27003 this: div_func,
27004 to: DataType::Custom {
27005 name: "NUMERIC".to_string(),
27006 },
27007 trailing_comments: Vec::new(),
27008 double_colon_syntax: false,
27009 format: None,
27010 default: None,
27011 inferred_type: None,
27012 })))
27013 }
27014 DialectType::SQLite => {
27015 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
27016 let cast_a = Expression::Cast(Box::new(Cast {
27017 this: a,
27018 to: DataType::Custom {
27019 name: "REAL".to_string(),
27020 },
27021 trailing_comments: Vec::new(),
27022 double_colon_syntax: false,
27023 format: None,
27024 default: None,
27025 inferred_type: None,
27026 }));
27027 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
27028 let cast_int = Expression::Cast(Box::new(Cast {
27029 this: div,
27030 to: DataType::Int {
27031 length: None,
27032 integer_spelling: true,
27033 },
27034 trailing_comments: Vec::new(),
27035 double_colon_syntax: false,
27036 format: None,
27037 default: None,
27038 inferred_type: None,
27039 }));
27040 Ok(Expression::Cast(Box::new(Cast {
27041 this: cast_int,
27042 to: DataType::Custom {
27043 name: "REAL".to_string(),
27044 },
27045 trailing_comments: Vec::new(),
27046 double_colon_syntax: false,
27047 format: None,
27048 default: None,
27049 inferred_type: None,
27050 })))
27051 }
27052 _ => Ok(Expression::Function(f)),
27053 }
27054 } else {
27055 Ok(Expression::Function(f))
27056 }
27057 } else {
27058 Ok(e)
27059 }
27060 }
27061
27062 Action::JsonObjectAggConvert => {
27063 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
27064 match e {
27065 Expression::Function(f) => Ok(Expression::Function(Box::new(
27066 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
27067 ))),
27068 Expression::AggregateFunction(af) => {
27069 // AggregateFunction stores all args in the `args` vec
27070 Ok(Expression::Function(Box::new(Function::new(
27071 "JSON_GROUP_OBJECT".to_string(),
27072 af.args,
27073 ))))
27074 }
27075 other => Ok(other),
27076 }
27077 }
27078
27079 Action::JsonbExistsConvert => {
27080 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
27081 if let Expression::Function(f) = e {
27082 if f.args.len() == 2 {
27083 let json_expr = f.args[0].clone();
27084 let key = match &f.args[1] {
27085 Expression::Literal(lit)
27086 if matches!(
27087 lit.as_ref(),
27088 crate::expressions::Literal::String(_)
27089 ) =>
27090 {
27091 let crate::expressions::Literal::String(s) = lit.as_ref()
27092 else {
27093 unreachable!()
27094 };
27095 format!("$.{}", s)
27096 }
27097 _ => return Ok(Expression::Function(f)),
27098 };
27099 Ok(Expression::Function(Box::new(Function::new(
27100 "JSON_EXISTS".to_string(),
27101 vec![json_expr, Expression::string(&key)],
27102 ))))
27103 } else {
27104 Ok(Expression::Function(f))
27105 }
27106 } else {
27107 Ok(e)
27108 }
27109 }
27110
27111 Action::DateBinConvert => {
27112 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
27113 if let Expression::Function(f) = e {
27114 Ok(Expression::Function(Box::new(Function::new(
27115 "TIME_BUCKET".to_string(),
27116 f.args,
27117 ))))
27118 } else {
27119 Ok(e)
27120 }
27121 }
27122
27123 Action::MysqlCastCharToText => {
27124 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
27125 if let Expression::Cast(mut c) = e {
27126 c.to = DataType::Text;
27127 Ok(Expression::Cast(c))
27128 } else {
27129 Ok(e)
27130 }
27131 }
27132
27133 Action::SparkCastVarcharToString => {
27134 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
27135 match e {
27136 Expression::Cast(mut c) => {
27137 c.to = Self::normalize_varchar_to_string(c.to);
27138 Ok(Expression::Cast(c))
27139 }
27140 Expression::TryCast(mut c) => {
27141 c.to = Self::normalize_varchar_to_string(c.to);
27142 Ok(Expression::TryCast(c))
27143 }
27144 _ => Ok(e),
27145 }
27146 }
27147
27148 Action::MinMaxToLeastGreatest => {
27149 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
27150 if let Expression::Function(f) = e {
27151 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
27152 "LEAST"
27153 } else if f.name.eq_ignore_ascii_case("MAX") {
27154 "GREATEST"
27155 } else {
27156 return Ok(Expression::Function(f));
27157 };
27158 Ok(Expression::Function(Box::new(Function::new(
27159 new_name.to_string(),
27160 f.args,
27161 ))))
27162 } else {
27163 Ok(e)
27164 }
27165 }
27166
27167 Action::ClickHouseUniqToApproxCountDistinct => {
27168 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
27169 if let Expression::Function(f) = e {
27170 Ok(Expression::Function(Box::new(Function::new(
27171 "APPROX_COUNT_DISTINCT".to_string(),
27172 f.args,
27173 ))))
27174 } else {
27175 Ok(e)
27176 }
27177 }
27178
27179 Action::ClickHouseAnyToAnyValue => {
27180 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
27181 if let Expression::Function(f) = e {
27182 Ok(Expression::Function(Box::new(Function::new(
27183 "ANY_VALUE".to_string(),
27184 f.args,
27185 ))))
27186 } else {
27187 Ok(e)
27188 }
27189 }
27190
27191 Action::OracleVarchar2ToVarchar => {
27192 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
27193 if let Expression::DataType(DataType::Custom { ref name }) = e {
27194 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
27195 let starts_varchar2 =
27196 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
27197 let starts_nvarchar2 =
27198 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
27199 let inner = if starts_varchar2 || starts_nvarchar2 {
27200 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
27201 let end = name.len() - 1; // skip trailing ")"
27202 Some(&name[start..end])
27203 } else {
27204 Option::None
27205 };
27206 if let Some(inner_str) = inner {
27207 // Parse the number part, ignoring BYTE/CHAR qualifier
27208 let num_str = inner_str.split_whitespace().next().unwrap_or("");
27209 if let Ok(n) = num_str.parse::<u32>() {
27210 Ok(Expression::DataType(DataType::VarChar {
27211 length: Some(n),
27212 parenthesized_length: false,
27213 }))
27214 } else {
27215 Ok(e)
27216 }
27217 } else {
27218 // Plain VARCHAR2 / NVARCHAR2 without parens
27219 Ok(Expression::DataType(DataType::VarChar {
27220 length: Option::None,
27221 parenthesized_length: false,
27222 }))
27223 }
27224 } else {
27225 Ok(e)
27226 }
27227 }
27228
27229 Action::Nvl2Expand => {
27230 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
27231 // But keep as NVL2 for dialects that support it natively
27232 let nvl2_native = matches!(
27233 target,
27234 DialectType::Oracle
27235 | DialectType::Snowflake
27236 | DialectType::Redshift
27237 | DialectType::Teradata
27238 | DialectType::Spark
27239 | DialectType::Databricks
27240 );
27241 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
27242 if nvl2_native {
27243 return Ok(Expression::Nvl2(nvl2));
27244 }
27245 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
27246 } else if let Expression::Function(f) = e {
27247 if nvl2_native {
27248 return Ok(Expression::Function(Box::new(Function::new(
27249 "NVL2".to_string(),
27250 f.args,
27251 ))));
27252 }
27253 if f.args.len() < 2 {
27254 return Ok(Expression::Function(f));
27255 }
27256 let mut args = f.args;
27257 let a = args.remove(0);
27258 let b = args.remove(0);
27259 let c = if !args.is_empty() {
27260 Some(args.remove(0))
27261 } else {
27262 Option::None
27263 };
27264 (a, b, c)
27265 } else {
27266 return Ok(e);
27267 };
27268 // Build: NOT (a IS NULL)
27269 let is_null = Expression::IsNull(Box::new(IsNull {
27270 this: a,
27271 not: false,
27272 postfix_form: false,
27273 }));
27274 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
27275 this: is_null,
27276 inferred_type: None,
27277 }));
27278 Ok(Expression::Case(Box::new(Case {
27279 operand: Option::None,
27280 whens: vec![(not_null, b)],
27281 else_: c,
27282 comments: Vec::new(),
27283 inferred_type: None,
27284 })))
27285 }
27286
27287 Action::IfnullToCoalesce => {
27288 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
27289 if let Expression::Coalesce(mut cf) = e {
27290 cf.original_name = Option::None;
27291 Ok(Expression::Coalesce(cf))
27292 } else if let Expression::Function(f) = e {
27293 Ok(Expression::Function(Box::new(Function::new(
27294 "COALESCE".to_string(),
27295 f.args,
27296 ))))
27297 } else {
27298 Ok(e)
27299 }
27300 }
27301
27302 Action::IsAsciiConvert => {
27303 // IS_ASCII(x) -> dialect-specific ASCII check
27304 if let Expression::Function(f) = e {
27305 let arg = f.args.into_iter().next().unwrap();
27306 match target {
27307 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
27308 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
27309 Ok(Expression::Function(Box::new(Function::new(
27310 "REGEXP_LIKE".to_string(),
27311 vec![
27312 arg,
27313 Expression::Literal(Box::new(Literal::String(
27314 "^[[:ascii:]]*$".to_string(),
27315 ))),
27316 ],
27317 ))))
27318 }
27319 DialectType::PostgreSQL
27320 | DialectType::Redshift
27321 | DialectType::Materialize
27322 | DialectType::RisingWave => {
27323 // (x ~ '^[[:ascii:]]*$')
27324 Ok(Expression::Paren(Box::new(Paren {
27325 this: Expression::RegexpLike(Box::new(
27326 crate::expressions::RegexpFunc {
27327 this: arg,
27328 pattern: Expression::Literal(Box::new(
27329 Literal::String("^[[:ascii:]]*$".to_string()),
27330 )),
27331 flags: Option::None,
27332 },
27333 )),
27334 trailing_comments: Vec::new(),
27335 })))
27336 }
27337 DialectType::SQLite => {
27338 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
27339 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
27340 "2a5b5e012d7f5d2a".to_string(),
27341 )));
27342 let cast_expr = Expression::Cast(Box::new(Cast {
27343 this: hex_lit,
27344 to: DataType::Text,
27345 trailing_comments: Vec::new(),
27346 double_colon_syntax: false,
27347 format: Option::None,
27348 default: Option::None,
27349 inferred_type: None,
27350 }));
27351 let glob = Expression::Glob(Box::new(BinaryOp {
27352 left: arg,
27353 right: cast_expr,
27354 left_comments: Vec::new(),
27355 operator_comments: Vec::new(),
27356 trailing_comments: Vec::new(),
27357 inferred_type: None,
27358 }));
27359 Ok(Expression::Paren(Box::new(Paren {
27360 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
27361 this: glob,
27362 inferred_type: None,
27363 })),
27364 trailing_comments: Vec::new(),
27365 })))
27366 }
27367 DialectType::TSQL | DialectType::Fabric => {
27368 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
27369 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
27370 "255b5e002d7f5d25".to_string(),
27371 )));
27372 let convert_expr = Expression::Convert(Box::new(
27373 crate::expressions::ConvertFunc {
27374 this: hex_lit,
27375 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
27376 style: None,
27377 },
27378 ));
27379 let collated = Expression::Collation(Box::new(
27380 crate::expressions::CollationExpr {
27381 this: convert_expr,
27382 collation: "Latin1_General_BIN".to_string(),
27383 quoted: false,
27384 double_quoted: false,
27385 },
27386 ));
27387 let patindex = Expression::Function(Box::new(Function::new(
27388 "PATINDEX".to_string(),
27389 vec![collated, arg],
27390 )));
27391 let zero =
27392 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27393 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27394 left: patindex,
27395 right: zero,
27396 left_comments: Vec::new(),
27397 operator_comments: Vec::new(),
27398 trailing_comments: Vec::new(),
27399 inferred_type: None,
27400 }));
27401 Ok(Expression::Paren(Box::new(Paren {
27402 this: eq_zero,
27403 trailing_comments: Vec::new(),
27404 })))
27405 }
27406 DialectType::Oracle => {
27407 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
27408 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27409 let s1 = Expression::Literal(Box::new(Literal::String(
27410 "^[".to_string(),
27411 )));
27412 let chr1 = Expression::Function(Box::new(Function::new(
27413 "CHR".to_string(),
27414 vec![Expression::Literal(Box::new(Literal::Number(
27415 "1".to_string(),
27416 )))],
27417 )));
27418 let dash =
27419 Expression::Literal(Box::new(Literal::String("-".to_string())));
27420 let chr127 = Expression::Function(Box::new(Function::new(
27421 "CHR".to_string(),
27422 vec![Expression::Literal(Box::new(Literal::Number(
27423 "127".to_string(),
27424 )))],
27425 )));
27426 let s2 = Expression::Literal(Box::new(Literal::String(
27427 "]*$".to_string(),
27428 )));
27429 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27430 let concat1 =
27431 Expression::DPipe(Box::new(crate::expressions::DPipe {
27432 this: Box::new(s1),
27433 expression: Box::new(chr1),
27434 safe: None,
27435 }));
27436 let concat2 =
27437 Expression::DPipe(Box::new(crate::expressions::DPipe {
27438 this: Box::new(concat1),
27439 expression: Box::new(dash),
27440 safe: None,
27441 }));
27442 let concat3 =
27443 Expression::DPipe(Box::new(crate::expressions::DPipe {
27444 this: Box::new(concat2),
27445 expression: Box::new(chr127),
27446 safe: None,
27447 }));
27448 let concat4 =
27449 Expression::DPipe(Box::new(crate::expressions::DPipe {
27450 this: Box::new(concat3),
27451 expression: Box::new(s2),
27452 safe: None,
27453 }));
27454 let regexp_like = Expression::Function(Box::new(Function::new(
27455 "REGEXP_LIKE".to_string(),
27456 vec![arg, concat4],
27457 )));
27458 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
27459 let true_expr =
27460 Expression::Column(Box::new(crate::expressions::Column {
27461 name: Identifier {
27462 name: "TRUE".to_string(),
27463 quoted: false,
27464 trailing_comments: Vec::new(),
27465 span: None,
27466 },
27467 table: None,
27468 join_mark: false,
27469 trailing_comments: Vec::new(),
27470 span: None,
27471 inferred_type: None,
27472 }));
27473 let nvl = Expression::Function(Box::new(Function::new(
27474 "NVL".to_string(),
27475 vec![regexp_like, true_expr],
27476 )));
27477 Ok(nvl)
27478 }
27479 _ => Ok(Expression::Function(Box::new(Function::new(
27480 "IS_ASCII".to_string(),
27481 vec![arg],
27482 )))),
27483 }
27484 } else {
27485 Ok(e)
27486 }
27487 }
27488
27489 Action::StrPositionConvert => {
27490 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
27491 if let Expression::Function(f) = e {
27492 if f.args.len() < 2 {
27493 return Ok(Expression::Function(f));
27494 }
27495 let mut args = f.args;
27496
27497 let haystack = args.remove(0);
27498 let needle = args.remove(0);
27499 let position = if !args.is_empty() {
27500 Some(args.remove(0))
27501 } else {
27502 Option::None
27503 };
27504 let occurrence = if !args.is_empty() {
27505 Some(args.remove(0))
27506 } else {
27507 Option::None
27508 };
27509
27510 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
27511 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
27512 fn build_position_expansion(
27513 haystack: Expression,
27514 needle: Expression,
27515 pos: Expression,
27516 occurrence: Option<Expression>,
27517 inner_func: &str,
27518 wrapper: &str, // "CASE", "IF", "IIF"
27519 ) -> Expression {
27520 let substr = Expression::Function(Box::new(Function::new(
27521 "SUBSTRING".to_string(),
27522 vec![haystack, pos.clone()],
27523 )));
27524 let mut inner_args = vec![substr, needle];
27525 if let Some(occ) = occurrence {
27526 inner_args.push(occ);
27527 }
27528 let inner_call = Expression::Function(Box::new(Function::new(
27529 inner_func.to_string(),
27530 inner_args,
27531 )));
27532 let zero =
27533 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27534 let one =
27535 Expression::Literal(Box::new(Literal::Number("1".to_string())));
27536 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27537 left: inner_call.clone(),
27538 right: zero.clone(),
27539 left_comments: Vec::new(),
27540 operator_comments: Vec::new(),
27541 trailing_comments: Vec::new(),
27542 inferred_type: None,
27543 }));
27544 let add_pos = Expression::Add(Box::new(BinaryOp {
27545 left: inner_call,
27546 right: pos,
27547 left_comments: Vec::new(),
27548 operator_comments: Vec::new(),
27549 trailing_comments: Vec::new(),
27550 inferred_type: None,
27551 }));
27552 let sub_one = Expression::Sub(Box::new(BinaryOp {
27553 left: add_pos,
27554 right: one,
27555 left_comments: Vec::new(),
27556 operator_comments: Vec::new(),
27557 trailing_comments: Vec::new(),
27558 inferred_type: None,
27559 }));
27560
27561 match wrapper {
27562 "CASE" => Expression::Case(Box::new(Case {
27563 operand: Option::None,
27564 whens: vec![(eq_zero, zero)],
27565 else_: Some(sub_one),
27566 comments: Vec::new(),
27567 inferred_type: None,
27568 })),
27569 "IIF" => Expression::Function(Box::new(Function::new(
27570 "IIF".to_string(),
27571 vec![eq_zero, zero, sub_one],
27572 ))),
27573 _ => Expression::Function(Box::new(Function::new(
27574 "IF".to_string(),
27575 vec![eq_zero, zero, sub_one],
27576 ))),
27577 }
27578 }
27579
27580 match target {
27581 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
27582 DialectType::Athena
27583 | DialectType::DuckDB
27584 | DialectType::Presto
27585 | DialectType::Trino
27586 | DialectType::Drill => {
27587 if let Some(pos) = position {
27588 let wrapper = if matches!(target, DialectType::DuckDB) {
27589 "CASE"
27590 } else {
27591 "IF"
27592 };
27593 let result = build_position_expansion(
27594 haystack, needle, pos, occurrence, "STRPOS", wrapper,
27595 );
27596 if matches!(target, DialectType::Drill) {
27597 // Drill uses backtick-quoted `IF`
27598 if let Expression::Function(mut f) = result {
27599 f.name = "`IF`".to_string();
27600 Ok(Expression::Function(f))
27601 } else {
27602 Ok(result)
27603 }
27604 } else {
27605 Ok(result)
27606 }
27607 } else {
27608 Ok(Expression::Function(Box::new(Function::new(
27609 "STRPOS".to_string(),
27610 vec![haystack, needle],
27611 ))))
27612 }
27613 }
27614 // SQLite: IIF wrapper
27615 DialectType::SQLite => {
27616 if let Some(pos) = position {
27617 Ok(build_position_expansion(
27618 haystack, needle, pos, occurrence, "INSTR", "IIF",
27619 ))
27620 } else {
27621 Ok(Expression::Function(Box::new(Function::new(
27622 "INSTR".to_string(),
27623 vec![haystack, needle],
27624 ))))
27625 }
27626 }
27627 // INSTR group: Teradata, BigQuery, Oracle
27628 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
27629 let mut a = vec![haystack, needle];
27630 if let Some(pos) = position {
27631 a.push(pos);
27632 }
27633 if let Some(occ) = occurrence {
27634 a.push(occ);
27635 }
27636 Ok(Expression::Function(Box::new(Function::new(
27637 "INSTR".to_string(),
27638 a,
27639 ))))
27640 }
27641 // CHARINDEX group: Snowflake, TSQL
27642 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
27643 let mut a = vec![needle, haystack];
27644 if let Some(pos) = position {
27645 a.push(pos);
27646 }
27647 Ok(Expression::Function(Box::new(Function::new(
27648 "CHARINDEX".to_string(),
27649 a,
27650 ))))
27651 }
27652 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
27653 DialectType::PostgreSQL
27654 | DialectType::Materialize
27655 | DialectType::RisingWave
27656 | DialectType::Redshift => {
27657 if let Some(pos) = position {
27658 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
27659 // ELSE POSITION(...) + pos - 1 END
27660 let substr = Expression::Substring(Box::new(
27661 crate::expressions::SubstringFunc {
27662 this: haystack,
27663 start: pos.clone(),
27664 length: Option::None,
27665 from_for_syntax: true,
27666 },
27667 ));
27668 let pos_in = Expression::StrPosition(Box::new(
27669 crate::expressions::StrPosition {
27670 this: Box::new(substr),
27671 substr: Some(Box::new(needle)),
27672 position: Option::None,
27673 occurrence: Option::None,
27674 },
27675 ));
27676 let zero = Expression::Literal(Box::new(Literal::Number(
27677 "0".to_string(),
27678 )));
27679 let one = Expression::Literal(Box::new(Literal::Number(
27680 "1".to_string(),
27681 )));
27682 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27683 left: pos_in.clone(),
27684 right: zero.clone(),
27685 left_comments: Vec::new(),
27686 operator_comments: Vec::new(),
27687 trailing_comments: Vec::new(),
27688 inferred_type: None,
27689 }));
27690 let add_pos = Expression::Add(Box::new(BinaryOp {
27691 left: pos_in,
27692 right: pos,
27693 left_comments: Vec::new(),
27694 operator_comments: Vec::new(),
27695 trailing_comments: Vec::new(),
27696 inferred_type: None,
27697 }));
27698 let sub_one = Expression::Sub(Box::new(BinaryOp {
27699 left: add_pos,
27700 right: one,
27701 left_comments: Vec::new(),
27702 operator_comments: Vec::new(),
27703 trailing_comments: Vec::new(),
27704 inferred_type: None,
27705 }));
27706 Ok(Expression::Case(Box::new(Case {
27707 operand: Option::None,
27708 whens: vec![(eq_zero, zero)],
27709 else_: Some(sub_one),
27710 comments: Vec::new(),
27711 inferred_type: None,
27712 })))
27713 } else {
27714 Ok(Expression::StrPosition(Box::new(
27715 crate::expressions::StrPosition {
27716 this: Box::new(haystack),
27717 substr: Some(Box::new(needle)),
27718 position: Option::None,
27719 occurrence: Option::None,
27720 },
27721 )))
27722 }
27723 }
27724 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
27725 DialectType::MySQL
27726 | DialectType::SingleStore
27727 | DialectType::TiDB
27728 | DialectType::Hive
27729 | DialectType::Spark
27730 | DialectType::Databricks
27731 | DialectType::Doris
27732 | DialectType::StarRocks => {
27733 let mut a = vec![needle, haystack];
27734 if let Some(pos) = position {
27735 a.push(pos);
27736 }
27737 Ok(Expression::Function(Box::new(Function::new(
27738 "LOCATE".to_string(),
27739 a,
27740 ))))
27741 }
27742 // ClickHouse: POSITION(haystack, needle[, position])
27743 DialectType::ClickHouse => {
27744 let mut a = vec![haystack, needle];
27745 if let Some(pos) = position {
27746 a.push(pos);
27747 }
27748 Ok(Expression::Function(Box::new(Function::new(
27749 "POSITION".to_string(),
27750 a,
27751 ))))
27752 }
27753 _ => {
27754 let mut a = vec![haystack, needle];
27755 if let Some(pos) = position {
27756 a.push(pos);
27757 }
27758 if let Some(occ) = occurrence {
27759 a.push(occ);
27760 }
27761 Ok(Expression::Function(Box::new(Function::new(
27762 "STR_POSITION".to_string(),
27763 a,
27764 ))))
27765 }
27766 }
27767 } else {
27768 Ok(e)
27769 }
27770 }
27771
27772 Action::ArraySumConvert => {
27773 // ARRAY_SUM(arr) -> dialect-specific
27774 if let Expression::Function(f) = e {
27775 let args = f.args;
27776 match target {
27777 DialectType::DuckDB => Ok(Expression::Function(Box::new(
27778 Function::new("LIST_SUM".to_string(), args),
27779 ))),
27780 DialectType::Spark | DialectType::Databricks => {
27781 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27782 let arr = args.into_iter().next().unwrap();
27783 let zero =
27784 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27785 let acc_id = Identifier::new("acc");
27786 let x_id = Identifier::new("x");
27787 let acc = Expression::Identifier(acc_id.clone());
27788 let x = Expression::Identifier(x_id.clone());
27789 let add = Expression::Add(Box::new(BinaryOp {
27790 left: acc.clone(),
27791 right: x,
27792 left_comments: Vec::new(),
27793 operator_comments: Vec::new(),
27794 trailing_comments: Vec::new(),
27795 inferred_type: None,
27796 }));
27797 let lambda1 =
27798 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27799 parameters: vec![acc_id.clone(), x_id],
27800 body: add,
27801 colon: false,
27802 parameter_types: Vec::new(),
27803 }));
27804 let lambda2 =
27805 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27806 parameters: vec![acc_id],
27807 body: acc,
27808 colon: false,
27809 parameter_types: Vec::new(),
27810 }));
27811 Ok(Expression::Function(Box::new(Function::new(
27812 "AGGREGATE".to_string(),
27813 vec![arr, zero, lambda1, lambda2],
27814 ))))
27815 }
27816 DialectType::Presto | DialectType::Athena => {
27817 // Presto/Athena keep ARRAY_SUM natively
27818 Ok(Expression::Function(Box::new(Function::new(
27819 "ARRAY_SUM".to_string(),
27820 args,
27821 ))))
27822 }
27823 DialectType::Trino => {
27824 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27825 if args.len() == 1 {
27826 let arr = args.into_iter().next().unwrap();
27827 let zero = Expression::Literal(Box::new(Literal::Number(
27828 "0".to_string(),
27829 )));
27830 let acc_id = Identifier::new("acc");
27831 let x_id = Identifier::new("x");
27832 let acc = Expression::Identifier(acc_id.clone());
27833 let x = Expression::Identifier(x_id.clone());
27834 let add = Expression::Add(Box::new(BinaryOp {
27835 left: acc.clone(),
27836 right: x,
27837 left_comments: Vec::new(),
27838 operator_comments: Vec::new(),
27839 trailing_comments: Vec::new(),
27840 inferred_type: None,
27841 }));
27842 let lambda1 = Expression::Lambda(Box::new(
27843 crate::expressions::LambdaExpr {
27844 parameters: vec![acc_id.clone(), x_id],
27845 body: add,
27846 colon: false,
27847 parameter_types: Vec::new(),
27848 },
27849 ));
27850 let lambda2 = Expression::Lambda(Box::new(
27851 crate::expressions::LambdaExpr {
27852 parameters: vec![acc_id],
27853 body: acc,
27854 colon: false,
27855 parameter_types: Vec::new(),
27856 },
27857 ));
27858 Ok(Expression::Function(Box::new(Function::new(
27859 "REDUCE".to_string(),
27860 vec![arr, zero, lambda1, lambda2],
27861 ))))
27862 } else {
27863 Ok(Expression::Function(Box::new(Function::new(
27864 "ARRAY_SUM".to_string(),
27865 args,
27866 ))))
27867 }
27868 }
27869 DialectType::ClickHouse => {
27870 // arraySum(lambda, arr) or arraySum(arr)
27871 Ok(Expression::Function(Box::new(Function::new(
27872 "arraySum".to_string(),
27873 args,
27874 ))))
27875 }
27876 _ => Ok(Expression::Function(Box::new(Function::new(
27877 "ARRAY_SUM".to_string(),
27878 args,
27879 )))),
27880 }
27881 } else {
27882 Ok(e)
27883 }
27884 }
27885
27886 Action::ArraySizeConvert => {
27887 if let Expression::Function(f) = e {
27888 Ok(Expression::Function(Box::new(Function::new(
27889 "REPEATED_COUNT".to_string(),
27890 f.args,
27891 ))))
27892 } else {
27893 Ok(e)
27894 }
27895 }
27896
27897 Action::ArrayAnyConvert => {
27898 if let Expression::Function(f) = e {
27899 let mut args = f.args;
27900 if args.len() == 2 {
27901 let arr = args.remove(0);
27902 let lambda = args.remove(0);
27903
27904 // Extract lambda parameter name and body
27905 let (param_name, pred_body) =
27906 if let Expression::Lambda(ref lam) = lambda {
27907 let name = if let Some(p) = lam.parameters.first() {
27908 p.name.clone()
27909 } else {
27910 "x".to_string()
27911 };
27912 (name, lam.body.clone())
27913 } else {
27914 ("x".to_string(), lambda.clone())
27915 };
27916
27917 // Helper: build a function call Expression
27918 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
27919 Expression::Function(Box::new(Function::new(
27920 name.to_string(),
27921 args,
27922 )))
27923 };
27924
27925 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
27926 let build_filter_pattern = |len_func: &str,
27927 len_args_extra: Vec<Expression>,
27928 filter_expr: Expression|
27929 -> Expression {
27930 // len_func(arr, ...extra) = 0
27931 let mut len_arr_args = vec![arr.clone()];
27932 len_arr_args.extend(len_args_extra.clone());
27933 let len_arr = make_func(len_func, len_arr_args);
27934 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
27935 len_arr,
27936 Expression::number(0),
27937 )));
27938
27939 // len_func(filter_expr, ...extra) <> 0
27940 let mut len_filter_args = vec![filter_expr];
27941 len_filter_args.extend(len_args_extra);
27942 let len_filter = make_func(len_func, len_filter_args);
27943 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
27944 len_filter,
27945 Expression::number(0),
27946 )));
27947
27948 // (eq_zero OR neq_zero)
27949 let or_expr =
27950 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
27951 Expression::Paren(Box::new(Paren {
27952 this: or_expr,
27953 trailing_comments: Vec::new(),
27954 }))
27955 };
27956
27957 match target {
27958 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
27959 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
27960 }
27961 DialectType::ClickHouse => {
27962 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
27963 // ClickHouse arrayFilter takes lambda first, then array
27964 let filter_expr =
27965 make_func("arrayFilter", vec![lambda, arr.clone()]);
27966 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
27967 }
27968 DialectType::Databricks | DialectType::Spark => {
27969 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
27970 let filter_expr =
27971 make_func("FILTER", vec![arr.clone(), lambda]);
27972 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
27973 }
27974 DialectType::DuckDB => {
27975 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
27976 let filter_expr =
27977 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
27978 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
27979 }
27980 DialectType::Teradata => {
27981 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
27982 let filter_expr =
27983 make_func("FILTER", vec![arr.clone(), lambda]);
27984 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
27985 }
27986 DialectType::BigQuery => {
27987 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
27988 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
27989 let param_col = Expression::column(¶m_name);
27990 let unnest_expr = Expression::Unnest(Box::new(
27991 crate::expressions::UnnestFunc {
27992 this: arr.clone(),
27993 expressions: vec![],
27994 with_ordinality: false,
27995 alias: Some(Identifier::new(¶m_name)),
27996 offset_alias: None,
27997 },
27998 ));
27999 let mut sel = crate::expressions::Select::default();
28000 sel.expressions = vec![param_col];
28001 sel.from = Some(crate::expressions::From {
28002 expressions: vec![unnest_expr],
28003 });
28004 sel.where_clause =
28005 Some(crate::expressions::Where { this: pred_body });
28006 let array_subquery =
28007 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28008 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
28009 }
28010 DialectType::PostgreSQL => {
28011 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
28012 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
28013 let param_col = Expression::column(¶m_name);
28014 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
28015 let unnest_with_alias =
28016 Expression::Alias(Box::new(crate::expressions::Alias {
28017 this: Expression::Unnest(Box::new(
28018 crate::expressions::UnnestFunc {
28019 this: arr.clone(),
28020 expressions: vec![],
28021 with_ordinality: false,
28022 alias: None,
28023 offset_alias: None,
28024 },
28025 )),
28026 alias: Identifier::new("_t0"),
28027 column_aliases: vec![Identifier::new(¶m_name)],
28028 pre_alias_comments: Vec::new(),
28029 trailing_comments: Vec::new(),
28030 inferred_type: None,
28031 }));
28032 let mut sel = crate::expressions::Select::default();
28033 sel.expressions = vec![param_col];
28034 sel.from = Some(crate::expressions::From {
28035 expressions: vec![unnest_with_alias],
28036 });
28037 sel.where_clause =
28038 Some(crate::expressions::Where { this: pred_body });
28039 let array_subquery =
28040 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28041 Ok(build_filter_pattern(
28042 "ARRAY_LENGTH",
28043 vec![Expression::number(1)],
28044 array_subquery,
28045 ))
28046 }
28047 _ => Ok(Expression::Function(Box::new(Function::new(
28048 "ARRAY_ANY".to_string(),
28049 vec![arr, lambda],
28050 )))),
28051 }
28052 } else {
28053 Ok(Expression::Function(Box::new(Function::new(
28054 "ARRAY_ANY".to_string(),
28055 args,
28056 ))))
28057 }
28058 } else {
28059 Ok(e)
28060 }
28061 }
28062
28063 Action::DecodeSimplify => {
28064 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
28065 // For literal search values: CASE WHEN x = search THEN result
28066 // For NULL search: CASE WHEN x IS NULL THEN result
28067 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
28068 fn is_decode_literal(e: &Expression) -> bool {
28069 matches!(
28070 e,
28071 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
28072 )
28073 }
28074
28075 let build_decode_case =
28076 |this_expr: Expression,
28077 pairs: Vec<(Expression, Expression)>,
28078 default: Option<Expression>| {
28079 let whens: Vec<(Expression, Expression)> = pairs
28080 .into_iter()
28081 .map(|(search, result)| {
28082 if matches!(&search, Expression::Null(_)) {
28083 // NULL search -> IS NULL
28084 let condition = Expression::Is(Box::new(BinaryOp {
28085 left: this_expr.clone(),
28086 right: Expression::Null(crate::expressions::Null),
28087 left_comments: Vec::new(),
28088 operator_comments: Vec::new(),
28089 trailing_comments: Vec::new(),
28090 inferred_type: None,
28091 }));
28092 (condition, result)
28093 } else if is_decode_literal(&search)
28094 || is_decode_literal(&this_expr)
28095 {
28096 // At least one side is a literal -> simple equality (no NULL check needed)
28097 let eq = Expression::Eq(Box::new(BinaryOp {
28098 left: this_expr.clone(),
28099 right: search,
28100 left_comments: Vec::new(),
28101 operator_comments: Vec::new(),
28102 trailing_comments: Vec::new(),
28103 inferred_type: None,
28104 }));
28105 (eq, result)
28106 } else {
28107 // Non-literal -> null-safe comparison
28108 let needs_paren = matches!(
28109 &search,
28110 Expression::Eq(_)
28111 | Expression::Neq(_)
28112 | Expression::Gt(_)
28113 | Expression::Gte(_)
28114 | Expression::Lt(_)
28115 | Expression::Lte(_)
28116 );
28117 let search_ref = if needs_paren {
28118 Expression::Paren(Box::new(crate::expressions::Paren {
28119 this: search.clone(),
28120 trailing_comments: Vec::new(),
28121 }))
28122 } else {
28123 search.clone()
28124 };
28125 // Build: x = search OR (x IS NULL AND search IS NULL)
28126 let eq = Expression::Eq(Box::new(BinaryOp {
28127 left: this_expr.clone(),
28128 right: search_ref,
28129 left_comments: Vec::new(),
28130 operator_comments: Vec::new(),
28131 trailing_comments: Vec::new(),
28132 inferred_type: None,
28133 }));
28134 let search_in_null = if needs_paren {
28135 Expression::Paren(Box::new(crate::expressions::Paren {
28136 this: search.clone(),
28137 trailing_comments: Vec::new(),
28138 }))
28139 } else {
28140 search.clone()
28141 };
28142 let x_is_null = Expression::Is(Box::new(BinaryOp {
28143 left: this_expr.clone(),
28144 right: Expression::Null(crate::expressions::Null),
28145 left_comments: Vec::new(),
28146 operator_comments: Vec::new(),
28147 trailing_comments: Vec::new(),
28148 inferred_type: None,
28149 }));
28150 let search_is_null = Expression::Is(Box::new(BinaryOp {
28151 left: search_in_null,
28152 right: Expression::Null(crate::expressions::Null),
28153 left_comments: Vec::new(),
28154 operator_comments: Vec::new(),
28155 trailing_comments: Vec::new(),
28156 inferred_type: None,
28157 }));
28158 let both_null = Expression::And(Box::new(BinaryOp {
28159 left: x_is_null,
28160 right: search_is_null,
28161 left_comments: Vec::new(),
28162 operator_comments: Vec::new(),
28163 trailing_comments: Vec::new(),
28164 inferred_type: None,
28165 }));
28166 let condition = Expression::Or(Box::new(BinaryOp {
28167 left: eq,
28168 right: Expression::Paren(Box::new(
28169 crate::expressions::Paren {
28170 this: both_null,
28171 trailing_comments: Vec::new(),
28172 },
28173 )),
28174 left_comments: Vec::new(),
28175 operator_comments: Vec::new(),
28176 trailing_comments: Vec::new(),
28177 inferred_type: None,
28178 }));
28179 (condition, result)
28180 }
28181 })
28182 .collect();
28183 Expression::Case(Box::new(Case {
28184 operand: None,
28185 whens,
28186 else_: default,
28187 comments: Vec::new(),
28188 inferred_type: None,
28189 }))
28190 };
28191
28192 if let Expression::Decode(decode) = e {
28193 Ok(build_decode_case(
28194 decode.this,
28195 decode.search_results,
28196 decode.default,
28197 ))
28198 } else if let Expression::DecodeCase(dc) = e {
28199 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
28200 let mut exprs = dc.expressions;
28201 if exprs.len() < 3 {
28202 return Ok(Expression::DecodeCase(Box::new(
28203 crate::expressions::DecodeCase { expressions: exprs },
28204 )));
28205 }
28206 let this_expr = exprs.remove(0);
28207 let mut pairs = Vec::new();
28208 let mut default = None;
28209 let mut i = 0;
28210 while i + 1 < exprs.len() {
28211 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
28212 i += 2;
28213 }
28214 if i < exprs.len() {
28215 // Odd remaining element is the default
28216 default = Some(exprs[i].clone());
28217 }
28218 Ok(build_decode_case(this_expr, pairs, default))
28219 } else {
28220 Ok(e)
28221 }
28222 }
28223
28224 Action::CreateTableLikeToCtas => {
28225 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
28226 if let Expression::CreateTable(ct) = e {
28227 let like_source = ct.constraints.iter().find_map(|c| {
28228 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28229 Some(source.clone())
28230 } else {
28231 None
28232 }
28233 });
28234 if let Some(source_table) = like_source {
28235 let mut new_ct = *ct;
28236 new_ct.constraints.clear();
28237 // Build: SELECT * FROM b LIMIT 0
28238 let select = Expression::Select(Box::new(crate::expressions::Select {
28239 expressions: vec![Expression::Star(crate::expressions::Star {
28240 table: None,
28241 except: None,
28242 replace: None,
28243 rename: None,
28244 trailing_comments: Vec::new(),
28245 span: None,
28246 })],
28247 from: Some(crate::expressions::From {
28248 expressions: vec![Expression::Table(Box::new(source_table))],
28249 }),
28250 limit: Some(crate::expressions::Limit {
28251 this: Expression::Literal(Box::new(Literal::Number(
28252 "0".to_string(),
28253 ))),
28254 percent: false,
28255 comments: Vec::new(),
28256 }),
28257 ..Default::default()
28258 }));
28259 new_ct.as_select = Some(select);
28260 Ok(Expression::CreateTable(Box::new(new_ct)))
28261 } else {
28262 Ok(Expression::CreateTable(ct))
28263 }
28264 } else {
28265 Ok(e)
28266 }
28267 }
28268
28269 Action::CreateTableLikeToSelectInto => {
28270 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
28271 if let Expression::CreateTable(ct) = e {
28272 let like_source = ct.constraints.iter().find_map(|c| {
28273 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28274 Some(source.clone())
28275 } else {
28276 None
28277 }
28278 });
28279 if let Some(source_table) = like_source {
28280 let mut aliased_source = source_table;
28281 aliased_source.alias = Some(Identifier::new("temp"));
28282 // Build: SELECT TOP 0 * INTO a FROM b AS temp
28283 let select = Expression::Select(Box::new(crate::expressions::Select {
28284 expressions: vec![Expression::Star(crate::expressions::Star {
28285 table: None,
28286 except: None,
28287 replace: None,
28288 rename: None,
28289 trailing_comments: Vec::new(),
28290 span: None,
28291 })],
28292 from: Some(crate::expressions::From {
28293 expressions: vec![Expression::Table(Box::new(aliased_source))],
28294 }),
28295 into: Some(crate::expressions::SelectInto {
28296 this: Expression::Table(Box::new(ct.name.clone())),
28297 temporary: false,
28298 unlogged: false,
28299 bulk_collect: false,
28300 expressions: Vec::new(),
28301 }),
28302 top: Some(crate::expressions::Top {
28303 this: Expression::Literal(Box::new(Literal::Number(
28304 "0".to_string(),
28305 ))),
28306 percent: false,
28307 with_ties: false,
28308 parenthesized: false,
28309 }),
28310 ..Default::default()
28311 }));
28312 Ok(select)
28313 } else {
28314 Ok(Expression::CreateTable(ct))
28315 }
28316 } else {
28317 Ok(e)
28318 }
28319 }
28320
28321 Action::CreateTableLikeToAs => {
28322 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
28323 if let Expression::CreateTable(ct) = e {
28324 let like_source = ct.constraints.iter().find_map(|c| {
28325 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28326 Some(source.clone())
28327 } else {
28328 None
28329 }
28330 });
28331 if let Some(source_table) = like_source {
28332 let mut new_ct = *ct;
28333 new_ct.constraints.clear();
28334 // AS b (just a table reference, not a SELECT)
28335 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
28336 Ok(Expression::CreateTable(Box::new(new_ct)))
28337 } else {
28338 Ok(Expression::CreateTable(ct))
28339 }
28340 } else {
28341 Ok(e)
28342 }
28343 }
28344
28345 Action::TsOrDsToDateConvert => {
28346 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
28347 if let Expression::Function(f) = e {
28348 let mut args = f.args;
28349 let this = args.remove(0);
28350 let fmt = if !args.is_empty() {
28351 match &args[0] {
28352 Expression::Literal(lit)
28353 if matches!(lit.as_ref(), Literal::String(_)) =>
28354 {
28355 let Literal::String(s) = lit.as_ref() else {
28356 unreachable!()
28357 };
28358 Some(s.clone())
28359 }
28360 _ => None,
28361 }
28362 } else {
28363 None
28364 };
28365 Ok(Expression::TsOrDsToDate(Box::new(
28366 crate::expressions::TsOrDsToDate {
28367 this: Box::new(this),
28368 format: fmt,
28369 safe: None,
28370 },
28371 )))
28372 } else {
28373 Ok(e)
28374 }
28375 }
28376
28377 Action::TsOrDsToDateStrConvert => {
28378 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
28379 if let Expression::Function(f) = e {
28380 let arg = f.args.into_iter().next().unwrap();
28381 let str_type = match target {
28382 DialectType::DuckDB
28383 | DialectType::PostgreSQL
28384 | DialectType::Materialize => DataType::Text,
28385 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28386 DataType::Custom {
28387 name: "STRING".to_string(),
28388 }
28389 }
28390 DialectType::Presto
28391 | DialectType::Trino
28392 | DialectType::Athena
28393 | DialectType::Drill => DataType::VarChar {
28394 length: None,
28395 parenthesized_length: false,
28396 },
28397 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
28398 DataType::Custom {
28399 name: "STRING".to_string(),
28400 }
28401 }
28402 _ => DataType::VarChar {
28403 length: None,
28404 parenthesized_length: false,
28405 },
28406 };
28407 let cast_expr = Expression::Cast(Box::new(Cast {
28408 this: arg,
28409 to: str_type,
28410 double_colon_syntax: false,
28411 trailing_comments: Vec::new(),
28412 format: None,
28413 default: None,
28414 inferred_type: None,
28415 }));
28416 Ok(Expression::Substring(Box::new(
28417 crate::expressions::SubstringFunc {
28418 this: cast_expr,
28419 start: Expression::number(1),
28420 length: Some(Expression::number(10)),
28421 from_for_syntax: false,
28422 },
28423 )))
28424 } else {
28425 Ok(e)
28426 }
28427 }
28428
28429 Action::DateStrToDateConvert => {
28430 // DATE_STR_TO_DATE(x) -> dialect-specific
28431 if let Expression::Function(f) = e {
28432 let arg = f.args.into_iter().next().unwrap();
28433 match target {
28434 DialectType::SQLite => {
28435 // SQLite: just the bare expression (dates are strings)
28436 Ok(arg)
28437 }
28438 _ => Ok(Expression::Cast(Box::new(Cast {
28439 this: arg,
28440 to: DataType::Date,
28441 double_colon_syntax: false,
28442 trailing_comments: Vec::new(),
28443 format: None,
28444 default: None,
28445 inferred_type: None,
28446 }))),
28447 }
28448 } else {
28449 Ok(e)
28450 }
28451 }
28452
28453 Action::TimeStrToDateConvert => {
28454 // TIME_STR_TO_DATE(x) -> dialect-specific
28455 if let Expression::Function(f) = e {
28456 let arg = f.args.into_iter().next().unwrap();
28457 match target {
28458 DialectType::Hive
28459 | DialectType::Doris
28460 | DialectType::StarRocks
28461 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
28462 Function::new("TO_DATE".to_string(), vec![arg]),
28463 ))),
28464 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28465 // Presto: CAST(x AS TIMESTAMP)
28466 Ok(Expression::Cast(Box::new(Cast {
28467 this: arg,
28468 to: DataType::Timestamp {
28469 timezone: false,
28470 precision: None,
28471 },
28472 double_colon_syntax: false,
28473 trailing_comments: Vec::new(),
28474 format: None,
28475 default: None,
28476 inferred_type: None,
28477 })))
28478 }
28479 _ => {
28480 // Default: CAST(x AS DATE)
28481 Ok(Expression::Cast(Box::new(Cast {
28482 this: arg,
28483 to: DataType::Date,
28484 double_colon_syntax: false,
28485 trailing_comments: Vec::new(),
28486 format: None,
28487 default: None,
28488 inferred_type: None,
28489 })))
28490 }
28491 }
28492 } else {
28493 Ok(e)
28494 }
28495 }
28496
28497 Action::TimeStrToTimeConvert => {
28498 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
28499 if let Expression::Function(f) = e {
28500 let mut args = f.args;
28501 let this = args.remove(0);
28502 let zone = if !args.is_empty() {
28503 match &args[0] {
28504 Expression::Literal(lit)
28505 if matches!(lit.as_ref(), Literal::String(_)) =>
28506 {
28507 let Literal::String(s) = lit.as_ref() else {
28508 unreachable!()
28509 };
28510 Some(s.clone())
28511 }
28512 _ => None,
28513 }
28514 } else {
28515 None
28516 };
28517 let has_zone = zone.is_some();
28518
28519 match target {
28520 DialectType::SQLite => {
28521 // SQLite: just the bare expression
28522 Ok(this)
28523 }
28524 DialectType::MySQL => {
28525 if has_zone {
28526 // MySQL with zone: TIMESTAMP(x)
28527 Ok(Expression::Function(Box::new(Function::new(
28528 "TIMESTAMP".to_string(),
28529 vec![this],
28530 ))))
28531 } else {
28532 // MySQL: CAST(x AS DATETIME) or with precision
28533 // Use DataType::Custom to avoid MySQL's transform_cast converting
28534 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
28535 let precision = if let Expression::Literal(ref lit) = this {
28536 if let Literal::String(ref s) = lit.as_ref() {
28537 if let Some(dot_pos) = s.rfind('.') {
28538 let frac = &s[dot_pos + 1..];
28539 let digit_count = frac
28540 .chars()
28541 .take_while(|c| c.is_ascii_digit())
28542 .count();
28543 if digit_count > 0 {
28544 Some(digit_count)
28545 } else {
28546 None
28547 }
28548 } else {
28549 None
28550 }
28551 } else {
28552 None
28553 }
28554 } else {
28555 None
28556 };
28557 let type_name = match precision {
28558 Some(p) => format!("DATETIME({})", p),
28559 None => "DATETIME".to_string(),
28560 };
28561 Ok(Expression::Cast(Box::new(Cast {
28562 this,
28563 to: DataType::Custom { name: type_name },
28564 double_colon_syntax: false,
28565 trailing_comments: Vec::new(),
28566 format: None,
28567 default: None,
28568 inferred_type: None,
28569 })))
28570 }
28571 }
28572 DialectType::ClickHouse => {
28573 if has_zone {
28574 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
28575 // We need to strip the timezone offset from the literal if present
28576 let clean_this = if let Expression::Literal(ref lit) = this {
28577 if let Literal::String(ref s) = lit.as_ref() {
28578 // Strip timezone offset like "-08:00" or "+00:00"
28579 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
28580 if let Some(offset_pos) = re_offset {
28581 if offset_pos > 10 {
28582 // After the date part
28583 let trimmed = s[..offset_pos].to_string();
28584 Expression::Literal(Box::new(Literal::String(
28585 trimmed,
28586 )))
28587 } else {
28588 this.clone()
28589 }
28590 } else {
28591 this.clone()
28592 }
28593 } else {
28594 this.clone()
28595 }
28596 } else {
28597 this.clone()
28598 };
28599 let zone_str = zone.unwrap();
28600 // Build: CAST(x AS DateTime64(6, 'zone'))
28601 let type_name = format!("DateTime64(6, '{}')", zone_str);
28602 Ok(Expression::Cast(Box::new(Cast {
28603 this: clean_this,
28604 to: DataType::Custom { name: type_name },
28605 double_colon_syntax: false,
28606 trailing_comments: Vec::new(),
28607 format: None,
28608 default: None,
28609 inferred_type: None,
28610 })))
28611 } else {
28612 Ok(Expression::Cast(Box::new(Cast {
28613 this,
28614 to: DataType::Custom {
28615 name: "DateTime64(6)".to_string(),
28616 },
28617 double_colon_syntax: false,
28618 trailing_comments: Vec::new(),
28619 format: None,
28620 default: None,
28621 inferred_type: None,
28622 })))
28623 }
28624 }
28625 DialectType::BigQuery => {
28626 if has_zone {
28627 // BigQuery with zone: CAST(x AS TIMESTAMP)
28628 Ok(Expression::Cast(Box::new(Cast {
28629 this,
28630 to: DataType::Timestamp {
28631 timezone: false,
28632 precision: None,
28633 },
28634 double_colon_syntax: false,
28635 trailing_comments: Vec::new(),
28636 format: None,
28637 default: None,
28638 inferred_type: None,
28639 })))
28640 } else {
28641 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
28642 Ok(Expression::Cast(Box::new(Cast {
28643 this,
28644 to: DataType::Custom {
28645 name: "DATETIME".to_string(),
28646 },
28647 double_colon_syntax: false,
28648 trailing_comments: Vec::new(),
28649 format: None,
28650 default: None,
28651 inferred_type: None,
28652 })))
28653 }
28654 }
28655 DialectType::Doris => {
28656 // Doris: CAST(x AS DATETIME)
28657 Ok(Expression::Cast(Box::new(Cast {
28658 this,
28659 to: DataType::Custom {
28660 name: "DATETIME".to_string(),
28661 },
28662 double_colon_syntax: false,
28663 trailing_comments: Vec::new(),
28664 format: None,
28665 default: None,
28666 inferred_type: None,
28667 })))
28668 }
28669 DialectType::TSQL | DialectType::Fabric => {
28670 if has_zone {
28671 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
28672 let cast_expr = Expression::Cast(Box::new(Cast {
28673 this,
28674 to: DataType::Custom {
28675 name: "DATETIMEOFFSET".to_string(),
28676 },
28677 double_colon_syntax: false,
28678 trailing_comments: Vec::new(),
28679 format: None,
28680 default: None,
28681 inferred_type: None,
28682 }));
28683 Ok(Expression::AtTimeZone(Box::new(
28684 crate::expressions::AtTimeZone {
28685 this: cast_expr,
28686 zone: Expression::Literal(Box::new(Literal::String(
28687 "UTC".to_string(),
28688 ))),
28689 },
28690 )))
28691 } else {
28692 // TSQL: CAST(x AS DATETIME2)
28693 Ok(Expression::Cast(Box::new(Cast {
28694 this,
28695 to: DataType::Custom {
28696 name: "DATETIME2".to_string(),
28697 },
28698 double_colon_syntax: false,
28699 trailing_comments: Vec::new(),
28700 format: None,
28701 default: None,
28702 inferred_type: None,
28703 })))
28704 }
28705 }
28706 DialectType::DuckDB => {
28707 if has_zone {
28708 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
28709 Ok(Expression::Cast(Box::new(Cast {
28710 this,
28711 to: DataType::Timestamp {
28712 timezone: true,
28713 precision: None,
28714 },
28715 double_colon_syntax: false,
28716 trailing_comments: Vec::new(),
28717 format: None,
28718 default: None,
28719 inferred_type: None,
28720 })))
28721 } else {
28722 // DuckDB: CAST(x AS TIMESTAMP)
28723 Ok(Expression::Cast(Box::new(Cast {
28724 this,
28725 to: DataType::Timestamp {
28726 timezone: false,
28727 precision: None,
28728 },
28729 double_colon_syntax: false,
28730 trailing_comments: Vec::new(),
28731 format: None,
28732 default: None,
28733 inferred_type: None,
28734 })))
28735 }
28736 }
28737 DialectType::PostgreSQL
28738 | DialectType::Materialize
28739 | DialectType::RisingWave => {
28740 if has_zone {
28741 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
28742 Ok(Expression::Cast(Box::new(Cast {
28743 this,
28744 to: DataType::Timestamp {
28745 timezone: true,
28746 precision: None,
28747 },
28748 double_colon_syntax: false,
28749 trailing_comments: Vec::new(),
28750 format: None,
28751 default: None,
28752 inferred_type: None,
28753 })))
28754 } else {
28755 // PostgreSQL: CAST(x AS TIMESTAMP)
28756 Ok(Expression::Cast(Box::new(Cast {
28757 this,
28758 to: DataType::Timestamp {
28759 timezone: false,
28760 precision: None,
28761 },
28762 double_colon_syntax: false,
28763 trailing_comments: Vec::new(),
28764 format: None,
28765 default: None,
28766 inferred_type: None,
28767 })))
28768 }
28769 }
28770 DialectType::Snowflake => {
28771 if has_zone {
28772 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
28773 Ok(Expression::Cast(Box::new(Cast {
28774 this,
28775 to: DataType::Timestamp {
28776 timezone: true,
28777 precision: None,
28778 },
28779 double_colon_syntax: false,
28780 trailing_comments: Vec::new(),
28781 format: None,
28782 default: None,
28783 inferred_type: None,
28784 })))
28785 } else {
28786 // Snowflake: CAST(x AS TIMESTAMP)
28787 Ok(Expression::Cast(Box::new(Cast {
28788 this,
28789 to: DataType::Timestamp {
28790 timezone: false,
28791 precision: None,
28792 },
28793 double_colon_syntax: false,
28794 trailing_comments: Vec::new(),
28795 format: None,
28796 default: None,
28797 inferred_type: None,
28798 })))
28799 }
28800 }
28801 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28802 if has_zone {
28803 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28804 // Check for precision from sub-second digits
28805 let precision = if let Expression::Literal(ref lit) = this {
28806 if let Literal::String(ref s) = lit.as_ref() {
28807 if let Some(dot_pos) = s.rfind('.') {
28808 let frac = &s[dot_pos + 1..];
28809 let digit_count = frac
28810 .chars()
28811 .take_while(|c| c.is_ascii_digit())
28812 .count();
28813 if digit_count > 0
28814 && matches!(target, DialectType::Trino)
28815 {
28816 Some(digit_count as u32)
28817 } else {
28818 None
28819 }
28820 } else {
28821 None
28822 }
28823 } else {
28824 None
28825 }
28826 } else {
28827 None
28828 };
28829 let dt = if let Some(prec) = precision {
28830 DataType::Timestamp {
28831 timezone: true,
28832 precision: Some(prec),
28833 }
28834 } else {
28835 DataType::Timestamp {
28836 timezone: true,
28837 precision: None,
28838 }
28839 };
28840 Ok(Expression::Cast(Box::new(Cast {
28841 this,
28842 to: dt,
28843 double_colon_syntax: false,
28844 trailing_comments: Vec::new(),
28845 format: None,
28846 default: None,
28847 inferred_type: None,
28848 })))
28849 } else {
28850 // Check for sub-second precision for Trino
28851 let precision = if let Expression::Literal(ref lit) = this {
28852 if let Literal::String(ref s) = lit.as_ref() {
28853 if let Some(dot_pos) = s.rfind('.') {
28854 let frac = &s[dot_pos + 1..];
28855 let digit_count = frac
28856 .chars()
28857 .take_while(|c| c.is_ascii_digit())
28858 .count();
28859 if digit_count > 0
28860 && matches!(target, DialectType::Trino)
28861 {
28862 Some(digit_count as u32)
28863 } else {
28864 None
28865 }
28866 } else {
28867 None
28868 }
28869 } else {
28870 None
28871 }
28872 } else {
28873 None
28874 };
28875 let dt = DataType::Timestamp {
28876 timezone: false,
28877 precision,
28878 };
28879 Ok(Expression::Cast(Box::new(Cast {
28880 this,
28881 to: dt,
28882 double_colon_syntax: false,
28883 trailing_comments: Vec::new(),
28884 format: None,
28885 default: None,
28886 inferred_type: None,
28887 })))
28888 }
28889 }
28890 DialectType::Redshift => {
28891 if has_zone {
28892 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28893 Ok(Expression::Cast(Box::new(Cast {
28894 this,
28895 to: DataType::Timestamp {
28896 timezone: true,
28897 precision: None,
28898 },
28899 double_colon_syntax: false,
28900 trailing_comments: Vec::new(),
28901 format: None,
28902 default: None,
28903 inferred_type: None,
28904 })))
28905 } else {
28906 // Redshift: CAST(x AS TIMESTAMP)
28907 Ok(Expression::Cast(Box::new(Cast {
28908 this,
28909 to: DataType::Timestamp {
28910 timezone: false,
28911 precision: None,
28912 },
28913 double_colon_syntax: false,
28914 trailing_comments: Vec::new(),
28915 format: None,
28916 default: None,
28917 inferred_type: None,
28918 })))
28919 }
28920 }
28921 _ => {
28922 // Default: CAST(x AS TIMESTAMP)
28923 Ok(Expression::Cast(Box::new(Cast {
28924 this,
28925 to: DataType::Timestamp {
28926 timezone: false,
28927 precision: None,
28928 },
28929 double_colon_syntax: false,
28930 trailing_comments: Vec::new(),
28931 format: None,
28932 default: None,
28933 inferred_type: None,
28934 })))
28935 }
28936 }
28937 } else {
28938 Ok(e)
28939 }
28940 }
28941
28942 Action::DateToDateStrConvert => {
28943 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
28944 if let Expression::Function(f) = e {
28945 let arg = f.args.into_iter().next().unwrap();
28946 let str_type = match target {
28947 DialectType::DuckDB => DataType::Text,
28948 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28949 DataType::Custom {
28950 name: "STRING".to_string(),
28951 }
28952 }
28953 DialectType::Presto
28954 | DialectType::Trino
28955 | DialectType::Athena
28956 | DialectType::Drill => DataType::VarChar {
28957 length: None,
28958 parenthesized_length: false,
28959 },
28960 _ => DataType::VarChar {
28961 length: None,
28962 parenthesized_length: false,
28963 },
28964 };
28965 Ok(Expression::Cast(Box::new(Cast {
28966 this: arg,
28967 to: str_type,
28968 double_colon_syntax: false,
28969 trailing_comments: Vec::new(),
28970 format: None,
28971 default: None,
28972 inferred_type: None,
28973 })))
28974 } else {
28975 Ok(e)
28976 }
28977 }
28978
28979 Action::DateToDiConvert => {
28980 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
28981 if let Expression::Function(f) = e {
28982 let arg = f.args.into_iter().next().unwrap();
28983 let inner = match target {
28984 DialectType::DuckDB => {
28985 // STRFTIME(x, '%Y%m%d')
28986 Expression::Function(Box::new(Function::new(
28987 "STRFTIME".to_string(),
28988 vec![arg, Expression::string("%Y%m%d")],
28989 )))
28990 }
28991 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28992 // DATE_FORMAT(x, 'yyyyMMdd')
28993 Expression::Function(Box::new(Function::new(
28994 "DATE_FORMAT".to_string(),
28995 vec![arg, Expression::string("yyyyMMdd")],
28996 )))
28997 }
28998 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28999 // DATE_FORMAT(x, '%Y%m%d')
29000 Expression::Function(Box::new(Function::new(
29001 "DATE_FORMAT".to_string(),
29002 vec![arg, Expression::string("%Y%m%d")],
29003 )))
29004 }
29005 DialectType::Drill => {
29006 // TO_DATE(x, 'yyyyMMdd')
29007 Expression::Function(Box::new(Function::new(
29008 "TO_DATE".to_string(),
29009 vec![arg, Expression::string("yyyyMMdd")],
29010 )))
29011 }
29012 _ => {
29013 // Default: STRFTIME(x, '%Y%m%d')
29014 Expression::Function(Box::new(Function::new(
29015 "STRFTIME".to_string(),
29016 vec![arg, Expression::string("%Y%m%d")],
29017 )))
29018 }
29019 };
29020 // Use INT (not INTEGER) for Presto/Trino
29021 let int_type = match target {
29022 DialectType::Presto
29023 | DialectType::Trino
29024 | DialectType::Athena
29025 | DialectType::TSQL
29026 | DialectType::Fabric
29027 | DialectType::SQLite
29028 | DialectType::Redshift => DataType::Custom {
29029 name: "INT".to_string(),
29030 },
29031 _ => DataType::Int {
29032 length: None,
29033 integer_spelling: false,
29034 },
29035 };
29036 Ok(Expression::Cast(Box::new(Cast {
29037 this: inner,
29038 to: int_type,
29039 double_colon_syntax: false,
29040 trailing_comments: Vec::new(),
29041 format: None,
29042 default: None,
29043 inferred_type: None,
29044 })))
29045 } else {
29046 Ok(e)
29047 }
29048 }
29049
29050 Action::DiToDateConvert => {
29051 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
29052 if let Expression::Function(f) = e {
29053 let arg = f.args.into_iter().next().unwrap();
29054 match target {
29055 DialectType::DuckDB => {
29056 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
29057 let cast_text = Expression::Cast(Box::new(Cast {
29058 this: arg,
29059 to: DataType::Text,
29060 double_colon_syntax: false,
29061 trailing_comments: Vec::new(),
29062 format: None,
29063 default: None,
29064 inferred_type: None,
29065 }));
29066 let strptime = Expression::Function(Box::new(Function::new(
29067 "STRPTIME".to_string(),
29068 vec![cast_text, Expression::string("%Y%m%d")],
29069 )));
29070 Ok(Expression::Cast(Box::new(Cast {
29071 this: strptime,
29072 to: DataType::Date,
29073 double_colon_syntax: false,
29074 trailing_comments: Vec::new(),
29075 format: None,
29076 default: None,
29077 inferred_type: None,
29078 })))
29079 }
29080 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29081 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
29082 let cast_str = Expression::Cast(Box::new(Cast {
29083 this: arg,
29084 to: DataType::Custom {
29085 name: "STRING".to_string(),
29086 },
29087 double_colon_syntax: false,
29088 trailing_comments: Vec::new(),
29089 format: None,
29090 default: None,
29091 inferred_type: None,
29092 }));
29093 Ok(Expression::Function(Box::new(Function::new(
29094 "TO_DATE".to_string(),
29095 vec![cast_str, Expression::string("yyyyMMdd")],
29096 ))))
29097 }
29098 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29099 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
29100 let cast_varchar = Expression::Cast(Box::new(Cast {
29101 this: arg,
29102 to: DataType::VarChar {
29103 length: None,
29104 parenthesized_length: false,
29105 },
29106 double_colon_syntax: false,
29107 trailing_comments: Vec::new(),
29108 format: None,
29109 default: None,
29110 inferred_type: None,
29111 }));
29112 let date_parse = Expression::Function(Box::new(Function::new(
29113 "DATE_PARSE".to_string(),
29114 vec![cast_varchar, Expression::string("%Y%m%d")],
29115 )));
29116 Ok(Expression::Cast(Box::new(Cast {
29117 this: date_parse,
29118 to: DataType::Date,
29119 double_colon_syntax: false,
29120 trailing_comments: Vec::new(),
29121 format: None,
29122 default: None,
29123 inferred_type: None,
29124 })))
29125 }
29126 DialectType::Drill => {
29127 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
29128 let cast_varchar = Expression::Cast(Box::new(Cast {
29129 this: arg,
29130 to: DataType::VarChar {
29131 length: None,
29132 parenthesized_length: false,
29133 },
29134 double_colon_syntax: false,
29135 trailing_comments: Vec::new(),
29136 format: None,
29137 default: None,
29138 inferred_type: None,
29139 }));
29140 Ok(Expression::Function(Box::new(Function::new(
29141 "TO_DATE".to_string(),
29142 vec![cast_varchar, Expression::string("yyyyMMdd")],
29143 ))))
29144 }
29145 _ => Ok(Expression::Function(Box::new(Function::new(
29146 "DI_TO_DATE".to_string(),
29147 vec![arg],
29148 )))),
29149 }
29150 } else {
29151 Ok(e)
29152 }
29153 }
29154
29155 Action::TsOrDiToDiConvert => {
29156 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
29157 if let Expression::Function(f) = e {
29158 let arg = f.args.into_iter().next().unwrap();
29159 let str_type = match target {
29160 DialectType::DuckDB => DataType::Text,
29161 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29162 DataType::Custom {
29163 name: "STRING".to_string(),
29164 }
29165 }
29166 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29167 DataType::VarChar {
29168 length: None,
29169 parenthesized_length: false,
29170 }
29171 }
29172 _ => DataType::VarChar {
29173 length: None,
29174 parenthesized_length: false,
29175 },
29176 };
29177 let cast_str = Expression::Cast(Box::new(Cast {
29178 this: arg,
29179 to: str_type,
29180 double_colon_syntax: false,
29181 trailing_comments: Vec::new(),
29182 format: None,
29183 default: None,
29184 inferred_type: None,
29185 }));
29186 let replace_expr = Expression::Function(Box::new(Function::new(
29187 "REPLACE".to_string(),
29188 vec![cast_str, Expression::string("-"), Expression::string("")],
29189 )));
29190 let substr_name = match target {
29191 DialectType::DuckDB
29192 | DialectType::Hive
29193 | DialectType::Spark
29194 | DialectType::Databricks => "SUBSTR",
29195 _ => "SUBSTR",
29196 };
29197 let substr = Expression::Function(Box::new(Function::new(
29198 substr_name.to_string(),
29199 vec![replace_expr, Expression::number(1), Expression::number(8)],
29200 )));
29201 // Use INT (not INTEGER) for Presto/Trino etc.
29202 let int_type = match target {
29203 DialectType::Presto
29204 | DialectType::Trino
29205 | DialectType::Athena
29206 | DialectType::TSQL
29207 | DialectType::Fabric
29208 | DialectType::SQLite
29209 | DialectType::Redshift => DataType::Custom {
29210 name: "INT".to_string(),
29211 },
29212 _ => DataType::Int {
29213 length: None,
29214 integer_spelling: false,
29215 },
29216 };
29217 Ok(Expression::Cast(Box::new(Cast {
29218 this: substr,
29219 to: int_type,
29220 double_colon_syntax: false,
29221 trailing_comments: Vec::new(),
29222 format: None,
29223 default: None,
29224 inferred_type: None,
29225 })))
29226 } else {
29227 Ok(e)
29228 }
29229 }
29230
29231 Action::UnixToStrConvert => {
29232 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
29233 if let Expression::Function(f) = e {
29234 let mut args = f.args;
29235 let this = args.remove(0);
29236 let fmt_expr = if !args.is_empty() {
29237 Some(args.remove(0))
29238 } else {
29239 None
29240 };
29241
29242 // Check if format is a string literal
29243 let fmt_str = fmt_expr.as_ref().and_then(|f| {
29244 if let Expression::Literal(lit) = f {
29245 if let Literal::String(s) = lit.as_ref() {
29246 Some(s.clone())
29247 } else {
29248 None
29249 }
29250 } else {
29251 None
29252 }
29253 });
29254
29255 if let Some(fmt_string) = fmt_str {
29256 // String literal format -> use UnixToStr expression (generator handles it)
29257 Ok(Expression::UnixToStr(Box::new(
29258 crate::expressions::UnixToStr {
29259 this: Box::new(this),
29260 format: Some(fmt_string),
29261 },
29262 )))
29263 } else if let Some(fmt_e) = fmt_expr {
29264 // Non-literal format (e.g., identifier `y`) -> build target expression directly
29265 match target {
29266 DialectType::DuckDB => {
29267 // STRFTIME(TO_TIMESTAMP(x), y)
29268 let to_ts = Expression::Function(Box::new(Function::new(
29269 "TO_TIMESTAMP".to_string(),
29270 vec![this],
29271 )));
29272 Ok(Expression::Function(Box::new(Function::new(
29273 "STRFTIME".to_string(),
29274 vec![to_ts, fmt_e],
29275 ))))
29276 }
29277 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29278 // DATE_FORMAT(FROM_UNIXTIME(x), y)
29279 let from_unix = Expression::Function(Box::new(Function::new(
29280 "FROM_UNIXTIME".to_string(),
29281 vec![this],
29282 )));
29283 Ok(Expression::Function(Box::new(Function::new(
29284 "DATE_FORMAT".to_string(),
29285 vec![from_unix, fmt_e],
29286 ))))
29287 }
29288 DialectType::Hive
29289 | DialectType::Spark
29290 | DialectType::Databricks
29291 | DialectType::Doris
29292 | DialectType::StarRocks => {
29293 // FROM_UNIXTIME(x, y)
29294 Ok(Expression::Function(Box::new(Function::new(
29295 "FROM_UNIXTIME".to_string(),
29296 vec![this, fmt_e],
29297 ))))
29298 }
29299 _ => {
29300 // Default: keep as UNIX_TO_STR(x, y)
29301 Ok(Expression::Function(Box::new(Function::new(
29302 "UNIX_TO_STR".to_string(),
29303 vec![this, fmt_e],
29304 ))))
29305 }
29306 }
29307 } else {
29308 Ok(Expression::UnixToStr(Box::new(
29309 crate::expressions::UnixToStr {
29310 this: Box::new(this),
29311 format: None,
29312 },
29313 )))
29314 }
29315 } else {
29316 Ok(e)
29317 }
29318 }
29319
29320 Action::UnixToTimeConvert => {
29321 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
29322 if let Expression::Function(f) = e {
29323 let arg = f.args.into_iter().next().unwrap();
29324 Ok(Expression::UnixToTime(Box::new(
29325 crate::expressions::UnixToTime {
29326 this: Box::new(arg),
29327 scale: None,
29328 zone: None,
29329 hours: None,
29330 minutes: None,
29331 format: None,
29332 target_type: None,
29333 },
29334 )))
29335 } else {
29336 Ok(e)
29337 }
29338 }
29339
29340 Action::UnixToTimeStrConvert => {
29341 // UNIX_TO_TIME_STR(x) -> dialect-specific
29342 if let Expression::Function(f) = e {
29343 let arg = f.args.into_iter().next().unwrap();
29344 match target {
29345 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29346 // FROM_UNIXTIME(x)
29347 Ok(Expression::Function(Box::new(Function::new(
29348 "FROM_UNIXTIME".to_string(),
29349 vec![arg],
29350 ))))
29351 }
29352 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29353 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
29354 let from_unix = Expression::Function(Box::new(Function::new(
29355 "FROM_UNIXTIME".to_string(),
29356 vec![arg],
29357 )));
29358 Ok(Expression::Cast(Box::new(Cast {
29359 this: from_unix,
29360 to: DataType::VarChar {
29361 length: None,
29362 parenthesized_length: false,
29363 },
29364 double_colon_syntax: false,
29365 trailing_comments: Vec::new(),
29366 format: None,
29367 default: None,
29368 inferred_type: None,
29369 })))
29370 }
29371 DialectType::DuckDB => {
29372 // CAST(TO_TIMESTAMP(x) AS TEXT)
29373 let to_ts = Expression::Function(Box::new(Function::new(
29374 "TO_TIMESTAMP".to_string(),
29375 vec![arg],
29376 )));
29377 Ok(Expression::Cast(Box::new(Cast {
29378 this: to_ts,
29379 to: DataType::Text,
29380 double_colon_syntax: false,
29381 trailing_comments: Vec::new(),
29382 format: None,
29383 default: None,
29384 inferred_type: None,
29385 })))
29386 }
29387 _ => Ok(Expression::Function(Box::new(Function::new(
29388 "UNIX_TO_TIME_STR".to_string(),
29389 vec![arg],
29390 )))),
29391 }
29392 } else {
29393 Ok(e)
29394 }
29395 }
29396
29397 Action::TimeToUnixConvert => {
29398 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
29399 if let Expression::Function(f) = e {
29400 let arg = f.args.into_iter().next().unwrap();
29401 Ok(Expression::TimeToUnix(Box::new(
29402 crate::expressions::UnaryFunc {
29403 this: arg,
29404 original_name: None,
29405 inferred_type: None,
29406 },
29407 )))
29408 } else {
29409 Ok(e)
29410 }
29411 }
29412
29413 Action::TimeToStrConvert => {
29414 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
29415 if let Expression::Function(f) = e {
29416 let mut args = f.args;
29417 let this = args.remove(0);
29418 let fmt = match args.remove(0) {
29419 Expression::Literal(lit)
29420 if matches!(lit.as_ref(), Literal::String(_)) =>
29421 {
29422 let Literal::String(s) = lit.as_ref() else {
29423 unreachable!()
29424 };
29425 s.clone()
29426 }
29427 other => {
29428 return Ok(Expression::Function(Box::new(Function::new(
29429 "TIME_TO_STR".to_string(),
29430 vec![this, other],
29431 ))));
29432 }
29433 };
29434 Ok(Expression::TimeToStr(Box::new(
29435 crate::expressions::TimeToStr {
29436 this: Box::new(this),
29437 format: fmt,
29438 culture: None,
29439 zone: None,
29440 },
29441 )))
29442 } else {
29443 Ok(e)
29444 }
29445 }
29446
29447 Action::StrToUnixConvert => {
29448 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
29449 if let Expression::Function(f) = e {
29450 let mut args = f.args;
29451 let this = args.remove(0);
29452 let fmt = match args.remove(0) {
29453 Expression::Literal(lit)
29454 if matches!(lit.as_ref(), Literal::String(_)) =>
29455 {
29456 let Literal::String(s) = lit.as_ref() else {
29457 unreachable!()
29458 };
29459 s.clone()
29460 }
29461 other => {
29462 return Ok(Expression::Function(Box::new(Function::new(
29463 "STR_TO_UNIX".to_string(),
29464 vec![this, other],
29465 ))));
29466 }
29467 };
29468 Ok(Expression::StrToUnix(Box::new(
29469 crate::expressions::StrToUnix {
29470 this: Some(Box::new(this)),
29471 format: Some(fmt),
29472 },
29473 )))
29474 } else {
29475 Ok(e)
29476 }
29477 }
29478
29479 Action::TimeStrToUnixConvert => {
29480 // TIME_STR_TO_UNIX(x) -> dialect-specific
29481 if let Expression::Function(f) = e {
29482 let arg = f.args.into_iter().next().unwrap();
29483 match target {
29484 DialectType::DuckDB => {
29485 // EPOCH(CAST(x AS TIMESTAMP))
29486 let cast_ts = Expression::Cast(Box::new(Cast {
29487 this: arg,
29488 to: DataType::Timestamp {
29489 timezone: false,
29490 precision: None,
29491 },
29492 double_colon_syntax: false,
29493 trailing_comments: Vec::new(),
29494 format: None,
29495 default: None,
29496 inferred_type: None,
29497 }));
29498 Ok(Expression::Function(Box::new(Function::new(
29499 "EPOCH".to_string(),
29500 vec![cast_ts],
29501 ))))
29502 }
29503 DialectType::Hive
29504 | DialectType::Doris
29505 | DialectType::StarRocks
29506 | DialectType::MySQL => {
29507 // UNIX_TIMESTAMP(x)
29508 Ok(Expression::Function(Box::new(Function::new(
29509 "UNIX_TIMESTAMP".to_string(),
29510 vec![arg],
29511 ))))
29512 }
29513 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29514 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
29515 let date_parse = Expression::Function(Box::new(Function::new(
29516 "DATE_PARSE".to_string(),
29517 vec![arg, Expression::string("%Y-%m-%d %T")],
29518 )));
29519 Ok(Expression::Function(Box::new(Function::new(
29520 "TO_UNIXTIME".to_string(),
29521 vec![date_parse],
29522 ))))
29523 }
29524 _ => Ok(Expression::Function(Box::new(Function::new(
29525 "TIME_STR_TO_UNIX".to_string(),
29526 vec![arg],
29527 )))),
29528 }
29529 } else {
29530 Ok(e)
29531 }
29532 }
29533
29534 Action::TimeToTimeStrConvert => {
29535 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
29536 if let Expression::Function(f) = e {
29537 let arg = f.args.into_iter().next().unwrap();
29538 let str_type = match target {
29539 DialectType::DuckDB => DataType::Text,
29540 DialectType::Hive
29541 | DialectType::Spark
29542 | DialectType::Databricks
29543 | DialectType::Doris
29544 | DialectType::StarRocks => DataType::Custom {
29545 name: "STRING".to_string(),
29546 },
29547 DialectType::Redshift => DataType::Custom {
29548 name: "VARCHAR(MAX)".to_string(),
29549 },
29550 _ => DataType::VarChar {
29551 length: None,
29552 parenthesized_length: false,
29553 },
29554 };
29555 Ok(Expression::Cast(Box::new(Cast {
29556 this: arg,
29557 to: str_type,
29558 double_colon_syntax: false,
29559 trailing_comments: Vec::new(),
29560 format: None,
29561 default: None,
29562 inferred_type: None,
29563 })))
29564 } else {
29565 Ok(e)
29566 }
29567 }
29568
29569 Action::DateTruncSwapArgs => {
29570 // DATE_TRUNC('unit', x) from Generic -> target-specific
29571 if let Expression::Function(f) = e {
29572 if f.args.len() == 2 {
29573 let unit_arg = f.args[0].clone();
29574 let expr_arg = f.args[1].clone();
29575 // Extract unit string from the first arg
29576 let unit_str = match &unit_arg {
29577 Expression::Literal(lit)
29578 if matches!(lit.as_ref(), Literal::String(_)) =>
29579 {
29580 let Literal::String(s) = lit.as_ref() else {
29581 unreachable!()
29582 };
29583 s.to_ascii_uppercase()
29584 }
29585 _ => return Ok(Expression::Function(f)),
29586 };
29587 match target {
29588 DialectType::BigQuery => {
29589 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
29590 let unit_ident =
29591 Expression::Column(Box::new(crate::expressions::Column {
29592 name: crate::expressions::Identifier::new(unit_str),
29593 table: None,
29594 join_mark: false,
29595 trailing_comments: Vec::new(),
29596 span: None,
29597 inferred_type: None,
29598 }));
29599 Ok(Expression::Function(Box::new(Function::new(
29600 "DATE_TRUNC".to_string(),
29601 vec![expr_arg, unit_ident],
29602 ))))
29603 }
29604 DialectType::Doris => {
29605 // Doris: DATE_TRUNC(x, 'UNIT')
29606 Ok(Expression::Function(Box::new(Function::new(
29607 "DATE_TRUNC".to_string(),
29608 vec![expr_arg, Expression::string(&unit_str)],
29609 ))))
29610 }
29611 DialectType::StarRocks => {
29612 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
29613 Ok(Expression::Function(Box::new(Function::new(
29614 "DATE_TRUNC".to_string(),
29615 vec![Expression::string(&unit_str), expr_arg],
29616 ))))
29617 }
29618 DialectType::Spark | DialectType::Databricks => {
29619 // Spark: TRUNC(x, 'UNIT')
29620 Ok(Expression::Function(Box::new(Function::new(
29621 "TRUNC".to_string(),
29622 vec![expr_arg, Expression::string(&unit_str)],
29623 ))))
29624 }
29625 DialectType::MySQL => {
29626 // MySQL: complex expansion based on unit
29627 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
29628 }
29629 _ => Ok(Expression::Function(f)),
29630 }
29631 } else {
29632 Ok(Expression::Function(f))
29633 }
29634 } else {
29635 Ok(e)
29636 }
29637 }
29638
29639 Action::TimestampTruncConvert => {
29640 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
29641 if let Expression::Function(f) = e {
29642 if f.args.len() >= 2 {
29643 let expr_arg = f.args[0].clone();
29644 let unit_arg = f.args[1].clone();
29645 let tz_arg = if f.args.len() >= 3 {
29646 Some(f.args[2].clone())
29647 } else {
29648 None
29649 };
29650 // Extract unit string
29651 let unit_str = match &unit_arg {
29652 Expression::Literal(lit)
29653 if matches!(lit.as_ref(), Literal::String(_)) =>
29654 {
29655 let Literal::String(s) = lit.as_ref() else {
29656 unreachable!()
29657 };
29658 s.to_ascii_uppercase()
29659 }
29660 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
29661 _ => {
29662 return Ok(Expression::Function(f));
29663 }
29664 };
29665 match target {
29666 DialectType::Spark | DialectType::Databricks => {
29667 // Spark: DATE_TRUNC('UNIT', x)
29668 Ok(Expression::Function(Box::new(Function::new(
29669 "DATE_TRUNC".to_string(),
29670 vec![Expression::string(&unit_str), expr_arg],
29671 ))))
29672 }
29673 DialectType::Doris | DialectType::StarRocks => {
29674 // Doris: DATE_TRUNC(x, 'UNIT')
29675 Ok(Expression::Function(Box::new(Function::new(
29676 "DATE_TRUNC".to_string(),
29677 vec![expr_arg, Expression::string(&unit_str)],
29678 ))))
29679 }
29680 DialectType::BigQuery => {
29681 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
29682 let unit_ident =
29683 Expression::Column(Box::new(crate::expressions::Column {
29684 name: crate::expressions::Identifier::new(unit_str),
29685 table: None,
29686 join_mark: false,
29687 trailing_comments: Vec::new(),
29688 span: None,
29689 inferred_type: None,
29690 }));
29691 let mut args = vec![expr_arg, unit_ident];
29692 if let Some(tz) = tz_arg {
29693 args.push(tz);
29694 }
29695 Ok(Expression::Function(Box::new(Function::new(
29696 "TIMESTAMP_TRUNC".to_string(),
29697 args,
29698 ))))
29699 }
29700 DialectType::DuckDB => {
29701 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
29702 if let Some(tz) = tz_arg {
29703 let tz_str = match &tz {
29704 Expression::Literal(lit)
29705 if matches!(lit.as_ref(), Literal::String(_)) =>
29706 {
29707 let Literal::String(s) = lit.as_ref() else {
29708 unreachable!()
29709 };
29710 s.clone()
29711 }
29712 _ => "UTC".to_string(),
29713 };
29714 // x AT TIME ZONE 'tz'
29715 let at_tz = Expression::AtTimeZone(Box::new(
29716 crate::expressions::AtTimeZone {
29717 this: expr_arg,
29718 zone: Expression::string(&tz_str),
29719 },
29720 ));
29721 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
29722 let trunc = Expression::Function(Box::new(Function::new(
29723 "DATE_TRUNC".to_string(),
29724 vec![Expression::string(&unit_str), at_tz],
29725 )));
29726 // DATE_TRUNC(...) AT TIME ZONE 'tz'
29727 Ok(Expression::AtTimeZone(Box::new(
29728 crate::expressions::AtTimeZone {
29729 this: trunc,
29730 zone: Expression::string(&tz_str),
29731 },
29732 )))
29733 } else {
29734 Ok(Expression::Function(Box::new(Function::new(
29735 "DATE_TRUNC".to_string(),
29736 vec![Expression::string(&unit_str), expr_arg],
29737 ))))
29738 }
29739 }
29740 DialectType::Presto
29741 | DialectType::Trino
29742 | DialectType::Athena
29743 | DialectType::Snowflake => {
29744 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
29745 Ok(Expression::Function(Box::new(Function::new(
29746 "DATE_TRUNC".to_string(),
29747 vec![Expression::string(&unit_str), expr_arg],
29748 ))))
29749 }
29750 _ => {
29751 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
29752 let mut args = vec![Expression::string(&unit_str), expr_arg];
29753 if let Some(tz) = tz_arg {
29754 args.push(tz);
29755 }
29756 Ok(Expression::Function(Box::new(Function::new(
29757 "DATE_TRUNC".to_string(),
29758 args,
29759 ))))
29760 }
29761 }
29762 } else {
29763 Ok(Expression::Function(f))
29764 }
29765 } else {
29766 Ok(e)
29767 }
29768 }
29769
29770 Action::StrToDateConvert => {
29771 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
29772 if let Expression::Function(f) = e {
29773 if f.args.len() == 2 {
29774 let mut args = f.args;
29775 let this = args.remove(0);
29776 let fmt_expr = args.remove(0);
29777 let fmt_str = match &fmt_expr {
29778 Expression::Literal(lit)
29779 if matches!(lit.as_ref(), Literal::String(_)) =>
29780 {
29781 let Literal::String(s) = lit.as_ref() else {
29782 unreachable!()
29783 };
29784 Some(s.clone())
29785 }
29786 _ => None,
29787 };
29788 let default_date = "%Y-%m-%d";
29789 let default_time = "%Y-%m-%d %H:%M:%S";
29790 let is_default = fmt_str
29791 .as_ref()
29792 .map_or(false, |f| f == default_date || f == default_time);
29793
29794 if is_default {
29795 // Default format: handle per-dialect
29796 match target {
29797 DialectType::MySQL
29798 | DialectType::Doris
29799 | DialectType::StarRocks => {
29800 // Keep STR_TO_DATE(x, fmt) as-is
29801 Ok(Expression::Function(Box::new(Function::new(
29802 "STR_TO_DATE".to_string(),
29803 vec![this, fmt_expr],
29804 ))))
29805 }
29806 DialectType::Hive => {
29807 // Hive: CAST(x AS DATE)
29808 Ok(Expression::Cast(Box::new(Cast {
29809 this,
29810 to: DataType::Date,
29811 double_colon_syntax: false,
29812 trailing_comments: Vec::new(),
29813 format: None,
29814 default: None,
29815 inferred_type: None,
29816 })))
29817 }
29818 DialectType::Presto
29819 | DialectType::Trino
29820 | DialectType::Athena => {
29821 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
29822 let date_parse =
29823 Expression::Function(Box::new(Function::new(
29824 "DATE_PARSE".to_string(),
29825 vec![this, fmt_expr],
29826 )));
29827 Ok(Expression::Cast(Box::new(Cast {
29828 this: date_parse,
29829 to: DataType::Date,
29830 double_colon_syntax: false,
29831 trailing_comments: Vec::new(),
29832 format: None,
29833 default: None,
29834 inferred_type: None,
29835 })))
29836 }
29837 _ => {
29838 // Others: TsOrDsToDate (delegates to generator)
29839 Ok(Expression::TsOrDsToDate(Box::new(
29840 crate::expressions::TsOrDsToDate {
29841 this: Box::new(this),
29842 format: None,
29843 safe: None,
29844 },
29845 )))
29846 }
29847 }
29848 } else if let Some(fmt) = fmt_str {
29849 match target {
29850 DialectType::Doris
29851 | DialectType::StarRocks
29852 | DialectType::MySQL => {
29853 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
29854 let mut normalized = fmt.clone();
29855 normalized = normalized.replace("%-d", "%e");
29856 normalized = normalized.replace("%-m", "%c");
29857 normalized = normalized.replace("%H:%M:%S", "%T");
29858 Ok(Expression::Function(Box::new(Function::new(
29859 "STR_TO_DATE".to_string(),
29860 vec![this, Expression::string(&normalized)],
29861 ))))
29862 }
29863 DialectType::Hive => {
29864 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
29865 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29866 let unix_ts =
29867 Expression::Function(Box::new(Function::new(
29868 "UNIX_TIMESTAMP".to_string(),
29869 vec![this, Expression::string(&java_fmt)],
29870 )));
29871 let from_unix =
29872 Expression::Function(Box::new(Function::new(
29873 "FROM_UNIXTIME".to_string(),
29874 vec![unix_ts],
29875 )));
29876 Ok(Expression::Cast(Box::new(Cast {
29877 this: from_unix,
29878 to: DataType::Date,
29879 double_colon_syntax: false,
29880 trailing_comments: Vec::new(),
29881 format: None,
29882 default: None,
29883 inferred_type: None,
29884 })))
29885 }
29886 DialectType::Spark | DialectType::Databricks => {
29887 // Spark: TO_DATE(x, java_fmt)
29888 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29889 Ok(Expression::Function(Box::new(Function::new(
29890 "TO_DATE".to_string(),
29891 vec![this, Expression::string(&java_fmt)],
29892 ))))
29893 }
29894 DialectType::Drill => {
29895 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
29896 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
29897 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29898 let java_fmt = java_fmt.replace('T', "'T'");
29899 Ok(Expression::Function(Box::new(Function::new(
29900 "TO_DATE".to_string(),
29901 vec![this, Expression::string(&java_fmt)],
29902 ))))
29903 }
29904 _ => {
29905 // For other dialects: use TsOrDsToDate which delegates to generator
29906 Ok(Expression::TsOrDsToDate(Box::new(
29907 crate::expressions::TsOrDsToDate {
29908 this: Box::new(this),
29909 format: Some(fmt),
29910 safe: None,
29911 },
29912 )))
29913 }
29914 }
29915 } else {
29916 // Non-string format - keep as-is
29917 let mut new_args = Vec::new();
29918 new_args.push(this);
29919 new_args.push(fmt_expr);
29920 Ok(Expression::Function(Box::new(Function::new(
29921 "STR_TO_DATE".to_string(),
29922 new_args,
29923 ))))
29924 }
29925 } else {
29926 Ok(Expression::Function(f))
29927 }
29928 } else {
29929 Ok(e)
29930 }
29931 }
29932
29933 Action::TsOrDsAddConvert => {
29934 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
29935 if let Expression::Function(f) = e {
29936 if f.args.len() == 3 {
29937 let mut args = f.args;
29938 let x = args.remove(0);
29939 let n = args.remove(0);
29940 let unit_expr = args.remove(0);
29941 let unit_str = match &unit_expr {
29942 Expression::Literal(lit)
29943 if matches!(lit.as_ref(), Literal::String(_)) =>
29944 {
29945 let Literal::String(s) = lit.as_ref() else {
29946 unreachable!()
29947 };
29948 s.to_ascii_uppercase()
29949 }
29950 _ => "DAY".to_string(),
29951 };
29952
29953 match target {
29954 DialectType::Hive
29955 | DialectType::Spark
29956 | DialectType::Databricks => {
29957 // DATE_ADD(x, n) - only supports DAY unit
29958 Ok(Expression::Function(Box::new(Function::new(
29959 "DATE_ADD".to_string(),
29960 vec![x, n],
29961 ))))
29962 }
29963 DialectType::MySQL => {
29964 // DATE_ADD(x, INTERVAL n UNIT)
29965 let iu = match unit_str.as_str() {
29966 "YEAR" => crate::expressions::IntervalUnit::Year,
29967 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
29968 "MONTH" => crate::expressions::IntervalUnit::Month,
29969 "WEEK" => crate::expressions::IntervalUnit::Week,
29970 "HOUR" => crate::expressions::IntervalUnit::Hour,
29971 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29972 "SECOND" => crate::expressions::IntervalUnit::Second,
29973 _ => crate::expressions::IntervalUnit::Day,
29974 };
29975 let interval = Expression::Interval(Box::new(
29976 crate::expressions::Interval {
29977 this: Some(n),
29978 unit: Some(
29979 crate::expressions::IntervalUnitSpec::Simple {
29980 unit: iu,
29981 use_plural: false,
29982 },
29983 ),
29984 },
29985 ));
29986 Ok(Expression::Function(Box::new(Function::new(
29987 "DATE_ADD".to_string(),
29988 vec![x, interval],
29989 ))))
29990 }
29991 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29992 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
29993 let cast_ts = Expression::Cast(Box::new(Cast {
29994 this: x,
29995 to: DataType::Timestamp {
29996 precision: None,
29997 timezone: false,
29998 },
29999 double_colon_syntax: false,
30000 trailing_comments: Vec::new(),
30001 format: None,
30002 default: None,
30003 inferred_type: None,
30004 }));
30005 let cast_date = Expression::Cast(Box::new(Cast {
30006 this: cast_ts,
30007 to: DataType::Date,
30008 double_colon_syntax: false,
30009 trailing_comments: Vec::new(),
30010 format: None,
30011 default: None,
30012 inferred_type: None,
30013 }));
30014 Ok(Expression::Function(Box::new(Function::new(
30015 "DATE_ADD".to_string(),
30016 vec![Expression::string(&unit_str), n, cast_date],
30017 ))))
30018 }
30019 DialectType::DuckDB => {
30020 // CAST(x AS DATE) + INTERVAL n UNIT
30021 let cast_date = Expression::Cast(Box::new(Cast {
30022 this: x,
30023 to: DataType::Date,
30024 double_colon_syntax: false,
30025 trailing_comments: Vec::new(),
30026 format: None,
30027 default: None,
30028 inferred_type: None,
30029 }));
30030 let iu = match unit_str.as_str() {
30031 "YEAR" => crate::expressions::IntervalUnit::Year,
30032 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30033 "MONTH" => crate::expressions::IntervalUnit::Month,
30034 "WEEK" => crate::expressions::IntervalUnit::Week,
30035 "HOUR" => crate::expressions::IntervalUnit::Hour,
30036 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30037 "SECOND" => crate::expressions::IntervalUnit::Second,
30038 _ => crate::expressions::IntervalUnit::Day,
30039 };
30040 let interval = Expression::Interval(Box::new(
30041 crate::expressions::Interval {
30042 this: Some(n),
30043 unit: Some(
30044 crate::expressions::IntervalUnitSpec::Simple {
30045 unit: iu,
30046 use_plural: false,
30047 },
30048 ),
30049 },
30050 ));
30051 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
30052 left: cast_date,
30053 right: interval,
30054 left_comments: Vec::new(),
30055 operator_comments: Vec::new(),
30056 trailing_comments: Vec::new(),
30057 inferred_type: None,
30058 })))
30059 }
30060 DialectType::Drill => {
30061 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
30062 let cast_date = Expression::Cast(Box::new(Cast {
30063 this: x,
30064 to: DataType::Date,
30065 double_colon_syntax: false,
30066 trailing_comments: Vec::new(),
30067 format: None,
30068 default: None,
30069 inferred_type: None,
30070 }));
30071 let iu = match unit_str.as_str() {
30072 "YEAR" => crate::expressions::IntervalUnit::Year,
30073 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30074 "MONTH" => crate::expressions::IntervalUnit::Month,
30075 "WEEK" => crate::expressions::IntervalUnit::Week,
30076 "HOUR" => crate::expressions::IntervalUnit::Hour,
30077 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30078 "SECOND" => crate::expressions::IntervalUnit::Second,
30079 _ => crate::expressions::IntervalUnit::Day,
30080 };
30081 let interval = Expression::Interval(Box::new(
30082 crate::expressions::Interval {
30083 this: Some(n),
30084 unit: Some(
30085 crate::expressions::IntervalUnitSpec::Simple {
30086 unit: iu,
30087 use_plural: false,
30088 },
30089 ),
30090 },
30091 ));
30092 Ok(Expression::Function(Box::new(Function::new(
30093 "DATE_ADD".to_string(),
30094 vec![cast_date, interval],
30095 ))))
30096 }
30097 _ => {
30098 // Default: keep as TS_OR_DS_ADD
30099 Ok(Expression::Function(Box::new(Function::new(
30100 "TS_OR_DS_ADD".to_string(),
30101 vec![x, n, unit_expr],
30102 ))))
30103 }
30104 }
30105 } else {
30106 Ok(Expression::Function(f))
30107 }
30108 } else {
30109 Ok(e)
30110 }
30111 }
30112
30113 Action::DateFromUnixDateConvert => {
30114 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30115 if let Expression::Function(f) = e {
30116 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
30117 if matches!(
30118 target,
30119 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
30120 ) {
30121 return Ok(Expression::Function(Box::new(Function::new(
30122 "DATE_FROM_UNIX_DATE".to_string(),
30123 f.args,
30124 ))));
30125 }
30126 let n = f.args.into_iter().next().unwrap();
30127 let epoch_date = Expression::Cast(Box::new(Cast {
30128 this: Expression::string("1970-01-01"),
30129 to: DataType::Date,
30130 double_colon_syntax: false,
30131 trailing_comments: Vec::new(),
30132 format: None,
30133 default: None,
30134 inferred_type: None,
30135 }));
30136 match target {
30137 DialectType::DuckDB => {
30138 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
30139 let interval =
30140 Expression::Interval(Box::new(crate::expressions::Interval {
30141 this: Some(n),
30142 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30143 unit: crate::expressions::IntervalUnit::Day,
30144 use_plural: false,
30145 }),
30146 }));
30147 Ok(Expression::Add(Box::new(
30148 crate::expressions::BinaryOp::new(epoch_date, interval),
30149 )))
30150 }
30151 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30152 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
30153 Ok(Expression::Function(Box::new(Function::new(
30154 "DATE_ADD".to_string(),
30155 vec![Expression::string("DAY"), n, epoch_date],
30156 ))))
30157 }
30158 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
30159 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30160 Ok(Expression::Function(Box::new(Function::new(
30161 "DATEADD".to_string(),
30162 vec![
30163 Expression::Identifier(Identifier::new("DAY")),
30164 n,
30165 epoch_date,
30166 ],
30167 ))))
30168 }
30169 DialectType::BigQuery => {
30170 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30171 let interval =
30172 Expression::Interval(Box::new(crate::expressions::Interval {
30173 this: Some(n),
30174 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30175 unit: crate::expressions::IntervalUnit::Day,
30176 use_plural: false,
30177 }),
30178 }));
30179 Ok(Expression::Function(Box::new(Function::new(
30180 "DATE_ADD".to_string(),
30181 vec![epoch_date, interval],
30182 ))))
30183 }
30184 DialectType::MySQL
30185 | DialectType::Doris
30186 | DialectType::StarRocks
30187 | DialectType::Drill => {
30188 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30189 let interval =
30190 Expression::Interval(Box::new(crate::expressions::Interval {
30191 this: Some(n),
30192 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30193 unit: crate::expressions::IntervalUnit::Day,
30194 use_plural: false,
30195 }),
30196 }));
30197 Ok(Expression::Function(Box::new(Function::new(
30198 "DATE_ADD".to_string(),
30199 vec![epoch_date, interval],
30200 ))))
30201 }
30202 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30203 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
30204 Ok(Expression::Function(Box::new(Function::new(
30205 "DATE_ADD".to_string(),
30206 vec![epoch_date, n],
30207 ))))
30208 }
30209 DialectType::PostgreSQL
30210 | DialectType::Materialize
30211 | DialectType::RisingWave => {
30212 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
30213 let n_str = match &n {
30214 Expression::Literal(lit)
30215 if matches!(lit.as_ref(), Literal::Number(_)) =>
30216 {
30217 let Literal::Number(s) = lit.as_ref() else {
30218 unreachable!()
30219 };
30220 s.clone()
30221 }
30222 _ => Self::expr_to_string_static(&n),
30223 };
30224 let interval =
30225 Expression::Interval(Box::new(crate::expressions::Interval {
30226 this: Some(Expression::string(&format!("{} DAY", n_str))),
30227 unit: None,
30228 }));
30229 Ok(Expression::Add(Box::new(
30230 crate::expressions::BinaryOp::new(epoch_date, interval),
30231 )))
30232 }
30233 _ => {
30234 // Default: keep as-is
30235 Ok(Expression::Function(Box::new(Function::new(
30236 "DATE_FROM_UNIX_DATE".to_string(),
30237 vec![n],
30238 ))))
30239 }
30240 }
30241 } else {
30242 Ok(e)
30243 }
30244 }
30245
30246 Action::ArrayRemoveConvert => {
30247 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
30248 if let Expression::ArrayRemove(bf) = e {
30249 let arr = bf.this;
30250 let target_val = bf.expression;
30251 match target {
30252 DialectType::DuckDB => {
30253 let u_id = crate::expressions::Identifier::new("_u");
30254 let lambda =
30255 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30256 parameters: vec![u_id.clone()],
30257 body: Expression::Neq(Box::new(BinaryOp {
30258 left: Expression::Identifier(u_id),
30259 right: target_val,
30260 left_comments: Vec::new(),
30261 operator_comments: Vec::new(),
30262 trailing_comments: Vec::new(),
30263 inferred_type: None,
30264 })),
30265 colon: false,
30266 parameter_types: Vec::new(),
30267 }));
30268 Ok(Expression::Function(Box::new(Function::new(
30269 "LIST_FILTER".to_string(),
30270 vec![arr, lambda],
30271 ))))
30272 }
30273 DialectType::ClickHouse => {
30274 let u_id = crate::expressions::Identifier::new("_u");
30275 let lambda =
30276 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30277 parameters: vec![u_id.clone()],
30278 body: Expression::Neq(Box::new(BinaryOp {
30279 left: Expression::Identifier(u_id),
30280 right: target_val,
30281 left_comments: Vec::new(),
30282 operator_comments: Vec::new(),
30283 trailing_comments: Vec::new(),
30284 inferred_type: None,
30285 })),
30286 colon: false,
30287 parameter_types: Vec::new(),
30288 }));
30289 Ok(Expression::Function(Box::new(Function::new(
30290 "arrayFilter".to_string(),
30291 vec![lambda, arr],
30292 ))))
30293 }
30294 DialectType::BigQuery => {
30295 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
30296 let u_id = crate::expressions::Identifier::new("_u");
30297 let u_col =
30298 Expression::Column(Box::new(crate::expressions::Column {
30299 name: u_id.clone(),
30300 table: None,
30301 join_mark: false,
30302 trailing_comments: Vec::new(),
30303 span: None,
30304 inferred_type: None,
30305 }));
30306 let unnest_expr =
30307 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
30308 this: arr,
30309 expressions: Vec::new(),
30310 with_ordinality: false,
30311 alias: None,
30312 offset_alias: None,
30313 }));
30314 let aliased_unnest =
30315 Expression::Alias(Box::new(crate::expressions::Alias {
30316 this: unnest_expr,
30317 alias: u_id.clone(),
30318 column_aliases: Vec::new(),
30319 pre_alias_comments: Vec::new(),
30320 trailing_comments: Vec::new(),
30321 inferred_type: None,
30322 }));
30323 let where_cond = Expression::Neq(Box::new(BinaryOp {
30324 left: u_col.clone(),
30325 right: target_val,
30326 left_comments: Vec::new(),
30327 operator_comments: Vec::new(),
30328 trailing_comments: Vec::new(),
30329 inferred_type: None,
30330 }));
30331 let subquery = Expression::Select(Box::new(
30332 crate::expressions::Select::new()
30333 .column(u_col)
30334 .from(aliased_unnest)
30335 .where_(where_cond),
30336 ));
30337 Ok(Expression::ArrayFunc(Box::new(
30338 crate::expressions::ArrayConstructor {
30339 expressions: vec![subquery],
30340 bracket_notation: false,
30341 use_list_keyword: false,
30342 },
30343 )))
30344 }
30345 _ => Ok(Expression::ArrayRemove(Box::new(
30346 crate::expressions::BinaryFunc {
30347 original_name: None,
30348 this: arr,
30349 expression: target_val,
30350 inferred_type: None,
30351 },
30352 ))),
30353 }
30354 } else {
30355 Ok(e)
30356 }
30357 }
30358
30359 Action::ArrayReverseConvert => {
30360 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
30361 if let Expression::ArrayReverse(af) = e {
30362 Ok(Expression::Function(Box::new(Function::new(
30363 "arrayReverse".to_string(),
30364 vec![af.this],
30365 ))))
30366 } else {
30367 Ok(e)
30368 }
30369 }
30370
30371 Action::JsonKeysConvert => {
30372 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
30373 if let Expression::JsonKeys(uf) = e {
30374 match target {
30375 DialectType::Spark | DialectType::Databricks => {
30376 Ok(Expression::Function(Box::new(Function::new(
30377 "JSON_OBJECT_KEYS".to_string(),
30378 vec![uf.this],
30379 ))))
30380 }
30381 DialectType::Snowflake => Ok(Expression::Function(Box::new(
30382 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
30383 ))),
30384 _ => Ok(Expression::JsonKeys(uf)),
30385 }
30386 } else {
30387 Ok(e)
30388 }
30389 }
30390
30391 Action::ParseJsonStrip => {
30392 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
30393 if let Expression::ParseJson(uf) = e {
30394 Ok(uf.this)
30395 } else {
30396 Ok(e)
30397 }
30398 }
30399
30400 Action::ArraySizeDrill => {
30401 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
30402 if let Expression::ArraySize(uf) = e {
30403 Ok(Expression::Function(Box::new(Function::new(
30404 "REPEATED_COUNT".to_string(),
30405 vec![uf.this],
30406 ))))
30407 } else {
30408 Ok(e)
30409 }
30410 }
30411
30412 Action::WeekOfYearToWeekIso => {
30413 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
30414 if let Expression::WeekOfYear(uf) = e {
30415 Ok(Expression::Function(Box::new(Function::new(
30416 "WEEKISO".to_string(),
30417 vec![uf.this],
30418 ))))
30419 } else {
30420 Ok(e)
30421 }
30422 }
30423 }
30424 })
30425 }
30426
30427 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
30428 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
30429 use crate::expressions::Function;
30430 match unit {
30431 "DAY" => {
30432 // DATE(x)
30433 Ok(Expression::Function(Box::new(Function::new(
30434 "DATE".to_string(),
30435 vec![expr.clone()],
30436 ))))
30437 }
30438 "WEEK" => {
30439 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
30440 let year_x = Expression::Function(Box::new(Function::new(
30441 "YEAR".to_string(),
30442 vec![expr.clone()],
30443 )));
30444 let week_x = Expression::Function(Box::new(Function::new(
30445 "WEEK".to_string(),
30446 vec![expr.clone(), Expression::number(1)],
30447 )));
30448 let concat_args = vec![
30449 year_x,
30450 Expression::string(" "),
30451 week_x,
30452 Expression::string(" 1"),
30453 ];
30454 let concat = Expression::Function(Box::new(Function::new(
30455 "CONCAT".to_string(),
30456 concat_args,
30457 )));
30458 Ok(Expression::Function(Box::new(Function::new(
30459 "STR_TO_DATE".to_string(),
30460 vec![concat, Expression::string("%Y %u %w")],
30461 ))))
30462 }
30463 "MONTH" => {
30464 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
30465 let year_x = Expression::Function(Box::new(Function::new(
30466 "YEAR".to_string(),
30467 vec![expr.clone()],
30468 )));
30469 let month_x = Expression::Function(Box::new(Function::new(
30470 "MONTH".to_string(),
30471 vec![expr.clone()],
30472 )));
30473 let concat_args = vec![
30474 year_x,
30475 Expression::string(" "),
30476 month_x,
30477 Expression::string(" 1"),
30478 ];
30479 let concat = Expression::Function(Box::new(Function::new(
30480 "CONCAT".to_string(),
30481 concat_args,
30482 )));
30483 Ok(Expression::Function(Box::new(Function::new(
30484 "STR_TO_DATE".to_string(),
30485 vec![concat, Expression::string("%Y %c %e")],
30486 ))))
30487 }
30488 "QUARTER" => {
30489 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
30490 let year_x = Expression::Function(Box::new(Function::new(
30491 "YEAR".to_string(),
30492 vec![expr.clone()],
30493 )));
30494 let quarter_x = Expression::Function(Box::new(Function::new(
30495 "QUARTER".to_string(),
30496 vec![expr.clone()],
30497 )));
30498 // QUARTER(x) * 3 - 2
30499 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
30500 left: quarter_x,
30501 right: Expression::number(3),
30502 left_comments: Vec::new(),
30503 operator_comments: Vec::new(),
30504 trailing_comments: Vec::new(),
30505 inferred_type: None,
30506 }));
30507 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
30508 left: mul,
30509 right: Expression::number(2),
30510 left_comments: Vec::new(),
30511 operator_comments: Vec::new(),
30512 trailing_comments: Vec::new(),
30513 inferred_type: None,
30514 }));
30515 let concat_args = vec![
30516 year_x,
30517 Expression::string(" "),
30518 sub,
30519 Expression::string(" 1"),
30520 ];
30521 let concat = Expression::Function(Box::new(Function::new(
30522 "CONCAT".to_string(),
30523 concat_args,
30524 )));
30525 Ok(Expression::Function(Box::new(Function::new(
30526 "STR_TO_DATE".to_string(),
30527 vec![concat, Expression::string("%Y %c %e")],
30528 ))))
30529 }
30530 "YEAR" => {
30531 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
30532 let year_x = Expression::Function(Box::new(Function::new(
30533 "YEAR".to_string(),
30534 vec![expr.clone()],
30535 )));
30536 let concat_args = vec![year_x, Expression::string(" 1 1")];
30537 let concat = Expression::Function(Box::new(Function::new(
30538 "CONCAT".to_string(),
30539 concat_args,
30540 )));
30541 Ok(Expression::Function(Box::new(Function::new(
30542 "STR_TO_DATE".to_string(),
30543 vec![concat, Expression::string("%Y %c %e")],
30544 ))))
30545 }
30546 _ => {
30547 // Unsupported unit -> keep as DATE_TRUNC
30548 Ok(Expression::Function(Box::new(Function::new(
30549 "DATE_TRUNC".to_string(),
30550 vec![Expression::string(unit), expr.clone()],
30551 ))))
30552 }
30553 }
30554 }
30555
30556 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
30557 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
30558 use crate::expressions::DataType;
30559 match dt {
30560 DataType::VarChar { .. } | DataType::Char { .. } => true,
30561 DataType::Struct { fields, .. } => fields
30562 .iter()
30563 .any(|f| Self::has_varchar_char_type(&f.data_type)),
30564 _ => false,
30565 }
30566 }
30567
30568 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
30569 fn normalize_varchar_to_string(
30570 dt: crate::expressions::DataType,
30571 ) -> crate::expressions::DataType {
30572 use crate::expressions::DataType;
30573 match dt {
30574 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
30575 name: "STRING".to_string(),
30576 },
30577 DataType::Struct { fields, nested } => {
30578 let fields = fields
30579 .into_iter()
30580 .map(|mut f| {
30581 f.data_type = Self::normalize_varchar_to_string(f.data_type);
30582 f
30583 })
30584 .collect();
30585 DataType::Struct { fields, nested }
30586 }
30587 other => other,
30588 }
30589 }
30590
30591 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
30592 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
30593 if let Expression::Literal(ref lit) = expr {
30594 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
30595 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
30596 let trimmed = s.trim();
30597
30598 // Find where digits end and unit text begins
30599 let digit_end = trimmed
30600 .find(|c: char| !c.is_ascii_digit())
30601 .unwrap_or(trimmed.len());
30602 if digit_end == 0 || digit_end == trimmed.len() {
30603 return expr;
30604 }
30605 let num = &trimmed[..digit_end];
30606 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
30607 if unit_text.is_empty() {
30608 return expr;
30609 }
30610
30611 let known_units = [
30612 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
30613 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
30614 ];
30615 if !known_units.contains(&unit_text.as_str()) {
30616 return expr;
30617 }
30618
30619 let unit_str = unit_text.clone();
30620 // Singularize
30621 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
30622 &unit_str[..unit_str.len() - 1]
30623 } else {
30624 &unit_str
30625 };
30626 let unit = unit_singular;
30627
30628 match target {
30629 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30630 // INTERVAL '2' DAY
30631 let iu = match unit {
30632 "DAY" => crate::expressions::IntervalUnit::Day,
30633 "HOUR" => crate::expressions::IntervalUnit::Hour,
30634 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30635 "SECOND" => crate::expressions::IntervalUnit::Second,
30636 "WEEK" => crate::expressions::IntervalUnit::Week,
30637 "MONTH" => crate::expressions::IntervalUnit::Month,
30638 "YEAR" => crate::expressions::IntervalUnit::Year,
30639 _ => return expr,
30640 };
30641 return Expression::Interval(Box::new(crate::expressions::Interval {
30642 this: Some(Expression::string(num)),
30643 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30644 unit: iu,
30645 use_plural: false,
30646 }),
30647 }));
30648 }
30649 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
30650 // INTERVAL '2 DAYS'
30651 let plural = if num != "1" && !unit_str.ends_with('S') {
30652 format!("{} {}S", num, unit)
30653 } else if unit_str.ends_with('S') {
30654 format!("{} {}", num, unit_str)
30655 } else {
30656 format!("{} {}", num, unit)
30657 };
30658 return Expression::Interval(Box::new(crate::expressions::Interval {
30659 this: Some(Expression::string(&plural)),
30660 unit: None,
30661 }));
30662 }
30663 _ => {
30664 // Spark/Databricks/Hive: INTERVAL '1' DAY
30665 let iu = match unit {
30666 "DAY" => crate::expressions::IntervalUnit::Day,
30667 "HOUR" => crate::expressions::IntervalUnit::Hour,
30668 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30669 "SECOND" => crate::expressions::IntervalUnit::Second,
30670 "WEEK" => crate::expressions::IntervalUnit::Week,
30671 "MONTH" => crate::expressions::IntervalUnit::Month,
30672 "YEAR" => crate::expressions::IntervalUnit::Year,
30673 _ => return expr,
30674 };
30675 return Expression::Interval(Box::new(crate::expressions::Interval {
30676 this: Some(Expression::string(num)),
30677 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30678 unit: iu,
30679 use_plural: false,
30680 }),
30681 }));
30682 }
30683 }
30684 }
30685 }
30686 // If it's already an INTERVAL expression, pass through
30687 expr
30688 }
30689
30690 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
30691 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
30692 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
30693 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
30694 fn rewrite_unnest_expansion(
30695 select: &crate::expressions::Select,
30696 target: DialectType,
30697 ) -> Option<crate::expressions::Select> {
30698 use crate::expressions::{
30699 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
30700 UnnestFunc,
30701 };
30702
30703 let index_offset: i64 = match target {
30704 DialectType::Presto | DialectType::Trino => 1,
30705 _ => 0, // BigQuery, Snowflake
30706 };
30707
30708 let if_func_name = match target {
30709 DialectType::Snowflake => "IFF",
30710 _ => "IF",
30711 };
30712
30713 let array_length_func = match target {
30714 DialectType::BigQuery => "ARRAY_LENGTH",
30715 DialectType::Presto | DialectType::Trino => "CARDINALITY",
30716 DialectType::Snowflake => "ARRAY_SIZE",
30717 _ => "ARRAY_LENGTH",
30718 };
30719
30720 let use_table_aliases = matches!(
30721 target,
30722 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
30723 );
30724 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
30725
30726 fn make_col(name: &str, table: Option<&str>) -> Expression {
30727 if let Some(tbl) = table {
30728 Expression::boxed_column(Column {
30729 name: Identifier::new(name.to_string()),
30730 table: Some(Identifier::new(tbl.to_string())),
30731 join_mark: false,
30732 trailing_comments: Vec::new(),
30733 span: None,
30734 inferred_type: None,
30735 })
30736 } else {
30737 Expression::Identifier(Identifier::new(name.to_string()))
30738 }
30739 }
30740
30741 fn make_join(this: Expression) -> Join {
30742 Join {
30743 this,
30744 on: None,
30745 using: Vec::new(),
30746 kind: JoinKind::Cross,
30747 use_inner_keyword: false,
30748 use_outer_keyword: false,
30749 deferred_condition: false,
30750 join_hint: None,
30751 match_condition: None,
30752 pivots: Vec::new(),
30753 comments: Vec::new(),
30754 nesting_group: 0,
30755 directed: false,
30756 }
30757 }
30758
30759 // Collect UNNEST info from SELECT expressions
30760 struct UnnestInfo {
30761 arr_expr: Expression,
30762 col_alias: String,
30763 pos_alias: String,
30764 source_alias: String,
30765 original_expr: Expression,
30766 has_outer_alias: Option<String>,
30767 }
30768
30769 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
30770 let mut col_counter = 0usize;
30771 let mut pos_counter = 1usize;
30772 let mut source_counter = 1usize;
30773
30774 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
30775 match expr {
30776 Expression::Unnest(u) => Some(u.this.clone()),
30777 Expression::Function(f)
30778 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
30779 {
30780 Some(f.args[0].clone())
30781 }
30782 Expression::Alias(a) => extract_unnest_arg(&a.this),
30783 Expression::Add(op)
30784 | Expression::Sub(op)
30785 | Expression::Mul(op)
30786 | Expression::Div(op) => {
30787 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
30788 }
30789 _ => None,
30790 }
30791 }
30792
30793 fn get_alias_name(expr: &Expression) -> Option<String> {
30794 if let Expression::Alias(a) = expr {
30795 Some(a.alias.name.clone())
30796 } else {
30797 None
30798 }
30799 }
30800
30801 for sel_expr in &select.expressions {
30802 if let Some(arr) = extract_unnest_arg(sel_expr) {
30803 col_counter += 1;
30804 pos_counter += 1;
30805 source_counter += 1;
30806
30807 let col_alias = if col_counter == 1 {
30808 "col".to_string()
30809 } else {
30810 format!("col_{}", col_counter)
30811 };
30812 let pos_alias = format!("pos_{}", pos_counter);
30813 let source_alias = format!("_u_{}", source_counter);
30814 let has_outer_alias = get_alias_name(sel_expr);
30815
30816 unnest_infos.push(UnnestInfo {
30817 arr_expr: arr,
30818 col_alias,
30819 pos_alias,
30820 source_alias,
30821 original_expr: sel_expr.clone(),
30822 has_outer_alias,
30823 });
30824 }
30825 }
30826
30827 if unnest_infos.is_empty() {
30828 return None;
30829 }
30830
30831 let series_alias = "pos".to_string();
30832 let series_source_alias = "_u".to_string();
30833 let tbl_ref = if use_table_aliases {
30834 Some(series_source_alias.as_str())
30835 } else {
30836 None
30837 };
30838
30839 // Build new SELECT expressions
30840 let mut new_select_exprs = Vec::new();
30841 for info in &unnest_infos {
30842 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
30843 let src_ref = if use_table_aliases {
30844 Some(info.source_alias.as_str())
30845 } else {
30846 None
30847 };
30848
30849 let pos_col = make_col(&series_alias, tbl_ref);
30850 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
30851 let col_ref = make_col(actual_col_name, src_ref);
30852
30853 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
30854 pos_col.clone(),
30855 unnest_pos_col.clone(),
30856 )));
30857 let mut if_args = vec![eq_cond, col_ref];
30858 if null_third_arg {
30859 if_args.push(Expression::Null(crate::expressions::Null));
30860 }
30861
30862 let if_expr =
30863 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
30864 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
30865
30866 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
30867 final_expr,
30868 Identifier::new(actual_col_name.clone()),
30869 ))));
30870 }
30871
30872 // Build array size expressions for GREATEST
30873 let size_exprs: Vec<Expression> = unnest_infos
30874 .iter()
30875 .map(|info| {
30876 Expression::Function(Box::new(Function::new(
30877 array_length_func.to_string(),
30878 vec![info.arr_expr.clone()],
30879 )))
30880 })
30881 .collect();
30882
30883 let greatest =
30884 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
30885
30886 let series_end = if index_offset == 0 {
30887 Expression::Sub(Box::new(BinaryOp::new(
30888 greatest,
30889 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30890 )))
30891 } else {
30892 greatest
30893 };
30894
30895 // Build the position array source
30896 let series_unnest_expr = match target {
30897 DialectType::BigQuery => {
30898 let gen_array = Expression::Function(Box::new(Function::new(
30899 "GENERATE_ARRAY".to_string(),
30900 vec![
30901 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
30902 series_end,
30903 ],
30904 )));
30905 Expression::Unnest(Box::new(UnnestFunc {
30906 this: gen_array,
30907 expressions: Vec::new(),
30908 with_ordinality: false,
30909 alias: None,
30910 offset_alias: None,
30911 }))
30912 }
30913 DialectType::Presto | DialectType::Trino => {
30914 let sequence = Expression::Function(Box::new(Function::new(
30915 "SEQUENCE".to_string(),
30916 vec![
30917 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30918 series_end,
30919 ],
30920 )));
30921 Expression::Unnest(Box::new(UnnestFunc {
30922 this: sequence,
30923 expressions: Vec::new(),
30924 with_ordinality: false,
30925 alias: None,
30926 offset_alias: None,
30927 }))
30928 }
30929 DialectType::Snowflake => {
30930 let range_end = Expression::Add(Box::new(BinaryOp::new(
30931 Expression::Paren(Box::new(crate::expressions::Paren {
30932 this: series_end,
30933 trailing_comments: Vec::new(),
30934 })),
30935 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30936 )));
30937 let gen_range = Expression::Function(Box::new(Function::new(
30938 "ARRAY_GENERATE_RANGE".to_string(),
30939 vec![
30940 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
30941 range_end,
30942 ],
30943 )));
30944 let flatten_arg =
30945 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
30946 name: Identifier::new("INPUT".to_string()),
30947 value: gen_range,
30948 separator: crate::expressions::NamedArgSeparator::DArrow,
30949 }));
30950 let flatten = Expression::Function(Box::new(Function::new(
30951 "FLATTEN".to_string(),
30952 vec![flatten_arg],
30953 )));
30954 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
30955 }
30956 _ => return None,
30957 };
30958
30959 // Build series alias expression
30960 let series_alias_expr = if use_table_aliases {
30961 let col_aliases = if matches!(target, DialectType::Snowflake) {
30962 vec![
30963 Identifier::new("seq".to_string()),
30964 Identifier::new("key".to_string()),
30965 Identifier::new("path".to_string()),
30966 Identifier::new("index".to_string()),
30967 Identifier::new(series_alias.clone()),
30968 Identifier::new("this".to_string()),
30969 ]
30970 } else {
30971 vec![Identifier::new(series_alias.clone())]
30972 };
30973 Expression::Alias(Box::new(Alias {
30974 this: series_unnest_expr,
30975 alias: Identifier::new(series_source_alias.clone()),
30976 column_aliases: col_aliases,
30977 pre_alias_comments: Vec::new(),
30978 trailing_comments: Vec::new(),
30979 inferred_type: None,
30980 }))
30981 } else {
30982 Expression::Alias(Box::new(Alias::new(
30983 series_unnest_expr,
30984 Identifier::new(series_alias.clone()),
30985 )))
30986 };
30987
30988 // Build CROSS JOINs for each UNNEST
30989 let mut joins = Vec::new();
30990 for info in &unnest_infos {
30991 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
30992
30993 let unnest_join_expr = match target {
30994 DialectType::BigQuery => {
30995 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
30996 let unnest = UnnestFunc {
30997 this: info.arr_expr.clone(),
30998 expressions: Vec::new(),
30999 with_ordinality: true,
31000 alias: Some(Identifier::new(actual_col_name.clone())),
31001 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
31002 };
31003 Expression::Unnest(Box::new(unnest))
31004 }
31005 DialectType::Presto | DialectType::Trino => {
31006 let unnest = UnnestFunc {
31007 this: info.arr_expr.clone(),
31008 expressions: Vec::new(),
31009 with_ordinality: true,
31010 alias: None,
31011 offset_alias: None,
31012 };
31013 Expression::Alias(Box::new(Alias {
31014 this: Expression::Unnest(Box::new(unnest)),
31015 alias: Identifier::new(info.source_alias.clone()),
31016 column_aliases: vec![
31017 Identifier::new(actual_col_name.clone()),
31018 Identifier::new(info.pos_alias.clone()),
31019 ],
31020 pre_alias_comments: Vec::new(),
31021 trailing_comments: Vec::new(),
31022 inferred_type: None,
31023 }))
31024 }
31025 DialectType::Snowflake => {
31026 let flatten_arg =
31027 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31028 name: Identifier::new("INPUT".to_string()),
31029 value: info.arr_expr.clone(),
31030 separator: crate::expressions::NamedArgSeparator::DArrow,
31031 }));
31032 let flatten = Expression::Function(Box::new(Function::new(
31033 "FLATTEN".to_string(),
31034 vec![flatten_arg],
31035 )));
31036 let table_fn = Expression::Function(Box::new(Function::new(
31037 "TABLE".to_string(),
31038 vec![flatten],
31039 )));
31040 Expression::Alias(Box::new(Alias {
31041 this: table_fn,
31042 alias: Identifier::new(info.source_alias.clone()),
31043 column_aliases: vec![
31044 Identifier::new("seq".to_string()),
31045 Identifier::new("key".to_string()),
31046 Identifier::new("path".to_string()),
31047 Identifier::new(info.pos_alias.clone()),
31048 Identifier::new(actual_col_name.clone()),
31049 Identifier::new("this".to_string()),
31050 ],
31051 pre_alias_comments: Vec::new(),
31052 trailing_comments: Vec::new(),
31053 inferred_type: None,
31054 }))
31055 }
31056 _ => return None,
31057 };
31058
31059 joins.push(make_join(unnest_join_expr));
31060 }
31061
31062 // Build WHERE clause
31063 let mut where_conditions: Vec<Expression> = Vec::new();
31064 for info in &unnest_infos {
31065 let src_ref = if use_table_aliases {
31066 Some(info.source_alias.as_str())
31067 } else {
31068 None
31069 };
31070 let pos_col = make_col(&series_alias, tbl_ref);
31071 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31072
31073 let arr_size = Expression::Function(Box::new(Function::new(
31074 array_length_func.to_string(),
31075 vec![info.arr_expr.clone()],
31076 )));
31077
31078 let size_ref = if index_offset == 0 {
31079 Expression::Paren(Box::new(crate::expressions::Paren {
31080 this: Expression::Sub(Box::new(BinaryOp::new(
31081 arr_size,
31082 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31083 ))),
31084 trailing_comments: Vec::new(),
31085 }))
31086 } else {
31087 arr_size
31088 };
31089
31090 let eq = Expression::Eq(Box::new(BinaryOp::new(
31091 pos_col.clone(),
31092 unnest_pos_col.clone(),
31093 )));
31094 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
31095 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
31096 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
31097 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
31098 this: and_cond,
31099 trailing_comments: Vec::new(),
31100 }));
31101 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
31102
31103 where_conditions.push(or_cond);
31104 }
31105
31106 let where_expr = if where_conditions.len() == 1 {
31107 // Single condition: no parens needed
31108 where_conditions.into_iter().next().unwrap()
31109 } else {
31110 // Multiple conditions: wrap each OR in parens, then combine with AND
31111 let wrap = |e: Expression| {
31112 Expression::Paren(Box::new(crate::expressions::Paren {
31113 this: e,
31114 trailing_comments: Vec::new(),
31115 }))
31116 };
31117 let mut iter = where_conditions.into_iter();
31118 let first = wrap(iter.next().unwrap());
31119 let second = wrap(iter.next().unwrap());
31120 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
31121 this: Expression::And(Box::new(BinaryOp::new(first, second))),
31122 trailing_comments: Vec::new(),
31123 }));
31124 for cond in iter {
31125 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
31126 }
31127 combined
31128 };
31129
31130 // Build the new SELECT
31131 let mut new_select = select.clone();
31132 new_select.expressions = new_select_exprs;
31133
31134 if new_select.from.is_some() {
31135 let mut all_joins = vec![make_join(series_alias_expr)];
31136 all_joins.extend(joins);
31137 new_select.joins.extend(all_joins);
31138 } else {
31139 new_select.from = Some(From {
31140 expressions: vec![series_alias_expr],
31141 });
31142 new_select.joins.extend(joins);
31143 }
31144
31145 if let Some(ref existing_where) = new_select.where_clause {
31146 let combined = Expression::And(Box::new(BinaryOp::new(
31147 existing_where.this.clone(),
31148 where_expr,
31149 )));
31150 new_select.where_clause = Some(crate::expressions::Where { this: combined });
31151 } else {
31152 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
31153 }
31154
31155 Some(new_select)
31156 }
31157
31158 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
31159 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
31160 match original {
31161 Expression::Unnest(_) => replacement.clone(),
31162 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
31163 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
31164 Expression::Add(op) => {
31165 let left = Self::replace_unnest_with_if(&op.left, replacement);
31166 let right = Self::replace_unnest_with_if(&op.right, replacement);
31167 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
31168 }
31169 Expression::Sub(op) => {
31170 let left = Self::replace_unnest_with_if(&op.left, replacement);
31171 let right = Self::replace_unnest_with_if(&op.right, replacement);
31172 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
31173 }
31174 Expression::Mul(op) => {
31175 let left = Self::replace_unnest_with_if(&op.left, replacement);
31176 let right = Self::replace_unnest_with_if(&op.right, replacement);
31177 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
31178 }
31179 Expression::Div(op) => {
31180 let left = Self::replace_unnest_with_if(&op.left, replacement);
31181 let right = Self::replace_unnest_with_if(&op.right, replacement);
31182 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
31183 }
31184 _ => original.clone(),
31185 }
31186 }
31187
31188 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
31189 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
31190 fn decompose_json_path(path: &str) -> Vec<String> {
31191 let mut parts = Vec::new();
31192 let path = if path.starts_with("$.") {
31193 &path[2..]
31194 } else if path.starts_with('$') {
31195 &path[1..]
31196 } else {
31197 path
31198 };
31199 if path.is_empty() {
31200 return parts;
31201 }
31202 let mut current = String::new();
31203 let chars: Vec<char> = path.chars().collect();
31204 let mut i = 0;
31205 while i < chars.len() {
31206 match chars[i] {
31207 '.' => {
31208 if !current.is_empty() {
31209 parts.push(current.clone());
31210 current.clear();
31211 }
31212 i += 1;
31213 }
31214 '[' => {
31215 if !current.is_empty() {
31216 parts.push(current.clone());
31217 current.clear();
31218 }
31219 i += 1;
31220 let mut bracket_content = String::new();
31221 while i < chars.len() && chars[i] != ']' {
31222 if chars[i] == '"' || chars[i] == '\'' {
31223 let quote = chars[i];
31224 i += 1;
31225 while i < chars.len() && chars[i] != quote {
31226 bracket_content.push(chars[i]);
31227 i += 1;
31228 }
31229 if i < chars.len() {
31230 i += 1;
31231 }
31232 } else {
31233 bracket_content.push(chars[i]);
31234 i += 1;
31235 }
31236 }
31237 if i < chars.len() {
31238 i += 1;
31239 }
31240 if bracket_content != "*" {
31241 parts.push(bracket_content);
31242 }
31243 }
31244 _ => {
31245 current.push(chars[i]);
31246 i += 1;
31247 }
31248 }
31249 }
31250 if !current.is_empty() {
31251 parts.push(current);
31252 }
31253 parts
31254 }
31255
31256 /// Strip `$` prefix from a JSON path, keeping the rest.
31257 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
31258 fn strip_json_dollar_prefix(path: &str) -> String {
31259 if path.starts_with("$.") {
31260 path[2..].to_string()
31261 } else if path.starts_with('$') {
31262 path[1..].to_string()
31263 } else {
31264 path.to_string()
31265 }
31266 }
31267
31268 /// Strip `[*]` wildcards from a JSON path.
31269 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
31270 fn strip_json_wildcards(path: &str) -> String {
31271 path.replace("[*]", "")
31272 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
31273 .trim_end_matches('.')
31274 .to_string()
31275 }
31276
31277 /// Convert bracket notation to dot notation for JSON paths.
31278 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
31279 fn bracket_to_dot_notation(path: &str) -> String {
31280 let mut result = String::new();
31281 let chars: Vec<char> = path.chars().collect();
31282 let mut i = 0;
31283 while i < chars.len() {
31284 if chars[i] == '[' {
31285 // Read bracket content
31286 i += 1;
31287 let mut bracket_content = String::new();
31288 let mut is_quoted = false;
31289 let mut _quote_char = '"';
31290 while i < chars.len() && chars[i] != ']' {
31291 if chars[i] == '"' || chars[i] == '\'' {
31292 is_quoted = true;
31293 _quote_char = chars[i];
31294 i += 1;
31295 while i < chars.len() && chars[i] != _quote_char {
31296 bracket_content.push(chars[i]);
31297 i += 1;
31298 }
31299 if i < chars.len() {
31300 i += 1;
31301 }
31302 } else {
31303 bracket_content.push(chars[i]);
31304 i += 1;
31305 }
31306 }
31307 if i < chars.len() {
31308 i += 1;
31309 } // skip ]
31310 if bracket_content == "*" {
31311 // Keep wildcard as-is
31312 result.push_str("[*]");
31313 } else if is_quoted {
31314 // Quoted bracket -> dot notation with quotes
31315 result.push('.');
31316 result.push('"');
31317 result.push_str(&bracket_content);
31318 result.push('"');
31319 } else {
31320 // Numeric index -> keep as bracket
31321 result.push('[');
31322 result.push_str(&bracket_content);
31323 result.push(']');
31324 }
31325 } else {
31326 result.push(chars[i]);
31327 i += 1;
31328 }
31329 }
31330 result
31331 }
31332
31333 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
31334 /// `$["a b"]` -> `$['a b']`
31335 fn bracket_to_single_quotes(path: &str) -> String {
31336 let mut result = String::new();
31337 let chars: Vec<char> = path.chars().collect();
31338 let mut i = 0;
31339 while i < chars.len() {
31340 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
31341 result.push('[');
31342 result.push('\'');
31343 i += 2; // skip [ and "
31344 while i < chars.len() && chars[i] != '"' {
31345 result.push(chars[i]);
31346 i += 1;
31347 }
31348 if i < chars.len() {
31349 i += 1;
31350 } // skip closing "
31351 result.push('\'');
31352 } else {
31353 result.push(chars[i]);
31354 i += 1;
31355 }
31356 }
31357 result
31358 }
31359
31360 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
31361 /// or PostgreSQL #temp -> TEMPORARY.
31362 /// Also strips # from INSERT INTO #table for non-TSQL targets.
31363 fn transform_select_into(
31364 expr: Expression,
31365 _source: DialectType,
31366 target: DialectType,
31367 ) -> Expression {
31368 use crate::expressions::{CreateTable, Expression, TableRef};
31369
31370 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
31371 if let Expression::Insert(ref insert) = expr {
31372 if insert.table.name.name.starts_with('#')
31373 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
31374 {
31375 let mut new_insert = insert.clone();
31376 new_insert.table.name.name =
31377 insert.table.name.name.trim_start_matches('#').to_string();
31378 return Expression::Insert(new_insert);
31379 }
31380 return expr;
31381 }
31382
31383 if let Expression::Select(ref select) = expr {
31384 if let Some(ref into) = select.into {
31385 let table_name_raw = match &into.this {
31386 Expression::Table(tr) => tr.name.name.clone(),
31387 Expression::Identifier(id) => id.name.clone(),
31388 _ => String::new(),
31389 };
31390 let is_temp = table_name_raw.starts_with('#') || into.temporary;
31391 let clean_name = table_name_raw.trim_start_matches('#').to_string();
31392
31393 match target {
31394 DialectType::DuckDB | DialectType::Snowflake => {
31395 // SELECT INTO -> CREATE TABLE AS SELECT
31396 let mut new_select = select.clone();
31397 new_select.into = None;
31398 let ct = CreateTable {
31399 name: TableRef::new(clean_name),
31400 on_cluster: None,
31401 columns: Vec::new(),
31402 constraints: Vec::new(),
31403 if_not_exists: false,
31404 temporary: is_temp,
31405 or_replace: false,
31406 table_modifier: None,
31407 as_select: Some(Expression::Select(new_select)),
31408 as_select_parenthesized: false,
31409 on_commit: None,
31410 clone_source: None,
31411 clone_at_clause: None,
31412 shallow_clone: false,
31413 deep_clone: false,
31414 is_copy: false,
31415 leading_comments: Vec::new(),
31416 with_properties: Vec::new(),
31417 teradata_post_name_options: Vec::new(),
31418 with_data: None,
31419 with_statistics: None,
31420 teradata_indexes: Vec::new(),
31421 with_cte: None,
31422 properties: Vec::new(),
31423 partition_of: None,
31424 post_table_properties: Vec::new(),
31425 mysql_table_options: Vec::new(),
31426 inherits: Vec::new(),
31427 on_property: None,
31428 copy_grants: false,
31429 using_template: None,
31430 rollup: None,
31431 uuid: None,
31432 with_partition_columns: Vec::new(),
31433 with_connection: None,
31434 };
31435 return Expression::CreateTable(Box::new(ct));
31436 }
31437 DialectType::PostgreSQL | DialectType::Redshift => {
31438 // PostgreSQL: #foo -> INTO TEMPORARY foo
31439 if is_temp && !into.temporary {
31440 let mut new_select = select.clone();
31441 let mut new_into = into.clone();
31442 new_into.temporary = true;
31443 new_into.unlogged = false;
31444 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
31445 new_select.into = Some(new_into);
31446 Expression::Select(new_select)
31447 } else {
31448 expr
31449 }
31450 }
31451 _ => expr,
31452 }
31453 } else {
31454 expr
31455 }
31456 } else {
31457 expr
31458 }
31459 }
31460
31461 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
31462 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
31463 fn transform_create_table_properties(
31464 ct: &mut crate::expressions::CreateTable,
31465 _source: DialectType,
31466 target: DialectType,
31467 ) {
31468 use crate::expressions::{
31469 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
31470 Properties,
31471 };
31472
31473 // Helper to convert a raw property value string to the correct Expression
31474 let value_to_expr = |v: &str| -> Expression {
31475 let trimmed = v.trim();
31476 // Check if it's a quoted string (starts and ends with ')
31477 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
31478 Expression::Literal(Box::new(Literal::String(
31479 trimmed[1..trimmed.len() - 1].to_string(),
31480 )))
31481 }
31482 // Check if it's a number
31483 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
31484 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
31485 }
31486 // Check if it's ARRAY[...] or ARRAY(...)
31487 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31488 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
31489 let inner = trimmed
31490 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
31491 .trim_start_matches('[')
31492 .trim_start_matches('(')
31493 .trim_end_matches(']')
31494 .trim_end_matches(')');
31495 let elements: Vec<Expression> = inner
31496 .split(',')
31497 .map(|e| {
31498 let elem = e.trim().trim_matches('\'');
31499 Expression::Literal(Box::new(Literal::String(elem.to_string())))
31500 })
31501 .collect();
31502 Expression::Function(Box::new(crate::expressions::Function::new(
31503 "ARRAY".to_string(),
31504 elements,
31505 )))
31506 }
31507 // Otherwise, just output as identifier (unquoted)
31508 else {
31509 Expression::Identifier(Identifier::new(trimmed.to_string()))
31510 }
31511 };
31512
31513 if ct.with_properties.is_empty() && ct.properties.is_empty() {
31514 return;
31515 }
31516
31517 // Handle Presto-style WITH properties
31518 if !ct.with_properties.is_empty() {
31519 // Extract FORMAT property and remaining properties
31520 let mut format_value: Option<String> = None;
31521 let mut partitioned_by: Option<String> = None;
31522 let mut other_props: Vec<(String, String)> = Vec::new();
31523
31524 for (key, value) in ct.with_properties.drain(..) {
31525 if key.eq_ignore_ascii_case("FORMAT") {
31526 // Strip surrounding quotes from value if present
31527 format_value = Some(value.trim_matches('\'').to_string());
31528 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
31529 partitioned_by = Some(value);
31530 } else {
31531 other_props.push((key, value));
31532 }
31533 }
31534
31535 match target {
31536 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31537 // Presto: keep WITH properties but lowercase 'format' key
31538 if let Some(fmt) = format_value {
31539 ct.with_properties
31540 .push(("format".to_string(), format!("'{}'", fmt)));
31541 }
31542 if let Some(part) = partitioned_by {
31543 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
31544 let trimmed = part.trim();
31545 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
31546 // Also handle ARRAY['...'] format - keep as-is
31547 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31548 ct.with_properties
31549 .push(("PARTITIONED_BY".to_string(), part));
31550 } else {
31551 // Parse column names from the parenthesized list
31552 let cols: Vec<&str> = inner
31553 .split(',')
31554 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
31555 .collect();
31556 let array_val = format!(
31557 "ARRAY[{}]",
31558 cols.iter()
31559 .map(|c| format!("'{}'", c))
31560 .collect::<Vec<_>>()
31561 .join(", ")
31562 );
31563 ct.with_properties
31564 .push(("PARTITIONED_BY".to_string(), array_val));
31565 }
31566 }
31567 ct.with_properties.extend(other_props);
31568 }
31569 DialectType::Hive => {
31570 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
31571 if let Some(fmt) = format_value {
31572 ct.properties.push(Expression::FileFormatProperty(Box::new(
31573 FileFormatProperty {
31574 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31575 expressions: vec![],
31576 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
31577 value: true,
31578 }))),
31579 },
31580 )));
31581 }
31582 if let Some(_part) = partitioned_by {
31583 // PARTITIONED_BY handling is complex - move columns to partitioned by
31584 // For now, the partition columns are extracted from the column list
31585 Self::apply_partitioned_by(ct, &_part, target);
31586 }
31587 if !other_props.is_empty() {
31588 let eq_exprs: Vec<Expression> = other_props
31589 .into_iter()
31590 .map(|(k, v)| {
31591 Expression::Eq(Box::new(BinaryOp::new(
31592 Expression::Literal(Box::new(Literal::String(k))),
31593 value_to_expr(&v),
31594 )))
31595 })
31596 .collect();
31597 ct.properties
31598 .push(Expression::Properties(Box::new(Properties {
31599 expressions: eq_exprs,
31600 })));
31601 }
31602 }
31603 DialectType::Spark | DialectType::Databricks => {
31604 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
31605 if let Some(fmt) = format_value {
31606 ct.properties.push(Expression::FileFormatProperty(Box::new(
31607 FileFormatProperty {
31608 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31609 expressions: vec![],
31610 hive_format: None, // None means USING syntax
31611 },
31612 )));
31613 }
31614 if let Some(_part) = partitioned_by {
31615 Self::apply_partitioned_by(ct, &_part, target);
31616 }
31617 if !other_props.is_empty() {
31618 let eq_exprs: Vec<Expression> = other_props
31619 .into_iter()
31620 .map(|(k, v)| {
31621 Expression::Eq(Box::new(BinaryOp::new(
31622 Expression::Literal(Box::new(Literal::String(k))),
31623 value_to_expr(&v),
31624 )))
31625 })
31626 .collect();
31627 ct.properties
31628 .push(Expression::Properties(Box::new(Properties {
31629 expressions: eq_exprs,
31630 })));
31631 }
31632 }
31633 DialectType::DuckDB => {
31634 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
31635 // Keep nothing
31636 }
31637 _ => {
31638 // For other dialects, keep WITH properties as-is
31639 if let Some(fmt) = format_value {
31640 ct.with_properties
31641 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
31642 }
31643 if let Some(part) = partitioned_by {
31644 ct.with_properties
31645 .push(("PARTITIONED_BY".to_string(), part));
31646 }
31647 ct.with_properties.extend(other_props);
31648 }
31649 }
31650 }
31651
31652 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
31653 // and Hive STORED AS -> Presto WITH (format=...) conversion
31654 if !ct.properties.is_empty() {
31655 let is_presto_target = matches!(
31656 target,
31657 DialectType::Presto | DialectType::Trino | DialectType::Athena
31658 );
31659 let is_duckdb_target = matches!(target, DialectType::DuckDB);
31660
31661 if is_presto_target || is_duckdb_target {
31662 let mut new_properties = Vec::new();
31663 for prop in ct.properties.drain(..) {
31664 match &prop {
31665 Expression::FileFormatProperty(ffp) => {
31666 if is_presto_target {
31667 // Convert STORED AS/USING to WITH (format=...)
31668 if let Some(ref fmt_expr) = ffp.this {
31669 let fmt_str = match fmt_expr.as_ref() {
31670 Expression::Identifier(id) => id.name.clone(),
31671 Expression::Literal(lit)
31672 if matches!(lit.as_ref(), Literal::String(_)) =>
31673 {
31674 let Literal::String(s) = lit.as_ref() else {
31675 unreachable!()
31676 };
31677 s.clone()
31678 }
31679 _ => {
31680 new_properties.push(prop);
31681 continue;
31682 }
31683 };
31684 ct.with_properties
31685 .push(("format".to_string(), format!("'{}'", fmt_str)));
31686 }
31687 }
31688 // DuckDB: just strip file format properties
31689 }
31690 // Convert TBLPROPERTIES to WITH properties for Presto target
31691 Expression::Properties(props) if is_presto_target => {
31692 for expr in &props.expressions {
31693 if let Expression::Eq(eq) = expr {
31694 // Extract key and value from the Eq expression
31695 let key = match &eq.left {
31696 Expression::Literal(lit)
31697 if matches!(lit.as_ref(), Literal::String(_)) =>
31698 {
31699 let Literal::String(s) = lit.as_ref() else {
31700 unreachable!()
31701 };
31702 s.clone()
31703 }
31704 Expression::Identifier(id) => id.name.clone(),
31705 _ => continue,
31706 };
31707 let value = match &eq.right {
31708 Expression::Literal(lit)
31709 if matches!(lit.as_ref(), Literal::String(_)) =>
31710 {
31711 let Literal::String(s) = lit.as_ref() else {
31712 unreachable!()
31713 };
31714 format!("'{}'", s)
31715 }
31716 Expression::Literal(lit)
31717 if matches!(lit.as_ref(), Literal::Number(_)) =>
31718 {
31719 let Literal::Number(n) = lit.as_ref() else {
31720 unreachable!()
31721 };
31722 n.clone()
31723 }
31724 Expression::Identifier(id) => id.name.clone(),
31725 _ => continue,
31726 };
31727 ct.with_properties.push((key, value));
31728 }
31729 }
31730 }
31731 // Convert PartitionedByProperty for Presto target
31732 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
31733 // Check if it contains ColumnDef expressions (Hive-style with types)
31734 if let Expression::Tuple(ref tuple) = *pbp.this {
31735 let mut col_names: Vec<String> = Vec::new();
31736 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
31737 let mut has_col_defs = false;
31738 for expr in &tuple.expressions {
31739 if let Expression::ColumnDef(ref cd) = expr {
31740 has_col_defs = true;
31741 col_names.push(cd.name.name.clone());
31742 col_defs.push(*cd.clone());
31743 } else if let Expression::Column(ref col) = expr {
31744 col_names.push(col.name.name.clone());
31745 } else if let Expression::Identifier(ref id) = expr {
31746 col_names.push(id.name.clone());
31747 } else {
31748 // For function expressions like MONTHS(y), serialize to SQL
31749 let generic = Dialect::get(DialectType::Generic);
31750 if let Ok(sql) = generic.generate(expr) {
31751 col_names.push(sql);
31752 }
31753 }
31754 }
31755 if has_col_defs {
31756 // Merge partition column defs into the main column list
31757 for cd in col_defs {
31758 ct.columns.push(cd);
31759 }
31760 }
31761 if !col_names.is_empty() {
31762 // Add PARTITIONED_BY property
31763 let array_val = format!(
31764 "ARRAY[{}]",
31765 col_names
31766 .iter()
31767 .map(|n| format!("'{}'", n))
31768 .collect::<Vec<_>>()
31769 .join(", ")
31770 );
31771 ct.with_properties
31772 .push(("PARTITIONED_BY".to_string(), array_val));
31773 }
31774 }
31775 // Skip - don't keep in properties
31776 }
31777 _ => {
31778 if !is_duckdb_target {
31779 new_properties.push(prop);
31780 }
31781 }
31782 }
31783 }
31784 ct.properties = new_properties;
31785 } else {
31786 // For Hive/Spark targets, unquote format names in STORED AS
31787 for prop in &mut ct.properties {
31788 if let Expression::FileFormatProperty(ref mut ffp) = prop {
31789 if let Some(ref mut fmt_expr) = ffp.this {
31790 if let Expression::Literal(lit) = fmt_expr.as_ref() {
31791 if let Literal::String(s) = lit.as_ref() {
31792 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
31793 let unquoted = s.clone();
31794 *fmt_expr =
31795 Box::new(Expression::Identifier(Identifier::new(unquoted)));
31796 }
31797 }
31798 }
31799 }
31800 }
31801 }
31802 }
31803 }
31804
31805 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
31806 fn apply_partitioned_by(
31807 ct: &mut crate::expressions::CreateTable,
31808 partitioned_by_value: &str,
31809 target: DialectType,
31810 ) {
31811 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
31812
31813 // Parse the ARRAY['col1', 'col2'] value to extract column names
31814 let mut col_names: Vec<String> = Vec::new();
31815 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
31816 let inner = partitioned_by_value
31817 .trim()
31818 .trim_start_matches("ARRAY")
31819 .trim_start_matches('[')
31820 .trim_start_matches('(')
31821 .trim_end_matches(']')
31822 .trim_end_matches(')');
31823 for part in inner.split(',') {
31824 let col = part.trim().trim_matches('\'').trim_matches('"');
31825 if !col.is_empty() {
31826 col_names.push(col.to_string());
31827 }
31828 }
31829
31830 if col_names.is_empty() {
31831 return;
31832 }
31833
31834 if matches!(target, DialectType::Hive) {
31835 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
31836 let mut partition_col_defs = Vec::new();
31837 for col_name in &col_names {
31838 // Find and remove from columns
31839 if let Some(pos) = ct
31840 .columns
31841 .iter()
31842 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
31843 {
31844 let col_def = ct.columns.remove(pos);
31845 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
31846 }
31847 }
31848 if !partition_col_defs.is_empty() {
31849 ct.properties
31850 .push(Expression::PartitionedByProperty(Box::new(
31851 PartitionedByProperty {
31852 this: Box::new(Expression::Tuple(Box::new(Tuple {
31853 expressions: partition_col_defs,
31854 }))),
31855 },
31856 )));
31857 }
31858 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
31859 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
31860 // Use quoted identifiers to match the quoting style of the original column definitions
31861 let partition_exprs: Vec<Expression> = col_names
31862 .iter()
31863 .map(|name| {
31864 // Check if the column exists in the column list and use its quoting
31865 let is_quoted = ct
31866 .columns
31867 .iter()
31868 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
31869 let ident = if is_quoted {
31870 Identifier::quoted(name.clone())
31871 } else {
31872 Identifier::new(name.clone())
31873 };
31874 Expression::boxed_column(Column {
31875 name: ident,
31876 table: None,
31877 join_mark: false,
31878 trailing_comments: Vec::new(),
31879 span: None,
31880 inferred_type: None,
31881 })
31882 })
31883 .collect();
31884 ct.properties
31885 .push(Expression::PartitionedByProperty(Box::new(
31886 PartitionedByProperty {
31887 this: Box::new(Expression::Tuple(Box::new(Tuple {
31888 expressions: partition_exprs,
31889 }))),
31890 },
31891 )));
31892 }
31893 // DuckDB: strip partitioned_by entirely (already handled)
31894 }
31895
31896 /// Convert a DataType to Spark's type string format (using angle brackets)
31897 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
31898 use crate::expressions::DataType;
31899 match dt {
31900 DataType::Int { .. } => "INT".to_string(),
31901 DataType::BigInt { .. } => "BIGINT".to_string(),
31902 DataType::SmallInt { .. } => "SMALLINT".to_string(),
31903 DataType::TinyInt { .. } => "TINYINT".to_string(),
31904 DataType::Float { .. } => "FLOAT".to_string(),
31905 DataType::Double { .. } => "DOUBLE".to_string(),
31906 DataType::Decimal {
31907 precision: Some(p),
31908 scale: Some(s),
31909 } => format!("DECIMAL({}, {})", p, s),
31910 DataType::Decimal {
31911 precision: Some(p), ..
31912 } => format!("DECIMAL({})", p),
31913 DataType::Decimal { .. } => "DECIMAL".to_string(),
31914 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
31915 "STRING".to_string()
31916 }
31917 DataType::Char { .. } => "STRING".to_string(),
31918 DataType::Boolean => "BOOLEAN".to_string(),
31919 DataType::Date => "DATE".to_string(),
31920 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
31921 DataType::Json | DataType::JsonB => "STRING".to_string(),
31922 DataType::Binary { .. } => "BINARY".to_string(),
31923 DataType::Array { element_type, .. } => {
31924 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
31925 }
31926 DataType::Map {
31927 key_type,
31928 value_type,
31929 } => format!(
31930 "MAP<{}, {}>",
31931 Self::data_type_to_spark_string(key_type),
31932 Self::data_type_to_spark_string(value_type)
31933 ),
31934 DataType::Struct { fields, .. } => {
31935 let field_strs: Vec<String> = fields
31936 .iter()
31937 .map(|f| {
31938 if f.name.is_empty() {
31939 Self::data_type_to_spark_string(&f.data_type)
31940 } else {
31941 format!(
31942 "{}: {}",
31943 f.name,
31944 Self::data_type_to_spark_string(&f.data_type)
31945 )
31946 }
31947 })
31948 .collect();
31949 format!("STRUCT<{}>", field_strs.join(", "))
31950 }
31951 DataType::Custom { name } => name.clone(),
31952 _ => format!("{:?}", dt),
31953 }
31954 }
31955
31956 /// Extract value and unit from an Interval expression
31957 /// Returns (value_expression, IntervalUnit)
31958 fn extract_interval_parts(
31959 interval_expr: &Expression,
31960 ) -> (Expression, crate::expressions::IntervalUnit) {
31961 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
31962
31963 if let Expression::Interval(iv) = interval_expr {
31964 let val = iv.this.clone().unwrap_or(Expression::number(0));
31965 let unit = match &iv.unit {
31966 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
31967 None => {
31968 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
31969 if let Expression::Literal(lit) = &val {
31970 if let crate::expressions::Literal::String(s) = lit.as_ref() {
31971 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
31972 if parts.len() == 2 {
31973 let unit_str = parts[1].trim().to_ascii_uppercase();
31974 let parsed_unit = match unit_str.as_str() {
31975 "YEAR" | "YEARS" => IntervalUnit::Year,
31976 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
31977 "MONTH" | "MONTHS" => IntervalUnit::Month,
31978 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
31979 "DAY" | "DAYS" => IntervalUnit::Day,
31980 "HOUR" | "HOURS" => IntervalUnit::Hour,
31981 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
31982 "SECOND" | "SECONDS" => IntervalUnit::Second,
31983 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
31984 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
31985 _ => IntervalUnit::Day,
31986 };
31987 // Return just the numeric part as value and parsed unit
31988 return (
31989 Expression::Literal(Box::new(
31990 crate::expressions::Literal::String(
31991 parts[0].trim().to_string(),
31992 ),
31993 )),
31994 parsed_unit,
31995 );
31996 }
31997 IntervalUnit::Day
31998 } else {
31999 IntervalUnit::Day
32000 }
32001 } else {
32002 IntervalUnit::Day
32003 }
32004 }
32005 _ => IntervalUnit::Day,
32006 };
32007 (val, unit)
32008 } else {
32009 // Not an interval - pass through
32010 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32011 }
32012 }
32013
32014 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
32015 match expr {
32016 Expression::Add(op) => {
32017 let Expression::Interval(_) = &op.right else {
32018 return None;
32019 };
32020 Some(Self::build_tsql_dateadd_from_interval(
32021 op.left.clone(),
32022 &op.right,
32023 false,
32024 ))
32025 }
32026 Expression::Sub(op) => {
32027 let Expression::Interval(_) = &op.right else {
32028 return None;
32029 };
32030 Some(Self::build_tsql_dateadd_from_interval(
32031 op.left.clone(),
32032 &op.right,
32033 true,
32034 ))
32035 }
32036 _ => None,
32037 }
32038 }
32039
32040 fn build_tsql_dateadd_from_interval(
32041 date: Expression,
32042 interval: &Expression,
32043 subtract: bool,
32044 ) -> Expression {
32045 let (value, unit) = Self::extract_interval_parts(interval);
32046 let unit = Self::interval_unit_to_string(&unit);
32047 let amount = Self::tsql_dateadd_amount(value, subtract);
32048
32049 Expression::Function(Box::new(Function::new(
32050 "DATEADD".to_string(),
32051 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
32052 )))
32053 }
32054
32055 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
32056 use crate::expressions::UnaryOp;
32057
32058 fn numeric_literal_value(value: &Expression) -> Option<&str> {
32059 match value {
32060 Expression::Literal(lit) => match lit.as_ref() {
32061 crate::expressions::Literal::Number(n)
32062 | crate::expressions::Literal::String(n) => Some(n.as_str()),
32063 _ => None,
32064 },
32065 _ => None,
32066 }
32067 }
32068
32069 if let Some(n) = numeric_literal_value(&value) {
32070 if let Ok(parsed) = n.parse::<f64>() {
32071 let normalized = if negate { -parsed } else { parsed };
32072 let rendered = if normalized.fract() == 0.0 {
32073 format!("{}", normalized as i64)
32074 } else {
32075 normalized.to_string()
32076 };
32077 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
32078 rendered,
32079 )));
32080 }
32081 }
32082
32083 if !negate {
32084 return value;
32085 }
32086
32087 match value {
32088 Expression::Neg(op) => op.this,
32089 other => Expression::Neg(Box::new(UnaryOp {
32090 this: other,
32091 inferred_type: None,
32092 })),
32093 }
32094 }
32095
32096 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
32097 fn normalize_bigquery_function(
32098 e: Expression,
32099 source: DialectType,
32100 target: DialectType,
32101 ) -> Result<Expression> {
32102 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
32103
32104 let f = if let Expression::Function(f) = e {
32105 *f
32106 } else {
32107 return Ok(e);
32108 };
32109 let name = f.name.to_ascii_uppercase();
32110 let mut args = f.args;
32111
32112 /// Helper to extract unit string from an identifier, column, or literal expression
32113 fn get_unit_str(expr: &Expression) -> String {
32114 match expr {
32115 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
32116 Expression::Var(v) => v.this.to_ascii_uppercase(),
32117 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
32118 let Literal::String(s) = lit.as_ref() else {
32119 unreachable!()
32120 };
32121 s.to_ascii_uppercase()
32122 }
32123 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
32124 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
32125 Expression::Function(f) => {
32126 let base = f.name.to_ascii_uppercase();
32127 if !f.args.is_empty() {
32128 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
32129 let inner = get_unit_str(&f.args[0]);
32130 format!("{}({})", base, inner)
32131 } else {
32132 base
32133 }
32134 }
32135 _ => "DAY".to_string(),
32136 }
32137 }
32138
32139 /// Parse unit string to IntervalUnit
32140 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
32141 match s {
32142 "YEAR" => crate::expressions::IntervalUnit::Year,
32143 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
32144 "MONTH" => crate::expressions::IntervalUnit::Month,
32145 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32146 "DAY" => crate::expressions::IntervalUnit::Day,
32147 "HOUR" => crate::expressions::IntervalUnit::Hour,
32148 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32149 "SECOND" => crate::expressions::IntervalUnit::Second,
32150 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
32151 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
32152 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32153 _ => crate::expressions::IntervalUnit::Day,
32154 }
32155 }
32156
32157 match name.as_str() {
32158 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
32159 // (BigQuery: result = date1 - date2, Standard: result = end - start)
32160 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
32161 let date1 = args.remove(0);
32162 let date2 = args.remove(0);
32163 let unit_expr = args.remove(0);
32164 let unit_str = get_unit_str(&unit_expr);
32165
32166 if matches!(target, DialectType::BigQuery) {
32167 // BigQuery -> BigQuery: just uppercase the unit
32168 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
32169 return Ok(Expression::Function(Box::new(Function::new(
32170 f.name,
32171 vec![date1, date2, unit],
32172 ))));
32173 }
32174
32175 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
32176 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
32177 if matches!(target, DialectType::Snowflake) {
32178 return Ok(Expression::TimestampDiff(Box::new(
32179 crate::expressions::TimestampDiff {
32180 this: Box::new(date2),
32181 expression: Box::new(date1),
32182 unit: Some(unit_str),
32183 },
32184 )));
32185 }
32186
32187 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
32188 if matches!(target, DialectType::DuckDB) {
32189 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
32190 // CAST to TIME
32191 let cast_fn = |e: Expression| -> Expression {
32192 match e {
32193 Expression::Literal(lit)
32194 if matches!(lit.as_ref(), Literal::String(_)) =>
32195 {
32196 let Literal::String(s) = lit.as_ref() else {
32197 unreachable!()
32198 };
32199 Expression::Cast(Box::new(Cast {
32200 this: Expression::Literal(Box::new(Literal::String(
32201 s.clone(),
32202 ))),
32203 to: DataType::Custom {
32204 name: "TIME".to_string(),
32205 },
32206 trailing_comments: vec![],
32207 double_colon_syntax: false,
32208 format: None,
32209 default: None,
32210 inferred_type: None,
32211 }))
32212 }
32213 other => other,
32214 }
32215 };
32216 (cast_fn(date1), cast_fn(date2))
32217 } else if name == "DATETIME_DIFF" {
32218 // CAST to TIMESTAMP
32219 (
32220 Self::ensure_cast_timestamp(date1),
32221 Self::ensure_cast_timestamp(date2),
32222 )
32223 } else {
32224 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
32225 (
32226 Self::ensure_cast_timestamptz(date1),
32227 Self::ensure_cast_timestamptz(date2),
32228 )
32229 };
32230 return Ok(Expression::Function(Box::new(Function::new(
32231 "DATE_DIFF".to_string(),
32232 vec![
32233 Expression::Literal(Box::new(Literal::String(unit_str))),
32234 cast_d2,
32235 cast_d1,
32236 ],
32237 ))));
32238 }
32239
32240 // Convert to standard TIMESTAMPDIFF(unit, start, end)
32241 let unit = Expression::Identifier(Identifier::new(unit_str));
32242 Ok(Expression::Function(Box::new(Function::new(
32243 "TIMESTAMPDIFF".to_string(),
32244 vec![unit, date2, date1],
32245 ))))
32246 }
32247
32248 // DATEDIFF(unit, start, end) -> target-specific form
32249 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
32250 "DATEDIFF" if args.len() == 3 => {
32251 let arg0 = args.remove(0);
32252 let arg1 = args.remove(0);
32253 let arg2 = args.remove(0);
32254 let unit_str = get_unit_str(&arg0);
32255
32256 // Redshift DATEDIFF(unit, start, end) order: result = end - start
32257 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
32258 // TSQL DATEDIFF(unit, start, end) order: result = end - start
32259
32260 if matches!(target, DialectType::Snowflake) {
32261 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
32262 let unit = Expression::Identifier(Identifier::new(unit_str));
32263 return Ok(Expression::Function(Box::new(Function::new(
32264 "DATEDIFF".to_string(),
32265 vec![unit, arg1, arg2],
32266 ))));
32267 }
32268
32269 if matches!(target, DialectType::DuckDB) {
32270 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
32271 let cast_d1 = Self::ensure_cast_timestamp(arg1);
32272 let cast_d2 = Self::ensure_cast_timestamp(arg2);
32273 return Ok(Expression::Function(Box::new(Function::new(
32274 "DATE_DIFF".to_string(),
32275 vec![
32276 Expression::Literal(Box::new(Literal::String(unit_str))),
32277 cast_d1,
32278 cast_d2,
32279 ],
32280 ))));
32281 }
32282
32283 if matches!(target, DialectType::BigQuery) {
32284 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
32285 let cast_d1 = Self::ensure_cast_datetime(arg1);
32286 let cast_d2 = Self::ensure_cast_datetime(arg2);
32287 let unit = Expression::Identifier(Identifier::new(unit_str));
32288 return Ok(Expression::Function(Box::new(Function::new(
32289 "DATE_DIFF".to_string(),
32290 vec![cast_d2, cast_d1, unit],
32291 ))));
32292 }
32293
32294 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32295 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
32296 let unit = Expression::Identifier(Identifier::new(unit_str));
32297 return Ok(Expression::Function(Box::new(Function::new(
32298 "DATEDIFF".to_string(),
32299 vec![unit, arg1, arg2],
32300 ))));
32301 }
32302
32303 if matches!(target, DialectType::Hive) {
32304 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
32305 match unit_str.as_str() {
32306 "MONTH" => {
32307 return Ok(Expression::Function(Box::new(Function::new(
32308 "CAST".to_string(),
32309 vec![Expression::Function(Box::new(Function::new(
32310 "MONTHS_BETWEEN".to_string(),
32311 vec![arg2, arg1],
32312 )))],
32313 ))));
32314 }
32315 "WEEK" => {
32316 return Ok(Expression::Cast(Box::new(Cast {
32317 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
32318 Expression::Function(Box::new(Function::new(
32319 "DATEDIFF".to_string(),
32320 vec![arg2, arg1],
32321 ))),
32322 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
32323 ))),
32324 to: DataType::Int {
32325 length: None,
32326 integer_spelling: false,
32327 },
32328 trailing_comments: vec![],
32329 double_colon_syntax: false,
32330 format: None,
32331 default: None,
32332 inferred_type: None,
32333 })));
32334 }
32335 _ => {
32336 // Default: DATEDIFF(end, start) for DAY
32337 return Ok(Expression::Function(Box::new(Function::new(
32338 "DATEDIFF".to_string(),
32339 vec![arg2, arg1],
32340 ))));
32341 }
32342 }
32343 }
32344
32345 if matches!(
32346 target,
32347 DialectType::Presto | DialectType::Trino | DialectType::Athena
32348 ) {
32349 // Presto/Trino: DATE_DIFF('UNIT', start, end)
32350 return Ok(Expression::Function(Box::new(Function::new(
32351 "DATE_DIFF".to_string(),
32352 vec![
32353 Expression::Literal(Box::new(Literal::String(unit_str))),
32354 arg1,
32355 arg2,
32356 ],
32357 ))));
32358 }
32359
32360 if matches!(target, DialectType::TSQL) {
32361 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
32362 let cast_d2 = Self::ensure_cast_datetime2(arg2);
32363 let unit = Expression::Identifier(Identifier::new(unit_str));
32364 return Ok(Expression::Function(Box::new(Function::new(
32365 "DATEDIFF".to_string(),
32366 vec![unit, arg1, cast_d2],
32367 ))));
32368 }
32369
32370 if matches!(target, DialectType::PostgreSQL) {
32371 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
32372 // For now, use DATEDIFF (passthrough) with uppercased unit
32373 let unit = Expression::Identifier(Identifier::new(unit_str));
32374 return Ok(Expression::Function(Box::new(Function::new(
32375 "DATEDIFF".to_string(),
32376 vec![unit, arg1, arg2],
32377 ))));
32378 }
32379
32380 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
32381 let unit = Expression::Identifier(Identifier::new(unit_str));
32382 Ok(Expression::Function(Box::new(Function::new(
32383 "DATEDIFF".to_string(),
32384 vec![unit, arg1, arg2],
32385 ))))
32386 }
32387
32388 // DATE_DIFF(date1, date2, unit) -> standard form
32389 "DATE_DIFF" if args.len() == 3 => {
32390 let date1 = args.remove(0);
32391 let date2 = args.remove(0);
32392 let unit_expr = args.remove(0);
32393 let unit_str = get_unit_str(&unit_expr);
32394
32395 if matches!(target, DialectType::BigQuery) {
32396 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
32397 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
32398 "WEEK".to_string()
32399 } else {
32400 unit_str
32401 };
32402 let norm_d1 = Self::date_literal_to_cast(date1);
32403 let norm_d2 = Self::date_literal_to_cast(date2);
32404 let unit = Expression::Identifier(Identifier::new(norm_unit));
32405 return Ok(Expression::Function(Box::new(Function::new(
32406 f.name,
32407 vec![norm_d1, norm_d2, unit],
32408 ))));
32409 }
32410
32411 if matches!(target, DialectType::MySQL) {
32412 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
32413 let norm_d1 = Self::date_literal_to_cast(date1);
32414 let norm_d2 = Self::date_literal_to_cast(date2);
32415 return Ok(Expression::Function(Box::new(Function::new(
32416 "DATEDIFF".to_string(),
32417 vec![norm_d1, norm_d2],
32418 ))));
32419 }
32420
32421 if matches!(target, DialectType::StarRocks) {
32422 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
32423 let norm_d1 = Self::date_literal_to_cast(date1);
32424 let norm_d2 = Self::date_literal_to_cast(date2);
32425 return Ok(Expression::Function(Box::new(Function::new(
32426 "DATE_DIFF".to_string(),
32427 vec![
32428 Expression::Literal(Box::new(Literal::String(unit_str))),
32429 norm_d1,
32430 norm_d2,
32431 ],
32432 ))));
32433 }
32434
32435 if matches!(target, DialectType::DuckDB) {
32436 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
32437 let norm_d1 = Self::ensure_cast_date(date1);
32438 let norm_d2 = Self::ensure_cast_date(date2);
32439
32440 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
32441 let is_week_variant = unit_str == "WEEK"
32442 || unit_str.starts_with("WEEK(")
32443 || unit_str == "ISOWEEK";
32444 if is_week_variant {
32445 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
32446 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
32447 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
32448 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
32449 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
32450 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
32451 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
32452 Some("1") // Shift Sunday to Monday alignment
32453 } else if unit_str == "WEEK(SATURDAY)" {
32454 Some("-5")
32455 } else if unit_str == "WEEK(TUESDAY)" {
32456 Some("-1")
32457 } else if unit_str == "WEEK(WEDNESDAY)" {
32458 Some("-2")
32459 } else if unit_str == "WEEK(THURSDAY)" {
32460 Some("-3")
32461 } else if unit_str == "WEEK(FRIDAY)" {
32462 Some("-4")
32463 } else {
32464 Some("1") // default to Sunday
32465 };
32466
32467 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
32468 let shifted = if let Some(off) = offset {
32469 let interval =
32470 Expression::Interval(Box::new(crate::expressions::Interval {
32471 this: Some(Expression::Literal(Box::new(Literal::String(
32472 off.to_string(),
32473 )))),
32474 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32475 unit: crate::expressions::IntervalUnit::Day,
32476 use_plural: false,
32477 }),
32478 }));
32479 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
32480 date, interval,
32481 )))
32482 } else {
32483 date
32484 };
32485 Expression::Function(Box::new(Function::new(
32486 "DATE_TRUNC".to_string(),
32487 vec![
32488 Expression::Literal(Box::new(Literal::String(
32489 "WEEK".to_string(),
32490 ))),
32491 shifted,
32492 ],
32493 )))
32494 };
32495
32496 let trunc_d2 = make_trunc(norm_d2, day_offset);
32497 let trunc_d1 = make_trunc(norm_d1, day_offset);
32498 return Ok(Expression::Function(Box::new(Function::new(
32499 "DATE_DIFF".to_string(),
32500 vec![
32501 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
32502 trunc_d2,
32503 trunc_d1,
32504 ],
32505 ))));
32506 }
32507
32508 return Ok(Expression::Function(Box::new(Function::new(
32509 "DATE_DIFF".to_string(),
32510 vec![
32511 Expression::Literal(Box::new(Literal::String(unit_str))),
32512 norm_d2,
32513 norm_d1,
32514 ],
32515 ))));
32516 }
32517
32518 // Default: DATEDIFF(unit, date2, date1)
32519 let unit = Expression::Identifier(Identifier::new(unit_str));
32520 Ok(Expression::Function(Box::new(Function::new(
32521 "DATEDIFF".to_string(),
32522 vec![unit, date2, date1],
32523 ))))
32524 }
32525
32526 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
32527 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
32528 let ts = args.remove(0);
32529 let interval_expr = args.remove(0);
32530 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32531
32532 match target {
32533 DialectType::Snowflake => {
32534 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
32535 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
32536 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
32537 let unit_str = Self::interval_unit_to_string(&unit);
32538 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32539 Ok(Expression::TimestampAdd(Box::new(
32540 crate::expressions::TimestampAdd {
32541 this: Box::new(val),
32542 expression: Box::new(cast_ts),
32543 unit: Some(unit_str.to_string()),
32544 },
32545 )))
32546 }
32547 DialectType::Spark | DialectType::Databricks => {
32548 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
32549 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
32550 let interval =
32551 Expression::Interval(Box::new(crate::expressions::Interval {
32552 this: Some(val),
32553 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32554 unit,
32555 use_plural: false,
32556 }),
32557 }));
32558 Ok(Expression::Add(Box::new(
32559 crate::expressions::BinaryOp::new(ts, interval),
32560 )))
32561 } else if name == "DATETIME_ADD"
32562 && matches!(target, DialectType::Databricks)
32563 {
32564 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
32565 let unit_str = Self::interval_unit_to_string(&unit);
32566 Ok(Expression::Function(Box::new(Function::new(
32567 "TIMESTAMPADD".to_string(),
32568 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
32569 ))))
32570 } else {
32571 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
32572 let unit_str = Self::interval_unit_to_string(&unit);
32573 let cast_ts =
32574 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
32575 Self::maybe_cast_ts(ts)
32576 } else {
32577 ts
32578 };
32579 Ok(Expression::Function(Box::new(Function::new(
32580 "DATE_ADD".to_string(),
32581 vec![
32582 Expression::Identifier(Identifier::new(unit_str)),
32583 val,
32584 cast_ts,
32585 ],
32586 ))))
32587 }
32588 }
32589 DialectType::MySQL => {
32590 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
32591 let mysql_ts = if name.starts_with("TIMESTAMP") {
32592 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32593 match &ts {
32594 Expression::Function(ref inner_f)
32595 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32596 {
32597 // Already wrapped, keep as-is
32598 ts
32599 }
32600 _ => {
32601 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
32602 let unwrapped = match ts {
32603 Expression::Literal(lit)
32604 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32605 {
32606 let Literal::Timestamp(s) = lit.as_ref() else {
32607 unreachable!()
32608 };
32609 Expression::Literal(Box::new(Literal::String(
32610 s.clone(),
32611 )))
32612 }
32613 other => other,
32614 };
32615 Expression::Function(Box::new(Function::new(
32616 "TIMESTAMP".to_string(),
32617 vec![unwrapped],
32618 )))
32619 }
32620 }
32621 } else {
32622 ts
32623 };
32624 Ok(Expression::DateAdd(Box::new(
32625 crate::expressions::DateAddFunc {
32626 this: mysql_ts,
32627 interval: val,
32628 unit,
32629 },
32630 )))
32631 }
32632 _ => {
32633 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
32634 let cast_ts = if matches!(target, DialectType::DuckDB) {
32635 if name == "DATETIME_ADD" {
32636 Self::ensure_cast_timestamp(ts)
32637 } else if name.starts_with("TIMESTAMP") {
32638 Self::maybe_cast_ts_to_tz(ts, &name)
32639 } else {
32640 ts
32641 }
32642 } else {
32643 ts
32644 };
32645 Ok(Expression::DateAdd(Box::new(
32646 crate::expressions::DateAddFunc {
32647 this: cast_ts,
32648 interval: val,
32649 unit,
32650 },
32651 )))
32652 }
32653 }
32654 }
32655
32656 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
32657 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
32658 let ts = args.remove(0);
32659 let interval_expr = args.remove(0);
32660 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32661
32662 match target {
32663 DialectType::Snowflake => {
32664 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
32665 let unit_str = Self::interval_unit_to_string(&unit);
32666 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32667 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32668 val,
32669 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32670 this: Expression::number(1),
32671 inferred_type: None,
32672 })),
32673 )));
32674 Ok(Expression::TimestampAdd(Box::new(
32675 crate::expressions::TimestampAdd {
32676 this: Box::new(neg_val),
32677 expression: Box::new(cast_ts),
32678 unit: Some(unit_str.to_string()),
32679 },
32680 )))
32681 }
32682 DialectType::Spark | DialectType::Databricks => {
32683 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
32684 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
32685 {
32686 // Spark: ts - INTERVAL val UNIT
32687 let cast_ts = if name.starts_with("TIMESTAMP") {
32688 Self::maybe_cast_ts(ts)
32689 } else {
32690 ts
32691 };
32692 let interval =
32693 Expression::Interval(Box::new(crate::expressions::Interval {
32694 this: Some(val),
32695 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32696 unit,
32697 use_plural: false,
32698 }),
32699 }));
32700 Ok(Expression::Sub(Box::new(
32701 crate::expressions::BinaryOp::new(cast_ts, interval),
32702 )))
32703 } else {
32704 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
32705 let unit_str = Self::interval_unit_to_string(&unit);
32706 let neg_val =
32707 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32708 val,
32709 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32710 this: Expression::number(1),
32711 inferred_type: None,
32712 })),
32713 )));
32714 Ok(Expression::Function(Box::new(Function::new(
32715 "TIMESTAMPADD".to_string(),
32716 vec![
32717 Expression::Identifier(Identifier::new(unit_str)),
32718 neg_val,
32719 ts,
32720 ],
32721 ))))
32722 }
32723 }
32724 DialectType::MySQL => {
32725 let mysql_ts = if name.starts_with("TIMESTAMP") {
32726 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32727 match &ts {
32728 Expression::Function(ref inner_f)
32729 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32730 {
32731 // Already wrapped, keep as-is
32732 ts
32733 }
32734 _ => {
32735 let unwrapped = match ts {
32736 Expression::Literal(lit)
32737 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32738 {
32739 let Literal::Timestamp(s) = lit.as_ref() else {
32740 unreachable!()
32741 };
32742 Expression::Literal(Box::new(Literal::String(
32743 s.clone(),
32744 )))
32745 }
32746 other => other,
32747 };
32748 Expression::Function(Box::new(Function::new(
32749 "TIMESTAMP".to_string(),
32750 vec![unwrapped],
32751 )))
32752 }
32753 }
32754 } else {
32755 ts
32756 };
32757 Ok(Expression::DateSub(Box::new(
32758 crate::expressions::DateAddFunc {
32759 this: mysql_ts,
32760 interval: val,
32761 unit,
32762 },
32763 )))
32764 }
32765 _ => {
32766 let cast_ts = if matches!(target, DialectType::DuckDB) {
32767 if name == "DATETIME_SUB" {
32768 Self::ensure_cast_timestamp(ts)
32769 } else if name.starts_with("TIMESTAMP") {
32770 Self::maybe_cast_ts_to_tz(ts, &name)
32771 } else {
32772 ts
32773 }
32774 } else {
32775 ts
32776 };
32777 Ok(Expression::DateSub(Box::new(
32778 crate::expressions::DateAddFunc {
32779 this: cast_ts,
32780 interval: val,
32781 unit,
32782 },
32783 )))
32784 }
32785 }
32786 }
32787
32788 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
32789 "DATE_SUB" if args.len() == 2 => {
32790 let date = args.remove(0);
32791 let interval_expr = args.remove(0);
32792 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32793
32794 match target {
32795 DialectType::Databricks | DialectType::Spark => {
32796 // Databricks/Spark: DATE_ADD(date, -val)
32797 // Use DateAdd expression with negative val so it generates correctly
32798 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
32799 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
32800 // Instead, we directly output as a simple negated DateSub
32801 Ok(Expression::DateSub(Box::new(
32802 crate::expressions::DateAddFunc {
32803 this: date,
32804 interval: val,
32805 unit,
32806 },
32807 )))
32808 }
32809 DialectType::DuckDB => {
32810 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
32811 let cast_date = Self::ensure_cast_date(date);
32812 let interval =
32813 Expression::Interval(Box::new(crate::expressions::Interval {
32814 this: Some(val),
32815 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32816 unit,
32817 use_plural: false,
32818 }),
32819 }));
32820 Ok(Expression::Sub(Box::new(
32821 crate::expressions::BinaryOp::new(cast_date, interval),
32822 )))
32823 }
32824 DialectType::Snowflake => {
32825 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
32826 // Just ensure the date is cast properly
32827 let cast_date = Self::ensure_cast_date(date);
32828 Ok(Expression::DateSub(Box::new(
32829 crate::expressions::DateAddFunc {
32830 this: cast_date,
32831 interval: val,
32832 unit,
32833 },
32834 )))
32835 }
32836 DialectType::PostgreSQL => {
32837 // PostgreSQL: date - INTERVAL 'val UNIT'
32838 let unit_str = Self::interval_unit_to_string(&unit);
32839 let interval =
32840 Expression::Interval(Box::new(crate::expressions::Interval {
32841 this: Some(Expression::Literal(Box::new(Literal::String(
32842 format!("{} {}", Self::expr_to_string(&val), unit_str),
32843 )))),
32844 unit: None,
32845 }));
32846 Ok(Expression::Sub(Box::new(
32847 crate::expressions::BinaryOp::new(date, interval),
32848 )))
32849 }
32850 _ => Ok(Expression::DateSub(Box::new(
32851 crate::expressions::DateAddFunc {
32852 this: date,
32853 interval: val,
32854 unit,
32855 },
32856 ))),
32857 }
32858 }
32859
32860 // DATEADD(unit, val, date) -> target-specific form
32861 // Used by: Redshift, Snowflake, TSQL, ClickHouse
32862 "DATEADD" if args.len() == 3 => {
32863 let arg0 = args.remove(0);
32864 let arg1 = args.remove(0);
32865 let arg2 = args.remove(0);
32866 let unit_str = get_unit_str(&arg0);
32867
32868 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
32869 // Keep DATEADD(UNIT, val, date) with uppercased unit
32870 let unit = Expression::Identifier(Identifier::new(unit_str));
32871 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
32872 let date = if matches!(target, DialectType::TSQL)
32873 && !matches!(
32874 source,
32875 DialectType::Spark | DialectType::Databricks | DialectType::Hive
32876 ) {
32877 Self::ensure_cast_datetime2(arg2)
32878 } else {
32879 arg2
32880 };
32881 return Ok(Expression::Function(Box::new(Function::new(
32882 "DATEADD".to_string(),
32883 vec![unit, arg1, date],
32884 ))));
32885 }
32886
32887 if matches!(target, DialectType::DuckDB) {
32888 // DuckDB: date + INTERVAL 'val' UNIT
32889 let iu = parse_interval_unit(&unit_str);
32890 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32891 this: Some(arg1),
32892 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32893 unit: iu,
32894 use_plural: false,
32895 }),
32896 }));
32897 let cast_date = Self::ensure_cast_timestamp(arg2);
32898 return Ok(Expression::Add(Box::new(
32899 crate::expressions::BinaryOp::new(cast_date, interval),
32900 )));
32901 }
32902
32903 if matches!(target, DialectType::BigQuery) {
32904 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
32905 let iu = parse_interval_unit(&unit_str);
32906 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32907 this: Some(arg1),
32908 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32909 unit: iu,
32910 use_plural: false,
32911 }),
32912 }));
32913 return Ok(Expression::Function(Box::new(Function::new(
32914 "DATE_ADD".to_string(),
32915 vec![arg2, interval],
32916 ))));
32917 }
32918
32919 if matches!(target, DialectType::Databricks) {
32920 // Databricks: keep DATEADD(UNIT, val, date) format
32921 let unit = Expression::Identifier(Identifier::new(unit_str));
32922 return Ok(Expression::Function(Box::new(Function::new(
32923 "DATEADD".to_string(),
32924 vec![unit, arg1, arg2],
32925 ))));
32926 }
32927
32928 if matches!(target, DialectType::Spark) {
32929 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
32930 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
32931 if let Expression::Literal(lit) = &expr {
32932 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
32933 if let Ok(val) = n.parse::<i64>() {
32934 return Expression::Literal(Box::new(
32935 crate::expressions::Literal::Number(
32936 (val * factor).to_string(),
32937 ),
32938 ));
32939 }
32940 }
32941 }
32942 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32943 expr,
32944 Expression::Literal(Box::new(crate::expressions::Literal::Number(
32945 factor.to_string(),
32946 ))),
32947 )))
32948 }
32949 match unit_str.as_str() {
32950 "YEAR" => {
32951 let months = multiply_expr_dateadd(arg1, 12);
32952 return Ok(Expression::Function(Box::new(Function::new(
32953 "ADD_MONTHS".to_string(),
32954 vec![arg2, months],
32955 ))));
32956 }
32957 "QUARTER" => {
32958 let months = multiply_expr_dateadd(arg1, 3);
32959 return Ok(Expression::Function(Box::new(Function::new(
32960 "ADD_MONTHS".to_string(),
32961 vec![arg2, months],
32962 ))));
32963 }
32964 "MONTH" => {
32965 return Ok(Expression::Function(Box::new(Function::new(
32966 "ADD_MONTHS".to_string(),
32967 vec![arg2, arg1],
32968 ))));
32969 }
32970 "WEEK" => {
32971 let days = multiply_expr_dateadd(arg1, 7);
32972 return Ok(Expression::Function(Box::new(Function::new(
32973 "DATE_ADD".to_string(),
32974 vec![arg2, days],
32975 ))));
32976 }
32977 "DAY" => {
32978 return Ok(Expression::Function(Box::new(Function::new(
32979 "DATE_ADD".to_string(),
32980 vec![arg2, arg1],
32981 ))));
32982 }
32983 _ => {
32984 let unit = Expression::Identifier(Identifier::new(unit_str));
32985 return Ok(Expression::Function(Box::new(Function::new(
32986 "DATE_ADD".to_string(),
32987 vec![unit, arg1, arg2],
32988 ))));
32989 }
32990 }
32991 }
32992
32993 if matches!(target, DialectType::Hive) {
32994 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
32995 match unit_str.as_str() {
32996 "DAY" => {
32997 return Ok(Expression::Function(Box::new(Function::new(
32998 "DATE_ADD".to_string(),
32999 vec![arg2, arg1],
33000 ))));
33001 }
33002 "MONTH" => {
33003 return Ok(Expression::Function(Box::new(Function::new(
33004 "ADD_MONTHS".to_string(),
33005 vec![arg2, arg1],
33006 ))));
33007 }
33008 _ => {
33009 let iu = parse_interval_unit(&unit_str);
33010 let interval =
33011 Expression::Interval(Box::new(crate::expressions::Interval {
33012 this: Some(arg1),
33013 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33014 unit: iu,
33015 use_plural: false,
33016 }),
33017 }));
33018 return Ok(Expression::Add(Box::new(
33019 crate::expressions::BinaryOp::new(arg2, interval),
33020 )));
33021 }
33022 }
33023 }
33024
33025 if matches!(target, DialectType::PostgreSQL) {
33026 // PostgreSQL: date + INTERVAL 'val UNIT'
33027 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33028 this: Some(Expression::Literal(Box::new(Literal::String(format!(
33029 "{} {}",
33030 Self::expr_to_string(&arg1),
33031 unit_str
33032 ))))),
33033 unit: None,
33034 }));
33035 return Ok(Expression::Add(Box::new(
33036 crate::expressions::BinaryOp::new(arg2, interval),
33037 )));
33038 }
33039
33040 if matches!(
33041 target,
33042 DialectType::Presto | DialectType::Trino | DialectType::Athena
33043 ) {
33044 // Presto/Trino: DATE_ADD('UNIT', val, date)
33045 return Ok(Expression::Function(Box::new(Function::new(
33046 "DATE_ADD".to_string(),
33047 vec![
33048 Expression::Literal(Box::new(Literal::String(unit_str))),
33049 arg1,
33050 arg2,
33051 ],
33052 ))));
33053 }
33054
33055 if matches!(target, DialectType::ClickHouse) {
33056 // ClickHouse: DATE_ADD(UNIT, val, date)
33057 let unit = Expression::Identifier(Identifier::new(unit_str));
33058 return Ok(Expression::Function(Box::new(Function::new(
33059 "DATE_ADD".to_string(),
33060 vec![unit, arg1, arg2],
33061 ))));
33062 }
33063
33064 // Default: keep DATEADD with uppercased unit
33065 let unit = Expression::Identifier(Identifier::new(unit_str));
33066 Ok(Expression::Function(Box::new(Function::new(
33067 "DATEADD".to_string(),
33068 vec![unit, arg1, arg2],
33069 ))))
33070 }
33071
33072 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
33073 "DATE_ADD" if args.len() == 3 => {
33074 let arg0 = args.remove(0);
33075 let arg1 = args.remove(0);
33076 let arg2 = args.remove(0);
33077 let unit_str = get_unit_str(&arg0);
33078
33079 if matches!(
33080 target,
33081 DialectType::Presto | DialectType::Trino | DialectType::Athena
33082 ) {
33083 // Presto/Trino: DATE_ADD('UNIT', val, date)
33084 return Ok(Expression::Function(Box::new(Function::new(
33085 "DATE_ADD".to_string(),
33086 vec![
33087 Expression::Literal(Box::new(Literal::String(unit_str))),
33088 arg1,
33089 arg2,
33090 ],
33091 ))));
33092 }
33093
33094 if matches!(
33095 target,
33096 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
33097 ) {
33098 // DATEADD(UNIT, val, date)
33099 let unit = Expression::Identifier(Identifier::new(unit_str));
33100 let date = if matches!(target, DialectType::TSQL) {
33101 Self::ensure_cast_datetime2(arg2)
33102 } else {
33103 arg2
33104 };
33105 return Ok(Expression::Function(Box::new(Function::new(
33106 "DATEADD".to_string(),
33107 vec![unit, arg1, date],
33108 ))));
33109 }
33110
33111 if matches!(target, DialectType::DuckDB) {
33112 // DuckDB: date + INTERVAL val UNIT
33113 let iu = parse_interval_unit(&unit_str);
33114 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33115 this: Some(arg1),
33116 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33117 unit: iu,
33118 use_plural: false,
33119 }),
33120 }));
33121 return Ok(Expression::Add(Box::new(
33122 crate::expressions::BinaryOp::new(arg2, interval),
33123 )));
33124 }
33125
33126 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33127 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
33128 let unit = Expression::Identifier(Identifier::new(unit_str));
33129 return Ok(Expression::Function(Box::new(Function::new(
33130 "DATE_ADD".to_string(),
33131 vec![unit, arg1, arg2],
33132 ))));
33133 }
33134
33135 // Default: DATE_ADD(UNIT, val, date)
33136 let unit = Expression::Identifier(Identifier::new(unit_str));
33137 Ok(Expression::Function(Box::new(Function::new(
33138 "DATE_ADD".to_string(),
33139 vec![unit, arg1, arg2],
33140 ))))
33141 }
33142
33143 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
33144 "DATE_ADD" if args.len() == 2 => {
33145 let date = args.remove(0);
33146 let interval_expr = args.remove(0);
33147 let (val, unit) = Self::extract_interval_parts(&interval_expr);
33148 let unit_str = Self::interval_unit_to_string(&unit);
33149
33150 match target {
33151 DialectType::DuckDB => {
33152 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
33153 let cast_date = Self::ensure_cast_date(date);
33154 let quoted_val = Self::quote_interval_val(&val);
33155 let interval =
33156 Expression::Interval(Box::new(crate::expressions::Interval {
33157 this: Some(quoted_val),
33158 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33159 unit,
33160 use_plural: false,
33161 }),
33162 }));
33163 Ok(Expression::Add(Box::new(
33164 crate::expressions::BinaryOp::new(cast_date, interval),
33165 )))
33166 }
33167 DialectType::PostgreSQL => {
33168 // PostgreSQL: date + INTERVAL 'val UNIT'
33169 let interval =
33170 Expression::Interval(Box::new(crate::expressions::Interval {
33171 this: Some(Expression::Literal(Box::new(Literal::String(
33172 format!("{} {}", Self::expr_to_string(&val), unit_str),
33173 )))),
33174 unit: None,
33175 }));
33176 Ok(Expression::Add(Box::new(
33177 crate::expressions::BinaryOp::new(date, interval),
33178 )))
33179 }
33180 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33181 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
33182 let val_str = Self::expr_to_string(&val);
33183 Ok(Expression::Function(Box::new(Function::new(
33184 "DATE_ADD".to_string(),
33185 vec![
33186 Expression::Literal(Box::new(Literal::String(
33187 unit_str.to_string(),
33188 ))),
33189 Expression::Cast(Box::new(Cast {
33190 this: Expression::Literal(Box::new(Literal::String(val_str))),
33191 to: DataType::BigInt { length: None },
33192 trailing_comments: vec![],
33193 double_colon_syntax: false,
33194 format: None,
33195 default: None,
33196 inferred_type: None,
33197 })),
33198 date,
33199 ],
33200 ))))
33201 }
33202 DialectType::Spark | DialectType::Hive => {
33203 // Spark/Hive: DATE_ADD(date, val) for DAY
33204 match unit_str {
33205 "DAY" => Ok(Expression::Function(Box::new(Function::new(
33206 "DATE_ADD".to_string(),
33207 vec![date, val],
33208 )))),
33209 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
33210 "ADD_MONTHS".to_string(),
33211 vec![date, val],
33212 )))),
33213 _ => {
33214 let iu = parse_interval_unit(&unit_str);
33215 let interval =
33216 Expression::Interval(Box::new(crate::expressions::Interval {
33217 this: Some(val),
33218 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33219 unit: iu,
33220 use_plural: false,
33221 }),
33222 }));
33223 Ok(Expression::Function(Box::new(Function::new(
33224 "DATE_ADD".to_string(),
33225 vec![date, interval],
33226 ))))
33227 }
33228 }
33229 }
33230 DialectType::Snowflake => {
33231 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
33232 let cast_date = Self::ensure_cast_date(date);
33233 let val_str = Self::expr_to_string(&val);
33234 Ok(Expression::Function(Box::new(Function::new(
33235 "DATEADD".to_string(),
33236 vec![
33237 Expression::Identifier(Identifier::new(unit_str)),
33238 Expression::Literal(Box::new(Literal::String(val_str))),
33239 cast_date,
33240 ],
33241 ))))
33242 }
33243 DialectType::TSQL | DialectType::Fabric => {
33244 let cast_date = Self::ensure_cast_datetime2(date);
33245 Ok(Expression::Function(Box::new(Function::new(
33246 "DATEADD".to_string(),
33247 vec![
33248 Expression::Identifier(Identifier::new(unit_str)),
33249 val,
33250 cast_date,
33251 ],
33252 ))))
33253 }
33254 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
33255 "DATEADD".to_string(),
33256 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33257 )))),
33258 DialectType::MySQL => {
33259 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
33260 let quoted_val = Self::quote_interval_val(&val);
33261 let iu = parse_interval_unit(&unit_str);
33262 let interval =
33263 Expression::Interval(Box::new(crate::expressions::Interval {
33264 this: Some(quoted_val),
33265 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33266 unit: iu,
33267 use_plural: false,
33268 }),
33269 }));
33270 Ok(Expression::Function(Box::new(Function::new(
33271 "DATE_ADD".to_string(),
33272 vec![date, interval],
33273 ))))
33274 }
33275 DialectType::BigQuery => {
33276 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
33277 let quoted_val = Self::quote_interval_val(&val);
33278 let iu = parse_interval_unit(&unit_str);
33279 let interval =
33280 Expression::Interval(Box::new(crate::expressions::Interval {
33281 this: Some(quoted_val),
33282 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33283 unit: iu,
33284 use_plural: false,
33285 }),
33286 }));
33287 Ok(Expression::Function(Box::new(Function::new(
33288 "DATE_ADD".to_string(),
33289 vec![date, interval],
33290 ))))
33291 }
33292 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
33293 "DATEADD".to_string(),
33294 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33295 )))),
33296 _ => {
33297 // Default: keep as DATE_ADD with decomposed interval
33298 Ok(Expression::DateAdd(Box::new(
33299 crate::expressions::DateAddFunc {
33300 this: date,
33301 interval: val,
33302 unit,
33303 },
33304 )))
33305 }
33306 }
33307 }
33308
33309 // ADD_MONTHS(date, val) -> target-specific form
33310 "ADD_MONTHS" if args.len() == 2 => {
33311 let date = args.remove(0);
33312 let val = args.remove(0);
33313
33314 if matches!(target, DialectType::TSQL) {
33315 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
33316 let cast_date = Self::ensure_cast_datetime2(date);
33317 return Ok(Expression::Function(Box::new(Function::new(
33318 "DATEADD".to_string(),
33319 vec![
33320 Expression::Identifier(Identifier::new("MONTH")),
33321 val,
33322 cast_date,
33323 ],
33324 ))));
33325 }
33326
33327 if matches!(target, DialectType::DuckDB) {
33328 // DuckDB: date + INTERVAL val MONTH
33329 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33330 this: Some(val),
33331 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33332 unit: crate::expressions::IntervalUnit::Month,
33333 use_plural: false,
33334 }),
33335 }));
33336 return Ok(Expression::Add(Box::new(
33337 crate::expressions::BinaryOp::new(date, interval),
33338 )));
33339 }
33340
33341 if matches!(target, DialectType::Snowflake) {
33342 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
33343 if matches!(source, DialectType::Snowflake) {
33344 return Ok(Expression::Function(Box::new(Function::new(
33345 "ADD_MONTHS".to_string(),
33346 vec![date, val],
33347 ))));
33348 }
33349 return Ok(Expression::Function(Box::new(Function::new(
33350 "DATEADD".to_string(),
33351 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
33352 ))));
33353 }
33354
33355 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33356 // Spark: ADD_MONTHS(date, val) - keep as is
33357 return Ok(Expression::Function(Box::new(Function::new(
33358 "ADD_MONTHS".to_string(),
33359 vec![date, val],
33360 ))));
33361 }
33362
33363 if matches!(target, DialectType::Hive) {
33364 return Ok(Expression::Function(Box::new(Function::new(
33365 "ADD_MONTHS".to_string(),
33366 vec![date, val],
33367 ))));
33368 }
33369
33370 if matches!(
33371 target,
33372 DialectType::Presto | DialectType::Trino | DialectType::Athena
33373 ) {
33374 // Presto: DATE_ADD('MONTH', val, date)
33375 return Ok(Expression::Function(Box::new(Function::new(
33376 "DATE_ADD".to_string(),
33377 vec![
33378 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
33379 val,
33380 date,
33381 ],
33382 ))));
33383 }
33384
33385 // Default: keep ADD_MONTHS
33386 Ok(Expression::Function(Box::new(Function::new(
33387 "ADD_MONTHS".to_string(),
33388 vec![date, val],
33389 ))))
33390 }
33391
33392 // SAFE_DIVIDE(x, y) -> target-specific form directly
33393 "SAFE_DIVIDE" if args.len() == 2 => {
33394 let x = args.remove(0);
33395 let y = args.remove(0);
33396 // Wrap x and y in parens if they're complex expressions
33397 let y_ref = match &y {
33398 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33399 y.clone()
33400 }
33401 _ => Expression::Paren(Box::new(Paren {
33402 this: y.clone(),
33403 trailing_comments: vec![],
33404 })),
33405 };
33406 let x_ref = match &x {
33407 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33408 x.clone()
33409 }
33410 _ => Expression::Paren(Box::new(Paren {
33411 this: x.clone(),
33412 trailing_comments: vec![],
33413 })),
33414 };
33415 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
33416 y_ref.clone(),
33417 Expression::number(0),
33418 )));
33419 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33420 x_ref.clone(),
33421 y_ref.clone(),
33422 )));
33423
33424 match target {
33425 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
33426 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
33427 )),
33428 DialectType::DuckDB | DialectType::PostgreSQL => {
33429 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
33430 let result_div = if matches!(target, DialectType::PostgreSQL) {
33431 let cast_x = Expression::Cast(Box::new(Cast {
33432 this: x_ref,
33433 to: DataType::Custom {
33434 name: "DOUBLE PRECISION".to_string(),
33435 },
33436 trailing_comments: vec![],
33437 double_colon_syntax: false,
33438 format: None,
33439 default: None,
33440 inferred_type: None,
33441 }));
33442 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33443 cast_x, y_ref,
33444 )))
33445 } else {
33446 div_expr
33447 };
33448 Ok(Expression::Case(Box::new(crate::expressions::Case {
33449 operand: None,
33450 whens: vec![(condition, result_div)],
33451 else_: Some(Expression::Null(crate::expressions::Null)),
33452 comments: Vec::new(),
33453 inferred_type: None,
33454 })))
33455 }
33456 DialectType::Snowflake => {
33457 // IFF(y <> 0, x / y, NULL)
33458 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33459 condition,
33460 true_value: div_expr,
33461 false_value: Some(Expression::Null(crate::expressions::Null)),
33462 original_name: Some("IFF".to_string()),
33463 inferred_type: None,
33464 })))
33465 }
33466 DialectType::Presto | DialectType::Trino => {
33467 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
33468 let cast_x = Expression::Cast(Box::new(Cast {
33469 this: x_ref,
33470 to: DataType::Double {
33471 precision: None,
33472 scale: None,
33473 },
33474 trailing_comments: vec![],
33475 double_colon_syntax: false,
33476 format: None,
33477 default: None,
33478 inferred_type: None,
33479 }));
33480 let cast_div = Expression::Div(Box::new(
33481 crate::expressions::BinaryOp::new(cast_x, y_ref),
33482 ));
33483 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33484 condition,
33485 true_value: cast_div,
33486 false_value: Some(Expression::Null(crate::expressions::Null)),
33487 original_name: None,
33488 inferred_type: None,
33489 })))
33490 }
33491 _ => {
33492 // IF(y <> 0, x / y, NULL)
33493 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33494 condition,
33495 true_value: div_expr,
33496 false_value: Some(Expression::Null(crate::expressions::Null)),
33497 original_name: None,
33498 inferred_type: None,
33499 })))
33500 }
33501 }
33502 }
33503
33504 // GENERATE_UUID() -> UUID() with CAST to string
33505 "GENERATE_UUID" => {
33506 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
33507 this: None,
33508 name: None,
33509 is_string: None,
33510 }));
33511 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
33512 let cast_type = match target {
33513 DialectType::DuckDB => Some(DataType::Text),
33514 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
33515 length: None,
33516 parenthesized_length: false,
33517 }),
33518 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33519 Some(DataType::String { length: None })
33520 }
33521 _ => None,
33522 };
33523 if let Some(dt) = cast_type {
33524 Ok(Expression::Cast(Box::new(Cast {
33525 this: uuid_expr,
33526 to: dt,
33527 trailing_comments: vec![],
33528 double_colon_syntax: false,
33529 format: None,
33530 default: None,
33531 inferred_type: None,
33532 })))
33533 } else {
33534 Ok(uuid_expr)
33535 }
33536 }
33537
33538 // COUNTIF(x) -> CountIf expression
33539 "COUNTIF" if args.len() == 1 => {
33540 let arg = args.remove(0);
33541 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
33542 this: arg,
33543 distinct: false,
33544 filter: None,
33545 order_by: vec![],
33546 name: None,
33547 ignore_nulls: None,
33548 having_max: None,
33549 limit: None,
33550 inferred_type: None,
33551 })))
33552 }
33553
33554 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
33555 "EDIT_DISTANCE" => {
33556 // Strip named arguments (max_distance => N) and pass as positional
33557 let mut positional_args: Vec<Expression> = vec![];
33558 for arg in args {
33559 match arg {
33560 Expression::NamedArgument(na) => {
33561 positional_args.push(na.value);
33562 }
33563 other => positional_args.push(other),
33564 }
33565 }
33566 if positional_args.len() >= 2 {
33567 let col1 = positional_args.remove(0);
33568 let col2 = positional_args.remove(0);
33569 let levenshtein = crate::expressions::BinaryFunc {
33570 this: col1,
33571 expression: col2,
33572 original_name: None,
33573 inferred_type: None,
33574 };
33575 // Pass extra args through a function wrapper with all args
33576 if !positional_args.is_empty() {
33577 let max_dist = positional_args.remove(0);
33578 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
33579 if matches!(target, DialectType::DuckDB) {
33580 let lev = Expression::Function(Box::new(Function::new(
33581 "LEVENSHTEIN".to_string(),
33582 vec![levenshtein.this, levenshtein.expression],
33583 )));
33584 let lev_is_null =
33585 Expression::IsNull(Box::new(crate::expressions::IsNull {
33586 this: lev.clone(),
33587 not: false,
33588 postfix_form: false,
33589 }));
33590 let max_is_null =
33591 Expression::IsNull(Box::new(crate::expressions::IsNull {
33592 this: max_dist.clone(),
33593 not: false,
33594 postfix_form: false,
33595 }));
33596 let null_check =
33597 Expression::Or(Box::new(crate::expressions::BinaryOp {
33598 left: lev_is_null,
33599 right: max_is_null,
33600 left_comments: Vec::new(),
33601 operator_comments: Vec::new(),
33602 trailing_comments: Vec::new(),
33603 inferred_type: None,
33604 }));
33605 let least =
33606 Expression::Least(Box::new(crate::expressions::VarArgFunc {
33607 expressions: vec![lev, max_dist],
33608 original_name: None,
33609 inferred_type: None,
33610 }));
33611 return Ok(Expression::Case(Box::new(crate::expressions::Case {
33612 operand: None,
33613 whens: vec![(
33614 null_check,
33615 Expression::Null(crate::expressions::Null),
33616 )],
33617 else_: Some(least),
33618 comments: Vec::new(),
33619 inferred_type: None,
33620 })));
33621 }
33622 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
33623 all_args.extend(positional_args);
33624 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
33625 let func_name = if matches!(target, DialectType::PostgreSQL) {
33626 "LEVENSHTEIN_LESS_EQUAL"
33627 } else {
33628 "LEVENSHTEIN"
33629 };
33630 return Ok(Expression::Function(Box::new(Function::new(
33631 func_name.to_string(),
33632 all_args,
33633 ))));
33634 }
33635 Ok(Expression::Levenshtein(Box::new(levenshtein)))
33636 } else {
33637 Ok(Expression::Function(Box::new(Function::new(
33638 "EDIT_DISTANCE".to_string(),
33639 positional_args,
33640 ))))
33641 }
33642 }
33643
33644 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
33645 "TIMESTAMP_SECONDS" if args.len() == 1 => {
33646 let arg = args.remove(0);
33647 Ok(Expression::UnixToTime(Box::new(
33648 crate::expressions::UnixToTime {
33649 this: Box::new(arg),
33650 scale: Some(0),
33651 zone: None,
33652 hours: None,
33653 minutes: None,
33654 format: None,
33655 target_type: None,
33656 },
33657 )))
33658 }
33659
33660 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
33661 "TIMESTAMP_MILLIS" if args.len() == 1 => {
33662 let arg = args.remove(0);
33663 Ok(Expression::UnixToTime(Box::new(
33664 crate::expressions::UnixToTime {
33665 this: Box::new(arg),
33666 scale: Some(3),
33667 zone: None,
33668 hours: None,
33669 minutes: None,
33670 format: None,
33671 target_type: None,
33672 },
33673 )))
33674 }
33675
33676 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
33677 "TIMESTAMP_MICROS" if args.len() == 1 => {
33678 let arg = args.remove(0);
33679 Ok(Expression::UnixToTime(Box::new(
33680 crate::expressions::UnixToTime {
33681 this: Box::new(arg),
33682 scale: Some(6),
33683 zone: None,
33684 hours: None,
33685 minutes: None,
33686 format: None,
33687 target_type: None,
33688 },
33689 )))
33690 }
33691
33692 // DIV(x, y) -> IntDiv expression
33693 "DIV" if args.len() == 2 => {
33694 let x = args.remove(0);
33695 let y = args.remove(0);
33696 Ok(Expression::IntDiv(Box::new(
33697 crate::expressions::BinaryFunc {
33698 this: x,
33699 expression: y,
33700 original_name: None,
33701 inferred_type: None,
33702 },
33703 )))
33704 }
33705
33706 // TO_HEX(x) -> target-specific form
33707 "TO_HEX" if args.len() == 1 => {
33708 let arg = args.remove(0);
33709 // Check if inner function already returns hex string in certain targets
33710 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
33711 if matches!(target, DialectType::BigQuery) {
33712 // BQ->BQ: keep as TO_HEX
33713 Ok(Expression::Function(Box::new(Function::new(
33714 "TO_HEX".to_string(),
33715 vec![arg],
33716 ))))
33717 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
33718 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
33719 Ok(arg)
33720 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
33721 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
33722 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
33723 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
33724 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
33725 if let Expression::Function(ref inner_f) = arg {
33726 let inner_args = inner_f.args.clone();
33727 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
33728 "SHA1" => Expression::Function(Box::new(Function::new(
33729 "SHA1_BINARY".to_string(),
33730 inner_args,
33731 ))),
33732 "MD5" => Expression::Function(Box::new(Function::new(
33733 "MD5_BINARY".to_string(),
33734 inner_args,
33735 ))),
33736 "SHA256" => {
33737 let mut a = inner_args;
33738 a.push(Expression::number(256));
33739 Expression::Function(Box::new(Function::new(
33740 "SHA2_BINARY".to_string(),
33741 a,
33742 )))
33743 }
33744 "SHA512" => {
33745 let mut a = inner_args;
33746 a.push(Expression::number(512));
33747 Expression::Function(Box::new(Function::new(
33748 "SHA2_BINARY".to_string(),
33749 a,
33750 )))
33751 }
33752 _ => arg.clone(),
33753 };
33754 Ok(Expression::Function(Box::new(Function::new(
33755 "TO_CHAR".to_string(),
33756 vec![binary_func],
33757 ))))
33758 } else {
33759 let inner = Expression::Function(Box::new(Function::new(
33760 "HEX".to_string(),
33761 vec![arg],
33762 )));
33763 Ok(Expression::Lower(Box::new(
33764 crate::expressions::UnaryFunc::new(inner),
33765 )))
33766 }
33767 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
33768 let inner = Expression::Function(Box::new(Function::new(
33769 "TO_HEX".to_string(),
33770 vec![arg],
33771 )));
33772 Ok(Expression::Lower(Box::new(
33773 crate::expressions::UnaryFunc::new(inner),
33774 )))
33775 } else {
33776 let inner =
33777 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
33778 Ok(Expression::Lower(Box::new(
33779 crate::expressions::UnaryFunc::new(inner),
33780 )))
33781 }
33782 }
33783
33784 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
33785 "LAST_DAY" if args.len() == 2 => {
33786 let date = args.remove(0);
33787 let _unit = args.remove(0); // Strip the unit (MONTH is default)
33788 Ok(Expression::Function(Box::new(Function::new(
33789 "LAST_DAY".to_string(),
33790 vec![date],
33791 ))))
33792 }
33793
33794 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
33795 "GENERATE_ARRAY" => {
33796 let start = args.get(0).cloned();
33797 let end = args.get(1).cloned();
33798 let step = args.get(2).cloned();
33799 Ok(Expression::GenerateSeries(Box::new(
33800 crate::expressions::GenerateSeries {
33801 start: start.map(Box::new),
33802 end: end.map(Box::new),
33803 step: step.map(Box::new),
33804 is_end_exclusive: None,
33805 },
33806 )))
33807 }
33808
33809 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
33810 "GENERATE_TIMESTAMP_ARRAY" => {
33811 let start = args.get(0).cloned();
33812 let end = args.get(1).cloned();
33813 let step = args.get(2).cloned();
33814
33815 if matches!(target, DialectType::DuckDB) {
33816 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
33817 // Only cast string literals - leave columns/expressions as-is
33818 let maybe_cast_ts = |expr: Expression| -> Expression {
33819 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33820 {
33821 Expression::Cast(Box::new(Cast {
33822 this: expr,
33823 to: DataType::Timestamp {
33824 precision: None,
33825 timezone: false,
33826 },
33827 trailing_comments: vec![],
33828 double_colon_syntax: false,
33829 format: None,
33830 default: None,
33831 inferred_type: None,
33832 }))
33833 } else {
33834 expr
33835 }
33836 };
33837 let cast_start = start.map(maybe_cast_ts);
33838 let cast_end = end.map(maybe_cast_ts);
33839 Ok(Expression::GenerateSeries(Box::new(
33840 crate::expressions::GenerateSeries {
33841 start: cast_start.map(Box::new),
33842 end: cast_end.map(Box::new),
33843 step: step.map(Box::new),
33844 is_end_exclusive: None,
33845 },
33846 )))
33847 } else {
33848 Ok(Expression::GenerateSeries(Box::new(
33849 crate::expressions::GenerateSeries {
33850 start: start.map(Box::new),
33851 end: end.map(Box::new),
33852 step: step.map(Box::new),
33853 is_end_exclusive: None,
33854 },
33855 )))
33856 }
33857 }
33858
33859 // TO_JSON(x) -> target-specific (from Spark/Hive)
33860 "TO_JSON" => {
33861 match target {
33862 DialectType::Presto | DialectType::Trino => {
33863 // JSON_FORMAT(CAST(x AS JSON))
33864 let arg = args
33865 .into_iter()
33866 .next()
33867 .unwrap_or(Expression::Null(crate::expressions::Null));
33868 let cast_json = Expression::Cast(Box::new(Cast {
33869 this: arg,
33870 to: DataType::Custom {
33871 name: "JSON".to_string(),
33872 },
33873 trailing_comments: vec![],
33874 double_colon_syntax: false,
33875 format: None,
33876 default: None,
33877 inferred_type: None,
33878 }));
33879 Ok(Expression::Function(Box::new(Function::new(
33880 "JSON_FORMAT".to_string(),
33881 vec![cast_json],
33882 ))))
33883 }
33884 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
33885 "TO_JSON_STRING".to_string(),
33886 args,
33887 )))),
33888 DialectType::DuckDB => {
33889 // CAST(TO_JSON(x) AS TEXT)
33890 let arg = args
33891 .into_iter()
33892 .next()
33893 .unwrap_or(Expression::Null(crate::expressions::Null));
33894 let to_json = Expression::Function(Box::new(Function::new(
33895 "TO_JSON".to_string(),
33896 vec![arg],
33897 )));
33898 Ok(Expression::Cast(Box::new(Cast {
33899 this: to_json,
33900 to: DataType::Text,
33901 trailing_comments: vec![],
33902 double_colon_syntax: false,
33903 format: None,
33904 default: None,
33905 inferred_type: None,
33906 })))
33907 }
33908 _ => Ok(Expression::Function(Box::new(Function::new(
33909 "TO_JSON".to_string(),
33910 args,
33911 )))),
33912 }
33913 }
33914
33915 // TO_JSON_STRING(x) -> target-specific
33916 "TO_JSON_STRING" => {
33917 match target {
33918 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
33919 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
33920 ),
33921 DialectType::Presto | DialectType::Trino => {
33922 // JSON_FORMAT(CAST(x AS JSON))
33923 let arg = args
33924 .into_iter()
33925 .next()
33926 .unwrap_or(Expression::Null(crate::expressions::Null));
33927 let cast_json = Expression::Cast(Box::new(Cast {
33928 this: arg,
33929 to: DataType::Custom {
33930 name: "JSON".to_string(),
33931 },
33932 trailing_comments: vec![],
33933 double_colon_syntax: false,
33934 format: None,
33935 default: None,
33936 inferred_type: None,
33937 }));
33938 Ok(Expression::Function(Box::new(Function::new(
33939 "JSON_FORMAT".to_string(),
33940 vec![cast_json],
33941 ))))
33942 }
33943 DialectType::DuckDB => {
33944 // CAST(TO_JSON(x) AS TEXT)
33945 let arg = args
33946 .into_iter()
33947 .next()
33948 .unwrap_or(Expression::Null(crate::expressions::Null));
33949 let to_json = Expression::Function(Box::new(Function::new(
33950 "TO_JSON".to_string(),
33951 vec![arg],
33952 )));
33953 Ok(Expression::Cast(Box::new(Cast {
33954 this: to_json,
33955 to: DataType::Text,
33956 trailing_comments: vec![],
33957 double_colon_syntax: false,
33958 format: None,
33959 default: None,
33960 inferred_type: None,
33961 })))
33962 }
33963 DialectType::Snowflake => {
33964 // TO_JSON(x)
33965 Ok(Expression::Function(Box::new(Function::new(
33966 "TO_JSON".to_string(),
33967 args,
33968 ))))
33969 }
33970 _ => Ok(Expression::Function(Box::new(Function::new(
33971 "TO_JSON_STRING".to_string(),
33972 args,
33973 )))),
33974 }
33975 }
33976
33977 // SAFE_ADD(x, y) -> SafeAdd expression
33978 "SAFE_ADD" if args.len() == 2 => {
33979 let x = args.remove(0);
33980 let y = args.remove(0);
33981 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
33982 this: Box::new(x),
33983 expression: Box::new(y),
33984 })))
33985 }
33986
33987 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
33988 "SAFE_SUBTRACT" if args.len() == 2 => {
33989 let x = args.remove(0);
33990 let y = args.remove(0);
33991 Ok(Expression::SafeSubtract(Box::new(
33992 crate::expressions::SafeSubtract {
33993 this: Box::new(x),
33994 expression: Box::new(y),
33995 },
33996 )))
33997 }
33998
33999 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
34000 "SAFE_MULTIPLY" if args.len() == 2 => {
34001 let x = args.remove(0);
34002 let y = args.remove(0);
34003 Ok(Expression::SafeMultiply(Box::new(
34004 crate::expressions::SafeMultiply {
34005 this: Box::new(x),
34006 expression: Box::new(y),
34007 },
34008 )))
34009 }
34010
34011 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
34012 "REGEXP_CONTAINS" if args.len() == 2 => {
34013 let str_expr = args.remove(0);
34014 let pattern = args.remove(0);
34015 Ok(Expression::RegexpLike(Box::new(
34016 crate::expressions::RegexpFunc {
34017 this: str_expr,
34018 pattern,
34019 flags: None,
34020 },
34021 )))
34022 }
34023
34024 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
34025 "CONTAINS_SUBSTR" if args.len() == 2 => {
34026 let a = args.remove(0);
34027 let b = args.remove(0);
34028 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
34029 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
34030 Ok(Expression::Function(Box::new(Function::new(
34031 "CONTAINS".to_string(),
34032 vec![lower_a, lower_b],
34033 ))))
34034 }
34035
34036 // INT64(x) -> CAST(x AS BIGINT)
34037 "INT64" if args.len() == 1 => {
34038 let arg = args.remove(0);
34039 Ok(Expression::Cast(Box::new(Cast {
34040 this: arg,
34041 to: DataType::BigInt { length: None },
34042 trailing_comments: vec![],
34043 double_colon_syntax: false,
34044 format: None,
34045 default: None,
34046 inferred_type: None,
34047 })))
34048 }
34049
34050 // INSTR(str, substr) -> target-specific
34051 "INSTR" if args.len() >= 2 => {
34052 let str_expr = args.remove(0);
34053 let substr = args.remove(0);
34054 if matches!(target, DialectType::Snowflake) {
34055 // CHARINDEX(substr, str)
34056 Ok(Expression::Function(Box::new(Function::new(
34057 "CHARINDEX".to_string(),
34058 vec![substr, str_expr],
34059 ))))
34060 } else if matches!(target, DialectType::BigQuery) {
34061 // Keep as INSTR
34062 Ok(Expression::Function(Box::new(Function::new(
34063 "INSTR".to_string(),
34064 vec![str_expr, substr],
34065 ))))
34066 } else {
34067 // Default: keep as INSTR
34068 Ok(Expression::Function(Box::new(Function::new(
34069 "INSTR".to_string(),
34070 vec![str_expr, substr],
34071 ))))
34072 }
34073 }
34074
34075 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
34076 "DATE_TRUNC" if args.len() == 2 => {
34077 let expr = args.remove(0);
34078 let unit_expr = args.remove(0);
34079 let unit_str = get_unit_str(&unit_expr);
34080
34081 match target {
34082 DialectType::DuckDB
34083 | DialectType::Snowflake
34084 | DialectType::PostgreSQL
34085 | DialectType::Presto
34086 | DialectType::Trino
34087 | DialectType::Databricks
34088 | DialectType::Spark
34089 | DialectType::Redshift
34090 | DialectType::ClickHouse
34091 | DialectType::TSQL => {
34092 // Standard: DATE_TRUNC('UNIT', expr)
34093 Ok(Expression::Function(Box::new(Function::new(
34094 "DATE_TRUNC".to_string(),
34095 vec![
34096 Expression::Literal(Box::new(Literal::String(unit_str))),
34097 expr,
34098 ],
34099 ))))
34100 }
34101 _ => {
34102 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
34103 Ok(Expression::Function(Box::new(Function::new(
34104 "DATE_TRUNC".to_string(),
34105 vec![expr, unit_expr],
34106 ))))
34107 }
34108 }
34109 }
34110
34111 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
34112 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
34113 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
34114 let ts = args.remove(0);
34115 let unit_expr = args.remove(0);
34116 let tz = if !args.is_empty() {
34117 Some(args.remove(0))
34118 } else {
34119 None
34120 };
34121 let unit_str = get_unit_str(&unit_expr);
34122
34123 match target {
34124 DialectType::DuckDB => {
34125 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34126 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
34127 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
34128 let is_coarse = matches!(
34129 unit_str.as_str(),
34130 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
34131 );
34132 // For DATETIME_TRUNC, cast string args to TIMESTAMP
34133 let cast_ts = if name == "DATETIME_TRUNC" {
34134 match ts {
34135 Expression::Literal(ref lit)
34136 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
34137 {
34138 Expression::Cast(Box::new(Cast {
34139 this: ts,
34140 to: DataType::Timestamp {
34141 precision: None,
34142 timezone: false,
34143 },
34144 trailing_comments: vec![],
34145 double_colon_syntax: false,
34146 format: None,
34147 default: None,
34148 inferred_type: None,
34149 }))
34150 }
34151 _ => Self::maybe_cast_ts_to_tz(ts, &name),
34152 }
34153 } else {
34154 Self::maybe_cast_ts_to_tz(ts, &name)
34155 };
34156
34157 if let Some(tz_arg) = tz {
34158 if is_coarse {
34159 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
34160 let at_tz = Expression::AtTimeZone(Box::new(
34161 crate::expressions::AtTimeZone {
34162 this: cast_ts,
34163 zone: tz_arg.clone(),
34164 },
34165 ));
34166 let date_trunc = Expression::Function(Box::new(Function::new(
34167 "DATE_TRUNC".to_string(),
34168 vec![
34169 Expression::Literal(Box::new(Literal::String(unit_str))),
34170 at_tz,
34171 ],
34172 )));
34173 Ok(Expression::AtTimeZone(Box::new(
34174 crate::expressions::AtTimeZone {
34175 this: date_trunc,
34176 zone: tz_arg,
34177 },
34178 )))
34179 } else {
34180 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
34181 Ok(Expression::Function(Box::new(Function::new(
34182 "DATE_TRUNC".to_string(),
34183 vec![
34184 Expression::Literal(Box::new(Literal::String(unit_str))),
34185 cast_ts,
34186 ],
34187 ))))
34188 }
34189 } else {
34190 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34191 Ok(Expression::Function(Box::new(Function::new(
34192 "DATE_TRUNC".to_string(),
34193 vec![
34194 Expression::Literal(Box::new(Literal::String(unit_str))),
34195 cast_ts,
34196 ],
34197 ))))
34198 }
34199 }
34200 DialectType::Databricks | DialectType::Spark => {
34201 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
34202 Ok(Expression::Function(Box::new(Function::new(
34203 "DATE_TRUNC".to_string(),
34204 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
34205 ))))
34206 }
34207 _ => {
34208 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
34209 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
34210 let mut date_trunc_args = vec![unit, ts];
34211 if let Some(tz_arg) = tz {
34212 date_trunc_args.push(tz_arg);
34213 }
34214 Ok(Expression::Function(Box::new(Function::new(
34215 "TIMESTAMP_TRUNC".to_string(),
34216 date_trunc_args,
34217 ))))
34218 }
34219 }
34220 }
34221
34222 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
34223 "TIME" => {
34224 if args.len() == 3 {
34225 // TIME(h, m, s) constructor
34226 match target {
34227 DialectType::TSQL => {
34228 // TIMEFROMPARTS(h, m, s, 0, 0)
34229 args.push(Expression::number(0));
34230 args.push(Expression::number(0));
34231 Ok(Expression::Function(Box::new(Function::new(
34232 "TIMEFROMPARTS".to_string(),
34233 args,
34234 ))))
34235 }
34236 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
34237 "MAKETIME".to_string(),
34238 args,
34239 )))),
34240 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
34241 Function::new("MAKE_TIME".to_string(), args),
34242 ))),
34243 _ => Ok(Expression::Function(Box::new(Function::new(
34244 "TIME".to_string(),
34245 args,
34246 )))),
34247 }
34248 } else if args.len() == 1 {
34249 let arg = args.remove(0);
34250 if matches!(target, DialectType::Spark) {
34251 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
34252 Ok(Expression::Cast(Box::new(Cast {
34253 this: arg,
34254 to: DataType::Timestamp {
34255 timezone: false,
34256 precision: None,
34257 },
34258 trailing_comments: vec![],
34259 double_colon_syntax: false,
34260 format: None,
34261 default: None,
34262 inferred_type: None,
34263 })))
34264 } else {
34265 // Most targets: CAST(x AS TIME)
34266 Ok(Expression::Cast(Box::new(Cast {
34267 this: arg,
34268 to: DataType::Time {
34269 precision: None,
34270 timezone: false,
34271 },
34272 trailing_comments: vec![],
34273 double_colon_syntax: false,
34274 format: None,
34275 default: None,
34276 inferred_type: None,
34277 })))
34278 }
34279 } else if args.len() == 2 {
34280 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
34281 let expr = args.remove(0);
34282 let tz = args.remove(0);
34283 let cast_tstz = Expression::Cast(Box::new(Cast {
34284 this: expr,
34285 to: DataType::Timestamp {
34286 timezone: true,
34287 precision: None,
34288 },
34289 trailing_comments: vec![],
34290 double_colon_syntax: false,
34291 format: None,
34292 default: None,
34293 inferred_type: None,
34294 }));
34295 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34296 this: cast_tstz,
34297 zone: tz,
34298 }));
34299 Ok(Expression::Cast(Box::new(Cast {
34300 this: at_tz,
34301 to: DataType::Time {
34302 precision: None,
34303 timezone: false,
34304 },
34305 trailing_comments: vec![],
34306 double_colon_syntax: false,
34307 format: None,
34308 default: None,
34309 inferred_type: None,
34310 })))
34311 } else {
34312 Ok(Expression::Function(Box::new(Function::new(
34313 "TIME".to_string(),
34314 args,
34315 ))))
34316 }
34317 }
34318
34319 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
34320 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34321 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
34322 // DATETIME(y, m, d, h, min, s) -> target-specific
34323 "DATETIME" => {
34324 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
34325 if matches!(target, DialectType::BigQuery) {
34326 if args.len() == 2 {
34327 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34328 if has_time_literal {
34329 let first = args.remove(0);
34330 let second = args.remove(0);
34331 let time_as_cast = match second {
34332 Expression::Literal(lit)
34333 if matches!(lit.as_ref(), Literal::Time(_)) =>
34334 {
34335 let Literal::Time(s) = lit.as_ref() else {
34336 unreachable!()
34337 };
34338 Expression::Cast(Box::new(Cast {
34339 this: Expression::Literal(Box::new(Literal::String(
34340 s.clone(),
34341 ))),
34342 to: DataType::Time {
34343 precision: None,
34344 timezone: false,
34345 },
34346 trailing_comments: vec![],
34347 double_colon_syntax: false,
34348 format: None,
34349 default: None,
34350 inferred_type: None,
34351 }))
34352 }
34353 other => other,
34354 };
34355 return Ok(Expression::Function(Box::new(Function::new(
34356 "DATETIME".to_string(),
34357 vec![first, time_as_cast],
34358 ))));
34359 }
34360 }
34361 return Ok(Expression::Function(Box::new(Function::new(
34362 "DATETIME".to_string(),
34363 args,
34364 ))));
34365 }
34366
34367 if args.len() == 1 {
34368 let arg = args.remove(0);
34369 Ok(Expression::Cast(Box::new(Cast {
34370 this: arg,
34371 to: DataType::Timestamp {
34372 timezone: false,
34373 precision: None,
34374 },
34375 trailing_comments: vec![],
34376 double_colon_syntax: false,
34377 format: None,
34378 default: None,
34379 inferred_type: None,
34380 })))
34381 } else if args.len() == 2 {
34382 let first = args.remove(0);
34383 let second = args.remove(0);
34384 // Check if second arg is a TIME literal
34385 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34386 if is_time_literal {
34387 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34388 let cast_date = Expression::Cast(Box::new(Cast {
34389 this: first,
34390 to: DataType::Date,
34391 trailing_comments: vec![],
34392 double_colon_syntax: false,
34393 format: None,
34394 default: None,
34395 inferred_type: None,
34396 }));
34397 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
34398 let time_as_string = match second {
34399 Expression::Literal(lit)
34400 if matches!(lit.as_ref(), Literal::Time(_)) =>
34401 {
34402 let Literal::Time(s) = lit.as_ref() else {
34403 unreachable!()
34404 };
34405 Expression::Literal(Box::new(Literal::String(s.clone())))
34406 }
34407 other => other,
34408 };
34409 let cast_time = Expression::Cast(Box::new(Cast {
34410 this: time_as_string,
34411 to: DataType::Time {
34412 precision: None,
34413 timezone: false,
34414 },
34415 trailing_comments: vec![],
34416 double_colon_syntax: false,
34417 format: None,
34418 default: None,
34419 inferred_type: None,
34420 }));
34421 let add_expr =
34422 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
34423 Ok(Expression::Cast(Box::new(Cast {
34424 this: add_expr,
34425 to: DataType::Timestamp {
34426 timezone: false,
34427 precision: None,
34428 },
34429 trailing_comments: vec![],
34430 double_colon_syntax: false,
34431 format: None,
34432 default: None,
34433 inferred_type: None,
34434 })))
34435 } else {
34436 // DATETIME('string', 'timezone')
34437 let cast_tstz = Expression::Cast(Box::new(Cast {
34438 this: first,
34439 to: DataType::Timestamp {
34440 timezone: true,
34441 precision: None,
34442 },
34443 trailing_comments: vec![],
34444 double_colon_syntax: false,
34445 format: None,
34446 default: None,
34447 inferred_type: None,
34448 }));
34449 let at_tz =
34450 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34451 this: cast_tstz,
34452 zone: second,
34453 }));
34454 Ok(Expression::Cast(Box::new(Cast {
34455 this: at_tz,
34456 to: DataType::Timestamp {
34457 timezone: false,
34458 precision: None,
34459 },
34460 trailing_comments: vec![],
34461 double_colon_syntax: false,
34462 format: None,
34463 default: None,
34464 inferred_type: None,
34465 })))
34466 }
34467 } else if args.len() >= 3 {
34468 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
34469 // For other targets, use MAKE_TIMESTAMP or similar
34470 if matches!(target, DialectType::Snowflake) {
34471 Ok(Expression::Function(Box::new(Function::new(
34472 "TIMESTAMP_FROM_PARTS".to_string(),
34473 args,
34474 ))))
34475 } else {
34476 Ok(Expression::Function(Box::new(Function::new(
34477 "DATETIME".to_string(),
34478 args,
34479 ))))
34480 }
34481 } else {
34482 Ok(Expression::Function(Box::new(Function::new(
34483 "DATETIME".to_string(),
34484 args,
34485 ))))
34486 }
34487 }
34488
34489 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
34490 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
34491 "TIMESTAMP" => {
34492 if args.len() == 1 {
34493 let arg = args.remove(0);
34494 Ok(Expression::Cast(Box::new(Cast {
34495 this: arg,
34496 to: DataType::Timestamp {
34497 timezone: true,
34498 precision: None,
34499 },
34500 trailing_comments: vec![],
34501 double_colon_syntax: false,
34502 format: None,
34503 default: None,
34504 inferred_type: None,
34505 })))
34506 } else if args.len() == 2 {
34507 let arg = args.remove(0);
34508 let tz = args.remove(0);
34509 let cast_ts = Expression::Cast(Box::new(Cast {
34510 this: arg,
34511 to: DataType::Timestamp {
34512 timezone: false,
34513 precision: None,
34514 },
34515 trailing_comments: vec![],
34516 double_colon_syntax: false,
34517 format: None,
34518 default: None,
34519 inferred_type: None,
34520 }));
34521 if matches!(target, DialectType::Snowflake) {
34522 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
34523 Ok(Expression::Function(Box::new(Function::new(
34524 "CONVERT_TIMEZONE".to_string(),
34525 vec![tz, cast_ts],
34526 ))))
34527 } else {
34528 Ok(Expression::AtTimeZone(Box::new(
34529 crate::expressions::AtTimeZone {
34530 this: cast_ts,
34531 zone: tz,
34532 },
34533 )))
34534 }
34535 } else {
34536 Ok(Expression::Function(Box::new(Function::new(
34537 "TIMESTAMP".to_string(),
34538 args,
34539 ))))
34540 }
34541 }
34542
34543 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
34544 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
34545 "STRING" => {
34546 if args.len() == 1 {
34547 let arg = args.remove(0);
34548 let cast_type = match target {
34549 DialectType::DuckDB => DataType::Text,
34550 _ => DataType::VarChar {
34551 length: None,
34552 parenthesized_length: false,
34553 },
34554 };
34555 Ok(Expression::Cast(Box::new(Cast {
34556 this: arg,
34557 to: cast_type,
34558 trailing_comments: vec![],
34559 double_colon_syntax: false,
34560 format: None,
34561 default: None,
34562 inferred_type: None,
34563 })))
34564 } else if args.len() == 2 {
34565 let arg = args.remove(0);
34566 let tz = args.remove(0);
34567 let cast_type = match target {
34568 DialectType::DuckDB => DataType::Text,
34569 _ => DataType::VarChar {
34570 length: None,
34571 parenthesized_length: false,
34572 },
34573 };
34574 if matches!(target, DialectType::Snowflake) {
34575 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
34576 let convert_tz = Expression::Function(Box::new(Function::new(
34577 "CONVERT_TIMEZONE".to_string(),
34578 vec![
34579 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
34580 tz,
34581 arg,
34582 ],
34583 )));
34584 Ok(Expression::Cast(Box::new(Cast {
34585 this: convert_tz,
34586 to: cast_type,
34587 trailing_comments: vec![],
34588 double_colon_syntax: false,
34589 format: None,
34590 default: None,
34591 inferred_type: None,
34592 })))
34593 } else {
34594 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
34595 let cast_ts = Expression::Cast(Box::new(Cast {
34596 this: arg,
34597 to: DataType::Timestamp {
34598 timezone: false,
34599 precision: None,
34600 },
34601 trailing_comments: vec![],
34602 double_colon_syntax: false,
34603 format: None,
34604 default: None,
34605 inferred_type: None,
34606 }));
34607 let at_utc =
34608 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34609 this: cast_ts,
34610 zone: Expression::Literal(Box::new(Literal::String(
34611 "UTC".to_string(),
34612 ))),
34613 }));
34614 let at_tz =
34615 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34616 this: at_utc,
34617 zone: tz,
34618 }));
34619 Ok(Expression::Cast(Box::new(Cast {
34620 this: at_tz,
34621 to: cast_type,
34622 trailing_comments: vec![],
34623 double_colon_syntax: false,
34624 format: None,
34625 default: None,
34626 inferred_type: None,
34627 })))
34628 }
34629 } else {
34630 Ok(Expression::Function(Box::new(Function::new(
34631 "STRING".to_string(),
34632 args,
34633 ))))
34634 }
34635 }
34636
34637 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
34638 "UNIX_SECONDS" if args.len() == 1 => {
34639 let ts = args.remove(0);
34640 match target {
34641 DialectType::DuckDB => {
34642 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
34643 let cast_ts = Self::ensure_cast_timestamptz(ts);
34644 let epoch = Expression::Function(Box::new(Function::new(
34645 "EPOCH".to_string(),
34646 vec![cast_ts],
34647 )));
34648 Ok(Expression::Cast(Box::new(Cast {
34649 this: epoch,
34650 to: DataType::BigInt { length: None },
34651 trailing_comments: vec![],
34652 double_colon_syntax: false,
34653 format: None,
34654 default: None,
34655 inferred_type: None,
34656 })))
34657 }
34658 DialectType::Snowflake => {
34659 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
34660 let epoch = Expression::Cast(Box::new(Cast {
34661 this: Expression::Literal(Box::new(Literal::String(
34662 "1970-01-01 00:00:00+00".to_string(),
34663 ))),
34664 to: DataType::Timestamp {
34665 timezone: true,
34666 precision: None,
34667 },
34668 trailing_comments: vec![],
34669 double_colon_syntax: false,
34670 format: None,
34671 default: None,
34672 inferred_type: None,
34673 }));
34674 Ok(Expression::TimestampDiff(Box::new(
34675 crate::expressions::TimestampDiff {
34676 this: Box::new(epoch),
34677 expression: Box::new(ts),
34678 unit: Some("SECONDS".to_string()),
34679 },
34680 )))
34681 }
34682 _ => Ok(Expression::Function(Box::new(Function::new(
34683 "UNIX_SECONDS".to_string(),
34684 vec![ts],
34685 )))),
34686 }
34687 }
34688
34689 "UNIX_MILLIS" if args.len() == 1 => {
34690 let ts = args.remove(0);
34691 match target {
34692 DialectType::DuckDB => {
34693 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
34694 let cast_ts = Self::ensure_cast_timestamptz(ts);
34695 Ok(Expression::Function(Box::new(Function::new(
34696 "EPOCH_MS".to_string(),
34697 vec![cast_ts],
34698 ))))
34699 }
34700 _ => Ok(Expression::Function(Box::new(Function::new(
34701 "UNIX_MILLIS".to_string(),
34702 vec![ts],
34703 )))),
34704 }
34705 }
34706
34707 "UNIX_MICROS" if args.len() == 1 => {
34708 let ts = args.remove(0);
34709 match target {
34710 DialectType::DuckDB => {
34711 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
34712 let cast_ts = Self::ensure_cast_timestamptz(ts);
34713 Ok(Expression::Function(Box::new(Function::new(
34714 "EPOCH_US".to_string(),
34715 vec![cast_ts],
34716 ))))
34717 }
34718 _ => Ok(Expression::Function(Box::new(Function::new(
34719 "UNIX_MICROS".to_string(),
34720 vec![ts],
34721 )))),
34722 }
34723 }
34724
34725 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
34726 "ARRAY_CONCAT" | "LIST_CONCAT" => {
34727 match target {
34728 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34729 // CONCAT(arr1, arr2, ...)
34730 Ok(Expression::Function(Box::new(Function::new(
34731 "CONCAT".to_string(),
34732 args,
34733 ))))
34734 }
34735 DialectType::Presto | DialectType::Trino => {
34736 // CONCAT(arr1, arr2, ...)
34737 Ok(Expression::Function(Box::new(Function::new(
34738 "CONCAT".to_string(),
34739 args,
34740 ))))
34741 }
34742 DialectType::Snowflake => {
34743 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34744 if args.len() == 1 {
34745 // ARRAY_CAT requires 2 args, add empty array as []
34746 let empty_arr = Expression::ArrayFunc(Box::new(
34747 crate::expressions::ArrayConstructor {
34748 expressions: vec![],
34749 bracket_notation: true,
34750 use_list_keyword: false,
34751 },
34752 ));
34753 let mut new_args = args;
34754 new_args.push(empty_arr);
34755 Ok(Expression::Function(Box::new(Function::new(
34756 "ARRAY_CAT".to_string(),
34757 new_args,
34758 ))))
34759 } else if args.is_empty() {
34760 Ok(Expression::Function(Box::new(Function::new(
34761 "ARRAY_CAT".to_string(),
34762 args,
34763 ))))
34764 } else {
34765 let mut it = args.into_iter().rev();
34766 let mut result = it.next().unwrap();
34767 for arr in it {
34768 result = Expression::Function(Box::new(Function::new(
34769 "ARRAY_CAT".to_string(),
34770 vec![arr, result],
34771 )));
34772 }
34773 Ok(result)
34774 }
34775 }
34776 DialectType::PostgreSQL => {
34777 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34778 if args.len() <= 1 {
34779 Ok(Expression::Function(Box::new(Function::new(
34780 "ARRAY_CAT".to_string(),
34781 args,
34782 ))))
34783 } else {
34784 let mut it = args.into_iter().rev();
34785 let mut result = it.next().unwrap();
34786 for arr in it {
34787 result = Expression::Function(Box::new(Function::new(
34788 "ARRAY_CAT".to_string(),
34789 vec![arr, result],
34790 )));
34791 }
34792 Ok(result)
34793 }
34794 }
34795 DialectType::Redshift => {
34796 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
34797 if args.len() <= 2 {
34798 Ok(Expression::Function(Box::new(Function::new(
34799 "ARRAY_CONCAT".to_string(),
34800 args,
34801 ))))
34802 } else {
34803 let mut it = args.into_iter().rev();
34804 let mut result = it.next().unwrap();
34805 for arr in it {
34806 result = Expression::Function(Box::new(Function::new(
34807 "ARRAY_CONCAT".to_string(),
34808 vec![arr, result],
34809 )));
34810 }
34811 Ok(result)
34812 }
34813 }
34814 DialectType::DuckDB => {
34815 // LIST_CONCAT supports multiple args natively in DuckDB
34816 Ok(Expression::Function(Box::new(Function::new(
34817 "LIST_CONCAT".to_string(),
34818 args,
34819 ))))
34820 }
34821 _ => Ok(Expression::Function(Box::new(Function::new(
34822 "ARRAY_CONCAT".to_string(),
34823 args,
34824 )))),
34825 }
34826 }
34827
34828 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
34829 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
34830 let arg = args.remove(0);
34831 match target {
34832 DialectType::Snowflake => {
34833 let array_agg =
34834 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
34835 this: arg,
34836 distinct: false,
34837 filter: None,
34838 order_by: vec![],
34839 name: None,
34840 ignore_nulls: None,
34841 having_max: None,
34842 limit: None,
34843 inferred_type: None,
34844 }));
34845 Ok(Expression::Function(Box::new(Function::new(
34846 "ARRAY_FLATTEN".to_string(),
34847 vec![array_agg],
34848 ))))
34849 }
34850 _ => Ok(Expression::Function(Box::new(Function::new(
34851 "ARRAY_CONCAT_AGG".to_string(),
34852 vec![arg],
34853 )))),
34854 }
34855 }
34856
34857 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
34858 "MD5" if args.len() == 1 => {
34859 let arg = args.remove(0);
34860 match target {
34861 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34862 // UNHEX(MD5(x))
34863 let md5 = Expression::Function(Box::new(Function::new(
34864 "MD5".to_string(),
34865 vec![arg],
34866 )));
34867 Ok(Expression::Function(Box::new(Function::new(
34868 "UNHEX".to_string(),
34869 vec![md5],
34870 ))))
34871 }
34872 DialectType::Snowflake => {
34873 // MD5_BINARY(x)
34874 Ok(Expression::Function(Box::new(Function::new(
34875 "MD5_BINARY".to_string(),
34876 vec![arg],
34877 ))))
34878 }
34879 _ => Ok(Expression::Function(Box::new(Function::new(
34880 "MD5".to_string(),
34881 vec![arg],
34882 )))),
34883 }
34884 }
34885
34886 "SHA1" if args.len() == 1 => {
34887 let arg = args.remove(0);
34888 match target {
34889 DialectType::DuckDB => {
34890 // UNHEX(SHA1(x))
34891 let sha1 = Expression::Function(Box::new(Function::new(
34892 "SHA1".to_string(),
34893 vec![arg],
34894 )));
34895 Ok(Expression::Function(Box::new(Function::new(
34896 "UNHEX".to_string(),
34897 vec![sha1],
34898 ))))
34899 }
34900 _ => Ok(Expression::Function(Box::new(Function::new(
34901 "SHA1".to_string(),
34902 vec![arg],
34903 )))),
34904 }
34905 }
34906
34907 "SHA256" if args.len() == 1 => {
34908 let arg = args.remove(0);
34909 match target {
34910 DialectType::DuckDB => {
34911 // UNHEX(SHA256(x))
34912 let sha = Expression::Function(Box::new(Function::new(
34913 "SHA256".to_string(),
34914 vec![arg],
34915 )));
34916 Ok(Expression::Function(Box::new(Function::new(
34917 "UNHEX".to_string(),
34918 vec![sha],
34919 ))))
34920 }
34921 DialectType::Snowflake => {
34922 // SHA2_BINARY(x, 256)
34923 Ok(Expression::Function(Box::new(Function::new(
34924 "SHA2_BINARY".to_string(),
34925 vec![arg, Expression::number(256)],
34926 ))))
34927 }
34928 DialectType::Redshift | DialectType::Spark => {
34929 // SHA2(x, 256)
34930 Ok(Expression::Function(Box::new(Function::new(
34931 "SHA2".to_string(),
34932 vec![arg, Expression::number(256)],
34933 ))))
34934 }
34935 _ => Ok(Expression::Function(Box::new(Function::new(
34936 "SHA256".to_string(),
34937 vec![arg],
34938 )))),
34939 }
34940 }
34941
34942 "SHA512" if args.len() == 1 => {
34943 let arg = args.remove(0);
34944 match target {
34945 DialectType::Snowflake => {
34946 // SHA2_BINARY(x, 512)
34947 Ok(Expression::Function(Box::new(Function::new(
34948 "SHA2_BINARY".to_string(),
34949 vec![arg, Expression::number(512)],
34950 ))))
34951 }
34952 DialectType::Redshift | DialectType::Spark => {
34953 // SHA2(x, 512)
34954 Ok(Expression::Function(Box::new(Function::new(
34955 "SHA2".to_string(),
34956 vec![arg, Expression::number(512)],
34957 ))))
34958 }
34959 _ => Ok(Expression::Function(Box::new(Function::new(
34960 "SHA512".to_string(),
34961 vec![arg],
34962 )))),
34963 }
34964 }
34965
34966 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
34967 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
34968 let str_expr = args.remove(0);
34969 let pattern = args.remove(0);
34970
34971 // Check if pattern contains capturing groups (parentheses)
34972 let has_groups = match &pattern {
34973 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34974 let Literal::String(s) = lit.as_ref() else {
34975 unreachable!()
34976 };
34977 s.contains('(') && s.contains(')')
34978 }
34979 _ => false,
34980 };
34981
34982 match target {
34983 DialectType::DuckDB => {
34984 let group = if has_groups {
34985 Expression::number(1)
34986 } else {
34987 Expression::number(0)
34988 };
34989 Ok(Expression::Function(Box::new(Function::new(
34990 "REGEXP_EXTRACT_ALL".to_string(),
34991 vec![str_expr, pattern, group],
34992 ))))
34993 }
34994 DialectType::Spark | DialectType::Databricks => {
34995 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
34996 if has_groups {
34997 Ok(Expression::Function(Box::new(Function::new(
34998 "REGEXP_EXTRACT_ALL".to_string(),
34999 vec![str_expr, pattern],
35000 ))))
35001 } else {
35002 Ok(Expression::Function(Box::new(Function::new(
35003 "REGEXP_EXTRACT_ALL".to_string(),
35004 vec![str_expr, pattern, Expression::number(0)],
35005 ))))
35006 }
35007 }
35008 DialectType::Presto | DialectType::Trino => {
35009 if has_groups {
35010 Ok(Expression::Function(Box::new(Function::new(
35011 "REGEXP_EXTRACT_ALL".to_string(),
35012 vec![str_expr, pattern, Expression::number(1)],
35013 ))))
35014 } else {
35015 Ok(Expression::Function(Box::new(Function::new(
35016 "REGEXP_EXTRACT_ALL".to_string(),
35017 vec![str_expr, pattern],
35018 ))))
35019 }
35020 }
35021 DialectType::Snowflake => {
35022 if has_groups {
35023 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
35024 Ok(Expression::Function(Box::new(Function::new(
35025 "REGEXP_EXTRACT_ALL".to_string(),
35026 vec![
35027 str_expr,
35028 pattern,
35029 Expression::number(1),
35030 Expression::number(1),
35031 Expression::Literal(Box::new(Literal::String("c".to_string()))),
35032 Expression::number(1),
35033 ],
35034 ))))
35035 } else {
35036 Ok(Expression::Function(Box::new(Function::new(
35037 "REGEXP_EXTRACT_ALL".to_string(),
35038 vec![str_expr, pattern],
35039 ))))
35040 }
35041 }
35042 _ => Ok(Expression::Function(Box::new(Function::new(
35043 "REGEXP_EXTRACT_ALL".to_string(),
35044 vec![str_expr, pattern],
35045 )))),
35046 }
35047 }
35048
35049 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
35050 "MOD" if args.len() == 2 => {
35051 match target {
35052 DialectType::PostgreSQL
35053 | DialectType::DuckDB
35054 | DialectType::Presto
35055 | DialectType::Trino
35056 | DialectType::Athena
35057 | DialectType::Snowflake => {
35058 let x = args.remove(0);
35059 let y = args.remove(0);
35060 // Wrap complex expressions in parens to preserve precedence
35061 let needs_paren = |e: &Expression| {
35062 matches!(
35063 e,
35064 Expression::Add(_)
35065 | Expression::Sub(_)
35066 | Expression::Mul(_)
35067 | Expression::Div(_)
35068 )
35069 };
35070 let x = if needs_paren(&x) {
35071 Expression::Paren(Box::new(crate::expressions::Paren {
35072 this: x,
35073 trailing_comments: vec![],
35074 }))
35075 } else {
35076 x
35077 };
35078 let y = if needs_paren(&y) {
35079 Expression::Paren(Box::new(crate::expressions::Paren {
35080 this: y,
35081 trailing_comments: vec![],
35082 }))
35083 } else {
35084 y
35085 };
35086 Ok(Expression::Mod(Box::new(
35087 crate::expressions::BinaryOp::new(x, y),
35088 )))
35089 }
35090 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
35091 // Hive/Spark: a % b
35092 let x = args.remove(0);
35093 let y = args.remove(0);
35094 let needs_paren = |e: &Expression| {
35095 matches!(
35096 e,
35097 Expression::Add(_)
35098 | Expression::Sub(_)
35099 | Expression::Mul(_)
35100 | Expression::Div(_)
35101 )
35102 };
35103 let x = if needs_paren(&x) {
35104 Expression::Paren(Box::new(crate::expressions::Paren {
35105 this: x,
35106 trailing_comments: vec![],
35107 }))
35108 } else {
35109 x
35110 };
35111 let y = if needs_paren(&y) {
35112 Expression::Paren(Box::new(crate::expressions::Paren {
35113 this: y,
35114 trailing_comments: vec![],
35115 }))
35116 } else {
35117 y
35118 };
35119 Ok(Expression::Mod(Box::new(
35120 crate::expressions::BinaryOp::new(x, y),
35121 )))
35122 }
35123 _ => Ok(Expression::Function(Box::new(Function::new(
35124 "MOD".to_string(),
35125 args,
35126 )))),
35127 }
35128 }
35129
35130 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
35131 "ARRAY_FILTER" if args.len() == 2 => {
35132 let name = match target {
35133 DialectType::DuckDB => "LIST_FILTER",
35134 DialectType::StarRocks => "ARRAY_FILTER",
35135 _ => "FILTER",
35136 };
35137 Ok(Expression::Function(Box::new(Function::new(
35138 name.to_string(),
35139 args,
35140 ))))
35141 }
35142 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
35143 "FILTER" if args.len() == 2 => {
35144 let name = match target {
35145 DialectType::DuckDB => "LIST_FILTER",
35146 DialectType::StarRocks => "ARRAY_FILTER",
35147 _ => "FILTER",
35148 };
35149 Ok(Expression::Function(Box::new(Function::new(
35150 name.to_string(),
35151 args,
35152 ))))
35153 }
35154 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
35155 "REDUCE" if args.len() >= 3 => {
35156 let name = match target {
35157 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
35158 _ => "REDUCE",
35159 };
35160 Ok(Expression::Function(Box::new(Function::new(
35161 name.to_string(),
35162 args,
35163 ))))
35164 }
35165 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
35166 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
35167 Function::new("ARRAY_REVERSE".to_string(), args),
35168 ))),
35169
35170 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
35171 "CONCAT" if args.len() > 2 => match target {
35172 DialectType::DuckDB => {
35173 let mut it = args.into_iter();
35174 let mut result = it.next().unwrap();
35175 for arg in it {
35176 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
35177 this: Box::new(result),
35178 expression: Box::new(arg),
35179 safe: None,
35180 }));
35181 }
35182 Ok(result)
35183 }
35184 _ => Ok(Expression::Function(Box::new(Function::new(
35185 "CONCAT".to_string(),
35186 args,
35187 )))),
35188 },
35189
35190 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
35191 "GENERATE_DATE_ARRAY" => {
35192 if matches!(target, DialectType::BigQuery) {
35193 // BQ->BQ: add default interval if not present
35194 if args.len() == 2 {
35195 let start = args.remove(0);
35196 let end = args.remove(0);
35197 let default_interval =
35198 Expression::Interval(Box::new(crate::expressions::Interval {
35199 this: Some(Expression::Literal(Box::new(Literal::String(
35200 "1".to_string(),
35201 )))),
35202 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35203 unit: crate::expressions::IntervalUnit::Day,
35204 use_plural: false,
35205 }),
35206 }));
35207 Ok(Expression::Function(Box::new(Function::new(
35208 "GENERATE_DATE_ARRAY".to_string(),
35209 vec![start, end, default_interval],
35210 ))))
35211 } else {
35212 Ok(Expression::Function(Box::new(Function::new(
35213 "GENERATE_DATE_ARRAY".to_string(),
35214 args,
35215 ))))
35216 }
35217 } else if matches!(target, DialectType::DuckDB) {
35218 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
35219 let start = args.get(0).cloned();
35220 let end = args.get(1).cloned();
35221 let step = args.get(2).cloned().or_else(|| {
35222 Some(Expression::Interval(Box::new(
35223 crate::expressions::Interval {
35224 this: Some(Expression::Literal(Box::new(Literal::String(
35225 "1".to_string(),
35226 )))),
35227 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35228 unit: crate::expressions::IntervalUnit::Day,
35229 use_plural: false,
35230 }),
35231 },
35232 )))
35233 });
35234
35235 // Wrap start/end in CAST(... AS DATE) only for string literals
35236 let maybe_cast_date = |expr: Expression| -> Expression {
35237 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35238 {
35239 Expression::Cast(Box::new(Cast {
35240 this: expr,
35241 to: DataType::Date,
35242 trailing_comments: vec![],
35243 double_colon_syntax: false,
35244 format: None,
35245 default: None,
35246 inferred_type: None,
35247 }))
35248 } else {
35249 expr
35250 }
35251 };
35252 let cast_start = start.map(maybe_cast_date);
35253 let cast_end = end.map(maybe_cast_date);
35254
35255 let gen_series =
35256 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
35257 start: cast_start.map(Box::new),
35258 end: cast_end.map(Box::new),
35259 step: step.map(Box::new),
35260 is_end_exclusive: None,
35261 }));
35262
35263 // Wrap in CAST(... AS DATE[])
35264 Ok(Expression::Cast(Box::new(Cast {
35265 this: gen_series,
35266 to: DataType::Array {
35267 element_type: Box::new(DataType::Date),
35268 dimension: None,
35269 },
35270 trailing_comments: vec![],
35271 double_colon_syntax: false,
35272 format: None,
35273 default: None,
35274 inferred_type: None,
35275 })))
35276 } else if matches!(target, DialectType::Snowflake) {
35277 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
35278 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
35279 if args.len() == 2 {
35280 let start = args.remove(0);
35281 let end = args.remove(0);
35282 let default_interval =
35283 Expression::Interval(Box::new(crate::expressions::Interval {
35284 this: Some(Expression::Literal(Box::new(Literal::String(
35285 "1".to_string(),
35286 )))),
35287 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35288 unit: crate::expressions::IntervalUnit::Day,
35289 use_plural: false,
35290 }),
35291 }));
35292 Ok(Expression::Function(Box::new(Function::new(
35293 "GENERATE_DATE_ARRAY".to_string(),
35294 vec![start, end, default_interval],
35295 ))))
35296 } else {
35297 Ok(Expression::Function(Box::new(Function::new(
35298 "GENERATE_DATE_ARRAY".to_string(),
35299 args,
35300 ))))
35301 }
35302 } else {
35303 // Convert to GenerateSeries for other targets
35304 let start = args.get(0).cloned();
35305 let end = args.get(1).cloned();
35306 let step = args.get(2).cloned().or_else(|| {
35307 Some(Expression::Interval(Box::new(
35308 crate::expressions::Interval {
35309 this: Some(Expression::Literal(Box::new(Literal::String(
35310 "1".to_string(),
35311 )))),
35312 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35313 unit: crate::expressions::IntervalUnit::Day,
35314 use_plural: false,
35315 }),
35316 },
35317 )))
35318 });
35319 Ok(Expression::GenerateSeries(Box::new(
35320 crate::expressions::GenerateSeries {
35321 start: start.map(Box::new),
35322 end: end.map(Box::new),
35323 step: step.map(Box::new),
35324 is_end_exclusive: None,
35325 },
35326 )))
35327 }
35328 }
35329
35330 // PARSE_DATE(format, str) -> target-specific
35331 "PARSE_DATE" if args.len() == 2 => {
35332 let format = args.remove(0);
35333 let str_expr = args.remove(0);
35334 match target {
35335 DialectType::DuckDB => {
35336 // CAST(STRPTIME(str, duck_format) AS DATE)
35337 let duck_format = Self::bq_format_to_duckdb(&format);
35338 let strptime = Expression::Function(Box::new(Function::new(
35339 "STRPTIME".to_string(),
35340 vec![str_expr, duck_format],
35341 )));
35342 Ok(Expression::Cast(Box::new(Cast {
35343 this: strptime,
35344 to: DataType::Date,
35345 trailing_comments: vec![],
35346 double_colon_syntax: false,
35347 format: None,
35348 default: None,
35349 inferred_type: None,
35350 })))
35351 }
35352 DialectType::Snowflake => {
35353 // _POLYGLOT_DATE(str, snowflake_format)
35354 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
35355 let sf_format = Self::bq_format_to_snowflake(&format);
35356 Ok(Expression::Function(Box::new(Function::new(
35357 "_POLYGLOT_DATE".to_string(),
35358 vec![str_expr, sf_format],
35359 ))))
35360 }
35361 _ => Ok(Expression::Function(Box::new(Function::new(
35362 "PARSE_DATE".to_string(),
35363 vec![format, str_expr],
35364 )))),
35365 }
35366 }
35367
35368 // PARSE_TIMESTAMP(format, str) -> target-specific
35369 "PARSE_TIMESTAMP" if args.len() >= 2 => {
35370 let format = args.remove(0);
35371 let str_expr = args.remove(0);
35372 let tz = if !args.is_empty() {
35373 Some(args.remove(0))
35374 } else {
35375 None
35376 };
35377 match target {
35378 DialectType::DuckDB => {
35379 let duck_format = Self::bq_format_to_duckdb(&format);
35380 let strptime = Expression::Function(Box::new(Function::new(
35381 "STRPTIME".to_string(),
35382 vec![str_expr, duck_format],
35383 )));
35384 Ok(strptime)
35385 }
35386 _ => {
35387 let mut result_args = vec![format, str_expr];
35388 if let Some(tz_arg) = tz {
35389 result_args.push(tz_arg);
35390 }
35391 Ok(Expression::Function(Box::new(Function::new(
35392 "PARSE_TIMESTAMP".to_string(),
35393 result_args,
35394 ))))
35395 }
35396 }
35397 }
35398
35399 // FORMAT_DATE(format, date) -> target-specific
35400 "FORMAT_DATE" if args.len() == 2 => {
35401 let format = args.remove(0);
35402 let date_expr = args.remove(0);
35403 match target {
35404 DialectType::DuckDB => {
35405 // STRFTIME(CAST(date AS DATE), format)
35406 let cast_date = Expression::Cast(Box::new(Cast {
35407 this: date_expr,
35408 to: DataType::Date,
35409 trailing_comments: vec![],
35410 double_colon_syntax: false,
35411 format: None,
35412 default: None,
35413 inferred_type: None,
35414 }));
35415 Ok(Expression::Function(Box::new(Function::new(
35416 "STRFTIME".to_string(),
35417 vec![cast_date, format],
35418 ))))
35419 }
35420 _ => Ok(Expression::Function(Box::new(Function::new(
35421 "FORMAT_DATE".to_string(),
35422 vec![format, date_expr],
35423 )))),
35424 }
35425 }
35426
35427 // FORMAT_DATETIME(format, datetime) -> target-specific
35428 "FORMAT_DATETIME" if args.len() == 2 => {
35429 let format = args.remove(0);
35430 let dt_expr = args.remove(0);
35431
35432 if matches!(target, DialectType::BigQuery) {
35433 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
35434 let norm_format = Self::bq_format_normalize_bq(&format);
35435 // Also strip DATETIME keyword from typed literals
35436 let norm_dt = match dt_expr {
35437 Expression::Literal(lit)
35438 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
35439 {
35440 let Literal::Timestamp(s) = lit.as_ref() else {
35441 unreachable!()
35442 };
35443 Expression::Cast(Box::new(Cast {
35444 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35445 to: DataType::Custom {
35446 name: "DATETIME".to_string(),
35447 },
35448 trailing_comments: vec![],
35449 double_colon_syntax: false,
35450 format: None,
35451 default: None,
35452 inferred_type: None,
35453 }))
35454 }
35455 other => other,
35456 };
35457 return Ok(Expression::Function(Box::new(Function::new(
35458 "FORMAT_DATETIME".to_string(),
35459 vec![norm_format, norm_dt],
35460 ))));
35461 }
35462
35463 match target {
35464 DialectType::DuckDB => {
35465 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
35466 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
35467 let duck_format = Self::bq_format_to_duckdb(&format);
35468 Ok(Expression::Function(Box::new(Function::new(
35469 "STRFTIME".to_string(),
35470 vec![cast_dt, duck_format],
35471 ))))
35472 }
35473 _ => Ok(Expression::Function(Box::new(Function::new(
35474 "FORMAT_DATETIME".to_string(),
35475 vec![format, dt_expr],
35476 )))),
35477 }
35478 }
35479
35480 // FORMAT_TIMESTAMP(format, ts) -> target-specific
35481 "FORMAT_TIMESTAMP" if args.len() == 2 => {
35482 let format = args.remove(0);
35483 let ts_expr = args.remove(0);
35484 match target {
35485 DialectType::DuckDB => {
35486 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
35487 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35488 let cast_ts = Expression::Cast(Box::new(Cast {
35489 this: cast_tstz,
35490 to: DataType::Timestamp {
35491 timezone: false,
35492 precision: None,
35493 },
35494 trailing_comments: vec![],
35495 double_colon_syntax: false,
35496 format: None,
35497 default: None,
35498 inferred_type: None,
35499 }));
35500 Ok(Expression::Function(Box::new(Function::new(
35501 "STRFTIME".to_string(),
35502 vec![cast_ts, format],
35503 ))))
35504 }
35505 DialectType::Snowflake => {
35506 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
35507 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35508 let cast_ts = Expression::Cast(Box::new(Cast {
35509 this: cast_tstz,
35510 to: DataType::Timestamp {
35511 timezone: false,
35512 precision: None,
35513 },
35514 trailing_comments: vec![],
35515 double_colon_syntax: false,
35516 format: None,
35517 default: None,
35518 inferred_type: None,
35519 }));
35520 let sf_format = Self::bq_format_to_snowflake(&format);
35521 Ok(Expression::Function(Box::new(Function::new(
35522 "TO_CHAR".to_string(),
35523 vec![cast_ts, sf_format],
35524 ))))
35525 }
35526 _ => Ok(Expression::Function(Box::new(Function::new(
35527 "FORMAT_TIMESTAMP".to_string(),
35528 vec![format, ts_expr],
35529 )))),
35530 }
35531 }
35532
35533 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
35534 "UNIX_DATE" if args.len() == 1 => {
35535 let date = args.remove(0);
35536 match target {
35537 DialectType::DuckDB => {
35538 let epoch = Expression::Cast(Box::new(Cast {
35539 this: Expression::Literal(Box::new(Literal::String(
35540 "1970-01-01".to_string(),
35541 ))),
35542 to: DataType::Date,
35543 trailing_comments: vec![],
35544 double_colon_syntax: false,
35545 format: None,
35546 default: None,
35547 inferred_type: None,
35548 }));
35549 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
35550 // Need to convert DATE literal to CAST
35551 let norm_date = Self::date_literal_to_cast(date);
35552 Ok(Expression::Function(Box::new(Function::new(
35553 "DATE_DIFF".to_string(),
35554 vec![
35555 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
35556 epoch,
35557 norm_date,
35558 ],
35559 ))))
35560 }
35561 _ => Ok(Expression::Function(Box::new(Function::new(
35562 "UNIX_DATE".to_string(),
35563 vec![date],
35564 )))),
35565 }
35566 }
35567
35568 // UNIX_SECONDS(ts) -> target-specific
35569 "UNIX_SECONDS" if args.len() == 1 => {
35570 let ts = args.remove(0);
35571 match target {
35572 DialectType::DuckDB => {
35573 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35574 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35575 let epoch = Expression::Function(Box::new(Function::new(
35576 "EPOCH".to_string(),
35577 vec![norm_ts],
35578 )));
35579 Ok(Expression::Cast(Box::new(Cast {
35580 this: epoch,
35581 to: DataType::BigInt { length: None },
35582 trailing_comments: vec![],
35583 double_colon_syntax: false,
35584 format: None,
35585 default: None,
35586 inferred_type: None,
35587 })))
35588 }
35589 DialectType::Snowflake => {
35590 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35591 let epoch = Expression::Cast(Box::new(Cast {
35592 this: Expression::Literal(Box::new(Literal::String(
35593 "1970-01-01 00:00:00+00".to_string(),
35594 ))),
35595 to: DataType::Timestamp {
35596 timezone: true,
35597 precision: None,
35598 },
35599 trailing_comments: vec![],
35600 double_colon_syntax: false,
35601 format: None,
35602 default: None,
35603 inferred_type: None,
35604 }));
35605 Ok(Expression::Function(Box::new(Function::new(
35606 "TIMESTAMPDIFF".to_string(),
35607 vec![
35608 Expression::Identifier(Identifier::new("SECONDS".to_string())),
35609 epoch,
35610 ts,
35611 ],
35612 ))))
35613 }
35614 _ => Ok(Expression::Function(Box::new(Function::new(
35615 "UNIX_SECONDS".to_string(),
35616 vec![ts],
35617 )))),
35618 }
35619 }
35620
35621 // UNIX_MILLIS(ts) -> target-specific
35622 "UNIX_MILLIS" if args.len() == 1 => {
35623 let ts = args.remove(0);
35624 match target {
35625 DialectType::DuckDB => {
35626 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35627 Ok(Expression::Function(Box::new(Function::new(
35628 "EPOCH_MS".to_string(),
35629 vec![norm_ts],
35630 ))))
35631 }
35632 _ => Ok(Expression::Function(Box::new(Function::new(
35633 "UNIX_MILLIS".to_string(),
35634 vec![ts],
35635 )))),
35636 }
35637 }
35638
35639 // UNIX_MICROS(ts) -> target-specific
35640 "UNIX_MICROS" if args.len() == 1 => {
35641 let ts = args.remove(0);
35642 match target {
35643 DialectType::DuckDB => {
35644 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35645 Ok(Expression::Function(Box::new(Function::new(
35646 "EPOCH_US".to_string(),
35647 vec![norm_ts],
35648 ))))
35649 }
35650 _ => Ok(Expression::Function(Box::new(Function::new(
35651 "UNIX_MICROS".to_string(),
35652 vec![ts],
35653 )))),
35654 }
35655 }
35656
35657 // INSTR(str, substr) -> target-specific
35658 "INSTR" => {
35659 if matches!(target, DialectType::BigQuery) {
35660 // BQ->BQ: keep as INSTR
35661 Ok(Expression::Function(Box::new(Function::new(
35662 "INSTR".to_string(),
35663 args,
35664 ))))
35665 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
35666 // Snowflake: CHARINDEX(substr, str) - swap args
35667 let str_expr = args.remove(0);
35668 let substr = args.remove(0);
35669 Ok(Expression::Function(Box::new(Function::new(
35670 "CHARINDEX".to_string(),
35671 vec![substr, str_expr],
35672 ))))
35673 } else {
35674 // Keep as INSTR for other targets
35675 Ok(Expression::Function(Box::new(Function::new(
35676 "INSTR".to_string(),
35677 args,
35678 ))))
35679 }
35680 }
35681
35682 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
35683 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
35684 if matches!(target, DialectType::BigQuery) {
35685 // BQ->BQ: always output with parens (function form), keep any timezone arg
35686 Ok(Expression::Function(Box::new(Function::new(name, args))))
35687 } else if name == "CURRENT_DATE" && args.len() == 1 {
35688 // CURRENT_DATE('UTC') - has timezone arg
35689 let tz_arg = args.remove(0);
35690 match target {
35691 DialectType::DuckDB => {
35692 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
35693 let ct = Expression::CurrentTimestamp(
35694 crate::expressions::CurrentTimestamp {
35695 precision: None,
35696 sysdate: false,
35697 },
35698 );
35699 let at_tz =
35700 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
35701 this: ct,
35702 zone: tz_arg,
35703 }));
35704 Ok(Expression::Cast(Box::new(Cast {
35705 this: at_tz,
35706 to: DataType::Date,
35707 trailing_comments: vec![],
35708 double_colon_syntax: false,
35709 format: None,
35710 default: None,
35711 inferred_type: None,
35712 })))
35713 }
35714 DialectType::Snowflake => {
35715 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
35716 let ct = Expression::Function(Box::new(Function::new(
35717 "CURRENT_TIMESTAMP".to_string(),
35718 vec![],
35719 )));
35720 let convert = Expression::Function(Box::new(Function::new(
35721 "CONVERT_TIMEZONE".to_string(),
35722 vec![tz_arg, ct],
35723 )));
35724 Ok(Expression::Cast(Box::new(Cast {
35725 this: convert,
35726 to: DataType::Date,
35727 trailing_comments: vec![],
35728 double_colon_syntax: false,
35729 format: None,
35730 default: None,
35731 inferred_type: None,
35732 })))
35733 }
35734 _ => {
35735 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
35736 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
35737 Ok(Expression::AtTimeZone(Box::new(
35738 crate::expressions::AtTimeZone {
35739 this: cd,
35740 zone: tz_arg,
35741 },
35742 )))
35743 }
35744 }
35745 } else if (name == "CURRENT_TIMESTAMP"
35746 || name == "CURRENT_TIME"
35747 || name == "CURRENT_DATE")
35748 && args.is_empty()
35749 && matches!(
35750 target,
35751 DialectType::PostgreSQL
35752 | DialectType::DuckDB
35753 | DialectType::Presto
35754 | DialectType::Trino
35755 )
35756 {
35757 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
35758 if name == "CURRENT_TIMESTAMP" {
35759 Ok(Expression::CurrentTimestamp(
35760 crate::expressions::CurrentTimestamp {
35761 precision: None,
35762 sysdate: false,
35763 },
35764 ))
35765 } else if name == "CURRENT_DATE" {
35766 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
35767 } else {
35768 // CURRENT_TIME
35769 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
35770 precision: None,
35771 }))
35772 }
35773 } else {
35774 // All other targets: keep as function (with parens)
35775 Ok(Expression::Function(Box::new(Function::new(name, args))))
35776 }
35777 }
35778
35779 // JSON_QUERY(json, path) -> target-specific
35780 "JSON_QUERY" if args.len() == 2 => {
35781 match target {
35782 DialectType::DuckDB | DialectType::SQLite => {
35783 // json -> path syntax
35784 let json_expr = args.remove(0);
35785 let path = args.remove(0);
35786 Ok(Expression::JsonExtract(Box::new(
35787 crate::expressions::JsonExtractFunc {
35788 this: json_expr,
35789 path,
35790 returning: None,
35791 arrow_syntax: true,
35792 hash_arrow_syntax: false,
35793 wrapper_option: None,
35794 quotes_option: None,
35795 on_scalar_string: false,
35796 on_error: None,
35797 },
35798 )))
35799 }
35800 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35801 Ok(Expression::Function(Box::new(Function::new(
35802 "GET_JSON_OBJECT".to_string(),
35803 args,
35804 ))))
35805 }
35806 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
35807 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
35808 )),
35809 _ => Ok(Expression::Function(Box::new(Function::new(
35810 "JSON_QUERY".to_string(),
35811 args,
35812 )))),
35813 }
35814 }
35815
35816 // JSON_VALUE_ARRAY(json, path) -> target-specific
35817 "JSON_VALUE_ARRAY" if args.len() == 2 => {
35818 match target {
35819 DialectType::DuckDB => {
35820 // CAST(json -> path AS TEXT[])
35821 let json_expr = args.remove(0);
35822 let path = args.remove(0);
35823 let arrow = Expression::JsonExtract(Box::new(
35824 crate::expressions::JsonExtractFunc {
35825 this: json_expr,
35826 path,
35827 returning: None,
35828 arrow_syntax: true,
35829 hash_arrow_syntax: false,
35830 wrapper_option: None,
35831 quotes_option: None,
35832 on_scalar_string: false,
35833 on_error: None,
35834 },
35835 ));
35836 Ok(Expression::Cast(Box::new(Cast {
35837 this: arrow,
35838 to: DataType::Array {
35839 element_type: Box::new(DataType::Text),
35840 dimension: None,
35841 },
35842 trailing_comments: vec![],
35843 double_colon_syntax: false,
35844 format: None,
35845 default: None,
35846 inferred_type: None,
35847 })))
35848 }
35849 DialectType::Snowflake => {
35850 let json_expr = args.remove(0);
35851 let path_expr = args.remove(0);
35852 // Convert JSON path from $.path to just path
35853 let sf_path = if let Expression::Literal(ref lit) = path_expr {
35854 if let Literal::String(ref s) = lit.as_ref() {
35855 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
35856 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
35857 } else {
35858 path_expr.clone()
35859 }
35860 } else {
35861 path_expr
35862 };
35863 let parse_json = Expression::Function(Box::new(Function::new(
35864 "PARSE_JSON".to_string(),
35865 vec![json_expr],
35866 )));
35867 let get_path = Expression::Function(Box::new(Function::new(
35868 "GET_PATH".to_string(),
35869 vec![parse_json, sf_path],
35870 )));
35871 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
35872 let cast_expr = Expression::Cast(Box::new(Cast {
35873 this: Expression::Identifier(Identifier::new("x")),
35874 to: DataType::VarChar {
35875 length: None,
35876 parenthesized_length: false,
35877 },
35878 trailing_comments: vec![],
35879 double_colon_syntax: false,
35880 format: None,
35881 default: None,
35882 inferred_type: None,
35883 }));
35884 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
35885 parameters: vec![Identifier::new("x")],
35886 body: cast_expr,
35887 colon: false,
35888 parameter_types: vec![],
35889 }));
35890 Ok(Expression::Function(Box::new(Function::new(
35891 "TRANSFORM".to_string(),
35892 vec![get_path, lambda],
35893 ))))
35894 }
35895 _ => Ok(Expression::Function(Box::new(Function::new(
35896 "JSON_VALUE_ARRAY".to_string(),
35897 args,
35898 )))),
35899 }
35900 }
35901
35902 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
35903 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
35904 // This is different from Hive/Spark where 3rd arg is "group_index"
35905 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
35906 match target {
35907 DialectType::DuckDB
35908 | DialectType::Presto
35909 | DialectType::Trino
35910 | DialectType::Athena => {
35911 if args.len() == 2 {
35912 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
35913 args.push(Expression::number(1));
35914 Ok(Expression::Function(Box::new(Function::new(
35915 "REGEXP_EXTRACT".to_string(),
35916 args,
35917 ))))
35918 } else if args.len() == 3 {
35919 let val = args.remove(0);
35920 let regex = args.remove(0);
35921 let position = args.remove(0);
35922 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35923 if is_pos_1 {
35924 Ok(Expression::Function(Box::new(Function::new(
35925 "REGEXP_EXTRACT".to_string(),
35926 vec![val, regex, Expression::number(1)],
35927 ))))
35928 } else {
35929 let substring_expr = Expression::Function(Box::new(Function::new(
35930 "SUBSTRING".to_string(),
35931 vec![val, position],
35932 )));
35933 let nullif_expr = Expression::Function(Box::new(Function::new(
35934 "NULLIF".to_string(),
35935 vec![
35936 substring_expr,
35937 Expression::Literal(Box::new(Literal::String(
35938 String::new(),
35939 ))),
35940 ],
35941 )));
35942 Ok(Expression::Function(Box::new(Function::new(
35943 "REGEXP_EXTRACT".to_string(),
35944 vec![nullif_expr, regex, Expression::number(1)],
35945 ))))
35946 }
35947 } else if args.len() == 4 {
35948 let val = args.remove(0);
35949 let regex = args.remove(0);
35950 let position = args.remove(0);
35951 let occurrence = args.remove(0);
35952 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35953 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35954 if is_pos_1 && is_occ_1 {
35955 Ok(Expression::Function(Box::new(Function::new(
35956 "REGEXP_EXTRACT".to_string(),
35957 vec![val, regex, Expression::number(1)],
35958 ))))
35959 } else {
35960 let subject = if is_pos_1 {
35961 val
35962 } else {
35963 let substring_expr = Expression::Function(Box::new(
35964 Function::new("SUBSTRING".to_string(), vec![val, position]),
35965 ));
35966 Expression::Function(Box::new(Function::new(
35967 "NULLIF".to_string(),
35968 vec![
35969 substring_expr,
35970 Expression::Literal(Box::new(Literal::String(
35971 String::new(),
35972 ))),
35973 ],
35974 )))
35975 };
35976 let extract_all = Expression::Function(Box::new(Function::new(
35977 "REGEXP_EXTRACT_ALL".to_string(),
35978 vec![subject, regex, Expression::number(1)],
35979 )));
35980 Ok(Expression::Function(Box::new(Function::new(
35981 "ARRAY_EXTRACT".to_string(),
35982 vec![extract_all, occurrence],
35983 ))))
35984 }
35985 } else {
35986 Ok(Expression::Function(Box::new(Function {
35987 name: f.name,
35988 args,
35989 distinct: f.distinct,
35990 trailing_comments: f.trailing_comments,
35991 use_bracket_syntax: f.use_bracket_syntax,
35992 no_parens: f.no_parens,
35993 quoted: f.quoted,
35994 span: None,
35995 inferred_type: None,
35996 })))
35997 }
35998 }
35999 DialectType::Snowflake => {
36000 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
36001 Ok(Expression::Function(Box::new(Function::new(
36002 "REGEXP_SUBSTR".to_string(),
36003 args,
36004 ))))
36005 }
36006 _ => {
36007 // For other targets (Hive/Spark/BigQuery): pass through as-is
36008 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
36009 Ok(Expression::Function(Box::new(Function {
36010 name: f.name,
36011 args,
36012 distinct: f.distinct,
36013 trailing_comments: f.trailing_comments,
36014 use_bracket_syntax: f.use_bracket_syntax,
36015 no_parens: f.no_parens,
36016 quoted: f.quoted,
36017 span: None,
36018 inferred_type: None,
36019 })))
36020 }
36021 }
36022 }
36023
36024 // BigQuery STRUCT(args) -> target-specific struct expression
36025 "STRUCT" => {
36026 // Convert Function args to Struct fields
36027 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
36028 for (i, arg) in args.into_iter().enumerate() {
36029 match arg {
36030 Expression::Alias(a) => {
36031 // Named field: expr AS name
36032 fields.push((Some(a.alias.name.clone()), a.this));
36033 }
36034 other => {
36035 // Unnamed field: for Spark/Hive, keep as None
36036 // For Snowflake, auto-name as _N
36037 // For DuckDB, use column name for column refs, _N for others
36038 if matches!(target, DialectType::Snowflake) {
36039 fields.push((Some(format!("_{}", i)), other));
36040 } else if matches!(target, DialectType::DuckDB) {
36041 let auto_name = match &other {
36042 Expression::Column(col) => col.name.name.clone(),
36043 _ => format!("_{}", i),
36044 };
36045 fields.push((Some(auto_name), other));
36046 } else {
36047 fields.push((None, other));
36048 }
36049 }
36050 }
36051 }
36052
36053 match target {
36054 DialectType::Snowflake => {
36055 // OBJECT_CONSTRUCT('name', value, ...)
36056 let mut oc_args = Vec::new();
36057 for (name, val) in &fields {
36058 if let Some(n) = name {
36059 oc_args.push(Expression::Literal(Box::new(Literal::String(
36060 n.clone(),
36061 ))));
36062 oc_args.push(val.clone());
36063 } else {
36064 oc_args.push(val.clone());
36065 }
36066 }
36067 Ok(Expression::Function(Box::new(Function::new(
36068 "OBJECT_CONSTRUCT".to_string(),
36069 oc_args,
36070 ))))
36071 }
36072 DialectType::DuckDB => {
36073 // {'name': value, ...}
36074 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36075 fields,
36076 })))
36077 }
36078 DialectType::Hive => {
36079 // STRUCT(val1, val2, ...) - strip aliases
36080 let hive_fields: Vec<(Option<String>, Expression)> =
36081 fields.into_iter().map(|(_, v)| (None, v)).collect();
36082 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36083 fields: hive_fields,
36084 })))
36085 }
36086 DialectType::Spark | DialectType::Databricks => {
36087 // Use Expression::Struct to bypass Spark target transform auto-naming
36088 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36089 fields,
36090 })))
36091 }
36092 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
36093 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
36094 let all_named =
36095 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
36096 let all_types_inferable = all_named
36097 && fields
36098 .iter()
36099 .all(|(_, val)| Self::can_infer_presto_type(val));
36100 let row_args: Vec<Expression> =
36101 fields.iter().map(|(_, v)| v.clone()).collect();
36102 let row_expr = Expression::Function(Box::new(Function::new(
36103 "ROW".to_string(),
36104 row_args,
36105 )));
36106 if all_named && all_types_inferable {
36107 // Build ROW type with inferred types
36108 let mut row_type_fields = Vec::new();
36109 for (name, val) in &fields {
36110 if let Some(n) = name {
36111 let type_str = Self::infer_sql_type_for_presto(val);
36112 row_type_fields.push(crate::expressions::StructField::new(
36113 n.clone(),
36114 crate::expressions::DataType::Custom { name: type_str },
36115 ));
36116 }
36117 }
36118 let row_type = crate::expressions::DataType::Struct {
36119 fields: row_type_fields,
36120 nested: true,
36121 };
36122 Ok(Expression::Cast(Box::new(Cast {
36123 this: row_expr,
36124 to: row_type,
36125 trailing_comments: Vec::new(),
36126 double_colon_syntax: false,
36127 format: None,
36128 default: None,
36129 inferred_type: None,
36130 })))
36131 } else {
36132 Ok(row_expr)
36133 }
36134 }
36135 _ => {
36136 // Default: keep as STRUCT function with original args
36137 let mut new_args = Vec::new();
36138 for (name, val) in fields {
36139 if let Some(n) = name {
36140 new_args.push(Expression::Alias(Box::new(
36141 crate::expressions::Alias::new(val, Identifier::new(n)),
36142 )));
36143 } else {
36144 new_args.push(val);
36145 }
36146 }
36147 Ok(Expression::Function(Box::new(Function::new(
36148 "STRUCT".to_string(),
36149 new_args,
36150 ))))
36151 }
36152 }
36153 }
36154
36155 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
36156 "ROUND" if args.len() == 3 => {
36157 let x = args.remove(0);
36158 let n = args.remove(0);
36159 let mode = args.remove(0);
36160 // Check if mode is 'ROUND_HALF_EVEN'
36161 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
36162 if is_half_even && matches!(target, DialectType::DuckDB) {
36163 Ok(Expression::Function(Box::new(Function::new(
36164 "ROUND_EVEN".to_string(),
36165 vec![x, n],
36166 ))))
36167 } else {
36168 // Pass through with all args
36169 Ok(Expression::Function(Box::new(Function::new(
36170 "ROUND".to_string(),
36171 vec![x, n, mode],
36172 ))))
36173 }
36174 }
36175
36176 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
36177 "MAKE_INTERVAL" => {
36178 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
36179 // The positional args are: year, month
36180 // Named args are: day =>, minute =>, etc.
36181 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
36182 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
36183 // For BigQuery->BigQuery: reorder named args (day before minute)
36184 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
36185 let mut parts: Vec<(String, String)> = Vec::new();
36186 let mut pos_idx = 0;
36187 let pos_units = ["year", "month"];
36188 for arg in &args {
36189 if let Expression::NamedArgument(na) = arg {
36190 // Named arg like minute => 5
36191 let unit = na.name.name.clone();
36192 if let Expression::Literal(lit) = &na.value {
36193 if let Literal::Number(n) = lit.as_ref() {
36194 parts.push((unit, n.clone()));
36195 }
36196 }
36197 } else if pos_idx < pos_units.len() {
36198 if let Expression::Literal(lit) = arg {
36199 if let Literal::Number(n) = lit.as_ref() {
36200 parts.push((pos_units[pos_idx].to_string(), n.clone()));
36201 }
36202 }
36203 pos_idx += 1;
36204 }
36205 }
36206 // Don't sort - preserve original argument order
36207 let separator = if matches!(target, DialectType::Snowflake) {
36208 ", "
36209 } else {
36210 " "
36211 };
36212 let interval_str = parts
36213 .iter()
36214 .map(|(u, v)| format!("{} {}", v, u))
36215 .collect::<Vec<_>>()
36216 .join(separator);
36217 Ok(Expression::Interval(Box::new(
36218 crate::expressions::Interval {
36219 this: Some(Expression::Literal(Box::new(Literal::String(
36220 interval_str,
36221 )))),
36222 unit: None,
36223 },
36224 )))
36225 } else if matches!(target, DialectType::BigQuery) {
36226 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
36227 let mut positional = Vec::new();
36228 let mut named: Vec<(
36229 String,
36230 Expression,
36231 crate::expressions::NamedArgSeparator,
36232 )> = Vec::new();
36233 let _pos_units = ["year", "month"];
36234 let mut _pos_idx = 0;
36235 for arg in args {
36236 if let Expression::NamedArgument(na) = arg {
36237 named.push((na.name.name.clone(), na.value, na.separator));
36238 } else {
36239 positional.push(arg);
36240 _pos_idx += 1;
36241 }
36242 }
36243 // Sort named args by: day, hour, minute, second
36244 let unit_order = |u: &str| -> usize {
36245 match u.to_ascii_lowercase().as_str() {
36246 "day" => 0,
36247 "hour" => 1,
36248 "minute" => 2,
36249 "second" => 3,
36250 _ => 4,
36251 }
36252 };
36253 named.sort_by_key(|(u, _, _)| unit_order(u));
36254 let mut result_args = positional;
36255 for (name, value, sep) in named {
36256 result_args.push(Expression::NamedArgument(Box::new(
36257 crate::expressions::NamedArgument {
36258 name: Identifier::new(&name),
36259 value,
36260 separator: sep,
36261 },
36262 )));
36263 }
36264 Ok(Expression::Function(Box::new(Function::new(
36265 "MAKE_INTERVAL".to_string(),
36266 result_args,
36267 ))))
36268 } else {
36269 Ok(Expression::Function(Box::new(Function::new(
36270 "MAKE_INTERVAL".to_string(),
36271 args,
36272 ))))
36273 }
36274 }
36275
36276 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
36277 "ARRAY_TO_STRING" if args.len() == 3 => {
36278 let arr = args.remove(0);
36279 let sep = args.remove(0);
36280 let null_text = args.remove(0);
36281 match target {
36282 DialectType::DuckDB => {
36283 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
36284 let _lambda_param =
36285 Expression::Identifier(crate::expressions::Identifier::new("x"));
36286 let coalesce =
36287 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
36288 original_name: None,
36289 expressions: vec![
36290 Expression::Identifier(crate::expressions::Identifier::new(
36291 "x",
36292 )),
36293 null_text,
36294 ],
36295 inferred_type: None,
36296 }));
36297 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36298 parameters: vec![crate::expressions::Identifier::new("x")],
36299 body: coalesce,
36300 colon: false,
36301 parameter_types: vec![],
36302 }));
36303 let list_transform = Expression::Function(Box::new(Function::new(
36304 "LIST_TRANSFORM".to_string(),
36305 vec![arr, lambda],
36306 )));
36307 Ok(Expression::Function(Box::new(Function::new(
36308 "ARRAY_TO_STRING".to_string(),
36309 vec![list_transform, sep],
36310 ))))
36311 }
36312 _ => Ok(Expression::Function(Box::new(Function::new(
36313 "ARRAY_TO_STRING".to_string(),
36314 vec![arr, sep, null_text],
36315 )))),
36316 }
36317 }
36318
36319 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
36320 "LENGTH" if args.len() == 1 => {
36321 let arg = args.remove(0);
36322 match target {
36323 DialectType::DuckDB => {
36324 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
36325 let typeof_func = Expression::Function(Box::new(Function::new(
36326 "TYPEOF".to_string(),
36327 vec![arg.clone()],
36328 )));
36329 let blob_cast = Expression::Cast(Box::new(Cast {
36330 this: arg.clone(),
36331 to: DataType::VarBinary { length: None },
36332 trailing_comments: vec![],
36333 double_colon_syntax: false,
36334 format: None,
36335 default: None,
36336 inferred_type: None,
36337 }));
36338 let octet_length = Expression::Function(Box::new(Function::new(
36339 "OCTET_LENGTH".to_string(),
36340 vec![blob_cast],
36341 )));
36342 let text_cast = Expression::Cast(Box::new(Cast {
36343 this: arg,
36344 to: DataType::Text,
36345 trailing_comments: vec![],
36346 double_colon_syntax: false,
36347 format: None,
36348 default: None,
36349 inferred_type: None,
36350 }));
36351 let length_text = Expression::Function(Box::new(Function::new(
36352 "LENGTH".to_string(),
36353 vec![text_cast],
36354 )));
36355 Ok(Expression::Case(Box::new(crate::expressions::Case {
36356 operand: Some(typeof_func),
36357 whens: vec![(
36358 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
36359 octet_length,
36360 )],
36361 else_: Some(length_text),
36362 comments: Vec::new(),
36363 inferred_type: None,
36364 })))
36365 }
36366 _ => Ok(Expression::Function(Box::new(Function::new(
36367 "LENGTH".to_string(),
36368 vec![arg],
36369 )))),
36370 }
36371 }
36372
36373 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
36374 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
36375 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
36376 // The args should be [x, fraction] with the null handling stripped
36377 // For DuckDB: QUANTILE_CONT(x, fraction)
36378 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
36379 match target {
36380 DialectType::DuckDB => {
36381 // Strip down to just 2 args, rename to QUANTILE_CONT
36382 let x = args[0].clone();
36383 let frac = args[1].clone();
36384 Ok(Expression::Function(Box::new(Function::new(
36385 "QUANTILE_CONT".to_string(),
36386 vec![x, frac],
36387 ))))
36388 }
36389 _ => Ok(Expression::Function(Box::new(Function::new(
36390 "PERCENTILE_CONT".to_string(),
36391 args,
36392 )))),
36393 }
36394 }
36395
36396 // All others: pass through
36397 _ => Ok(Expression::Function(Box::new(Function {
36398 name: f.name,
36399 args,
36400 distinct: f.distinct,
36401 trailing_comments: f.trailing_comments,
36402 use_bracket_syntax: f.use_bracket_syntax,
36403 no_parens: f.no_parens,
36404 quoted: f.quoted,
36405 span: None,
36406 inferred_type: None,
36407 }))),
36408 }
36409 }
36410
36411 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
36412 /// Returns false for column references and other non-literal expressions where the type is unknown.
36413 fn can_infer_presto_type(expr: &Expression) -> bool {
36414 match expr {
36415 Expression::Literal(_) => true,
36416 Expression::Boolean(_) => true,
36417 Expression::Array(_) | Expression::ArrayFunc(_) => true,
36418 Expression::Struct(_) | Expression::StructFunc(_) => true,
36419 Expression::Function(f) => {
36420 f.name.eq_ignore_ascii_case("STRUCT")
36421 || f.name.eq_ignore_ascii_case("ROW")
36422 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
36423 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36424 || f.name.eq_ignore_ascii_case("NOW")
36425 }
36426 Expression::Cast(_) => true,
36427 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
36428 _ => false,
36429 }
36430 }
36431
36432 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
36433 fn infer_sql_type_for_presto(expr: &Expression) -> String {
36434 use crate::expressions::Literal;
36435 match expr {
36436 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36437 "VARCHAR".to_string()
36438 }
36439 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36440 let Literal::Number(n) = lit.as_ref() else {
36441 unreachable!()
36442 };
36443 if n.contains('.') {
36444 "DOUBLE".to_string()
36445 } else {
36446 "INTEGER".to_string()
36447 }
36448 }
36449 Expression::Boolean(_) => "BOOLEAN".to_string(),
36450 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36451 "DATE".to_string()
36452 }
36453 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36454 "TIMESTAMP".to_string()
36455 }
36456 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36457 "TIMESTAMP".to_string()
36458 }
36459 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
36460 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
36461 Expression::Function(f) => {
36462 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
36463 "ROW".to_string()
36464 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
36465 "DATE".to_string()
36466 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36467 || f.name.eq_ignore_ascii_case("NOW")
36468 {
36469 "TIMESTAMP".to_string()
36470 } else {
36471 "VARCHAR".to_string()
36472 }
36473 }
36474 Expression::Cast(c) => {
36475 // If already cast, use the target type
36476 Self::data_type_to_presto_string(&c.to)
36477 }
36478 _ => "VARCHAR".to_string(),
36479 }
36480 }
36481
36482 /// Convert a DataType to its Presto/Trino string representation for ROW type
36483 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
36484 use crate::expressions::DataType;
36485 match dt {
36486 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
36487 "VARCHAR".to_string()
36488 }
36489 DataType::Int { .. }
36490 | DataType::BigInt { .. }
36491 | DataType::SmallInt { .. }
36492 | DataType::TinyInt { .. } => "INTEGER".to_string(),
36493 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
36494 DataType::Boolean => "BOOLEAN".to_string(),
36495 DataType::Date => "DATE".to_string(),
36496 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
36497 DataType::Struct { fields, .. } => {
36498 let field_strs: Vec<String> = fields
36499 .iter()
36500 .map(|f| {
36501 format!(
36502 "{} {}",
36503 f.name,
36504 Self::data_type_to_presto_string(&f.data_type)
36505 )
36506 })
36507 .collect();
36508 format!("ROW({})", field_strs.join(", "))
36509 }
36510 DataType::Array { element_type, .. } => {
36511 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
36512 }
36513 DataType::Custom { name } => {
36514 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
36515 name.clone()
36516 }
36517 _ => "VARCHAR".to_string(),
36518 }
36519 }
36520
36521 /// Convert IntervalUnit to string
36522 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
36523 match unit {
36524 crate::expressions::IntervalUnit::Year => "YEAR",
36525 crate::expressions::IntervalUnit::Quarter => "QUARTER",
36526 crate::expressions::IntervalUnit::Month => "MONTH",
36527 crate::expressions::IntervalUnit::Week => "WEEK",
36528 crate::expressions::IntervalUnit::Day => "DAY",
36529 crate::expressions::IntervalUnit::Hour => "HOUR",
36530 crate::expressions::IntervalUnit::Minute => "MINUTE",
36531 crate::expressions::IntervalUnit::Second => "SECOND",
36532 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
36533 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
36534 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
36535 }
36536 }
36537
36538 /// Extract unit string from an expression (uppercased)
36539 fn get_unit_str_static(expr: &Expression) -> String {
36540 use crate::expressions::Literal;
36541 match expr {
36542 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
36543 Expression::Var(v) => v.this.to_ascii_uppercase(),
36544 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36545 let Literal::String(s) = lit.as_ref() else {
36546 unreachable!()
36547 };
36548 s.to_ascii_uppercase()
36549 }
36550 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
36551 Expression::Function(f) => {
36552 let base = f.name.to_ascii_uppercase();
36553 if !f.args.is_empty() {
36554 let inner = Self::get_unit_str_static(&f.args[0]);
36555 format!("{}({})", base, inner)
36556 } else {
36557 base
36558 }
36559 }
36560 _ => "DAY".to_string(),
36561 }
36562 }
36563
36564 /// Parse unit string to IntervalUnit
36565 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
36566 match s {
36567 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
36568 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
36569 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
36570 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
36571 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
36572 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
36573 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
36574 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
36575 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
36576 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
36577 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
36578 _ => crate::expressions::IntervalUnit::Day,
36579 }
36580 }
36581
36582 /// Convert expression to simple string for interval building
36583 fn expr_to_string_static(expr: &Expression) -> String {
36584 use crate::expressions::Literal;
36585 match expr {
36586 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36587 let Literal::Number(s) = lit.as_ref() else {
36588 unreachable!()
36589 };
36590 s.clone()
36591 }
36592 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36593 let Literal::String(s) = lit.as_ref() else {
36594 unreachable!()
36595 };
36596 s.clone()
36597 }
36598 Expression::Identifier(id) => id.name.clone(),
36599 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
36600 _ => "1".to_string(),
36601 }
36602 }
36603
36604 /// Extract a simple string representation from a literal expression
36605 fn expr_to_string(expr: &Expression) -> String {
36606 use crate::expressions::Literal;
36607 match expr {
36608 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36609 let Literal::Number(s) = lit.as_ref() else {
36610 unreachable!()
36611 };
36612 s.clone()
36613 }
36614 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36615 let Literal::String(s) = lit.as_ref() else {
36616 unreachable!()
36617 };
36618 s.clone()
36619 }
36620 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
36621 Expression::Identifier(id) => id.name.clone(),
36622 _ => "1".to_string(),
36623 }
36624 }
36625
36626 /// Quote an interval value expression as a string literal if it's a number (or negated number)
36627 fn quote_interval_val(expr: &Expression) -> Expression {
36628 use crate::expressions::Literal;
36629 match expr {
36630 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36631 let Literal::Number(n) = lit.as_ref() else {
36632 unreachable!()
36633 };
36634 Expression::Literal(Box::new(Literal::String(n.clone())))
36635 }
36636 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
36637 Expression::Neg(inner) => {
36638 if let Expression::Literal(lit) = &inner.this {
36639 if let Literal::Number(n) = lit.as_ref() {
36640 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
36641 } else {
36642 inner.this.clone()
36643 }
36644 } else {
36645 expr.clone()
36646 }
36647 }
36648 _ => expr.clone(),
36649 }
36650 }
36651
36652 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
36653 fn timestamp_string_has_timezone(ts: &str) -> bool {
36654 let trimmed = ts.trim();
36655 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
36656 if let Some(last_space) = trimmed.rfind(' ') {
36657 let suffix = &trimmed[last_space + 1..];
36658 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
36659 let rest = &suffix[1..];
36660 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
36661 return true;
36662 }
36663 }
36664 }
36665 // Check for named timezone abbreviations
36666 let ts_lower = trimmed.to_ascii_lowercase();
36667 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
36668 for abbrev in &tz_abbrevs {
36669 if ts_lower.ends_with(abbrev) {
36670 return true;
36671 }
36672 }
36673 false
36674 }
36675
36676 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
36677 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
36678 use crate::expressions::{Cast, DataType, Literal};
36679 match expr {
36680 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36681 let Literal::Timestamp(s) = lit.as_ref() else {
36682 unreachable!()
36683 };
36684 let tz = func_name.starts_with("TIMESTAMP");
36685 Expression::Cast(Box::new(Cast {
36686 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36687 to: if tz {
36688 DataType::Timestamp {
36689 timezone: true,
36690 precision: None,
36691 }
36692 } else {
36693 DataType::Timestamp {
36694 timezone: false,
36695 precision: None,
36696 }
36697 },
36698 trailing_comments: vec![],
36699 double_colon_syntax: false,
36700 format: None,
36701 default: None,
36702 inferred_type: None,
36703 }))
36704 }
36705 other => other,
36706 }
36707 }
36708
36709 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
36710 fn maybe_cast_ts(expr: Expression) -> Expression {
36711 use crate::expressions::{Cast, DataType, Literal};
36712 match expr {
36713 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36714 let Literal::Timestamp(s) = lit.as_ref() else {
36715 unreachable!()
36716 };
36717 Expression::Cast(Box::new(Cast {
36718 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36719 to: DataType::Timestamp {
36720 timezone: false,
36721 precision: None,
36722 },
36723 trailing_comments: vec![],
36724 double_colon_syntax: false,
36725 format: None,
36726 default: None,
36727 inferred_type: None,
36728 }))
36729 }
36730 other => other,
36731 }
36732 }
36733
36734 /// Convert DATE 'x' literal to CAST('x' AS DATE)
36735 fn date_literal_to_cast(expr: Expression) -> Expression {
36736 use crate::expressions::{Cast, DataType, Literal};
36737 match expr {
36738 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36739 let Literal::Date(s) = lit.as_ref() else {
36740 unreachable!()
36741 };
36742 Expression::Cast(Box::new(Cast {
36743 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36744 to: DataType::Date,
36745 trailing_comments: vec![],
36746 double_colon_syntax: false,
36747 format: None,
36748 default: None,
36749 inferred_type: None,
36750 }))
36751 }
36752 other => other,
36753 }
36754 }
36755
36756 /// Ensure an expression that should be a date is CAST(... AS DATE).
36757 /// Handles both DATE literals and string literals that look like dates.
36758 fn ensure_cast_date(expr: Expression) -> Expression {
36759 use crate::expressions::{Cast, DataType, Literal};
36760 match expr {
36761 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36762 let Literal::Date(s) = lit.as_ref() else {
36763 unreachable!()
36764 };
36765 Expression::Cast(Box::new(Cast {
36766 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36767 to: DataType::Date,
36768 trailing_comments: vec![],
36769 double_colon_syntax: false,
36770 format: None,
36771 default: None,
36772 inferred_type: None,
36773 }))
36774 }
36775 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
36776 // String literal that should be a date -> CAST('s' AS DATE)
36777 Expression::Cast(Box::new(Cast {
36778 this: expr,
36779 to: DataType::Date,
36780 trailing_comments: vec![],
36781 double_colon_syntax: false,
36782 format: None,
36783 default: None,
36784 inferred_type: None,
36785 }))
36786 }
36787 // Already a CAST or other expression -> leave as-is
36788 other => other,
36789 }
36790 }
36791
36792 /// Force CAST(expr AS DATE) for any expression (not just literals)
36793 /// Skips if the expression is already a CAST to DATE
36794 fn force_cast_date(expr: Expression) -> Expression {
36795 use crate::expressions::{Cast, DataType};
36796 // If it's already a CAST to DATE, don't double-wrap
36797 if let Expression::Cast(ref c) = expr {
36798 if matches!(c.to, DataType::Date) {
36799 return expr;
36800 }
36801 }
36802 Expression::Cast(Box::new(Cast {
36803 this: expr,
36804 to: DataType::Date,
36805 trailing_comments: vec![],
36806 double_colon_syntax: false,
36807 format: None,
36808 default: None,
36809 inferred_type: None,
36810 }))
36811 }
36812
36813 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
36814 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
36815 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
36816 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
36817
36818 fn ensure_to_date_preserved(expr: Expression) -> Expression {
36819 use crate::expressions::{Function, Literal};
36820 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
36821 {
36822 Expression::Function(Box::new(Function::new(
36823 Self::PRESERVED_TO_DATE.to_string(),
36824 vec![expr],
36825 )))
36826 } else {
36827 expr
36828 }
36829 }
36830
36831 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
36832 fn try_cast_date(expr: Expression) -> Expression {
36833 use crate::expressions::{Cast, DataType};
36834 Expression::TryCast(Box::new(Cast {
36835 this: expr,
36836 to: DataType::Date,
36837 trailing_comments: vec![],
36838 double_colon_syntax: false,
36839 format: None,
36840 default: None,
36841 inferred_type: None,
36842 }))
36843 }
36844
36845 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
36846 fn double_cast_timestamp_date(expr: Expression) -> Expression {
36847 use crate::expressions::{Cast, DataType};
36848 let inner = Expression::Cast(Box::new(Cast {
36849 this: expr,
36850 to: DataType::Timestamp {
36851 timezone: false,
36852 precision: None,
36853 },
36854 trailing_comments: vec![],
36855 double_colon_syntax: false,
36856 format: None,
36857 default: None,
36858 inferred_type: None,
36859 }));
36860 Expression::Cast(Box::new(Cast {
36861 this: inner,
36862 to: DataType::Date,
36863 trailing_comments: vec![],
36864 double_colon_syntax: false,
36865 format: None,
36866 default: None,
36867 inferred_type: None,
36868 }))
36869 }
36870
36871 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
36872 fn double_cast_datetime_date(expr: Expression) -> Expression {
36873 use crate::expressions::{Cast, DataType};
36874 let inner = Expression::Cast(Box::new(Cast {
36875 this: expr,
36876 to: DataType::Custom {
36877 name: "DATETIME".to_string(),
36878 },
36879 trailing_comments: vec![],
36880 double_colon_syntax: false,
36881 format: None,
36882 default: None,
36883 inferred_type: None,
36884 }));
36885 Expression::Cast(Box::new(Cast {
36886 this: inner,
36887 to: DataType::Date,
36888 trailing_comments: vec![],
36889 double_colon_syntax: false,
36890 format: None,
36891 default: None,
36892 inferred_type: None,
36893 }))
36894 }
36895
36896 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
36897 fn double_cast_datetime2_date(expr: Expression) -> Expression {
36898 use crate::expressions::{Cast, DataType};
36899 let inner = Expression::Cast(Box::new(Cast {
36900 this: expr,
36901 to: DataType::Custom {
36902 name: "DATETIME2".to_string(),
36903 },
36904 trailing_comments: vec![],
36905 double_colon_syntax: false,
36906 format: None,
36907 default: None,
36908 inferred_type: None,
36909 }));
36910 Expression::Cast(Box::new(Cast {
36911 this: inner,
36912 to: DataType::Date,
36913 trailing_comments: vec![],
36914 double_colon_syntax: false,
36915 format: None,
36916 default: None,
36917 inferred_type: None,
36918 }))
36919 }
36920
36921 /// Convert Hive/Java-style date format strings to C-style (strftime) format
36922 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
36923 fn hive_format_to_c_format(fmt: &str) -> String {
36924 let mut result = String::new();
36925 let chars: Vec<char> = fmt.chars().collect();
36926 let mut i = 0;
36927 while i < chars.len() {
36928 match chars[i] {
36929 'y' => {
36930 let mut count = 0;
36931 while i < chars.len() && chars[i] == 'y' {
36932 count += 1;
36933 i += 1;
36934 }
36935 if count >= 4 {
36936 result.push_str("%Y");
36937 } else if count == 2 {
36938 result.push_str("%y");
36939 } else {
36940 result.push_str("%Y");
36941 }
36942 }
36943 'M' => {
36944 let mut count = 0;
36945 while i < chars.len() && chars[i] == 'M' {
36946 count += 1;
36947 i += 1;
36948 }
36949 if count >= 3 {
36950 result.push_str("%b");
36951 } else if count == 2 {
36952 result.push_str("%m");
36953 } else {
36954 result.push_str("%m");
36955 }
36956 }
36957 'd' => {
36958 let mut _count = 0;
36959 while i < chars.len() && chars[i] == 'd' {
36960 _count += 1;
36961 i += 1;
36962 }
36963 result.push_str("%d");
36964 }
36965 'H' => {
36966 let mut _count = 0;
36967 while i < chars.len() && chars[i] == 'H' {
36968 _count += 1;
36969 i += 1;
36970 }
36971 result.push_str("%H");
36972 }
36973 'h' => {
36974 let mut _count = 0;
36975 while i < chars.len() && chars[i] == 'h' {
36976 _count += 1;
36977 i += 1;
36978 }
36979 result.push_str("%I");
36980 }
36981 'm' => {
36982 let mut _count = 0;
36983 while i < chars.len() && chars[i] == 'm' {
36984 _count += 1;
36985 i += 1;
36986 }
36987 result.push_str("%M");
36988 }
36989 's' => {
36990 let mut _count = 0;
36991 while i < chars.len() && chars[i] == 's' {
36992 _count += 1;
36993 i += 1;
36994 }
36995 result.push_str("%S");
36996 }
36997 'S' => {
36998 // Fractional seconds - skip
36999 while i < chars.len() && chars[i] == 'S' {
37000 i += 1;
37001 }
37002 result.push_str("%f");
37003 }
37004 'a' => {
37005 // AM/PM
37006 while i < chars.len() && chars[i] == 'a' {
37007 i += 1;
37008 }
37009 result.push_str("%p");
37010 }
37011 'E' => {
37012 let mut count = 0;
37013 while i < chars.len() && chars[i] == 'E' {
37014 count += 1;
37015 i += 1;
37016 }
37017 if count >= 4 {
37018 result.push_str("%A");
37019 } else {
37020 result.push_str("%a");
37021 }
37022 }
37023 '\'' => {
37024 // Quoted literal text - pass through the quotes and content
37025 result.push('\'');
37026 i += 1;
37027 while i < chars.len() && chars[i] != '\'' {
37028 result.push(chars[i]);
37029 i += 1;
37030 }
37031 if i < chars.len() {
37032 result.push('\'');
37033 i += 1;
37034 }
37035 }
37036 c => {
37037 result.push(c);
37038 i += 1;
37039 }
37040 }
37041 }
37042 result
37043 }
37044
37045 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
37046 fn hive_format_to_presto_format(fmt: &str) -> String {
37047 let c_fmt = Self::hive_format_to_c_format(fmt);
37048 // Presto uses %T for HH:MM:SS
37049 c_fmt.replace("%H:%M:%S", "%T")
37050 }
37051
37052 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
37053 fn ensure_cast_timestamp(expr: Expression) -> Expression {
37054 use crate::expressions::{Cast, DataType, Literal};
37055 match expr {
37056 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37057 let Literal::Timestamp(s) = lit.as_ref() else {
37058 unreachable!()
37059 };
37060 Expression::Cast(Box::new(Cast {
37061 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37062 to: DataType::Timestamp {
37063 timezone: false,
37064 precision: None,
37065 },
37066 trailing_comments: vec![],
37067 double_colon_syntax: false,
37068 format: None,
37069 default: None,
37070 inferred_type: None,
37071 }))
37072 }
37073 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37074 Expression::Cast(Box::new(Cast {
37075 this: expr,
37076 to: DataType::Timestamp {
37077 timezone: false,
37078 precision: None,
37079 },
37080 trailing_comments: vec![],
37081 double_colon_syntax: false,
37082 format: None,
37083 default: None,
37084 inferred_type: None,
37085 }))
37086 }
37087 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37088 let Literal::Datetime(s) = lit.as_ref() else {
37089 unreachable!()
37090 };
37091 Expression::Cast(Box::new(Cast {
37092 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37093 to: DataType::Timestamp {
37094 timezone: false,
37095 precision: None,
37096 },
37097 trailing_comments: vec![],
37098 double_colon_syntax: false,
37099 format: None,
37100 default: None,
37101 inferred_type: None,
37102 }))
37103 }
37104 other => other,
37105 }
37106 }
37107
37108 /// Force CAST to TIMESTAMP for any expression (not just literals)
37109 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
37110 fn force_cast_timestamp(expr: Expression) -> Expression {
37111 use crate::expressions::{Cast, DataType};
37112 // Don't double-wrap if already a CAST to TIMESTAMP
37113 if let Expression::Cast(ref c) = expr {
37114 if matches!(c.to, DataType::Timestamp { .. }) {
37115 return expr;
37116 }
37117 }
37118 Expression::Cast(Box::new(Cast {
37119 this: expr,
37120 to: DataType::Timestamp {
37121 timezone: false,
37122 precision: None,
37123 },
37124 trailing_comments: vec![],
37125 double_colon_syntax: false,
37126 format: None,
37127 default: None,
37128 inferred_type: None,
37129 }))
37130 }
37131
37132 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
37133 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
37134 use crate::expressions::{Cast, DataType, Literal};
37135 match expr {
37136 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37137 let Literal::Timestamp(s) = lit.as_ref() else {
37138 unreachable!()
37139 };
37140 Expression::Cast(Box::new(Cast {
37141 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37142 to: DataType::Timestamp {
37143 timezone: true,
37144 precision: None,
37145 },
37146 trailing_comments: vec![],
37147 double_colon_syntax: false,
37148 format: None,
37149 default: None,
37150 inferred_type: None,
37151 }))
37152 }
37153 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37154 Expression::Cast(Box::new(Cast {
37155 this: expr,
37156 to: DataType::Timestamp {
37157 timezone: true,
37158 precision: None,
37159 },
37160 trailing_comments: vec![],
37161 double_colon_syntax: false,
37162 format: None,
37163 default: None,
37164 inferred_type: None,
37165 }))
37166 }
37167 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37168 let Literal::Datetime(s) = lit.as_ref() else {
37169 unreachable!()
37170 };
37171 Expression::Cast(Box::new(Cast {
37172 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37173 to: DataType::Timestamp {
37174 timezone: true,
37175 precision: None,
37176 },
37177 trailing_comments: vec![],
37178 double_colon_syntax: false,
37179 format: None,
37180 default: None,
37181 inferred_type: None,
37182 }))
37183 }
37184 other => other,
37185 }
37186 }
37187
37188 /// Ensure expression is CAST to DATETIME (for BigQuery)
37189 fn ensure_cast_datetime(expr: Expression) -> Expression {
37190 use crate::expressions::{Cast, DataType, Literal};
37191 match expr {
37192 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37193 Expression::Cast(Box::new(Cast {
37194 this: expr,
37195 to: DataType::Custom {
37196 name: "DATETIME".to_string(),
37197 },
37198 trailing_comments: vec![],
37199 double_colon_syntax: false,
37200 format: None,
37201 default: None,
37202 inferred_type: None,
37203 }))
37204 }
37205 other => other,
37206 }
37207 }
37208
37209 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
37210 fn force_cast_datetime(expr: Expression) -> Expression {
37211 use crate::expressions::{Cast, DataType};
37212 if let Expression::Cast(ref c) = expr {
37213 if let DataType::Custom { ref name } = c.to {
37214 if name.eq_ignore_ascii_case("DATETIME") {
37215 return expr;
37216 }
37217 }
37218 }
37219 Expression::Cast(Box::new(Cast {
37220 this: expr,
37221 to: DataType::Custom {
37222 name: "DATETIME".to_string(),
37223 },
37224 trailing_comments: vec![],
37225 double_colon_syntax: false,
37226 format: None,
37227 default: None,
37228 inferred_type: None,
37229 }))
37230 }
37231
37232 /// Ensure expression is CAST to DATETIME2 (for TSQL)
37233 fn ensure_cast_datetime2(expr: Expression) -> Expression {
37234 use crate::expressions::{Cast, DataType, Literal};
37235 match expr {
37236 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37237 Expression::Cast(Box::new(Cast {
37238 this: expr,
37239 to: DataType::Custom {
37240 name: "DATETIME2".to_string(),
37241 },
37242 trailing_comments: vec![],
37243 double_colon_syntax: false,
37244 format: None,
37245 default: None,
37246 inferred_type: None,
37247 }))
37248 }
37249 other => other,
37250 }
37251 }
37252
37253 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
37254 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
37255 use crate::expressions::{Cast, DataType, Literal};
37256 match expr {
37257 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37258 let Literal::Timestamp(s) = lit.as_ref() else {
37259 unreachable!()
37260 };
37261 Expression::Cast(Box::new(Cast {
37262 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37263 to: DataType::Timestamp {
37264 timezone: true,
37265 precision: None,
37266 },
37267 trailing_comments: vec![],
37268 double_colon_syntax: false,
37269 format: None,
37270 default: None,
37271 inferred_type: None,
37272 }))
37273 }
37274 other => other,
37275 }
37276 }
37277
37278 /// Convert BigQuery format string to Snowflake format string
37279 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
37280 use crate::expressions::Literal;
37281 if let Expression::Literal(lit) = format_expr {
37282 if let Literal::String(s) = lit.as_ref() {
37283 let sf = s
37284 .replace("%Y", "yyyy")
37285 .replace("%m", "mm")
37286 .replace("%d", "DD")
37287 .replace("%H", "HH24")
37288 .replace("%M", "MI")
37289 .replace("%S", "SS")
37290 .replace("%b", "mon")
37291 .replace("%B", "Month")
37292 .replace("%e", "FMDD");
37293 Expression::Literal(Box::new(Literal::String(sf)))
37294 } else {
37295 format_expr.clone()
37296 }
37297 } else {
37298 format_expr.clone()
37299 }
37300 }
37301
37302 /// Convert BigQuery format string to DuckDB format string
37303 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
37304 use crate::expressions::Literal;
37305 if let Expression::Literal(lit) = format_expr {
37306 if let Literal::String(s) = lit.as_ref() {
37307 let duck = s
37308 .replace("%T", "%H:%M:%S")
37309 .replace("%F", "%Y-%m-%d")
37310 .replace("%D", "%m/%d/%y")
37311 .replace("%x", "%m/%d/%y")
37312 .replace("%c", "%a %b %-d %H:%M:%S %Y")
37313 .replace("%e", "%-d")
37314 .replace("%E6S", "%S.%f");
37315 Expression::Literal(Box::new(Literal::String(duck)))
37316 } else {
37317 format_expr.clone()
37318 }
37319 } else {
37320 format_expr.clone()
37321 }
37322 }
37323
37324 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
37325 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
37326 use crate::expressions::Literal;
37327 if let Expression::Literal(lit) = format_expr {
37328 if let Literal::String(s) = lit.as_ref() {
37329 // Replace format elements from longest to shortest to avoid partial matches
37330 let result = s
37331 .replace("YYYYMMDD", "%Y%m%d")
37332 .replace("YYYY", "%Y")
37333 .replace("YY", "%y")
37334 .replace("MONTH", "%B")
37335 .replace("MON", "%b")
37336 .replace("MM", "%m")
37337 .replace("DD", "%d")
37338 .replace("HH24", "%H")
37339 .replace("HH12", "%I")
37340 .replace("HH", "%I")
37341 .replace("MI", "%M")
37342 .replace("SSTZH", "%S%z")
37343 .replace("SS", "%S")
37344 .replace("TZH", "%z");
37345 Expression::Literal(Box::new(Literal::String(result)))
37346 } else {
37347 format_expr.clone()
37348 }
37349 } else {
37350 format_expr.clone()
37351 }
37352 }
37353
37354 /// Normalize BigQuery format strings for BQ->BQ output
37355 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
37356 use crate::expressions::Literal;
37357 if let Expression::Literal(lit) = format_expr {
37358 if let Literal::String(s) = lit.as_ref() {
37359 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
37360 Expression::Literal(Box::new(Literal::String(norm)))
37361 } else {
37362 format_expr.clone()
37363 }
37364 } else {
37365 format_expr.clone()
37366 }
37367 }
37368}
37369
37370#[cfg(test)]
37371mod tests {
37372 use super::*;
37373
37374 #[test]
37375 fn test_dialect_type_from_str() {
37376 assert_eq!(
37377 "postgres".parse::<DialectType>().unwrap(),
37378 DialectType::PostgreSQL
37379 );
37380 assert_eq!(
37381 "postgresql".parse::<DialectType>().unwrap(),
37382 DialectType::PostgreSQL
37383 );
37384 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
37385 assert_eq!(
37386 "bigquery".parse::<DialectType>().unwrap(),
37387 DialectType::BigQuery
37388 );
37389 }
37390
37391 #[test]
37392 fn test_basic_transpile() {
37393 let dialect = Dialect::get(DialectType::Generic);
37394 let result = dialect
37395 .transpile("SELECT 1", DialectType::PostgreSQL)
37396 .unwrap();
37397 assert_eq!(result.len(), 1);
37398 assert_eq!(result[0], "SELECT 1");
37399 }
37400
37401 #[test]
37402 fn test_function_transformation_mysql() {
37403 // NVL should be transformed to IFNULL in MySQL
37404 let dialect = Dialect::get(DialectType::Generic);
37405 let result = dialect
37406 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
37407 .unwrap();
37408 assert_eq!(result[0], "SELECT IFNULL(a, b)");
37409 }
37410
37411 #[test]
37412 fn test_get_path_duckdb() {
37413 // Test: step by step
37414 let snowflake = Dialect::get(DialectType::Snowflake);
37415
37416 // Step 1: Parse and check what Snowflake produces as intermediate
37417 let result_sf_sf = snowflake
37418 .transpile(
37419 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37420 DialectType::Snowflake,
37421 )
37422 .unwrap();
37423 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
37424
37425 // Step 2: DuckDB target
37426 let result_sf_dk = snowflake
37427 .transpile(
37428 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37429 DialectType::DuckDB,
37430 )
37431 .unwrap();
37432 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
37433
37434 // Step 3: GET_PATH directly
37435 let result_gp = snowflake
37436 .transpile(
37437 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
37438 DialectType::DuckDB,
37439 )
37440 .unwrap();
37441 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
37442 }
37443
37444 #[test]
37445 fn test_function_transformation_postgres() {
37446 // IFNULL should be transformed to COALESCE in PostgreSQL
37447 let dialect = Dialect::get(DialectType::Generic);
37448 let result = dialect
37449 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
37450 .unwrap();
37451 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37452
37453 // NVL should also be transformed to COALESCE
37454 let result = dialect
37455 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
37456 .unwrap();
37457 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37458 }
37459
37460 #[test]
37461 fn test_hive_cast_to_trycast() {
37462 // Hive CAST should become TRY_CAST for targets that support it
37463 let hive = Dialect::get(DialectType::Hive);
37464 let result = hive
37465 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
37466 .unwrap();
37467 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
37468
37469 let result = hive
37470 .transpile("CAST(1 AS INT)", DialectType::Presto)
37471 .unwrap();
37472 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
37473 }
37474
37475 #[test]
37476 fn test_hive_array_identity() {
37477 // Hive ARRAY<DATE> should preserve angle bracket syntax
37478 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
37479 let hive = Dialect::get(DialectType::Hive);
37480
37481 // Test via transpile (this works)
37482 let result = hive.transpile(sql, DialectType::Hive).unwrap();
37483 eprintln!("Hive ARRAY via transpile: {}", result[0]);
37484 assert!(
37485 result[0].contains("ARRAY<DATE>"),
37486 "transpile: Expected ARRAY<DATE>, got: {}",
37487 result[0]
37488 );
37489
37490 // Test via parse -> transform -> generate (identity test path)
37491 let ast = hive.parse(sql).unwrap();
37492 let transformed = hive.transform(ast[0].clone()).unwrap();
37493 let output = hive.generate(&transformed).unwrap();
37494 eprintln!("Hive ARRAY via identity path: {}", output);
37495 assert!(
37496 output.contains("ARRAY<DATE>"),
37497 "identity path: Expected ARRAY<DATE>, got: {}",
37498 output
37499 );
37500 }
37501
37502 #[test]
37503 fn test_starrocks_delete_between_expansion() {
37504 // StarRocks doesn't support BETWEEN in DELETE statements
37505 let dialect = Dialect::get(DialectType::Generic);
37506
37507 // BETWEEN should be expanded to >= AND <= in DELETE
37508 let result = dialect
37509 .transpile(
37510 "DELETE FROM t WHERE a BETWEEN b AND c",
37511 DialectType::StarRocks,
37512 )
37513 .unwrap();
37514 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
37515
37516 // NOT BETWEEN should be expanded to < OR > in DELETE
37517 let result = dialect
37518 .transpile(
37519 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
37520 DialectType::StarRocks,
37521 )
37522 .unwrap();
37523 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
37524
37525 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
37526 let result = dialect
37527 .transpile(
37528 "SELECT * FROM t WHERE a BETWEEN b AND c",
37529 DialectType::StarRocks,
37530 )
37531 .unwrap();
37532 assert!(
37533 result[0].contains("BETWEEN"),
37534 "BETWEEN should be preserved in SELECT"
37535 );
37536 }
37537
37538 #[test]
37539 fn test_snowflake_ltrim_rtrim_parse() {
37540 let sf = Dialect::get(DialectType::Snowflake);
37541 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
37542 let result = sf.transpile(sql, DialectType::DuckDB);
37543 match &result {
37544 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
37545 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
37546 }
37547 assert!(
37548 result.is_ok(),
37549 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
37550 result.err()
37551 );
37552 }
37553
37554 #[test]
37555 fn test_duckdb_count_if_parse() {
37556 let duck = Dialect::get(DialectType::DuckDB);
37557 let sql = "COUNT_IF(x)";
37558 let result = duck.transpile(sql, DialectType::DuckDB);
37559 match &result {
37560 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
37561 Err(e) => eprintln!("COUNT_IF error: {}", e),
37562 }
37563 assert!(
37564 result.is_ok(),
37565 "Expected successful parse of COUNT_IF(x), got error: {:?}",
37566 result.err()
37567 );
37568 }
37569
37570 #[test]
37571 fn test_tsql_cast_tinyint_parse() {
37572 let tsql = Dialect::get(DialectType::TSQL);
37573 let sql = "CAST(X AS TINYINT)";
37574 let result = tsql.transpile(sql, DialectType::DuckDB);
37575 match &result {
37576 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
37577 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
37578 }
37579 assert!(
37580 result.is_ok(),
37581 "Expected successful transpile, got error: {:?}",
37582 result.err()
37583 );
37584 }
37585
37586 #[test]
37587 fn test_pg_hash_bitwise_xor() {
37588 let dialect = Dialect::get(DialectType::PostgreSQL);
37589 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
37590 assert_eq!(result[0], "x # y");
37591 }
37592
37593 #[test]
37594 fn test_pg_array_to_duckdb() {
37595 let dialect = Dialect::get(DialectType::PostgreSQL);
37596 let result = dialect
37597 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
37598 .unwrap();
37599 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
37600 }
37601
37602 #[test]
37603 fn test_array_remove_bigquery() {
37604 let dialect = Dialect::get(DialectType::Generic);
37605 let result = dialect
37606 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
37607 .unwrap();
37608 assert_eq!(
37609 result[0],
37610 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
37611 );
37612 }
37613
37614 #[test]
37615 fn test_map_clickhouse_case() {
37616 let dialect = Dialect::get(DialectType::Generic);
37617 let parsed = dialect
37618 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
37619 .unwrap();
37620 eprintln!("MAP parsed: {:?}", parsed);
37621 let result = dialect
37622 .transpile(
37623 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
37624 DialectType::ClickHouse,
37625 )
37626 .unwrap();
37627 eprintln!("MAP result: {}", result[0]);
37628 }
37629
37630 #[test]
37631 fn test_generate_date_array_presto() {
37632 let dialect = Dialect::get(DialectType::Generic);
37633 let result = dialect.transpile(
37634 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37635 DialectType::Presto,
37636 ).unwrap();
37637 eprintln!("GDA -> Presto: {}", result[0]);
37638 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
37639 }
37640
37641 #[test]
37642 fn test_generate_date_array_postgres() {
37643 let dialect = Dialect::get(DialectType::Generic);
37644 let result = dialect.transpile(
37645 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37646 DialectType::PostgreSQL,
37647 ).unwrap();
37648 eprintln!("GDA -> PostgreSQL: {}", result[0]);
37649 }
37650
37651 #[test]
37652 fn test_generate_date_array_snowflake() {
37653 let dialect = Dialect::get(DialectType::Generic);
37654 let result = dialect
37655 .transpile(
37656 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37657 DialectType::Snowflake,
37658 )
37659 .unwrap();
37660 eprintln!("GDA -> Snowflake: {}", result[0]);
37661 }
37662
37663 #[test]
37664 fn test_array_length_generate_date_array_snowflake() {
37665 let dialect = Dialect::get(DialectType::Generic);
37666 let result = dialect.transpile(
37667 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37668 DialectType::Snowflake,
37669 ).unwrap();
37670 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
37671 }
37672
37673 #[test]
37674 fn test_generate_date_array_mysql() {
37675 let dialect = Dialect::get(DialectType::Generic);
37676 let result = dialect.transpile(
37677 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37678 DialectType::MySQL,
37679 ).unwrap();
37680 eprintln!("GDA -> MySQL: {}", result[0]);
37681 }
37682
37683 #[test]
37684 fn test_generate_date_array_redshift() {
37685 let dialect = Dialect::get(DialectType::Generic);
37686 let result = dialect.transpile(
37687 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37688 DialectType::Redshift,
37689 ).unwrap();
37690 eprintln!("GDA -> Redshift: {}", result[0]);
37691 }
37692
37693 #[test]
37694 fn test_generate_date_array_tsql() {
37695 let dialect = Dialect::get(DialectType::Generic);
37696 let result = dialect.transpile(
37697 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37698 DialectType::TSQL,
37699 ).unwrap();
37700 eprintln!("GDA -> TSQL: {}", result[0]);
37701 }
37702
37703 #[test]
37704 fn test_struct_colon_syntax() {
37705 let dialect = Dialect::get(DialectType::Generic);
37706 // Test without colon first
37707 let result = dialect.transpile(
37708 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
37709 DialectType::ClickHouse,
37710 );
37711 match result {
37712 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
37713 Err(e) => eprintln!("STRUCT no colon error: {}", e),
37714 }
37715 // Now test with colon
37716 let result = dialect.transpile(
37717 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
37718 DialectType::ClickHouse,
37719 );
37720 match result {
37721 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
37722 Err(e) => eprintln!("STRUCT colon error: {}", e),
37723 }
37724 }
37725
37726 #[test]
37727 fn test_generate_date_array_cte_wrapped_mysql() {
37728 let dialect = Dialect::get(DialectType::Generic);
37729 let result = dialect.transpile(
37730 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37731 DialectType::MySQL,
37732 ).unwrap();
37733 eprintln!("GDA CTE -> MySQL: {}", result[0]);
37734 }
37735
37736 #[test]
37737 fn test_generate_date_array_cte_wrapped_tsql() {
37738 let dialect = Dialect::get(DialectType::Generic);
37739 let result = dialect.transpile(
37740 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37741 DialectType::TSQL,
37742 ).unwrap();
37743 eprintln!("GDA CTE -> TSQL: {}", result[0]);
37744 }
37745
37746 #[test]
37747 fn test_decode_literal_no_null_check() {
37748 // Oracle DECODE with all literals should produce simple equality, no IS NULL
37749 let dialect = Dialect::get(DialectType::Oracle);
37750 let result = dialect
37751 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
37752 .unwrap();
37753 assert_eq!(
37754 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
37755 "Literal DECODE should not have IS NULL checks"
37756 );
37757 }
37758
37759 #[test]
37760 fn test_decode_column_vs_literal_no_null_check() {
37761 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
37762 let dialect = Dialect::get(DialectType::Oracle);
37763 let result = dialect
37764 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
37765 .unwrap();
37766 assert_eq!(
37767 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
37768 "Column vs literal DECODE should not have IS NULL checks"
37769 );
37770 }
37771
37772 #[test]
37773 fn test_decode_column_vs_column_keeps_null_check() {
37774 // Oracle DECODE with column vs column should keep null-safe comparison
37775 let dialect = Dialect::get(DialectType::Oracle);
37776 let result = dialect
37777 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
37778 .unwrap();
37779 assert!(
37780 result[0].contains("IS NULL"),
37781 "Column vs column DECODE should have IS NULL checks, got: {}",
37782 result[0]
37783 );
37784 }
37785
37786 #[test]
37787 fn test_decode_null_search() {
37788 // Oracle DECODE with NULL search should use IS NULL
37789 let dialect = Dialect::get(DialectType::Oracle);
37790 let result = dialect
37791 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
37792 .unwrap();
37793 assert_eq!(
37794 result[0],
37795 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
37796 );
37797 }
37798
37799 // =========================================================================
37800 // REGEXP function transpilation tests
37801 // =========================================================================
37802
37803 #[test]
37804 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
37805 let dialect = Dialect::get(DialectType::Snowflake);
37806 let result = dialect
37807 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
37808 .unwrap();
37809 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37810 }
37811
37812 #[test]
37813 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
37814 let dialect = Dialect::get(DialectType::Snowflake);
37815 let result = dialect
37816 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
37817 .unwrap();
37818 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37819 }
37820
37821 #[test]
37822 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
37823 let dialect = Dialect::get(DialectType::Snowflake);
37824 let result = dialect
37825 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
37826 .unwrap();
37827 assert_eq!(
37828 result[0],
37829 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
37830 );
37831 }
37832
37833 #[test]
37834 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
37835 let dialect = Dialect::get(DialectType::Snowflake);
37836 let result = dialect
37837 .transpile(
37838 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
37839 DialectType::DuckDB,
37840 )
37841 .unwrap();
37842 assert_eq!(
37843 result[0],
37844 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
37845 );
37846 }
37847
37848 #[test]
37849 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
37850 let dialect = Dialect::get(DialectType::Snowflake);
37851 let result = dialect
37852 .transpile(
37853 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
37854 DialectType::DuckDB,
37855 )
37856 .unwrap();
37857 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37858 }
37859
37860 #[test]
37861 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
37862 let dialect = Dialect::get(DialectType::Snowflake);
37863 let result = dialect
37864 .transpile(
37865 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
37866 DialectType::DuckDB,
37867 )
37868 .unwrap();
37869 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37870 }
37871
37872 #[test]
37873 fn test_regexp_substr_snowflake_identity_strip_group0() {
37874 let dialect = Dialect::get(DialectType::Snowflake);
37875 let result = dialect
37876 .transpile(
37877 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
37878 DialectType::Snowflake,
37879 )
37880 .unwrap();
37881 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
37882 }
37883
37884 #[test]
37885 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
37886 let dialect = Dialect::get(DialectType::Snowflake);
37887 let result = dialect
37888 .transpile(
37889 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
37890 DialectType::DuckDB,
37891 )
37892 .unwrap();
37893 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37894 }
37895
37896 #[test]
37897 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
37898 let dialect = Dialect::get(DialectType::Snowflake);
37899 let result = dialect
37900 .transpile(
37901 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
37902 DialectType::DuckDB,
37903 )
37904 .unwrap();
37905 assert_eq!(
37906 result[0],
37907 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
37908 );
37909 }
37910
37911 #[test]
37912 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
37913 let dialect = Dialect::get(DialectType::Snowflake);
37914 let result = dialect
37915 .transpile(
37916 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
37917 DialectType::DuckDB,
37918 )
37919 .unwrap();
37920 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37921 }
37922
37923 #[test]
37924 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
37925 let dialect = Dialect::get(DialectType::Snowflake);
37926 let result = dialect
37927 .transpile(
37928 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
37929 DialectType::DuckDB,
37930 )
37931 .unwrap();
37932 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37933 }
37934
37935 #[test]
37936 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
37937 let dialect = Dialect::get(DialectType::Snowflake);
37938 let result = dialect
37939 .transpile(
37940 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
37941 DialectType::Snowflake,
37942 )
37943 .unwrap();
37944 assert_eq!(
37945 result[0],
37946 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
37947 );
37948 }
37949
37950 #[test]
37951 fn test_regexp_count_snowflake_to_duckdb_2arg() {
37952 let dialect = Dialect::get(DialectType::Snowflake);
37953 let result = dialect
37954 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
37955 .unwrap();
37956 assert_eq!(
37957 result[0],
37958 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
37959 );
37960 }
37961
37962 #[test]
37963 fn test_regexp_count_snowflake_to_duckdb_3arg() {
37964 let dialect = Dialect::get(DialectType::Snowflake);
37965 let result = dialect
37966 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
37967 .unwrap();
37968 assert_eq!(
37969 result[0],
37970 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
37971 );
37972 }
37973
37974 #[test]
37975 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
37976 let dialect = Dialect::get(DialectType::Snowflake);
37977 let result = dialect
37978 .transpile(
37979 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
37980 DialectType::DuckDB,
37981 )
37982 .unwrap();
37983 assert_eq!(
37984 result[0],
37985 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
37986 );
37987 }
37988
37989 #[test]
37990 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
37991 let dialect = Dialect::get(DialectType::Snowflake);
37992 let result = dialect
37993 .transpile(
37994 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
37995 DialectType::DuckDB,
37996 )
37997 .unwrap();
37998 assert_eq!(
37999 result[0],
38000 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
38001 );
38002 }
38003
38004 #[test]
38005 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
38006 let dialect = Dialect::get(DialectType::Snowflake);
38007 let result = dialect
38008 .transpile(
38009 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
38010 DialectType::DuckDB,
38011 )
38012 .unwrap();
38013 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
38014 }
38015
38016 #[test]
38017 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
38018 let dialect = Dialect::get(DialectType::Snowflake);
38019 let result = dialect
38020 .transpile(
38021 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
38022 DialectType::DuckDB,
38023 )
38024 .unwrap();
38025 assert_eq!(
38026 result[0],
38027 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
38028 );
38029 }
38030
38031 #[test]
38032 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
38033 let dialect = Dialect::get(DialectType::Snowflake);
38034 let result = dialect
38035 .transpile(
38036 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
38037 DialectType::DuckDB,
38038 )
38039 .unwrap();
38040 assert_eq!(
38041 result[0],
38042 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
38043 );
38044 }
38045
38046 #[test]
38047 fn test_rlike_snowflake_to_duckdb_2arg() {
38048 let dialect = Dialect::get(DialectType::Snowflake);
38049 let result = dialect
38050 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
38051 .unwrap();
38052 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
38053 }
38054
38055 #[test]
38056 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
38057 let dialect = Dialect::get(DialectType::Snowflake);
38058 let result = dialect
38059 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
38060 .unwrap();
38061 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
38062 }
38063
38064 #[test]
38065 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
38066 let dialect = Dialect::get(DialectType::BigQuery);
38067 let result = dialect
38068 .transpile(
38069 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
38070 DialectType::Snowflake,
38071 )
38072 .unwrap();
38073 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
38074 }
38075
38076 #[test]
38077 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
38078 let dialect = Dialect::get(DialectType::BigQuery);
38079 let result = dialect
38080 .transpile(
38081 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
38082 DialectType::Snowflake,
38083 )
38084 .unwrap();
38085 assert_eq!(
38086 result[0],
38087 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
38088 );
38089 }
38090
38091 #[test]
38092 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
38093 let dialect = Dialect::get(DialectType::Snowflake);
38094 let result = dialect
38095 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
38096 .unwrap();
38097 assert!(
38098 result[0].contains("CASE WHEN"),
38099 "Expected CASE WHEN in result: {}",
38100 result[0]
38101 );
38102 assert!(
38103 result[0].contains("LIST_SUM"),
38104 "Expected LIST_SUM in result: {}",
38105 result[0]
38106 );
38107 }
38108
38109 #[test]
38110 fn test_array_except_generic_to_duckdb() {
38111 let dialect = Dialect::get(DialectType::Generic);
38112 let result = dialect
38113 .transpile(
38114 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38115 DialectType::DuckDB,
38116 )
38117 .unwrap();
38118 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
38119 assert!(
38120 result[0].contains("CASE WHEN"),
38121 "Expected CASE WHEN: {}",
38122 result[0]
38123 );
38124 assert!(
38125 result[0].contains("LIST_FILTER"),
38126 "Expected LIST_FILTER: {}",
38127 result[0]
38128 );
38129 assert!(
38130 result[0].contains("LIST_DISTINCT"),
38131 "Expected LIST_DISTINCT: {}",
38132 result[0]
38133 );
38134 assert!(
38135 result[0].contains("IS NOT DISTINCT FROM"),
38136 "Expected IS NOT DISTINCT FROM: {}",
38137 result[0]
38138 );
38139 assert!(
38140 result[0].contains("= 0"),
38141 "Expected = 0 filter: {}",
38142 result[0]
38143 );
38144 }
38145
38146 #[test]
38147 fn test_array_except_generic_to_snowflake() {
38148 let dialect = Dialect::get(DialectType::Generic);
38149 let result = dialect
38150 .transpile(
38151 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38152 DialectType::Snowflake,
38153 )
38154 .unwrap();
38155 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
38156 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
38157 }
38158
38159 #[test]
38160 fn test_array_except_generic_to_presto() {
38161 let dialect = Dialect::get(DialectType::Generic);
38162 let result = dialect
38163 .transpile(
38164 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38165 DialectType::Presto,
38166 )
38167 .unwrap();
38168 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
38169 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
38170 }
38171
38172 #[test]
38173 fn test_array_except_snowflake_to_duckdb() {
38174 let dialect = Dialect::get(DialectType::Snowflake);
38175 let result = dialect
38176 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
38177 .unwrap();
38178 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
38179 assert!(
38180 result[0].contains("CASE WHEN"),
38181 "Expected CASE WHEN: {}",
38182 result[0]
38183 );
38184 assert!(
38185 result[0].contains("LIST_TRANSFORM"),
38186 "Expected LIST_TRANSFORM: {}",
38187 result[0]
38188 );
38189 }
38190
38191 #[test]
38192 fn test_array_contains_snowflake_to_snowflake() {
38193 let dialect = Dialect::get(DialectType::Snowflake);
38194 let result = dialect
38195 .transpile(
38196 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38197 DialectType::Snowflake,
38198 )
38199 .unwrap();
38200 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
38201 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
38202 }
38203
38204 #[test]
38205 fn test_array_contains_snowflake_to_duckdb() {
38206 let dialect = Dialect::get(DialectType::Snowflake);
38207 let result = dialect
38208 .transpile(
38209 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38210 DialectType::DuckDB,
38211 )
38212 .unwrap();
38213 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
38214 assert!(
38215 result[0].contains("CASE WHEN"),
38216 "Expected CASE WHEN: {}",
38217 result[0]
38218 );
38219 assert!(
38220 result[0].contains("NULLIF"),
38221 "Expected NULLIF: {}",
38222 result[0]
38223 );
38224 assert!(
38225 result[0].contains("ARRAY_CONTAINS"),
38226 "Expected ARRAY_CONTAINS: {}",
38227 result[0]
38228 );
38229 }
38230
38231 #[test]
38232 fn test_array_distinct_snowflake_to_duckdb() {
38233 let dialect = Dialect::get(DialectType::Snowflake);
38234 let result = dialect
38235 .transpile(
38236 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
38237 DialectType::DuckDB,
38238 )
38239 .unwrap();
38240 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
38241 assert!(
38242 result[0].contains("CASE WHEN"),
38243 "Expected CASE WHEN: {}",
38244 result[0]
38245 );
38246 assert!(
38247 result[0].contains("LIST_DISTINCT"),
38248 "Expected LIST_DISTINCT: {}",
38249 result[0]
38250 );
38251 assert!(
38252 result[0].contains("LIST_APPEND"),
38253 "Expected LIST_APPEND: {}",
38254 result[0]
38255 );
38256 assert!(
38257 result[0].contains("LIST_FILTER"),
38258 "Expected LIST_FILTER: {}",
38259 result[0]
38260 );
38261 }
38262}