polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, Function, FunctionBody, Identifier, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, TokenType, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519#[derive(Debug)]
520enum TransformTask {
521 Visit(Expression),
522 Finish(FinishTask),
523}
524
525#[derive(Debug)]
526enum FinishTask {
527 Unary(Expression),
528 Binary(Expression),
529 CastLike(Expression),
530 List(Expression, usize),
531 From(crate::expressions::From, usize),
532 Select(SelectFrame),
533 SetOp(Expression),
534}
535
536#[derive(Debug)]
537struct SelectFrame {
538 select: Box<crate::expressions::Select>,
539 expr_count: usize,
540 from_present: bool,
541 where_present: bool,
542 group_by_count: usize,
543 having_present: bool,
544 qualify_present: bool,
545}
546
547fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
548 results
549 .pop()
550 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
551}
552
553fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
554 if results.len() < count {
555 return Err(crate::error::Error::Internal(
556 "transform result stack underflow".to_string(),
557 ));
558 }
559 Ok(results.split_off(results.len() - count))
560}
561
562/// Applies a transform function bottom-up through an entire expression tree.
563///
564/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
565/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
566/// trees, and common binary/unary expression chains). Less common shapes currently
567/// reuse the reference recursive implementation so semantics stay identical while
568/// the hot path avoids stack growth.
569pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
570where
571 F: Fn(Expression) -> Result<Expression>,
572{
573 #[cfg(feature = "stacker")]
574 {
575 let red_zone = if cfg!(debug_assertions) {
576 4 * 1024 * 1024
577 } else {
578 1024 * 1024
579 };
580 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
581 transform_recursive_inner(expr, transform_fn)
582 })
583 }
584 #[cfg(not(feature = "stacker"))]
585 {
586 transform_recursive_inner(expr, transform_fn)
587 }
588}
589
590fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
591where
592 F: Fn(Expression) -> Result<Expression>,
593{
594 let mut tasks = vec![TransformTask::Visit(expr)];
595 let mut results = Vec::new();
596
597 while let Some(task) = tasks.pop() {
598 match task {
599 TransformTask::Visit(expr) => {
600 if matches!(
601 &expr,
602 Expression::Literal(_)
603 | Expression::Boolean(_)
604 | Expression::Null(_)
605 | Expression::Identifier(_)
606 | Expression::Star(_)
607 | Expression::Parameter(_)
608 | Expression::Placeholder(_)
609 | Expression::SessionParameter(_)
610 ) {
611 results.push(transform_fn(expr)?);
612 continue;
613 }
614
615 match expr {
616 Expression::Alias(mut alias) => {
617 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
618 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
619 alias,
620 ))));
621 tasks.push(TransformTask::Visit(child));
622 }
623 Expression::Paren(mut paren) => {
624 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
625 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
626 paren,
627 ))));
628 tasks.push(TransformTask::Visit(child));
629 }
630 Expression::Not(mut not) => {
631 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
632 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
633 not,
634 ))));
635 tasks.push(TransformTask::Visit(child));
636 }
637 Expression::Neg(mut neg) => {
638 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
639 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
640 neg,
641 ))));
642 tasks.push(TransformTask::Visit(child));
643 }
644 Expression::IsNull(mut expr) => {
645 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
646 tasks.push(TransformTask::Finish(FinishTask::Unary(
647 Expression::IsNull(expr),
648 )));
649 tasks.push(TransformTask::Visit(child));
650 }
651 Expression::IsTrue(mut expr) => {
652 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
653 tasks.push(TransformTask::Finish(FinishTask::Unary(
654 Expression::IsTrue(expr),
655 )));
656 tasks.push(TransformTask::Visit(child));
657 }
658 Expression::IsFalse(mut expr) => {
659 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
660 tasks.push(TransformTask::Finish(FinishTask::Unary(
661 Expression::IsFalse(expr),
662 )));
663 tasks.push(TransformTask::Visit(child));
664 }
665 Expression::Subquery(mut subquery) => {
666 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
667 tasks.push(TransformTask::Finish(FinishTask::Unary(
668 Expression::Subquery(subquery),
669 )));
670 tasks.push(TransformTask::Visit(child));
671 }
672 Expression::Exists(mut exists) => {
673 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
674 tasks.push(TransformTask::Finish(FinishTask::Unary(
675 Expression::Exists(exists),
676 )));
677 tasks.push(TransformTask::Visit(child));
678 }
679 Expression::TableArgument(mut arg) => {
680 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
681 tasks.push(TransformTask::Finish(FinishTask::Unary(
682 Expression::TableArgument(arg),
683 )));
684 tasks.push(TransformTask::Visit(child));
685 }
686 Expression::And(mut op) => {
687 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
688 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
689 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
690 op,
691 ))));
692 tasks.push(TransformTask::Visit(right));
693 tasks.push(TransformTask::Visit(left));
694 }
695 Expression::Or(mut op) => {
696 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
697 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
698 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
699 op,
700 ))));
701 tasks.push(TransformTask::Visit(right));
702 tasks.push(TransformTask::Visit(left));
703 }
704 Expression::Add(mut op) => {
705 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
706 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
707 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
708 op,
709 ))));
710 tasks.push(TransformTask::Visit(right));
711 tasks.push(TransformTask::Visit(left));
712 }
713 Expression::Sub(mut op) => {
714 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
715 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
716 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
717 op,
718 ))));
719 tasks.push(TransformTask::Visit(right));
720 tasks.push(TransformTask::Visit(left));
721 }
722 Expression::Mul(mut op) => {
723 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
724 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
725 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
726 op,
727 ))));
728 tasks.push(TransformTask::Visit(right));
729 tasks.push(TransformTask::Visit(left));
730 }
731 Expression::Div(mut op) => {
732 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
733 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
734 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
735 op,
736 ))));
737 tasks.push(TransformTask::Visit(right));
738 tasks.push(TransformTask::Visit(left));
739 }
740 Expression::Eq(mut op) => {
741 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
742 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
743 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
744 op,
745 ))));
746 tasks.push(TransformTask::Visit(right));
747 tasks.push(TransformTask::Visit(left));
748 }
749 Expression::Lt(mut op) => {
750 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
751 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
752 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
753 op,
754 ))));
755 tasks.push(TransformTask::Visit(right));
756 tasks.push(TransformTask::Visit(left));
757 }
758 Expression::Gt(mut op) => {
759 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
760 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
761 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
762 op,
763 ))));
764 tasks.push(TransformTask::Visit(right));
765 tasks.push(TransformTask::Visit(left));
766 }
767 Expression::Neq(mut op) => {
768 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
769 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
770 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
771 op,
772 ))));
773 tasks.push(TransformTask::Visit(right));
774 tasks.push(TransformTask::Visit(left));
775 }
776 Expression::Lte(mut op) => {
777 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
778 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
779 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
780 op,
781 ))));
782 tasks.push(TransformTask::Visit(right));
783 tasks.push(TransformTask::Visit(left));
784 }
785 Expression::Gte(mut op) => {
786 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
787 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
788 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
789 op,
790 ))));
791 tasks.push(TransformTask::Visit(right));
792 tasks.push(TransformTask::Visit(left));
793 }
794 Expression::Mod(mut op) => {
795 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
796 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
797 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
798 op,
799 ))));
800 tasks.push(TransformTask::Visit(right));
801 tasks.push(TransformTask::Visit(left));
802 }
803 Expression::Concat(mut op) => {
804 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
805 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
806 tasks.push(TransformTask::Finish(FinishTask::Binary(
807 Expression::Concat(op),
808 )));
809 tasks.push(TransformTask::Visit(right));
810 tasks.push(TransformTask::Visit(left));
811 }
812 Expression::BitwiseAnd(mut op) => {
813 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
814 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
815 tasks.push(TransformTask::Finish(FinishTask::Binary(
816 Expression::BitwiseAnd(op),
817 )));
818 tasks.push(TransformTask::Visit(right));
819 tasks.push(TransformTask::Visit(left));
820 }
821 Expression::BitwiseOr(mut op) => {
822 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
823 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
824 tasks.push(TransformTask::Finish(FinishTask::Binary(
825 Expression::BitwiseOr(op),
826 )));
827 tasks.push(TransformTask::Visit(right));
828 tasks.push(TransformTask::Visit(left));
829 }
830 Expression::BitwiseXor(mut op) => {
831 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
832 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
833 tasks.push(TransformTask::Finish(FinishTask::Binary(
834 Expression::BitwiseXor(op),
835 )));
836 tasks.push(TransformTask::Visit(right));
837 tasks.push(TransformTask::Visit(left));
838 }
839 Expression::Is(mut op) => {
840 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
841 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
842 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
843 op,
844 ))));
845 tasks.push(TransformTask::Visit(right));
846 tasks.push(TransformTask::Visit(left));
847 }
848 Expression::MemberOf(mut op) => {
849 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
850 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
851 tasks.push(TransformTask::Finish(FinishTask::Binary(
852 Expression::MemberOf(op),
853 )));
854 tasks.push(TransformTask::Visit(right));
855 tasks.push(TransformTask::Visit(left));
856 }
857 Expression::ArrayContainsAll(mut op) => {
858 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
859 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
860 tasks.push(TransformTask::Finish(FinishTask::Binary(
861 Expression::ArrayContainsAll(op),
862 )));
863 tasks.push(TransformTask::Visit(right));
864 tasks.push(TransformTask::Visit(left));
865 }
866 Expression::ArrayContainedBy(mut op) => {
867 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
868 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
869 tasks.push(TransformTask::Finish(FinishTask::Binary(
870 Expression::ArrayContainedBy(op),
871 )));
872 tasks.push(TransformTask::Visit(right));
873 tasks.push(TransformTask::Visit(left));
874 }
875 Expression::ArrayOverlaps(mut op) => {
876 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
877 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
878 tasks.push(TransformTask::Finish(FinishTask::Binary(
879 Expression::ArrayOverlaps(op),
880 )));
881 tasks.push(TransformTask::Visit(right));
882 tasks.push(TransformTask::Visit(left));
883 }
884 Expression::TsMatch(mut op) => {
885 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
886 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
887 tasks.push(TransformTask::Finish(FinishTask::Binary(
888 Expression::TsMatch(op),
889 )));
890 tasks.push(TransformTask::Visit(right));
891 tasks.push(TransformTask::Visit(left));
892 }
893 Expression::Adjacent(mut op) => {
894 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
895 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
896 tasks.push(TransformTask::Finish(FinishTask::Binary(
897 Expression::Adjacent(op),
898 )));
899 tasks.push(TransformTask::Visit(right));
900 tasks.push(TransformTask::Visit(left));
901 }
902 Expression::Like(mut like) => {
903 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
904 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
905 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
906 like,
907 ))));
908 tasks.push(TransformTask::Visit(right));
909 tasks.push(TransformTask::Visit(left));
910 }
911 Expression::ILike(mut like) => {
912 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
913 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
914 tasks.push(TransformTask::Finish(FinishTask::Binary(
915 Expression::ILike(like),
916 )));
917 tasks.push(TransformTask::Visit(right));
918 tasks.push(TransformTask::Visit(left));
919 }
920 Expression::Cast(mut cast) => {
921 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
922 tasks.push(TransformTask::Finish(FinishTask::CastLike(
923 Expression::Cast(cast),
924 )));
925 tasks.push(TransformTask::Visit(child));
926 }
927 Expression::TryCast(mut cast) => {
928 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
929 tasks.push(TransformTask::Finish(FinishTask::CastLike(
930 Expression::TryCast(cast),
931 )));
932 tasks.push(TransformTask::Visit(child));
933 }
934 Expression::SafeCast(mut cast) => {
935 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
936 tasks.push(TransformTask::Finish(FinishTask::CastLike(
937 Expression::SafeCast(cast),
938 )));
939 tasks.push(TransformTask::Visit(child));
940 }
941 Expression::Function(mut function) => {
942 let args = std::mem::take(&mut function.args);
943 let count = args.len();
944 tasks.push(TransformTask::Finish(FinishTask::List(
945 Expression::Function(function),
946 count,
947 )));
948 for child in args.into_iter().rev() {
949 tasks.push(TransformTask::Visit(child));
950 }
951 }
952 Expression::Array(mut array) => {
953 let expressions = std::mem::take(&mut array.expressions);
954 let count = expressions.len();
955 tasks.push(TransformTask::Finish(FinishTask::List(
956 Expression::Array(array),
957 count,
958 )));
959 for child in expressions.into_iter().rev() {
960 tasks.push(TransformTask::Visit(child));
961 }
962 }
963 Expression::Tuple(mut tuple) => {
964 let expressions = std::mem::take(&mut tuple.expressions);
965 let count = expressions.len();
966 tasks.push(TransformTask::Finish(FinishTask::List(
967 Expression::Tuple(tuple),
968 count,
969 )));
970 for child in expressions.into_iter().rev() {
971 tasks.push(TransformTask::Visit(child));
972 }
973 }
974 Expression::ArrayFunc(mut array) => {
975 let expressions = std::mem::take(&mut array.expressions);
976 let count = expressions.len();
977 tasks.push(TransformTask::Finish(FinishTask::List(
978 Expression::ArrayFunc(array),
979 count,
980 )));
981 for child in expressions.into_iter().rev() {
982 tasks.push(TransformTask::Visit(child));
983 }
984 }
985 Expression::Coalesce(mut func) => {
986 let expressions = std::mem::take(&mut func.expressions);
987 let count = expressions.len();
988 tasks.push(TransformTask::Finish(FinishTask::List(
989 Expression::Coalesce(func),
990 count,
991 )));
992 for child in expressions.into_iter().rev() {
993 tasks.push(TransformTask::Visit(child));
994 }
995 }
996 Expression::Greatest(mut func) => {
997 let expressions = std::mem::take(&mut func.expressions);
998 let count = expressions.len();
999 tasks.push(TransformTask::Finish(FinishTask::List(
1000 Expression::Greatest(func),
1001 count,
1002 )));
1003 for child in expressions.into_iter().rev() {
1004 tasks.push(TransformTask::Visit(child));
1005 }
1006 }
1007 Expression::Least(mut func) => {
1008 let expressions = std::mem::take(&mut func.expressions);
1009 let count = expressions.len();
1010 tasks.push(TransformTask::Finish(FinishTask::List(
1011 Expression::Least(func),
1012 count,
1013 )));
1014 for child in expressions.into_iter().rev() {
1015 tasks.push(TransformTask::Visit(child));
1016 }
1017 }
1018 Expression::ArrayConcat(mut func) => {
1019 let expressions = std::mem::take(&mut func.expressions);
1020 let count = expressions.len();
1021 tasks.push(TransformTask::Finish(FinishTask::List(
1022 Expression::ArrayConcat(func),
1023 count,
1024 )));
1025 for child in expressions.into_iter().rev() {
1026 tasks.push(TransformTask::Visit(child));
1027 }
1028 }
1029 Expression::ArrayIntersect(mut func) => {
1030 let expressions = std::mem::take(&mut func.expressions);
1031 let count = expressions.len();
1032 tasks.push(TransformTask::Finish(FinishTask::List(
1033 Expression::ArrayIntersect(func),
1034 count,
1035 )));
1036 for child in expressions.into_iter().rev() {
1037 tasks.push(TransformTask::Visit(child));
1038 }
1039 }
1040 Expression::ArrayZip(mut func) => {
1041 let expressions = std::mem::take(&mut func.expressions);
1042 let count = expressions.len();
1043 tasks.push(TransformTask::Finish(FinishTask::List(
1044 Expression::ArrayZip(func),
1045 count,
1046 )));
1047 for child in expressions.into_iter().rev() {
1048 tasks.push(TransformTask::Visit(child));
1049 }
1050 }
1051 Expression::MapConcat(mut func) => {
1052 let expressions = std::mem::take(&mut func.expressions);
1053 let count = expressions.len();
1054 tasks.push(TransformTask::Finish(FinishTask::List(
1055 Expression::MapConcat(func),
1056 count,
1057 )));
1058 for child in expressions.into_iter().rev() {
1059 tasks.push(TransformTask::Visit(child));
1060 }
1061 }
1062 Expression::JsonArray(mut func) => {
1063 let expressions = std::mem::take(&mut func.expressions);
1064 let count = expressions.len();
1065 tasks.push(TransformTask::Finish(FinishTask::List(
1066 Expression::JsonArray(func),
1067 count,
1068 )));
1069 for child in expressions.into_iter().rev() {
1070 tasks.push(TransformTask::Visit(child));
1071 }
1072 }
1073 Expression::From(mut from) => {
1074 let expressions = std::mem::take(&mut from.expressions);
1075 let count = expressions.len();
1076 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1077 for child in expressions.into_iter().rev() {
1078 tasks.push(TransformTask::Visit(child));
1079 }
1080 }
1081 Expression::Select(mut select) => {
1082 let expressions = std::mem::take(&mut select.expressions);
1083 let expr_count = expressions.len();
1084
1085 let from_info = select.from.take().map(|mut from| {
1086 let children = std::mem::take(&mut from.expressions);
1087 (from, children)
1088 });
1089 let from_present = from_info.is_some();
1090
1091 let where_child = select.where_clause.as_mut().map(|where_clause| {
1092 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1093 });
1094 let where_present = where_child.is_some();
1095
1096 let group_expressions = select
1097 .group_by
1098 .as_mut()
1099 .map(|group_by| std::mem::take(&mut group_by.expressions))
1100 .unwrap_or_default();
1101 let group_by_count = group_expressions.len();
1102
1103 let having_child = select.having.as_mut().map(|having| {
1104 std::mem::replace(&mut having.this, Expression::Null(Null))
1105 });
1106 let having_present = having_child.is_some();
1107
1108 let qualify_child = select.qualify.as_mut().map(|qualify| {
1109 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1110 });
1111 let qualify_present = qualify_child.is_some();
1112
1113 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1114 select,
1115 expr_count,
1116 from_present,
1117 where_present,
1118 group_by_count,
1119 having_present,
1120 qualify_present,
1121 })));
1122
1123 if let Some(child) = qualify_child {
1124 tasks.push(TransformTask::Visit(child));
1125 }
1126 if let Some(child) = having_child {
1127 tasks.push(TransformTask::Visit(child));
1128 }
1129 for child in group_expressions.into_iter().rev() {
1130 tasks.push(TransformTask::Visit(child));
1131 }
1132 if let Some(child) = where_child {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 if let Some((from, children)) = from_info {
1136 tasks.push(TransformTask::Finish(FinishTask::From(
1137 from,
1138 children.len(),
1139 )));
1140 for child in children.into_iter().rev() {
1141 tasks.push(TransformTask::Visit(child));
1142 }
1143 }
1144 for child in expressions.into_iter().rev() {
1145 tasks.push(TransformTask::Visit(child));
1146 }
1147 }
1148 Expression::Union(mut union) => {
1149 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1150 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1151 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1152 union,
1153 ))));
1154 tasks.push(TransformTask::Visit(right));
1155 tasks.push(TransformTask::Visit(left));
1156 }
1157 Expression::Intersect(mut intersect) => {
1158 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1159 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1160 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1161 Expression::Intersect(intersect),
1162 )));
1163 tasks.push(TransformTask::Visit(right));
1164 tasks.push(TransformTask::Visit(left));
1165 }
1166 Expression::Except(mut except) => {
1167 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1168 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1169 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1170 Expression::Except(except),
1171 )));
1172 tasks.push(TransformTask::Visit(right));
1173 tasks.push(TransformTask::Visit(left));
1174 }
1175 other => {
1176 results.push(transform_recursive_reference(other, transform_fn)?);
1177 }
1178 }
1179 }
1180 TransformTask::Finish(finish) => match finish {
1181 FinishTask::Unary(expr) => {
1182 let child = transform_pop_result(&mut results)?;
1183 let rebuilt = match expr {
1184 Expression::Alias(mut alias) => {
1185 alias.this = child;
1186 Expression::Alias(alias)
1187 }
1188 Expression::Paren(mut paren) => {
1189 paren.this = child;
1190 Expression::Paren(paren)
1191 }
1192 Expression::Not(mut not) => {
1193 not.this = child;
1194 Expression::Not(not)
1195 }
1196 Expression::Neg(mut neg) => {
1197 neg.this = child;
1198 Expression::Neg(neg)
1199 }
1200 Expression::IsNull(mut expr) => {
1201 expr.this = child;
1202 Expression::IsNull(expr)
1203 }
1204 Expression::IsTrue(mut expr) => {
1205 expr.this = child;
1206 Expression::IsTrue(expr)
1207 }
1208 Expression::IsFalse(mut expr) => {
1209 expr.this = child;
1210 Expression::IsFalse(expr)
1211 }
1212 Expression::Subquery(mut subquery) => {
1213 subquery.this = child;
1214 Expression::Subquery(subquery)
1215 }
1216 Expression::Exists(mut exists) => {
1217 exists.this = child;
1218 Expression::Exists(exists)
1219 }
1220 Expression::TableArgument(mut arg) => {
1221 arg.this = child;
1222 Expression::TableArgument(arg)
1223 }
1224 _ => {
1225 return Err(crate::error::Error::Internal(
1226 "unexpected unary transform task".to_string(),
1227 ));
1228 }
1229 };
1230 results.push(transform_fn(rebuilt)?);
1231 }
1232 FinishTask::Binary(expr) => {
1233 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1234 let left = children.next().expect("left child");
1235 let right = children.next().expect("right child");
1236 let rebuilt = match expr {
1237 Expression::And(mut op) => {
1238 op.left = left;
1239 op.right = right;
1240 Expression::And(op)
1241 }
1242 Expression::Or(mut op) => {
1243 op.left = left;
1244 op.right = right;
1245 Expression::Or(op)
1246 }
1247 Expression::Add(mut op) => {
1248 op.left = left;
1249 op.right = right;
1250 Expression::Add(op)
1251 }
1252 Expression::Sub(mut op) => {
1253 op.left = left;
1254 op.right = right;
1255 Expression::Sub(op)
1256 }
1257 Expression::Mul(mut op) => {
1258 op.left = left;
1259 op.right = right;
1260 Expression::Mul(op)
1261 }
1262 Expression::Div(mut op) => {
1263 op.left = left;
1264 op.right = right;
1265 Expression::Div(op)
1266 }
1267 Expression::Eq(mut op) => {
1268 op.left = left;
1269 op.right = right;
1270 Expression::Eq(op)
1271 }
1272 Expression::Lt(mut op) => {
1273 op.left = left;
1274 op.right = right;
1275 Expression::Lt(op)
1276 }
1277 Expression::Gt(mut op) => {
1278 op.left = left;
1279 op.right = right;
1280 Expression::Gt(op)
1281 }
1282 Expression::Neq(mut op) => {
1283 op.left = left;
1284 op.right = right;
1285 Expression::Neq(op)
1286 }
1287 Expression::Lte(mut op) => {
1288 op.left = left;
1289 op.right = right;
1290 Expression::Lte(op)
1291 }
1292 Expression::Gte(mut op) => {
1293 op.left = left;
1294 op.right = right;
1295 Expression::Gte(op)
1296 }
1297 Expression::Mod(mut op) => {
1298 op.left = left;
1299 op.right = right;
1300 Expression::Mod(op)
1301 }
1302 Expression::Concat(mut op) => {
1303 op.left = left;
1304 op.right = right;
1305 Expression::Concat(op)
1306 }
1307 Expression::BitwiseAnd(mut op) => {
1308 op.left = left;
1309 op.right = right;
1310 Expression::BitwiseAnd(op)
1311 }
1312 Expression::BitwiseOr(mut op) => {
1313 op.left = left;
1314 op.right = right;
1315 Expression::BitwiseOr(op)
1316 }
1317 Expression::BitwiseXor(mut op) => {
1318 op.left = left;
1319 op.right = right;
1320 Expression::BitwiseXor(op)
1321 }
1322 Expression::Is(mut op) => {
1323 op.left = left;
1324 op.right = right;
1325 Expression::Is(op)
1326 }
1327 Expression::MemberOf(mut op) => {
1328 op.left = left;
1329 op.right = right;
1330 Expression::MemberOf(op)
1331 }
1332 Expression::ArrayContainsAll(mut op) => {
1333 op.left = left;
1334 op.right = right;
1335 Expression::ArrayContainsAll(op)
1336 }
1337 Expression::ArrayContainedBy(mut op) => {
1338 op.left = left;
1339 op.right = right;
1340 Expression::ArrayContainedBy(op)
1341 }
1342 Expression::ArrayOverlaps(mut op) => {
1343 op.left = left;
1344 op.right = right;
1345 Expression::ArrayOverlaps(op)
1346 }
1347 Expression::TsMatch(mut op) => {
1348 op.left = left;
1349 op.right = right;
1350 Expression::TsMatch(op)
1351 }
1352 Expression::Adjacent(mut op) => {
1353 op.left = left;
1354 op.right = right;
1355 Expression::Adjacent(op)
1356 }
1357 Expression::Like(mut like) => {
1358 like.left = left;
1359 like.right = right;
1360 Expression::Like(like)
1361 }
1362 Expression::ILike(mut like) => {
1363 like.left = left;
1364 like.right = right;
1365 Expression::ILike(like)
1366 }
1367 _ => {
1368 return Err(crate::error::Error::Internal(
1369 "unexpected binary transform task".to_string(),
1370 ));
1371 }
1372 };
1373 results.push(transform_fn(rebuilt)?);
1374 }
1375 FinishTask::CastLike(expr) => {
1376 let child = transform_pop_result(&mut results)?;
1377 let rebuilt = match expr {
1378 Expression::Cast(mut cast) => {
1379 cast.this = child;
1380 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1381 Expression::Cast(cast)
1382 }
1383 Expression::TryCast(mut cast) => {
1384 cast.this = child;
1385 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1386 Expression::TryCast(cast)
1387 }
1388 Expression::SafeCast(mut cast) => {
1389 cast.this = child;
1390 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1391 Expression::SafeCast(cast)
1392 }
1393 _ => {
1394 return Err(crate::error::Error::Internal(
1395 "unexpected cast transform task".to_string(),
1396 ));
1397 }
1398 };
1399 results.push(transform_fn(rebuilt)?);
1400 }
1401 FinishTask::List(expr, count) => {
1402 let children = transform_pop_results(&mut results, count)?;
1403 let rebuilt = match expr {
1404 Expression::Function(mut function) => {
1405 function.args = children;
1406 Expression::Function(function)
1407 }
1408 Expression::Array(mut array) => {
1409 array.expressions = children;
1410 Expression::Array(array)
1411 }
1412 Expression::Tuple(mut tuple) => {
1413 tuple.expressions = children;
1414 Expression::Tuple(tuple)
1415 }
1416 Expression::ArrayFunc(mut array) => {
1417 array.expressions = children;
1418 Expression::ArrayFunc(array)
1419 }
1420 Expression::Coalesce(mut func) => {
1421 func.expressions = children;
1422 Expression::Coalesce(func)
1423 }
1424 Expression::Greatest(mut func) => {
1425 func.expressions = children;
1426 Expression::Greatest(func)
1427 }
1428 Expression::Least(mut func) => {
1429 func.expressions = children;
1430 Expression::Least(func)
1431 }
1432 Expression::ArrayConcat(mut func) => {
1433 func.expressions = children;
1434 Expression::ArrayConcat(func)
1435 }
1436 Expression::ArrayIntersect(mut func) => {
1437 func.expressions = children;
1438 Expression::ArrayIntersect(func)
1439 }
1440 Expression::ArrayZip(mut func) => {
1441 func.expressions = children;
1442 Expression::ArrayZip(func)
1443 }
1444 Expression::MapConcat(mut func) => {
1445 func.expressions = children;
1446 Expression::MapConcat(func)
1447 }
1448 Expression::JsonArray(mut func) => {
1449 func.expressions = children;
1450 Expression::JsonArray(func)
1451 }
1452 _ => {
1453 return Err(crate::error::Error::Internal(
1454 "unexpected list transform task".to_string(),
1455 ));
1456 }
1457 };
1458 results.push(transform_fn(rebuilt)?);
1459 }
1460 FinishTask::From(mut from, count) => {
1461 from.expressions = transform_pop_results(&mut results, count)?;
1462 results.push(transform_fn(Expression::From(Box::new(from)))?);
1463 }
1464 FinishTask::Select(frame) => {
1465 let mut select = *frame.select;
1466
1467 if frame.qualify_present {
1468 if let Some(ref mut qualify) = select.qualify {
1469 qualify.this = transform_pop_result(&mut results)?;
1470 }
1471 }
1472 if frame.having_present {
1473 if let Some(ref mut having) = select.having {
1474 having.this = transform_pop_result(&mut results)?;
1475 }
1476 }
1477 if frame.group_by_count > 0 {
1478 if let Some(ref mut group_by) = select.group_by {
1479 group_by.expressions =
1480 transform_pop_results(&mut results, frame.group_by_count)?;
1481 }
1482 }
1483 if frame.where_present {
1484 if let Some(ref mut where_clause) = select.where_clause {
1485 where_clause.this = transform_pop_result(&mut results)?;
1486 }
1487 }
1488 if frame.from_present {
1489 match transform_pop_result(&mut results)? {
1490 Expression::From(from) => {
1491 select.from = Some(*from);
1492 }
1493 _ => {
1494 return Err(crate::error::Error::Internal(
1495 "expected FROM expression result".to_string(),
1496 ));
1497 }
1498 }
1499 }
1500 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1501
1502 select.joins = select
1503 .joins
1504 .into_iter()
1505 .map(|mut join| {
1506 join.this = transform_recursive(join.this, transform_fn)?;
1507 if let Some(on) = join.on.take() {
1508 join.on = Some(transform_recursive(on, transform_fn)?);
1509 }
1510 match transform_fn(Expression::Join(Box::new(join)))? {
1511 Expression::Join(j) => Ok(*j),
1512 _ => Err(crate::error::Error::parse(
1513 "Join transformation returned non-join expression",
1514 0,
1515 0,
1516 0,
1517 0,
1518 )),
1519 }
1520 })
1521 .collect::<Result<Vec<_>>>()?;
1522
1523 select.lateral_views = select
1524 .lateral_views
1525 .into_iter()
1526 .map(|mut lv| {
1527 lv.this = transform_recursive(lv.this, transform_fn)?;
1528 Ok(lv)
1529 })
1530 .collect::<Result<Vec<_>>>()?;
1531
1532 if let Some(mut with) = select.with.take() {
1533 with.ctes = with
1534 .ctes
1535 .into_iter()
1536 .map(|mut cte| {
1537 let original = cte.this.clone();
1538 cte.this =
1539 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1540 cte
1541 })
1542 .collect();
1543 select.with = Some(with);
1544 }
1545
1546 if let Some(mut order) = select.order_by.take() {
1547 order.expressions = order
1548 .expressions
1549 .into_iter()
1550 .map(|o| {
1551 let mut o = o;
1552 let original = o.this.clone();
1553 o.this =
1554 transform_recursive(o.this, transform_fn).unwrap_or(original);
1555 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1556 Ok(Expression::Ordered(transformed)) => *transformed,
1557 Ok(_) | Err(_) => o,
1558 }
1559 })
1560 .collect();
1561 select.order_by = Some(order);
1562 }
1563
1564 if let Some(ref mut windows) = select.windows {
1565 for nw in windows.iter_mut() {
1566 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1567 .into_iter()
1568 .map(|o| {
1569 let mut o = o;
1570 let original = o.this.clone();
1571 o.this = transform_recursive(o.this, transform_fn)
1572 .unwrap_or(original);
1573 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1574 Ok(Expression::Ordered(transformed)) => *transformed,
1575 Ok(_) | Err(_) => o,
1576 }
1577 })
1578 .collect();
1579 }
1580 }
1581
1582 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1583 }
1584 FinishTask::SetOp(expr) => {
1585 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1586 let left = children.next().expect("left child");
1587 let right = children.next().expect("right child");
1588
1589 let rebuilt = match expr {
1590 Expression::Union(mut union) => {
1591 union.left = left;
1592 union.right = right;
1593 if let Some(mut with) = union.with.take() {
1594 with.ctes = with
1595 .ctes
1596 .into_iter()
1597 .map(|mut cte| {
1598 let original = cte.this.clone();
1599 cte.this = transform_recursive(cte.this, transform_fn)
1600 .unwrap_or(original);
1601 cte
1602 })
1603 .collect();
1604 union.with = Some(with);
1605 }
1606 Expression::Union(union)
1607 }
1608 Expression::Intersect(mut intersect) => {
1609 intersect.left = left;
1610 intersect.right = right;
1611 if let Some(mut with) = intersect.with.take() {
1612 with.ctes = with
1613 .ctes
1614 .into_iter()
1615 .map(|mut cte| {
1616 let original = cte.this.clone();
1617 cte.this = transform_recursive(cte.this, transform_fn)
1618 .unwrap_or(original);
1619 cte
1620 })
1621 .collect();
1622 intersect.with = Some(with);
1623 }
1624 Expression::Intersect(intersect)
1625 }
1626 Expression::Except(mut except) => {
1627 except.left = left;
1628 except.right = right;
1629 if let Some(mut with) = except.with.take() {
1630 with.ctes = with
1631 .ctes
1632 .into_iter()
1633 .map(|mut cte| {
1634 let original = cte.this.clone();
1635 cte.this = transform_recursive(cte.this, transform_fn)
1636 .unwrap_or(original);
1637 cte
1638 })
1639 .collect();
1640 except.with = Some(with);
1641 }
1642 Expression::Except(except)
1643 }
1644 _ => {
1645 return Err(crate::error::Error::Internal(
1646 "unexpected set-op transform task".to_string(),
1647 ));
1648 }
1649 };
1650 results.push(transform_fn(rebuilt)?);
1651 }
1652 },
1653 }
1654 }
1655
1656 match results.len() {
1657 1 => Ok(results.pop().expect("single transform result")),
1658 _ => Err(crate::error::Error::Internal(
1659 "unexpected transform result stack size".to_string(),
1660 )),
1661 }
1662}
1663
1664fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1665where
1666 F: Fn(Expression) -> Result<Expression>,
1667{
1668 use crate::expressions::BinaryOp;
1669
1670 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
1671 macro_rules! recurse_agg {
1672 ($variant:ident, $f:expr) => {{
1673 let mut f = $f;
1674 f.this = transform_recursive(f.this, transform_fn)?;
1675 if let Some(filter) = f.filter.take() {
1676 f.filter = Some(transform_recursive(filter, transform_fn)?);
1677 }
1678 for ord in &mut f.order_by {
1679 ord.this = transform_recursive(
1680 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
1681 transform_fn,
1682 )?;
1683 }
1684 if let Some((ref mut expr, _)) = f.having_max {
1685 *expr = Box::new(transform_recursive(
1686 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
1687 transform_fn,
1688 )?);
1689 }
1690 if let Some(limit) = f.limit.take() {
1691 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
1692 }
1693 Expression::$variant(f)
1694 }};
1695 }
1696
1697 // Helper macro to transform binary ops with Box<BinaryOp>
1698 macro_rules! transform_binary {
1699 ($variant:ident, $op:expr) => {{
1700 let left = transform_recursive($op.left, transform_fn)?;
1701 let right = transform_recursive($op.right, transform_fn)?;
1702 Expression::$variant(Box::new(BinaryOp {
1703 left,
1704 right,
1705 left_comments: $op.left_comments,
1706 operator_comments: $op.operator_comments,
1707 trailing_comments: $op.trailing_comments,
1708 inferred_type: $op.inferred_type,
1709 }))
1710 }};
1711 }
1712
1713 // Fast path: leaf nodes never need child traversal, apply transform directly
1714 if matches!(
1715 &expr,
1716 Expression::Literal(_)
1717 | Expression::Boolean(_)
1718 | Expression::Null(_)
1719 | Expression::Identifier(_)
1720 | Expression::Star(_)
1721 | Expression::Parameter(_)
1722 | Expression::Placeholder(_)
1723 | Expression::SessionParameter(_)
1724 ) {
1725 return transform_fn(expr);
1726 }
1727
1728 // First recursively transform children, then apply the transform function
1729 let expr = match expr {
1730 Expression::Select(mut select) => {
1731 select.expressions = select
1732 .expressions
1733 .into_iter()
1734 .map(|e| transform_recursive(e, transform_fn))
1735 .collect::<Result<Vec<_>>>()?;
1736
1737 // Transform FROM clause
1738 if let Some(mut from) = select.from.take() {
1739 from.expressions = from
1740 .expressions
1741 .into_iter()
1742 .map(|e| transform_recursive(e, transform_fn))
1743 .collect::<Result<Vec<_>>>()?;
1744 select.from = Some(from);
1745 }
1746
1747 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
1748 select.joins = select
1749 .joins
1750 .into_iter()
1751 .map(|mut join| {
1752 join.this = transform_recursive(join.this, transform_fn)?;
1753 if let Some(on) = join.on.take() {
1754 join.on = Some(transform_recursive(on, transform_fn)?);
1755 }
1756 // Wrap join in Expression::Join to allow transform_fn to transform it
1757 match transform_fn(Expression::Join(Box::new(join)))? {
1758 Expression::Join(j) => Ok(*j),
1759 _ => Err(crate::error::Error::parse(
1760 "Join transformation returned non-join expression",
1761 0,
1762 0,
1763 0,
1764 0,
1765 )),
1766 }
1767 })
1768 .collect::<Result<Vec<_>>>()?;
1769
1770 // Transform LATERAL VIEW expressions (Hive/Spark)
1771 select.lateral_views = select
1772 .lateral_views
1773 .into_iter()
1774 .map(|mut lv| {
1775 lv.this = transform_recursive(lv.this, transform_fn)?;
1776 Ok(lv)
1777 })
1778 .collect::<Result<Vec<_>>>()?;
1779
1780 // Transform WHERE clause
1781 if let Some(mut where_clause) = select.where_clause.take() {
1782 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1783 select.where_clause = Some(where_clause);
1784 }
1785
1786 // Transform GROUP BY
1787 if let Some(mut group_by) = select.group_by.take() {
1788 group_by.expressions = group_by
1789 .expressions
1790 .into_iter()
1791 .map(|e| transform_recursive(e, transform_fn))
1792 .collect::<Result<Vec<_>>>()?;
1793 select.group_by = Some(group_by);
1794 }
1795
1796 // Transform HAVING
1797 if let Some(mut having) = select.having.take() {
1798 having.this = transform_recursive(having.this, transform_fn)?;
1799 select.having = Some(having);
1800 }
1801
1802 // Transform WITH (CTEs)
1803 if let Some(mut with) = select.with.take() {
1804 with.ctes = with
1805 .ctes
1806 .into_iter()
1807 .map(|mut cte| {
1808 let original = cte.this.clone();
1809 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1810 cte
1811 })
1812 .collect();
1813 select.with = Some(with);
1814 }
1815
1816 // Transform ORDER BY
1817 if let Some(mut order) = select.order_by.take() {
1818 order.expressions = order
1819 .expressions
1820 .into_iter()
1821 .map(|o| {
1822 let mut o = o;
1823 let original = o.this.clone();
1824 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1825 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
1826 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1827 Ok(Expression::Ordered(transformed)) => *transformed,
1828 Ok(_) | Err(_) => o,
1829 }
1830 })
1831 .collect();
1832 select.order_by = Some(order);
1833 }
1834
1835 // Transform WINDOW clause order_by
1836 if let Some(ref mut windows) = select.windows {
1837 for nw in windows.iter_mut() {
1838 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1839 .into_iter()
1840 .map(|o| {
1841 let mut o = o;
1842 let original = o.this.clone();
1843 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1844 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1845 Ok(Expression::Ordered(transformed)) => *transformed,
1846 Ok(_) | Err(_) => o,
1847 }
1848 })
1849 .collect();
1850 }
1851 }
1852
1853 // Transform QUALIFY
1854 if let Some(mut qual) = select.qualify.take() {
1855 qual.this = transform_recursive(qual.this, transform_fn)?;
1856 select.qualify = Some(qual);
1857 }
1858
1859 Expression::Select(select)
1860 }
1861 Expression::Function(mut f) => {
1862 f.args = f
1863 .args
1864 .into_iter()
1865 .map(|e| transform_recursive(e, transform_fn))
1866 .collect::<Result<Vec<_>>>()?;
1867 Expression::Function(f)
1868 }
1869 Expression::AggregateFunction(mut f) => {
1870 f.args = f
1871 .args
1872 .into_iter()
1873 .map(|e| transform_recursive(e, transform_fn))
1874 .collect::<Result<Vec<_>>>()?;
1875 if let Some(filter) = f.filter {
1876 f.filter = Some(transform_recursive(filter, transform_fn)?);
1877 }
1878 Expression::AggregateFunction(f)
1879 }
1880 Expression::WindowFunction(mut wf) => {
1881 wf.this = transform_recursive(wf.this, transform_fn)?;
1882 wf.over.partition_by = wf
1883 .over
1884 .partition_by
1885 .into_iter()
1886 .map(|e| transform_recursive(e, transform_fn))
1887 .collect::<Result<Vec<_>>>()?;
1888 // Transform order_by items through Expression::Ordered wrapper
1889 wf.over.order_by = wf
1890 .over
1891 .order_by
1892 .into_iter()
1893 .map(|o| {
1894 let mut o = o;
1895 o.this = transform_recursive(o.this, transform_fn)?;
1896 match transform_fn(Expression::Ordered(Box::new(o)))? {
1897 Expression::Ordered(transformed) => Ok(*transformed),
1898 _ => Err(crate::error::Error::parse(
1899 "Ordered transformation returned non-Ordered expression",
1900 0,
1901 0,
1902 0,
1903 0,
1904 )),
1905 }
1906 })
1907 .collect::<Result<Vec<_>>>()?;
1908 Expression::WindowFunction(wf)
1909 }
1910 Expression::Alias(mut a) => {
1911 a.this = transform_recursive(a.this, transform_fn)?;
1912 Expression::Alias(a)
1913 }
1914 Expression::Cast(mut c) => {
1915 c.this = transform_recursive(c.this, transform_fn)?;
1916 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
1917 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1918 Expression::Cast(c)
1919 }
1920 Expression::And(op) => transform_binary!(And, *op),
1921 Expression::Or(op) => transform_binary!(Or, *op),
1922 Expression::Add(op) => transform_binary!(Add, *op),
1923 Expression::Sub(op) => transform_binary!(Sub, *op),
1924 Expression::Mul(op) => transform_binary!(Mul, *op),
1925 Expression::Div(op) => transform_binary!(Div, *op),
1926 Expression::Eq(op) => transform_binary!(Eq, *op),
1927 Expression::Lt(op) => transform_binary!(Lt, *op),
1928 Expression::Gt(op) => transform_binary!(Gt, *op),
1929 Expression::Paren(mut p) => {
1930 p.this = transform_recursive(p.this, transform_fn)?;
1931 Expression::Paren(p)
1932 }
1933 Expression::Coalesce(mut f) => {
1934 f.expressions = f
1935 .expressions
1936 .into_iter()
1937 .map(|e| transform_recursive(e, transform_fn))
1938 .collect::<Result<Vec<_>>>()?;
1939 Expression::Coalesce(f)
1940 }
1941 Expression::IfNull(mut f) => {
1942 f.this = transform_recursive(f.this, transform_fn)?;
1943 f.expression = transform_recursive(f.expression, transform_fn)?;
1944 Expression::IfNull(f)
1945 }
1946 Expression::Nvl(mut f) => {
1947 f.this = transform_recursive(f.this, transform_fn)?;
1948 f.expression = transform_recursive(f.expression, transform_fn)?;
1949 Expression::Nvl(f)
1950 }
1951 Expression::In(mut i) => {
1952 i.this = transform_recursive(i.this, transform_fn)?;
1953 i.expressions = i
1954 .expressions
1955 .into_iter()
1956 .map(|e| transform_recursive(e, transform_fn))
1957 .collect::<Result<Vec<_>>>()?;
1958 if let Some(query) = i.query {
1959 i.query = Some(transform_recursive(query, transform_fn)?);
1960 }
1961 Expression::In(i)
1962 }
1963 Expression::Not(mut n) => {
1964 n.this = transform_recursive(n.this, transform_fn)?;
1965 Expression::Not(n)
1966 }
1967 Expression::ArraySlice(mut s) => {
1968 s.this = transform_recursive(s.this, transform_fn)?;
1969 if let Some(start) = s.start {
1970 s.start = Some(transform_recursive(start, transform_fn)?);
1971 }
1972 if let Some(end) = s.end {
1973 s.end = Some(transform_recursive(end, transform_fn)?);
1974 }
1975 Expression::ArraySlice(s)
1976 }
1977 Expression::Subscript(mut s) => {
1978 s.this = transform_recursive(s.this, transform_fn)?;
1979 s.index = transform_recursive(s.index, transform_fn)?;
1980 Expression::Subscript(s)
1981 }
1982 Expression::Array(mut a) => {
1983 a.expressions = a
1984 .expressions
1985 .into_iter()
1986 .map(|e| transform_recursive(e, transform_fn))
1987 .collect::<Result<Vec<_>>>()?;
1988 Expression::Array(a)
1989 }
1990 Expression::Struct(mut s) => {
1991 let mut new_fields = Vec::new();
1992 for (name, expr) in s.fields {
1993 let transformed = transform_recursive(expr, transform_fn)?;
1994 new_fields.push((name, transformed));
1995 }
1996 s.fields = new_fields;
1997 Expression::Struct(s)
1998 }
1999 Expression::NamedArgument(mut na) => {
2000 na.value = transform_recursive(na.value, transform_fn)?;
2001 Expression::NamedArgument(na)
2002 }
2003 Expression::MapFunc(mut m) => {
2004 m.keys = m
2005 .keys
2006 .into_iter()
2007 .map(|e| transform_recursive(e, transform_fn))
2008 .collect::<Result<Vec<_>>>()?;
2009 m.values = m
2010 .values
2011 .into_iter()
2012 .map(|e| transform_recursive(e, transform_fn))
2013 .collect::<Result<Vec<_>>>()?;
2014 Expression::MapFunc(m)
2015 }
2016 Expression::ArrayFunc(mut a) => {
2017 a.expressions = a
2018 .expressions
2019 .into_iter()
2020 .map(|e| transform_recursive(e, transform_fn))
2021 .collect::<Result<Vec<_>>>()?;
2022 Expression::ArrayFunc(a)
2023 }
2024 Expression::Lambda(mut l) => {
2025 l.body = transform_recursive(l.body, transform_fn)?;
2026 Expression::Lambda(l)
2027 }
2028 Expression::JsonExtract(mut f) => {
2029 f.this = transform_recursive(f.this, transform_fn)?;
2030 f.path = transform_recursive(f.path, transform_fn)?;
2031 Expression::JsonExtract(f)
2032 }
2033 Expression::JsonExtractScalar(mut f) => {
2034 f.this = transform_recursive(f.this, transform_fn)?;
2035 f.path = transform_recursive(f.path, transform_fn)?;
2036 Expression::JsonExtractScalar(f)
2037 }
2038
2039 // ===== UnaryFunc-based expressions =====
2040 // These all have a single `this: Expression` child
2041 Expression::Length(mut f) => {
2042 f.this = transform_recursive(f.this, transform_fn)?;
2043 Expression::Length(f)
2044 }
2045 Expression::Upper(mut f) => {
2046 f.this = transform_recursive(f.this, transform_fn)?;
2047 Expression::Upper(f)
2048 }
2049 Expression::Lower(mut f) => {
2050 f.this = transform_recursive(f.this, transform_fn)?;
2051 Expression::Lower(f)
2052 }
2053 Expression::LTrim(mut f) => {
2054 f.this = transform_recursive(f.this, transform_fn)?;
2055 Expression::LTrim(f)
2056 }
2057 Expression::RTrim(mut f) => {
2058 f.this = transform_recursive(f.this, transform_fn)?;
2059 Expression::RTrim(f)
2060 }
2061 Expression::Reverse(mut f) => {
2062 f.this = transform_recursive(f.this, transform_fn)?;
2063 Expression::Reverse(f)
2064 }
2065 Expression::Abs(mut f) => {
2066 f.this = transform_recursive(f.this, transform_fn)?;
2067 Expression::Abs(f)
2068 }
2069 Expression::Ceil(mut f) => {
2070 f.this = transform_recursive(f.this, transform_fn)?;
2071 Expression::Ceil(f)
2072 }
2073 Expression::Floor(mut f) => {
2074 f.this = transform_recursive(f.this, transform_fn)?;
2075 Expression::Floor(f)
2076 }
2077 Expression::Sign(mut f) => {
2078 f.this = transform_recursive(f.this, transform_fn)?;
2079 Expression::Sign(f)
2080 }
2081 Expression::Sqrt(mut f) => {
2082 f.this = transform_recursive(f.this, transform_fn)?;
2083 Expression::Sqrt(f)
2084 }
2085 Expression::Cbrt(mut f) => {
2086 f.this = transform_recursive(f.this, transform_fn)?;
2087 Expression::Cbrt(f)
2088 }
2089 Expression::Ln(mut f) => {
2090 f.this = transform_recursive(f.this, transform_fn)?;
2091 Expression::Ln(f)
2092 }
2093 Expression::Log(mut f) => {
2094 f.this = transform_recursive(f.this, transform_fn)?;
2095 if let Some(base) = f.base {
2096 f.base = Some(transform_recursive(base, transform_fn)?);
2097 }
2098 Expression::Log(f)
2099 }
2100 Expression::Exp(mut f) => {
2101 f.this = transform_recursive(f.this, transform_fn)?;
2102 Expression::Exp(f)
2103 }
2104 Expression::Date(mut f) => {
2105 f.this = transform_recursive(f.this, transform_fn)?;
2106 Expression::Date(f)
2107 }
2108 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2109 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2110 Expression::Variance(f) => recurse_agg!(Variance, f),
2111
2112 // ===== BinaryFunc-based expressions =====
2113 Expression::ModFunc(mut f) => {
2114 f.this = transform_recursive(f.this, transform_fn)?;
2115 f.expression = transform_recursive(f.expression, transform_fn)?;
2116 Expression::ModFunc(f)
2117 }
2118 Expression::Power(mut f) => {
2119 f.this = transform_recursive(f.this, transform_fn)?;
2120 f.expression = transform_recursive(f.expression, transform_fn)?;
2121 Expression::Power(f)
2122 }
2123 Expression::MapFromArrays(mut f) => {
2124 f.this = transform_recursive(f.this, transform_fn)?;
2125 f.expression = transform_recursive(f.expression, transform_fn)?;
2126 Expression::MapFromArrays(f)
2127 }
2128 Expression::ElementAt(mut f) => {
2129 f.this = transform_recursive(f.this, transform_fn)?;
2130 f.expression = transform_recursive(f.expression, transform_fn)?;
2131 Expression::ElementAt(f)
2132 }
2133 Expression::MapContainsKey(mut f) => {
2134 f.this = transform_recursive(f.this, transform_fn)?;
2135 f.expression = transform_recursive(f.expression, transform_fn)?;
2136 Expression::MapContainsKey(f)
2137 }
2138 Expression::Left(mut f) => {
2139 f.this = transform_recursive(f.this, transform_fn)?;
2140 f.length = transform_recursive(f.length, transform_fn)?;
2141 Expression::Left(f)
2142 }
2143 Expression::Right(mut f) => {
2144 f.this = transform_recursive(f.this, transform_fn)?;
2145 f.length = transform_recursive(f.length, transform_fn)?;
2146 Expression::Right(f)
2147 }
2148 Expression::Repeat(mut f) => {
2149 f.this = transform_recursive(f.this, transform_fn)?;
2150 f.times = transform_recursive(f.times, transform_fn)?;
2151 Expression::Repeat(f)
2152 }
2153
2154 // ===== Complex function expressions =====
2155 Expression::Substring(mut f) => {
2156 f.this = transform_recursive(f.this, transform_fn)?;
2157 f.start = transform_recursive(f.start, transform_fn)?;
2158 if let Some(len) = f.length {
2159 f.length = Some(transform_recursive(len, transform_fn)?);
2160 }
2161 Expression::Substring(f)
2162 }
2163 Expression::Replace(mut f) => {
2164 f.this = transform_recursive(f.this, transform_fn)?;
2165 f.old = transform_recursive(f.old, transform_fn)?;
2166 f.new = transform_recursive(f.new, transform_fn)?;
2167 Expression::Replace(f)
2168 }
2169 Expression::ConcatWs(mut f) => {
2170 f.separator = transform_recursive(f.separator, transform_fn)?;
2171 f.expressions = f
2172 .expressions
2173 .into_iter()
2174 .map(|e| transform_recursive(e, transform_fn))
2175 .collect::<Result<Vec<_>>>()?;
2176 Expression::ConcatWs(f)
2177 }
2178 Expression::Trim(mut f) => {
2179 f.this = transform_recursive(f.this, transform_fn)?;
2180 if let Some(chars) = f.characters {
2181 f.characters = Some(transform_recursive(chars, transform_fn)?);
2182 }
2183 Expression::Trim(f)
2184 }
2185 Expression::Split(mut f) => {
2186 f.this = transform_recursive(f.this, transform_fn)?;
2187 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2188 Expression::Split(f)
2189 }
2190 Expression::Lpad(mut f) => {
2191 f.this = transform_recursive(f.this, transform_fn)?;
2192 f.length = transform_recursive(f.length, transform_fn)?;
2193 if let Some(fill) = f.fill {
2194 f.fill = Some(transform_recursive(fill, transform_fn)?);
2195 }
2196 Expression::Lpad(f)
2197 }
2198 Expression::Rpad(mut f) => {
2199 f.this = transform_recursive(f.this, transform_fn)?;
2200 f.length = transform_recursive(f.length, transform_fn)?;
2201 if let Some(fill) = f.fill {
2202 f.fill = Some(transform_recursive(fill, transform_fn)?);
2203 }
2204 Expression::Rpad(f)
2205 }
2206
2207 // ===== Conditional expressions =====
2208 Expression::Case(mut c) => {
2209 if let Some(operand) = c.operand {
2210 c.operand = Some(transform_recursive(operand, transform_fn)?);
2211 }
2212 c.whens = c
2213 .whens
2214 .into_iter()
2215 .map(|(cond, then)| {
2216 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2217 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2218 (new_cond, new_then)
2219 })
2220 .collect();
2221 if let Some(else_expr) = c.else_ {
2222 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2223 }
2224 Expression::Case(c)
2225 }
2226 Expression::IfFunc(mut f) => {
2227 f.condition = transform_recursive(f.condition, transform_fn)?;
2228 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2229 if let Some(false_val) = f.false_value {
2230 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2231 }
2232 Expression::IfFunc(f)
2233 }
2234
2235 // ===== Date/Time expressions =====
2236 Expression::DateAdd(mut f) => {
2237 f.this = transform_recursive(f.this, transform_fn)?;
2238 f.interval = transform_recursive(f.interval, transform_fn)?;
2239 Expression::DateAdd(f)
2240 }
2241 Expression::DateSub(mut f) => {
2242 f.this = transform_recursive(f.this, transform_fn)?;
2243 f.interval = transform_recursive(f.interval, transform_fn)?;
2244 Expression::DateSub(f)
2245 }
2246 Expression::DateDiff(mut f) => {
2247 f.this = transform_recursive(f.this, transform_fn)?;
2248 f.expression = transform_recursive(f.expression, transform_fn)?;
2249 Expression::DateDiff(f)
2250 }
2251 Expression::DateTrunc(mut f) => {
2252 f.this = transform_recursive(f.this, transform_fn)?;
2253 Expression::DateTrunc(f)
2254 }
2255 Expression::Extract(mut f) => {
2256 f.this = transform_recursive(f.this, transform_fn)?;
2257 Expression::Extract(f)
2258 }
2259
2260 // ===== JSON expressions =====
2261 Expression::JsonObject(mut f) => {
2262 f.pairs = f
2263 .pairs
2264 .into_iter()
2265 .map(|(k, v)| {
2266 let new_k = transform_recursive(k, transform_fn)?;
2267 let new_v = transform_recursive(v, transform_fn)?;
2268 Ok((new_k, new_v))
2269 })
2270 .collect::<Result<Vec<_>>>()?;
2271 Expression::JsonObject(f)
2272 }
2273
2274 // ===== Subquery expressions =====
2275 Expression::Subquery(mut s) => {
2276 s.this = transform_recursive(s.this, transform_fn)?;
2277 Expression::Subquery(s)
2278 }
2279 Expression::Exists(mut e) => {
2280 e.this = transform_recursive(e.this, transform_fn)?;
2281 Expression::Exists(e)
2282 }
2283 Expression::Describe(mut d) => {
2284 d.target = transform_recursive(d.target, transform_fn)?;
2285 Expression::Describe(d)
2286 }
2287
2288 // ===== Set operations =====
2289 Expression::Union(mut u) => {
2290 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2291 u.left = transform_recursive(left, transform_fn)?;
2292 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2293 u.right = transform_recursive(right, transform_fn)?;
2294 if let Some(mut with) = u.with.take() {
2295 with.ctes = with
2296 .ctes
2297 .into_iter()
2298 .map(|mut cte| {
2299 let original = cte.this.clone();
2300 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2301 cte
2302 })
2303 .collect();
2304 u.with = Some(with);
2305 }
2306 Expression::Union(u)
2307 }
2308 Expression::Intersect(mut i) => {
2309 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2310 i.left = transform_recursive(left, transform_fn)?;
2311 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2312 i.right = transform_recursive(right, transform_fn)?;
2313 if let Some(mut with) = i.with.take() {
2314 with.ctes = with
2315 .ctes
2316 .into_iter()
2317 .map(|mut cte| {
2318 let original = cte.this.clone();
2319 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2320 cte
2321 })
2322 .collect();
2323 i.with = Some(with);
2324 }
2325 Expression::Intersect(i)
2326 }
2327 Expression::Except(mut e) => {
2328 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2329 e.left = transform_recursive(left, transform_fn)?;
2330 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2331 e.right = transform_recursive(right, transform_fn)?;
2332 if let Some(mut with) = e.with.take() {
2333 with.ctes = with
2334 .ctes
2335 .into_iter()
2336 .map(|mut cte| {
2337 let original = cte.this.clone();
2338 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2339 cte
2340 })
2341 .collect();
2342 e.with = Some(with);
2343 }
2344 Expression::Except(e)
2345 }
2346
2347 // ===== DML expressions =====
2348 Expression::Insert(mut ins) => {
2349 // Transform VALUES clause expressions
2350 let mut new_values = Vec::new();
2351 for row in ins.values {
2352 let mut new_row = Vec::new();
2353 for e in row {
2354 new_row.push(transform_recursive(e, transform_fn)?);
2355 }
2356 new_values.push(new_row);
2357 }
2358 ins.values = new_values;
2359
2360 // Transform query (for INSERT ... SELECT)
2361 if let Some(query) = ins.query {
2362 ins.query = Some(transform_recursive(query, transform_fn)?);
2363 }
2364
2365 // Transform RETURNING clause
2366 let mut new_returning = Vec::new();
2367 for e in ins.returning {
2368 new_returning.push(transform_recursive(e, transform_fn)?);
2369 }
2370 ins.returning = new_returning;
2371
2372 // Transform ON CONFLICT clause
2373 if let Some(on_conflict) = ins.on_conflict {
2374 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2375 }
2376
2377 Expression::Insert(ins)
2378 }
2379 Expression::Update(mut upd) => {
2380 upd.set = upd
2381 .set
2382 .into_iter()
2383 .map(|(id, val)| {
2384 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2385 (id, new_val)
2386 })
2387 .collect();
2388 if let Some(mut where_clause) = upd.where_clause.take() {
2389 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2390 upd.where_clause = Some(where_clause);
2391 }
2392 Expression::Update(upd)
2393 }
2394 Expression::Delete(mut del) => {
2395 if let Some(mut where_clause) = del.where_clause.take() {
2396 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2397 del.where_clause = Some(where_clause);
2398 }
2399 Expression::Delete(del)
2400 }
2401
2402 // ===== CTE expressions =====
2403 Expression::With(mut w) => {
2404 w.ctes = w
2405 .ctes
2406 .into_iter()
2407 .map(|mut cte| {
2408 let original = cte.this.clone();
2409 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2410 cte
2411 })
2412 .collect();
2413 Expression::With(w)
2414 }
2415 Expression::Cte(mut c) => {
2416 c.this = transform_recursive(c.this, transform_fn)?;
2417 Expression::Cte(c)
2418 }
2419
2420 // ===== Order expressions =====
2421 Expression::Ordered(mut o) => {
2422 o.this = transform_recursive(o.this, transform_fn)?;
2423 Expression::Ordered(o)
2424 }
2425
2426 // ===== Negation =====
2427 Expression::Neg(mut n) => {
2428 n.this = transform_recursive(n.this, transform_fn)?;
2429 Expression::Neg(n)
2430 }
2431
2432 // ===== Between =====
2433 Expression::Between(mut b) => {
2434 b.this = transform_recursive(b.this, transform_fn)?;
2435 b.low = transform_recursive(b.low, transform_fn)?;
2436 b.high = transform_recursive(b.high, transform_fn)?;
2437 Expression::Between(b)
2438 }
2439 Expression::IsNull(mut i) => {
2440 i.this = transform_recursive(i.this, transform_fn)?;
2441 Expression::IsNull(i)
2442 }
2443 Expression::IsTrue(mut i) => {
2444 i.this = transform_recursive(i.this, transform_fn)?;
2445 Expression::IsTrue(i)
2446 }
2447 Expression::IsFalse(mut i) => {
2448 i.this = transform_recursive(i.this, transform_fn)?;
2449 Expression::IsFalse(i)
2450 }
2451
2452 // ===== Like expressions =====
2453 Expression::Like(mut l) => {
2454 l.left = transform_recursive(l.left, transform_fn)?;
2455 l.right = transform_recursive(l.right, transform_fn)?;
2456 Expression::Like(l)
2457 }
2458 Expression::ILike(mut l) => {
2459 l.left = transform_recursive(l.left, transform_fn)?;
2460 l.right = transform_recursive(l.right, transform_fn)?;
2461 Expression::ILike(l)
2462 }
2463
2464 // ===== Additional binary ops not covered by macro =====
2465 Expression::Neq(op) => transform_binary!(Neq, *op),
2466 Expression::Lte(op) => transform_binary!(Lte, *op),
2467 Expression::Gte(op) => transform_binary!(Gte, *op),
2468 Expression::Mod(op) => transform_binary!(Mod, *op),
2469 Expression::Concat(op) => transform_binary!(Concat, *op),
2470 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2471 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2472 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2473 Expression::Is(op) => transform_binary!(Is, *op),
2474
2475 // ===== TryCast / SafeCast =====
2476 Expression::TryCast(mut c) => {
2477 c.this = transform_recursive(c.this, transform_fn)?;
2478 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2479 Expression::TryCast(c)
2480 }
2481 Expression::SafeCast(mut c) => {
2482 c.this = transform_recursive(c.this, transform_fn)?;
2483 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2484 Expression::SafeCast(c)
2485 }
2486
2487 // ===== Misc =====
2488 Expression::Unnest(mut f) => {
2489 f.this = transform_recursive(f.this, transform_fn)?;
2490 f.expressions = f
2491 .expressions
2492 .into_iter()
2493 .map(|e| transform_recursive(e, transform_fn))
2494 .collect::<Result<Vec<_>>>()?;
2495 Expression::Unnest(f)
2496 }
2497 Expression::Explode(mut f) => {
2498 f.this = transform_recursive(f.this, transform_fn)?;
2499 Expression::Explode(f)
2500 }
2501 Expression::GroupConcat(mut f) => {
2502 f.this = transform_recursive(f.this, transform_fn)?;
2503 Expression::GroupConcat(f)
2504 }
2505 Expression::StringAgg(mut f) => {
2506 f.this = transform_recursive(f.this, transform_fn)?;
2507 Expression::StringAgg(f)
2508 }
2509 Expression::ListAgg(mut f) => {
2510 f.this = transform_recursive(f.this, transform_fn)?;
2511 Expression::ListAgg(f)
2512 }
2513 Expression::ArrayAgg(mut f) => {
2514 f.this = transform_recursive(f.this, transform_fn)?;
2515 Expression::ArrayAgg(f)
2516 }
2517 Expression::ParseJson(mut f) => {
2518 f.this = transform_recursive(f.this, transform_fn)?;
2519 Expression::ParseJson(f)
2520 }
2521 Expression::ToJson(mut f) => {
2522 f.this = transform_recursive(f.this, transform_fn)?;
2523 Expression::ToJson(f)
2524 }
2525 Expression::JSONExtract(mut e) => {
2526 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2527 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2528 Expression::JSONExtract(e)
2529 }
2530 Expression::JSONExtractScalar(mut e) => {
2531 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2532 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2533 Expression::JSONExtractScalar(e)
2534 }
2535
2536 // StrToTime: recurse into this
2537 Expression::StrToTime(mut e) => {
2538 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2539 Expression::StrToTime(e)
2540 }
2541
2542 // UnixToTime: recurse into this
2543 Expression::UnixToTime(mut e) => {
2544 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2545 Expression::UnixToTime(e)
2546 }
2547
2548 // CreateTable: recurse into column defaults, on_update expressions, and data types
2549 Expression::CreateTable(mut ct) => {
2550 for col in &mut ct.columns {
2551 if let Some(default_expr) = col.default.take() {
2552 col.default = Some(transform_recursive(default_expr, transform_fn)?);
2553 }
2554 if let Some(on_update_expr) = col.on_update.take() {
2555 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
2556 }
2557 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
2558 // are NOT applied here because per-dialect transforms are designed for CAST/expression
2559 // contexts and may not produce correct results for DDL column definitions.
2560 // The DDL type mappings would need dedicated handling per source/target pair.
2561 }
2562 if let Some(as_select) = ct.as_select.take() {
2563 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
2564 }
2565 Expression::CreateTable(ct)
2566 }
2567
2568 // CreateView: recurse into the view body query
2569 Expression::CreateView(mut cv) => {
2570 cv.query = transform_recursive(cv.query, transform_fn)?;
2571 Expression::CreateView(cv)
2572 }
2573
2574 // CreateTask: recurse into the task body
2575 Expression::CreateTask(mut ct) => {
2576 ct.body = transform_recursive(ct.body, transform_fn)?;
2577 Expression::CreateTask(ct)
2578 }
2579
2580 // CreateProcedure: recurse into body expressions
2581 Expression::CreateProcedure(mut cp) => {
2582 if let Some(body) = cp.body.take() {
2583 cp.body = Some(match body {
2584 FunctionBody::Expression(expr) => {
2585 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2586 }
2587 FunctionBody::Return(expr) => {
2588 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2589 }
2590 FunctionBody::Statements(stmts) => {
2591 let transformed_stmts = stmts
2592 .into_iter()
2593 .map(|s| transform_recursive(s, transform_fn))
2594 .collect::<Result<Vec<_>>>()?;
2595 FunctionBody::Statements(transformed_stmts)
2596 }
2597 other => other,
2598 });
2599 }
2600 Expression::CreateProcedure(cp)
2601 }
2602
2603 // CreateFunction: recurse into body expressions
2604 Expression::CreateFunction(mut cf) => {
2605 if let Some(body) = cf.body.take() {
2606 cf.body = Some(match body {
2607 FunctionBody::Expression(expr) => {
2608 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2609 }
2610 FunctionBody::Return(expr) => {
2611 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2612 }
2613 FunctionBody::Statements(stmts) => {
2614 let transformed_stmts = stmts
2615 .into_iter()
2616 .map(|s| transform_recursive(s, transform_fn))
2617 .collect::<Result<Vec<_>>>()?;
2618 FunctionBody::Statements(transformed_stmts)
2619 }
2620 other => other,
2621 });
2622 }
2623 Expression::CreateFunction(cf)
2624 }
2625
2626 // MemberOf: recurse into left and right operands
2627 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
2628 // ArrayContainsAll (@>): recurse into left and right operands
2629 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
2630 // ArrayContainedBy (<@): recurse into left and right operands
2631 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
2632 // ArrayOverlaps (&&): recurse into left and right operands
2633 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
2634 // TsMatch (@@): recurse into left and right operands
2635 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
2636 // Adjacent (-|-): recurse into left and right operands
2637 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
2638
2639 // Table: recurse into when (HistoricalData) and changes fields
2640 Expression::Table(mut t) => {
2641 if let Some(when) = t.when.take() {
2642 let transformed =
2643 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
2644 if let Expression::HistoricalData(hd) = transformed {
2645 t.when = Some(hd);
2646 }
2647 }
2648 if let Some(changes) = t.changes.take() {
2649 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
2650 if let Expression::Changes(c) = transformed {
2651 t.changes = Some(c);
2652 }
2653 }
2654 Expression::Table(t)
2655 }
2656
2657 // HistoricalData (Snowflake time travel): recurse into expression
2658 Expression::HistoricalData(mut hd) => {
2659 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
2660 Expression::HistoricalData(hd)
2661 }
2662
2663 // Changes (Snowflake CHANGES clause): recurse into at_before and end
2664 Expression::Changes(mut c) => {
2665 if let Some(at_before) = c.at_before.take() {
2666 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
2667 }
2668 if let Some(end) = c.end.take() {
2669 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
2670 }
2671 Expression::Changes(c)
2672 }
2673
2674 // TableArgument: TABLE(expr) or MODEL(expr)
2675 Expression::TableArgument(mut ta) => {
2676 ta.this = transform_recursive(ta.this, transform_fn)?;
2677 Expression::TableArgument(ta)
2678 }
2679
2680 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
2681 Expression::JoinedTable(mut jt) => {
2682 jt.left = transform_recursive(jt.left, transform_fn)?;
2683 for join in &mut jt.joins {
2684 join.this = transform_recursive(
2685 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
2686 transform_fn,
2687 )?;
2688 if let Some(on) = join.on.take() {
2689 join.on = Some(transform_recursive(on, transform_fn)?);
2690 }
2691 }
2692 jt.lateral_views = jt
2693 .lateral_views
2694 .into_iter()
2695 .map(|mut lv| {
2696 lv.this = transform_recursive(lv.this, transform_fn)?;
2697 Ok(lv)
2698 })
2699 .collect::<Result<Vec<_>>>()?;
2700 Expression::JoinedTable(jt)
2701 }
2702
2703 // Lateral: LATERAL func() - recurse into the function expression
2704 Expression::Lateral(mut lat) => {
2705 *lat.this = transform_recursive(*lat.this, transform_fn)?;
2706 Expression::Lateral(lat)
2707 }
2708
2709 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
2710 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
2711 // as a unit together with the WithinGroup wrapper
2712 Expression::WithinGroup(mut wg) => {
2713 wg.order_by = wg
2714 .order_by
2715 .into_iter()
2716 .map(|mut o| {
2717 let original = o.this.clone();
2718 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2719 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2720 Ok(Expression::Ordered(transformed)) => *transformed,
2721 Ok(_) | Err(_) => o,
2722 }
2723 })
2724 .collect();
2725 Expression::WithinGroup(wg)
2726 }
2727
2728 // Filter: recurse into both the aggregate and the filter condition
2729 Expression::Filter(mut f) => {
2730 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
2731 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
2732 Expression::Filter(f)
2733 }
2734
2735 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
2736 // filter, order_by, having_max, and limit.
2737 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
2738 Expression::Sum(f) => recurse_agg!(Sum, f),
2739 Expression::Avg(f) => recurse_agg!(Avg, f),
2740 Expression::Min(f) => recurse_agg!(Min, f),
2741 Expression::Max(f) => recurse_agg!(Max, f),
2742 Expression::CountIf(f) => recurse_agg!(CountIf, f),
2743 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
2744 Expression::VarPop(f) => recurse_agg!(VarPop, f),
2745 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
2746 Expression::Median(f) => recurse_agg!(Median, f),
2747 Expression::Mode(f) => recurse_agg!(Mode, f),
2748 Expression::First(f) => recurse_agg!(First, f),
2749 Expression::Last(f) => recurse_agg!(Last, f),
2750 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
2751 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
2752 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
2753 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
2754 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
2755 Expression::Skewness(f) => recurse_agg!(Skewness, f),
2756 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
2757 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
2758 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
2759 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
2760 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
2761 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
2762
2763 // Count has its own struct with an Option<Expression> `this` field
2764 Expression::Count(mut c) => {
2765 if let Some(this) = c.this.take() {
2766 c.this = Some(transform_recursive(this, transform_fn)?);
2767 }
2768 if let Some(filter) = c.filter.take() {
2769 c.filter = Some(transform_recursive(filter, transform_fn)?);
2770 }
2771 Expression::Count(c)
2772 }
2773
2774 Expression::PipeOperator(mut pipe) => {
2775 pipe.this = transform_recursive(pipe.this, transform_fn)?;
2776 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
2777 Expression::PipeOperator(pipe)
2778 }
2779
2780 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
2781 Expression::ArrayExcept(mut f) => {
2782 f.this = transform_recursive(f.this, transform_fn)?;
2783 f.expression = transform_recursive(f.expression, transform_fn)?;
2784 Expression::ArrayExcept(f)
2785 }
2786 Expression::ArrayContains(mut f) => {
2787 f.this = transform_recursive(f.this, transform_fn)?;
2788 f.expression = transform_recursive(f.expression, transform_fn)?;
2789 Expression::ArrayContains(f)
2790 }
2791 Expression::ArrayDistinct(mut f) => {
2792 f.this = transform_recursive(f.this, transform_fn)?;
2793 Expression::ArrayDistinct(f)
2794 }
2795 Expression::ArrayPosition(mut f) => {
2796 f.this = transform_recursive(f.this, transform_fn)?;
2797 f.expression = transform_recursive(f.expression, transform_fn)?;
2798 Expression::ArrayPosition(f)
2799 }
2800
2801 // Pass through leaf nodes unchanged
2802 other => other,
2803 };
2804
2805 // Then apply the transform function
2806 transform_fn(expr)
2807}
2808
2809/// Returns the tokenizer config, generator config, and expression transform closure
2810/// for a built-in dialect type. This is the shared implementation used by both
2811/// `Dialect::get()` and custom dialect construction.
2812// ---------------------------------------------------------------------------
2813// Cached dialect configurations
2814// ---------------------------------------------------------------------------
2815
2816/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
2817/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
2818struct CachedDialectConfig {
2819 tokenizer_config: TokenizerConfig,
2820 generator_config: Arc<GeneratorConfig>,
2821}
2822
2823/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
2824macro_rules! cached_dialect {
2825 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
2826 #[cfg(feature = $feature)]
2827 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2828 let d = $dialect_struct;
2829 CachedDialectConfig {
2830 tokenizer_config: d.tokenizer_config(),
2831 generator_config: Arc::new(d.generator_config()),
2832 }
2833 });
2834 };
2835}
2836
2837static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2838 let d = GenericDialect;
2839 CachedDialectConfig {
2840 tokenizer_config: d.tokenizer_config(),
2841 generator_config: Arc::new(d.generator_config()),
2842 }
2843});
2844
2845cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
2846cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
2847cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
2848cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
2849cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
2850cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
2851cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
2852cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
2853cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
2854cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
2855cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
2856cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
2857cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
2858cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
2859cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
2860cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
2861cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
2862cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
2863cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
2864cached_dialect!(
2865 CACHED_MATERIALIZE,
2866 MaterializeDialect,
2867 "dialect-materialize"
2868);
2869cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
2870cached_dialect!(
2871 CACHED_SINGLESTORE,
2872 SingleStoreDialect,
2873 "dialect-singlestore"
2874);
2875cached_dialect!(
2876 CACHED_COCKROACHDB,
2877 CockroachDBDialect,
2878 "dialect-cockroachdb"
2879);
2880cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
2881cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
2882cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
2883cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
2884cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
2885cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
2886cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
2887cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
2888cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
2889cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
2890
2891fn configs_for_dialect_type(
2892 dt: DialectType,
2893) -> (
2894 TokenizerConfig,
2895 Arc<GeneratorConfig>,
2896 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2897) {
2898 /// Clone configs from a cached static and pair with a fresh transform closure.
2899 macro_rules! from_cache {
2900 ($cache:expr, $dialect_struct:expr) => {{
2901 let c = &*$cache;
2902 (
2903 c.tokenizer_config.clone(),
2904 c.generator_config.clone(),
2905 Box::new(move |e| $dialect_struct.transform_expr(e)),
2906 )
2907 }};
2908 }
2909 match dt {
2910 #[cfg(feature = "dialect-postgresql")]
2911 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
2912 #[cfg(feature = "dialect-mysql")]
2913 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
2914 #[cfg(feature = "dialect-bigquery")]
2915 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
2916 #[cfg(feature = "dialect-snowflake")]
2917 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
2918 #[cfg(feature = "dialect-duckdb")]
2919 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
2920 #[cfg(feature = "dialect-tsql")]
2921 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
2922 #[cfg(feature = "dialect-oracle")]
2923 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
2924 #[cfg(feature = "dialect-hive")]
2925 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
2926 #[cfg(feature = "dialect-spark")]
2927 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
2928 #[cfg(feature = "dialect-sqlite")]
2929 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
2930 #[cfg(feature = "dialect-presto")]
2931 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
2932 #[cfg(feature = "dialect-trino")]
2933 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
2934 #[cfg(feature = "dialect-redshift")]
2935 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
2936 #[cfg(feature = "dialect-clickhouse")]
2937 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
2938 #[cfg(feature = "dialect-databricks")]
2939 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
2940 #[cfg(feature = "dialect-athena")]
2941 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
2942 #[cfg(feature = "dialect-teradata")]
2943 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
2944 #[cfg(feature = "dialect-doris")]
2945 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
2946 #[cfg(feature = "dialect-starrocks")]
2947 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
2948 #[cfg(feature = "dialect-materialize")]
2949 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
2950 #[cfg(feature = "dialect-risingwave")]
2951 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
2952 #[cfg(feature = "dialect-singlestore")]
2953 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
2954 #[cfg(feature = "dialect-cockroachdb")]
2955 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
2956 #[cfg(feature = "dialect-tidb")]
2957 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
2958 #[cfg(feature = "dialect-druid")]
2959 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
2960 #[cfg(feature = "dialect-solr")]
2961 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
2962 #[cfg(feature = "dialect-tableau")]
2963 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
2964 #[cfg(feature = "dialect-dune")]
2965 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
2966 #[cfg(feature = "dialect-fabric")]
2967 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
2968 #[cfg(feature = "dialect-drill")]
2969 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
2970 #[cfg(feature = "dialect-dremio")]
2971 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
2972 #[cfg(feature = "dialect-exasol")]
2973 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
2974 #[cfg(feature = "dialect-datafusion")]
2975 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
2976 _ => from_cache!(CACHED_GENERIC, GenericDialect),
2977 }
2978}
2979
2980// ---------------------------------------------------------------------------
2981// Custom dialect registry
2982// ---------------------------------------------------------------------------
2983
2984static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
2985 LazyLock::new(|| RwLock::new(HashMap::new()));
2986
2987struct CustomDialectConfig {
2988 name: String,
2989 base_dialect: DialectType,
2990 tokenizer_config: TokenizerConfig,
2991 generator_config: GeneratorConfig,
2992 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2993 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2994}
2995
2996/// Fluent builder for creating and registering custom SQL dialects.
2997///
2998/// A custom dialect is based on an existing built-in dialect and allows selective
2999/// overrides of tokenizer configuration, generator configuration, and expression
3000/// transforms.
3001///
3002/// # Example
3003///
3004/// ```rust,ignore
3005/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3006/// use polyglot_sql::generator::NormalizeFunctions;
3007///
3008/// CustomDialectBuilder::new("my_postgres")
3009/// .based_on(DialectType::PostgreSQL)
3010/// .generator_config_modifier(|gc| {
3011/// gc.normalize_functions = NormalizeFunctions::Lower;
3012/// })
3013/// .register()
3014/// .unwrap();
3015///
3016/// let d = Dialect::get_by_name("my_postgres").unwrap();
3017/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3018/// let sql = d.generate(&exprs[0]).unwrap();
3019/// assert_eq!(sql, "select count(*)");
3020///
3021/// polyglot_sql::unregister_custom_dialect("my_postgres");
3022/// ```
3023pub struct CustomDialectBuilder {
3024 name: String,
3025 base_dialect: DialectType,
3026 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3027 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3028 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3029 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3030}
3031
3032impl CustomDialectBuilder {
3033 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3034 pub fn new(name: impl Into<String>) -> Self {
3035 Self {
3036 name: name.into(),
3037 base_dialect: DialectType::Generic,
3038 tokenizer_modifier: None,
3039 generator_modifier: None,
3040 transform: None,
3041 preprocess: None,
3042 }
3043 }
3044
3045 /// Set the base built-in dialect to inherit configuration from.
3046 pub fn based_on(mut self, dialect: DialectType) -> Self {
3047 self.base_dialect = dialect;
3048 self
3049 }
3050
3051 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3052 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3053 where
3054 F: FnOnce(&mut TokenizerConfig) + 'static,
3055 {
3056 self.tokenizer_modifier = Some(Box::new(f));
3057 self
3058 }
3059
3060 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3061 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3062 where
3063 F: FnOnce(&mut GeneratorConfig) + 'static,
3064 {
3065 self.generator_modifier = Some(Box::new(f));
3066 self
3067 }
3068
3069 /// Set a custom per-node expression transform function.
3070 ///
3071 /// This replaces the base dialect's transform. It is called on every expression
3072 /// node during the recursive transform pass.
3073 pub fn transform_fn<F>(mut self, f: F) -> Self
3074 where
3075 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3076 {
3077 self.transform = Some(Arc::new(f));
3078 self
3079 }
3080
3081 /// Set a custom whole-tree preprocessing function.
3082 ///
3083 /// This replaces the base dialect's built-in preprocessing. It is called once
3084 /// on the entire expression tree before the recursive per-node transform.
3085 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3086 where
3087 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3088 {
3089 self.preprocess = Some(Arc::new(f));
3090 self
3091 }
3092
3093 /// Build the custom dialect configuration and register it in the global registry.
3094 ///
3095 /// Returns an error if:
3096 /// - The name collides with a built-in dialect name
3097 /// - A custom dialect with the same name is already registered
3098 pub fn register(self) -> Result<()> {
3099 // Reject names that collide with built-in dialects
3100 if DialectType::from_str(&self.name).is_ok() {
3101 return Err(crate::error::Error::parse(
3102 format!(
3103 "Cannot register custom dialect '{}': name collides with built-in dialect",
3104 self.name
3105 ),
3106 0,
3107 0,
3108 0,
3109 0,
3110 ));
3111 }
3112
3113 // Get base configs
3114 let (mut tok_config, arc_gen_config, _base_transform) =
3115 configs_for_dialect_type(self.base_dialect);
3116 let mut gen_config = (*arc_gen_config).clone();
3117
3118 // Apply modifiers
3119 if let Some(tok_mod) = self.tokenizer_modifier {
3120 tok_mod(&mut tok_config);
3121 }
3122 if let Some(gen_mod) = self.generator_modifier {
3123 gen_mod(&mut gen_config);
3124 }
3125
3126 let config = CustomDialectConfig {
3127 name: self.name.clone(),
3128 base_dialect: self.base_dialect,
3129 tokenizer_config: tok_config,
3130 generator_config: gen_config,
3131 transform: self.transform,
3132 preprocess: self.preprocess,
3133 };
3134
3135 register_custom_dialect(config)
3136 }
3137}
3138
3139use std::str::FromStr;
3140
3141fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3142 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3143 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3144 })?;
3145
3146 if registry.contains_key(&config.name) {
3147 return Err(crate::error::Error::parse(
3148 format!("Custom dialect '{}' is already registered", config.name),
3149 0,
3150 0,
3151 0,
3152 0,
3153 ));
3154 }
3155
3156 registry.insert(config.name.clone(), Arc::new(config));
3157 Ok(())
3158}
3159
3160/// Remove a custom dialect from the global registry.
3161///
3162/// Returns `true` if a dialect with that name was found and removed,
3163/// `false` if no such custom dialect existed.
3164pub fn unregister_custom_dialect(name: &str) -> bool {
3165 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3166 registry.remove(name).is_some()
3167 } else {
3168 false
3169 }
3170}
3171
3172fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3173 CUSTOM_DIALECT_REGISTRY
3174 .read()
3175 .ok()
3176 .and_then(|registry| registry.get(name).cloned())
3177}
3178
3179/// Main entry point for dialect-specific SQL operations.
3180///
3181/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3182/// transformer for a specific SQL database engine. It is the high-level API through
3183/// which callers parse, generate, transform, and transpile SQL.
3184///
3185/// # Usage
3186///
3187/// ```rust,ignore
3188/// use polyglot_sql::dialects::{Dialect, DialectType};
3189///
3190/// // Parse PostgreSQL SQL into an AST
3191/// let pg = Dialect::get(DialectType::PostgreSQL);
3192/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3193///
3194/// // Transpile from PostgreSQL to BigQuery
3195/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3196/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3197/// ```
3198///
3199/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3200/// The struct is `Send + Sync` safe so it can be shared across threads.
3201pub struct Dialect {
3202 dialect_type: DialectType,
3203 tokenizer: Tokenizer,
3204 generator_config: Arc<GeneratorConfig>,
3205 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3206 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3207 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3208 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3209 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3210}
3211
3212/// Options for [`Dialect::transpile_with`].
3213///
3214/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3215/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3216/// breaking the API.
3217///
3218/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3219/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3220#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3221#[serde(rename_all = "camelCase", default)]
3222#[non_exhaustive]
3223pub struct TranspileOptions {
3224 /// Whether to pretty-print the output SQL.
3225 pub pretty: bool,
3226}
3227
3228impl TranspileOptions {
3229 /// Construct options with pretty-printing enabled.
3230 pub fn pretty() -> Self {
3231 Self { pretty: true }
3232 }
3233}
3234
3235/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3236/// [`Dialect::transpile_with`].
3237///
3238/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3239/// dialect handle, including custom ones). End users do not normally need to
3240/// implement this trait themselves.
3241pub trait TranspileTarget {
3242 /// Invoke `f` with a reference to the resolved target dialect.
3243 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3244}
3245
3246impl TranspileTarget for DialectType {
3247 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3248 f(&Dialect::get(self))
3249 }
3250}
3251
3252impl TranspileTarget for &Dialect {
3253 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3254 f(self)
3255 }
3256}
3257
3258impl Dialect {
3259 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3260 ///
3261 /// This is the primary constructor. It initializes the tokenizer, generator config,
3262 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3263 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3264 /// config routing.
3265 pub fn get(dialect_type: DialectType) -> Self {
3266 let (tokenizer_config, generator_config, transformer) =
3267 configs_for_dialect_type(dialect_type);
3268
3269 // Set up expression-specific generator config for hybrid dialects
3270 let generator_config_for_expr: Option<
3271 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3272 > = match dialect_type {
3273 #[cfg(feature = "dialect-athena")]
3274 DialectType::Athena => Some(Box::new(|expr| {
3275 AthenaDialect.generator_config_for_expr(expr)
3276 })),
3277 _ => None,
3278 };
3279
3280 Self {
3281 dialect_type,
3282 tokenizer: Tokenizer::new(tokenizer_config),
3283 generator_config,
3284 transformer,
3285 generator_config_for_expr,
3286 custom_preprocess: None,
3287 }
3288 }
3289
3290 /// Look up a dialect by string name.
3291 ///
3292 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3293 /// falls back to the custom dialect registry. Returns `None` if no dialect
3294 /// with the given name exists.
3295 pub fn get_by_name(name: &str) -> Option<Self> {
3296 // Try built-in first
3297 if let Ok(dt) = DialectType::from_str(name) {
3298 return Some(Self::get(dt));
3299 }
3300
3301 // Try custom registry
3302 let config = get_custom_dialect_config(name)?;
3303 Some(Self::from_custom_config(&config))
3304 }
3305
3306 /// Construct a `Dialect` from a custom dialect configuration.
3307 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3308 // Build the transformer: use custom if provided, else use base dialect's
3309 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3310 if let Some(ref custom_transform) = config.transform {
3311 let t = Arc::clone(custom_transform);
3312 Box::new(move |e| t(e))
3313 } else {
3314 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
3315 base_transform
3316 };
3317
3318 // Build the custom preprocess: use custom if provided
3319 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
3320 config.preprocess.as_ref().map(|p| {
3321 let p = Arc::clone(p);
3322 Box::new(move |e: Expression| p(e))
3323 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3324 });
3325
3326 Self {
3327 dialect_type: config.base_dialect,
3328 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3329 generator_config: Arc::new(config.generator_config.clone()),
3330 transformer,
3331 generator_config_for_expr: None,
3332 custom_preprocess,
3333 }
3334 }
3335
3336 /// Get the dialect type
3337 pub fn dialect_type(&self) -> DialectType {
3338 self.dialect_type
3339 }
3340
3341 /// Get the generator configuration
3342 pub fn generator_config(&self) -> &GeneratorConfig {
3343 &self.generator_config
3344 }
3345
3346 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3347 ///
3348 /// The input may contain multiple semicolon-separated statements; each one
3349 /// produces a separate element in the returned vector. Tokenization uses
3350 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3351 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3352 let tokens = self.tokenizer.tokenize(sql)?;
3353 let config = crate::parser::ParserConfig {
3354 dialect: Some(self.dialect_type),
3355 ..Default::default()
3356 };
3357 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3358 parser.parse()
3359 }
3360
3361 /// Tokenize SQL using this dialect's tokenizer configuration.
3362 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3363 self.tokenizer.tokenize(sql)
3364 }
3365
3366 /// Get the generator config for a specific expression (supports hybrid dialects).
3367 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3368 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3369 if let Some(ref config_fn) = self.generator_config_for_expr {
3370 config_fn(expr)
3371 } else {
3372 (*self.generator_config).clone()
3373 }
3374 }
3375
3376 /// Generates a SQL string from an [`Expression`] AST node.
3377 ///
3378 /// The output uses this dialect's generator configuration for identifier quoting,
3379 /// keyword casing, function name normalization, and syntax style. The result is
3380 /// a single-line (non-pretty) SQL string.
3381 pub fn generate(&self, expr: &Expression) -> Result<String> {
3382 // Fast path: when no per-expression config override, share the Arc cheaply.
3383 if self.generator_config_for_expr.is_none() {
3384 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3385 return generator.generate(expr);
3386 }
3387 let config = self.get_config_for_expr(expr);
3388 let mut generator = Generator::with_config(config);
3389 generator.generate(expr)
3390 }
3391
3392 /// Generate SQL from an expression with pretty printing enabled
3393 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3394 let mut config = self.get_config_for_expr(expr);
3395 config.pretty = true;
3396 let mut generator = Generator::with_config(config);
3397 generator.generate(expr)
3398 }
3399
3400 /// Generate SQL from an expression with source dialect info (for transpilation)
3401 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3402 let mut config = self.get_config_for_expr(expr);
3403 config.source_dialect = Some(source);
3404 let mut generator = Generator::with_config(config);
3405 generator.generate(expr)
3406 }
3407
3408 /// Generate SQL from an expression with pretty printing and source dialect info
3409 pub fn generate_pretty_with_source(
3410 &self,
3411 expr: &Expression,
3412 source: DialectType,
3413 ) -> Result<String> {
3414 let mut config = self.get_config_for_expr(expr);
3415 config.pretty = true;
3416 config.source_dialect = Some(source);
3417 let mut generator = Generator::with_config(config);
3418 generator.generate(expr)
3419 }
3420
3421 /// Generate SQL from an expression with forced identifier quoting (identify=True)
3422 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
3423 let mut config = self.get_config_for_expr(expr);
3424 config.always_quote_identifiers = true;
3425 let mut generator = Generator::with_config(config);
3426 generator.generate(expr)
3427 }
3428
3429 /// Generate SQL from an expression with pretty printing and forced identifier quoting
3430 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
3431 let mut config = (*self.generator_config).clone();
3432 config.pretty = true;
3433 config.always_quote_identifiers = true;
3434 let mut generator = Generator::with_config(config);
3435 generator.generate(expr)
3436 }
3437
3438 /// Generate SQL from an expression with caller-specified config overrides
3439 pub fn generate_with_overrides(
3440 &self,
3441 expr: &Expression,
3442 overrides: impl FnOnce(&mut GeneratorConfig),
3443 ) -> Result<String> {
3444 let mut config = self.get_config_for_expr(expr);
3445 overrides(&mut config);
3446 let mut generator = Generator::with_config(config);
3447 generator.generate(expr)
3448 }
3449
3450 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
3451 ///
3452 /// The transformation proceeds in two phases:
3453 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
3454 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
3455 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
3456 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
3457 ///
3458 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
3459 /// and for identity transforms (normalizing SQL within the same dialect).
3460 pub fn transform(&self, expr: Expression) -> Result<Expression> {
3461 // Apply preprocessing transforms based on dialect
3462 let preprocessed = self.preprocess(expr)?;
3463 // Then apply recursive transformation
3464 transform_recursive(preprocessed, &self.transformer)
3465 }
3466
3467 /// Apply dialect-specific preprocessing transforms
3468 fn preprocess(&self, expr: Expression) -> Result<Expression> {
3469 // If a custom preprocess function is set, use it instead of the built-in logic
3470 if let Some(ref custom_preprocess) = self.custom_preprocess {
3471 return custom_preprocess(expr);
3472 }
3473
3474 #[cfg(any(
3475 feature = "dialect-mysql",
3476 feature = "dialect-postgresql",
3477 feature = "dialect-bigquery",
3478 feature = "dialect-snowflake",
3479 feature = "dialect-tsql",
3480 feature = "dialect-spark",
3481 feature = "dialect-databricks",
3482 feature = "dialect-hive",
3483 feature = "dialect-sqlite",
3484 feature = "dialect-trino",
3485 feature = "dialect-presto",
3486 feature = "dialect-duckdb",
3487 feature = "dialect-redshift",
3488 feature = "dialect-starrocks",
3489 feature = "dialect-oracle",
3490 feature = "dialect-clickhouse",
3491 ))]
3492 use crate::transforms;
3493
3494 match self.dialect_type {
3495 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
3496 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
3497 #[cfg(feature = "dialect-mysql")]
3498 DialectType::MySQL => {
3499 let expr = transforms::eliminate_qualify(expr)?;
3500 let expr = transforms::eliminate_full_outer_join(expr)?;
3501 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3502 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3503 Ok(expr)
3504 }
3505 // PostgreSQL doesn't support QUALIFY
3506 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
3507 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
3508 #[cfg(feature = "dialect-postgresql")]
3509 DialectType::PostgreSQL => {
3510 let expr = transforms::eliminate_qualify(expr)?;
3511 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3512 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
3513 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
3514 // Only normalize when sqlglot would fully parse (no body) —
3515 // sqlglot falls back to Command for complex function bodies,
3516 // preserving the original text including TO.
3517 let expr = if let Expression::CreateFunction(mut cf) = expr {
3518 if cf.body.is_none() {
3519 for opt in &mut cf.set_options {
3520 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
3521 &mut opt.value
3522 {
3523 *use_to = false;
3524 }
3525 }
3526 }
3527 Expression::CreateFunction(cf)
3528 } else {
3529 expr
3530 };
3531 Ok(expr)
3532 }
3533 // BigQuery doesn't support DISTINCT ON or CTE column aliases
3534 #[cfg(feature = "dialect-bigquery")]
3535 DialectType::BigQuery => {
3536 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3537 let expr = transforms::pushdown_cte_column_names(expr)?;
3538 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
3539 Ok(expr)
3540 }
3541 // Snowflake
3542 #[cfg(feature = "dialect-snowflake")]
3543 DialectType::Snowflake => {
3544 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3545 let expr = transforms::eliminate_window_clause(expr)?;
3546 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
3547 Ok(expr)
3548 }
3549 // TSQL doesn't support QUALIFY
3550 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
3551 // TSQL doesn't support CTEs in subqueries (hoist to top level)
3552 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
3553 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
3554 #[cfg(feature = "dialect-tsql")]
3555 DialectType::TSQL => {
3556 let expr = transforms::eliminate_qualify(expr)?;
3557 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3558 let expr = transforms::ensure_bools(expr)?;
3559 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3560 let expr = transforms::move_ctes_to_top_level(expr)?;
3561 let expr = transforms::qualify_derived_table_outputs(expr)?;
3562 Ok(expr)
3563 }
3564 // Spark doesn't support QUALIFY (but Databricks does)
3565 // Spark doesn't support CTEs in subqueries (hoist to top level)
3566 #[cfg(feature = "dialect-spark")]
3567 DialectType::Spark => {
3568 let expr = transforms::eliminate_qualify(expr)?;
3569 let expr = transforms::add_auto_table_alias(expr)?;
3570 let expr = transforms::simplify_nested_paren_values(expr)?;
3571 let expr = transforms::move_ctes_to_top_level(expr)?;
3572 Ok(expr)
3573 }
3574 // Databricks supports QUALIFY natively
3575 // Databricks doesn't support CTEs in subqueries (hoist to top level)
3576 #[cfg(feature = "dialect-databricks")]
3577 DialectType::Databricks => {
3578 let expr = transforms::add_auto_table_alias(expr)?;
3579 let expr = transforms::simplify_nested_paren_values(expr)?;
3580 let expr = transforms::move_ctes_to_top_level(expr)?;
3581 Ok(expr)
3582 }
3583 // Hive doesn't support QUALIFY or CTEs in subqueries
3584 #[cfg(feature = "dialect-hive")]
3585 DialectType::Hive => {
3586 let expr = transforms::eliminate_qualify(expr)?;
3587 let expr = transforms::move_ctes_to_top_level(expr)?;
3588 Ok(expr)
3589 }
3590 // SQLite doesn't support QUALIFY
3591 #[cfg(feature = "dialect-sqlite")]
3592 DialectType::SQLite => {
3593 let expr = transforms::eliminate_qualify(expr)?;
3594 Ok(expr)
3595 }
3596 // Trino doesn't support QUALIFY
3597 #[cfg(feature = "dialect-trino")]
3598 DialectType::Trino => {
3599 let expr = transforms::eliminate_qualify(expr)?;
3600 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
3601 Ok(expr)
3602 }
3603 // Presto doesn't support QUALIFY or WINDOW clause
3604 #[cfg(feature = "dialect-presto")]
3605 DialectType::Presto => {
3606 let expr = transforms::eliminate_qualify(expr)?;
3607 let expr = transforms::eliminate_window_clause(expr)?;
3608 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
3609 Ok(expr)
3610 }
3611 // DuckDB supports QUALIFY - no elimination needed
3612 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
3613 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
3614 #[cfg(feature = "dialect-duckdb")]
3615 DialectType::DuckDB => {
3616 let expr = transforms::expand_posexplode_duckdb(expr)?;
3617 let expr = transforms::expand_like_any(expr)?;
3618 Ok(expr)
3619 }
3620 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
3621 #[cfg(feature = "dialect-redshift")]
3622 DialectType::Redshift => {
3623 let expr = transforms::eliminate_qualify(expr)?;
3624 let expr = transforms::eliminate_window_clause(expr)?;
3625 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3626 Ok(expr)
3627 }
3628 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
3629 #[cfg(feature = "dialect-starrocks")]
3630 DialectType::StarRocks => {
3631 let expr = transforms::eliminate_qualify(expr)?;
3632 let expr = transforms::expand_between_in_delete(expr)?;
3633 let expr = transforms::eliminate_distinct_on_for_dialect(
3634 expr,
3635 Some(DialectType::StarRocks),
3636 Some(DialectType::StarRocks),
3637 )?;
3638 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3639 Ok(expr)
3640 }
3641 // DataFusion supports QUALIFY and semi/anti joins natively
3642 #[cfg(feature = "dialect-datafusion")]
3643 DialectType::DataFusion => Ok(expr),
3644 // Oracle doesn't support QUALIFY
3645 #[cfg(feature = "dialect-oracle")]
3646 DialectType::Oracle => {
3647 let expr = transforms::eliminate_qualify(expr)?;
3648 Ok(expr)
3649 }
3650 // Drill - no special preprocessing needed
3651 #[cfg(feature = "dialect-drill")]
3652 DialectType::Drill => Ok(expr),
3653 // Teradata - no special preprocessing needed
3654 #[cfg(feature = "dialect-teradata")]
3655 DialectType::Teradata => Ok(expr),
3656 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
3657 #[cfg(feature = "dialect-clickhouse")]
3658 DialectType::ClickHouse => {
3659 let expr = transforms::no_limit_order_by_union(expr)?;
3660 Ok(expr)
3661 }
3662 // Other dialects - no preprocessing
3663 _ => Ok(expr),
3664 }
3665 }
3666
3667 /// Transpile SQL from this dialect to the given target dialect.
3668 ///
3669 /// The target may be specified as either a built-in [`DialectType`] enum variant
3670 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
3671 ///
3672 /// ```rust,ignore
3673 /// let pg = Dialect::get(DialectType::PostgreSQL);
3674 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
3675 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
3676 /// ```
3677 ///
3678 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
3679 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
3680 self.transpile_with(sql, target, TranspileOptions::default())
3681 }
3682
3683 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
3684 pub fn transpile_with<T: TranspileTarget>(
3685 &self,
3686 sql: &str,
3687 target: T,
3688 opts: TranspileOptions,
3689 ) -> Result<Vec<String>> {
3690 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
3691 }
3692
3693 #[cfg(not(feature = "transpile"))]
3694 fn transpile_inner(
3695 &self,
3696 sql: &str,
3697 target_dialect: &Dialect,
3698 pretty: bool,
3699 ) -> Result<Vec<String>> {
3700 let target = target_dialect.dialect_type;
3701 // Without the transpile feature, only same-dialect or to/from generic is supported
3702 if self.dialect_type != target
3703 && self.dialect_type != DialectType::Generic
3704 && target != DialectType::Generic
3705 {
3706 return Err(crate::error::Error::parse(
3707 "Cross-dialect transpilation not available in this build",
3708 0,
3709 0,
3710 0,
3711 0,
3712 ));
3713 }
3714
3715 let expressions = self.parse(sql)?;
3716 let generic_identity =
3717 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3718
3719 if generic_identity {
3720 return expressions
3721 .into_iter()
3722 .map(|expr| {
3723 if pretty {
3724 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3725 } else {
3726 target_dialect.generate_with_source(&expr, self.dialect_type)
3727 }
3728 })
3729 .collect();
3730 }
3731
3732 expressions
3733 .into_iter()
3734 .map(|expr| {
3735 let transformed = target_dialect.transform(expr)?;
3736 if pretty {
3737 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
3738 } else {
3739 target_dialect.generate_with_source(&transformed, self.dialect_type)
3740 }
3741 })
3742 .collect()
3743 }
3744
3745 #[cfg(feature = "transpile")]
3746 fn transpile_inner(
3747 &self,
3748 sql: &str,
3749 target_dialect: &Dialect,
3750 pretty: bool,
3751 ) -> Result<Vec<String>> {
3752 let target = target_dialect.dialect_type;
3753 if matches!(self.dialect_type, DialectType::PostgreSQL)
3754 && matches!(target, DialectType::SQLite)
3755 {
3756 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
3757 }
3758 let expressions = self.parse(sql)?;
3759 let generic_identity =
3760 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3761
3762 if generic_identity {
3763 return expressions
3764 .into_iter()
3765 .map(|expr| {
3766 if pretty {
3767 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3768 } else {
3769 target_dialect.generate_with_source(&expr, self.dialect_type)
3770 }
3771 })
3772 .collect();
3773 }
3774
3775 expressions
3776 .into_iter()
3777 .map(|expr| {
3778 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
3779 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
3780 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
3781 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
3782 use crate::expressions::DataType as DT;
3783 transform_recursive(expr, &|e| match e {
3784 Expression::DataType(DT::VarChar { .. }) => {
3785 Ok(Expression::DataType(DT::Text))
3786 }
3787 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
3788 _ => Ok(e),
3789 })?
3790 } else {
3791 expr
3792 };
3793
3794 // When source and target differ, first normalize the source dialect's
3795 // AST constructs to standard SQL, so that the target dialect can handle them.
3796 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
3797 let normalized =
3798 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
3799 self.transform(expr)?
3800 } else {
3801 expr
3802 };
3803
3804 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
3805 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
3806 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
3807 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
3808 let normalized =
3809 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3810 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3811 {
3812 transform_recursive(normalized, &|e| {
3813 if let Expression::Function(ref f) = e {
3814 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
3815 // Check if first arg is JSON_QUERY and second is JSON_VALUE
3816 if let (
3817 Expression::Function(ref jq),
3818 Expression::Function(ref jv),
3819 ) = (&f.args[0], &f.args[1])
3820 {
3821 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
3822 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
3823 {
3824 // Unwrap: return just JSON_QUERY(...)
3825 return Ok(f.args[0].clone());
3826 }
3827 }
3828 }
3829 }
3830 Ok(e)
3831 })?
3832 } else {
3833 normalized
3834 };
3835
3836 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
3837 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
3838 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
3839 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3840 && !matches!(target, DialectType::Snowflake)
3841 {
3842 transform_recursive(normalized, &|e| {
3843 if let Expression::Function(ref f) = e {
3844 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
3845 return Ok(Expression::Localtime(Box::new(
3846 crate::expressions::Localtime { this: None },
3847 )));
3848 }
3849 }
3850 Ok(e)
3851 })?
3852 } else {
3853 normalized
3854 };
3855
3856 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
3857 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
3858 // transform. DuckDB requires the count argument to be BIGINT.
3859 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3860 && matches!(target, DialectType::DuckDB)
3861 {
3862 transform_recursive(normalized, &|e| {
3863 if let Expression::Function(ref f) = e {
3864 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
3865 // Check if first arg is space string literal
3866 if let Expression::Literal(ref lit) = f.args[0] {
3867 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
3868 {
3869 if s == " " {
3870 // Wrap second arg in CAST(... AS BIGINT) if not already
3871 if !matches!(f.args[1], Expression::Cast(_)) {
3872 let mut new_args = f.args.clone();
3873 new_args[1] = Expression::Cast(Box::new(
3874 crate::expressions::Cast {
3875 this: new_args[1].clone(),
3876 to: crate::expressions::DataType::BigInt {
3877 length: None,
3878 },
3879 trailing_comments: Vec::new(),
3880 double_colon_syntax: false,
3881 format: None,
3882 default: None,
3883 inferred_type: None,
3884 },
3885 ));
3886 return Ok(Expression::Function(Box::new(
3887 crate::expressions::Function {
3888 name: f.name.clone(),
3889 args: new_args,
3890 distinct: f.distinct,
3891 trailing_comments: f
3892 .trailing_comments
3893 .clone(),
3894 use_bracket_syntax: f.use_bracket_syntax,
3895 no_parens: f.no_parens,
3896 quoted: f.quoted,
3897 span: None,
3898 inferred_type: None,
3899 },
3900 )));
3901 }
3902 }
3903 }
3904 }
3905 }
3906 }
3907 Ok(e)
3908 })?
3909 } else {
3910 normalized
3911 };
3912
3913 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
3914 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
3915 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
3916 && !matches!(target, DialectType::BigQuery)
3917 {
3918 crate::transforms::propagate_struct_field_names(normalized)?
3919 } else {
3920 normalized
3921 };
3922
3923 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
3924 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
3925 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
3926 // functions handle their generator args differently (as float seeds).
3927 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3928 && matches!(target, DialectType::DuckDB)
3929 {
3930 fn make_scaled_random() -> Expression {
3931 let lower =
3932 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3933 "-9.223372036854776E+18".to_string(),
3934 )));
3935 let upper =
3936 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3937 "9.223372036854776e+18".to_string(),
3938 )));
3939 let random_call = Expression::Random(crate::expressions::Random);
3940 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
3941 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
3942 left: upper,
3943 right: lower.clone(),
3944 left_comments: vec![],
3945 operator_comments: vec![],
3946 trailing_comments: vec![],
3947 inferred_type: None,
3948 })),
3949 trailing_comments: vec![],
3950 }));
3951 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
3952 left: random_call,
3953 right: range_size,
3954 left_comments: vec![],
3955 operator_comments: vec![],
3956 trailing_comments: vec![],
3957 inferred_type: None,
3958 }));
3959 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
3960 left: lower,
3961 right: scaled,
3962 left_comments: vec![],
3963 operator_comments: vec![],
3964 trailing_comments: vec![],
3965 inferred_type: None,
3966 }));
3967 Expression::Cast(Box::new(crate::expressions::Cast {
3968 this: shifted,
3969 to: crate::expressions::DataType::BigInt { length: None },
3970 trailing_comments: vec![],
3971 double_colon_syntax: false,
3972 format: None,
3973 default: None,
3974 inferred_type: None,
3975 }))
3976 }
3977
3978 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
3979 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
3980 // This prevents transform_recursive (which is bottom-up) from expanding
3981 // seeded RANDOM into make_scaled_random() and losing the seed value.
3982 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
3983 // and then un-expanded back to Expression::Random by the code below.
3984 let normalized = transform_recursive(normalized, &|e| {
3985 if let Expression::Function(ref f) = e {
3986 let n = f.name.to_ascii_uppercase();
3987 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
3988 if let Expression::Function(mut f) = e {
3989 for arg in f.args.iter_mut() {
3990 if let Expression::Rand(ref r) = arg {
3991 if r.lower.is_none() && r.upper.is_none() {
3992 if let Some(ref seed) = r.seed {
3993 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
3994 // so it won't be expanded by the RANDOM expansion below
3995 *arg = Expression::Function(Box::new(
3996 crate::expressions::Function::new(
3997 "RANDOM".to_string(),
3998 vec![*seed.clone()],
3999 ),
4000 ));
4001 }
4002 }
4003 }
4004 }
4005 return Ok(Expression::Function(f));
4006 }
4007 }
4008 }
4009 Ok(e)
4010 })?;
4011
4012 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4013 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4014 // we see the parent. We detect this and undo the expansion by replacing
4015 // the expanded pattern back with Expression::Random.
4016 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4017 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4018 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4019 transform_recursive(normalized, &|e| {
4020 if let Expression::Function(ref f) = e {
4021 let n = f.name.to_ascii_uppercase();
4022 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4023 if let Expression::Function(mut f) = e {
4024 for arg in f.args.iter_mut() {
4025 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4026 if let Expression::Cast(ref cast) = arg {
4027 if matches!(
4028 cast.to,
4029 crate::expressions::DataType::BigInt { .. }
4030 ) {
4031 if let Expression::Add(ref add) = cast.this {
4032 if let Expression::Literal(ref lit) = add.left {
4033 if let crate::expressions::Literal::Number(
4034 ref num,
4035 ) = lit.as_ref()
4036 {
4037 if num == "-9.223372036854776E+18" {
4038 *arg = Expression::Random(
4039 crate::expressions::Random,
4040 );
4041 }
4042 }
4043 }
4044 }
4045 }
4046 }
4047 }
4048 return Ok(Expression::Function(f));
4049 }
4050 return Ok(e);
4051 }
4052 }
4053 match e {
4054 Expression::Random(_) => Ok(make_scaled_random()),
4055 // Rand(seed) with no bounds: drop seed and expand
4056 // (DuckDB RANDOM doesn't support seeds)
4057 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4058 Ok(make_scaled_random())
4059 }
4060 _ => Ok(e),
4061 }
4062 })?
4063 } else {
4064 normalized
4065 };
4066
4067 // Apply cross-dialect semantic normalizations
4068 let normalized =
4069 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4070
4071 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4072 && matches!(target, DialectType::SQLite)
4073 {
4074 Self::normalize_postgres_to_sqlite_types(normalized)?
4075 } else {
4076 normalized
4077 };
4078
4079 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4080 // (SELECT UNNEST(..., max_depth => 2)) subquery
4081 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4082 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4083 && matches!(target, DialectType::DuckDB)
4084 {
4085 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4086 } else {
4087 normalized
4088 };
4089
4090 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4091 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4092 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4093 && matches!(
4094 target,
4095 DialectType::DuckDB
4096 | DialectType::Presto
4097 | DialectType::Trino
4098 | DialectType::Athena
4099 | DialectType::Spark
4100 | DialectType::Databricks
4101 ) {
4102 crate::transforms::unnest_alias_to_column_alias(normalized)?
4103 } else if matches!(self.dialect_type, DialectType::BigQuery)
4104 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4105 {
4106 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4107 // but don't convert alias format (no _t0 wrapper)
4108 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4109 // For Redshift: strip UNNEST when arg is a column reference path
4110 if matches!(target, DialectType::Redshift) {
4111 crate::transforms::strip_unnest_column_refs(result)?
4112 } else {
4113 result
4114 }
4115 } else {
4116 normalized
4117 };
4118
4119 // For Presto/Trino targets from PostgreSQL/Redshift source:
4120 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4121 let normalized = if matches!(
4122 self.dialect_type,
4123 DialectType::PostgreSQL | DialectType::Redshift
4124 ) && matches!(
4125 target,
4126 DialectType::Presto | DialectType::Trino | DialectType::Athena
4127 ) {
4128 crate::transforms::wrap_unnest_join_aliases(normalized)?
4129 } else {
4130 normalized
4131 };
4132
4133 // Eliminate DISTINCT ON with target-dialect awareness
4134 // This must happen after source transform (which may produce DISTINCT ON)
4135 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4136 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4137 normalized,
4138 Some(target),
4139 Some(self.dialect_type),
4140 )?;
4141
4142 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4143 let normalized = if matches!(target, DialectType::Snowflake) {
4144 Self::transform_generate_date_array_snowflake(normalized)?
4145 } else {
4146 normalized
4147 };
4148
4149 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4150 let normalized = if matches!(
4151 target,
4152 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4153 ) {
4154 crate::transforms::unnest_to_explode_select(normalized)?
4155 } else {
4156 normalized
4157 };
4158
4159 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4160 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4161 crate::transforms::no_limit_order_by_union(normalized)?
4162 } else {
4163 normalized
4164 };
4165
4166 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4167 // Python sqlglot does this in the TSQL generator, but we can't do it there
4168 // because it would break TSQL -> TSQL identity
4169 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4170 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4171 {
4172 transform_recursive(normalized, &|e| {
4173 if let Expression::Count(ref c) = e {
4174 // Build COUNT_BIG(...) as an AggregateFunction
4175 let args = if c.star {
4176 vec![Expression::Star(crate::expressions::Star {
4177 table: None,
4178 except: None,
4179 replace: None,
4180 rename: None,
4181 trailing_comments: Vec::new(),
4182 span: None,
4183 })]
4184 } else if let Some(ref this) = c.this {
4185 vec![this.clone()]
4186 } else {
4187 vec![]
4188 };
4189 Ok(Expression::AggregateFunction(Box::new(
4190 crate::expressions::AggregateFunction {
4191 name: "COUNT_BIG".to_string(),
4192 args,
4193 distinct: c.distinct,
4194 filter: c.filter.clone(),
4195 order_by: Vec::new(),
4196 limit: None,
4197 ignore_nulls: None,
4198 inferred_type: None,
4199 },
4200 )))
4201 } else {
4202 Ok(e)
4203 }
4204 })?
4205 } else {
4206 normalized
4207 };
4208
4209 let transformed = target_dialect.transform(normalized)?;
4210
4211 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4212 let transformed = if matches!(target, DialectType::DuckDB) {
4213 Self::seq_rownum_to_range(transformed)?
4214 } else {
4215 transformed
4216 };
4217
4218 let mut sql = if pretty {
4219 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
4220 } else {
4221 target_dialect.generate_with_source(&transformed, self.dialect_type)?
4222 };
4223
4224 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4225 if pretty && target == DialectType::Snowflake {
4226 sql = Self::normalize_snowflake_pretty(sql);
4227 }
4228
4229 Ok(sql)
4230 })
4231 .collect()
4232 }
4233}
4234
4235// Transpile-only methods: cross-dialect normalization and helpers
4236#[cfg(feature = "transpile")]
4237impl Dialect {
4238 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
4239 let tokens = self.tokenize(sql)?;
4240 for (i, token) in tokens.iter().enumerate() {
4241 if token.token_type == TokenType::NullsafeEq {
4242 return Err(crate::error::Error::unsupported(
4243 "PostgreSQL pgvector cosine distance operator <=>",
4244 "SQLite",
4245 ));
4246 }
4247 if token.token_type == TokenType::Lt
4248 && tokens
4249 .get(i + 1)
4250 .is_some_and(|token| token.token_type == TokenType::Tilde)
4251 && tokens
4252 .get(i + 2)
4253 .is_some_and(|token| token.token_type == TokenType::Gt)
4254 {
4255 return Err(crate::error::Error::unsupported(
4256 "PostgreSQL pgvector Hamming distance operator <~>",
4257 "SQLite",
4258 ));
4259 }
4260 }
4261 Ok(())
4262 }
4263
4264 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
4265 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
4266 use crate::expressions::DataType;
4267
4268 match dt {
4269 DataType::Bit { .. } => DataType::Int {
4270 length: None,
4271 integer_spelling: true,
4272 },
4273 DataType::TextWithLength { .. } => DataType::Text,
4274 DataType::VarChar { .. } => DataType::Text,
4275 DataType::Char { .. } => DataType::Text,
4276 DataType::Timestamp { timezone: true, .. } => DataType::Text,
4277 DataType::Custom { name } => {
4278 let base = name
4279 .split_once('(')
4280 .map_or(name.as_str(), |(base, _)| base)
4281 .trim();
4282 if base.eq_ignore_ascii_case("TSVECTOR")
4283 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
4284 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
4285 || base.eq_ignore_ascii_case("NVARCHAR")
4286 || base.eq_ignore_ascii_case("NCHAR")
4287 {
4288 DataType::Text
4289 } else {
4290 DataType::Custom { name }
4291 }
4292 }
4293 _ => dt,
4294 }
4295 }
4296
4297 transform_recursive(expr, &|e| match e {
4298 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
4299 Expression::CreateTable(mut ct) => {
4300 for column in &mut ct.columns {
4301 column.data_type = sqlite_type(column.data_type.clone());
4302 }
4303 Ok(Expression::CreateTable(ct))
4304 }
4305 _ => Ok(e),
4306 })
4307 }
4308
4309 /// For DuckDB target: when FROM clause contains RANGE(n), replace
4310 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
4311 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
4312 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
4313 if let Expression::Select(mut select) = expr {
4314 // Check if FROM contains a RANGE function
4315 let has_range_from = if let Some(ref from) = select.from {
4316 from.expressions.iter().any(|e| {
4317 // Check for direct RANGE(...) or aliased RANGE(...)
4318 match e {
4319 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
4320 Expression::Alias(a) => {
4321 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
4322 }
4323 _ => false,
4324 }
4325 })
4326 } else {
4327 false
4328 };
4329
4330 if has_range_from {
4331 // Replace the ROW_NUMBER pattern in select expressions
4332 select.expressions = select
4333 .expressions
4334 .into_iter()
4335 .map(|e| Self::replace_rownum_with_range(e))
4336 .collect();
4337 }
4338
4339 Ok(Expression::Select(select))
4340 } else {
4341 Ok(expr)
4342 }
4343 }
4344
4345 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
4346 fn replace_rownum_with_range(expr: Expression) -> Expression {
4347 match expr {
4348 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
4349 Expression::Mod(op) => {
4350 let new_left = Self::try_replace_rownum_paren(&op.left);
4351 Expression::Mod(Box::new(crate::expressions::BinaryOp {
4352 left: new_left,
4353 right: op.right,
4354 left_comments: op.left_comments,
4355 operator_comments: op.operator_comments,
4356 trailing_comments: op.trailing_comments,
4357 inferred_type: op.inferred_type,
4358 }))
4359 }
4360 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
4361 Expression::Paren(p) => {
4362 let inner = Self::replace_rownum_with_range(p.this);
4363 Expression::Paren(Box::new(crate::expressions::Paren {
4364 this: inner,
4365 trailing_comments: p.trailing_comments,
4366 }))
4367 }
4368 Expression::Case(mut c) => {
4369 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
4370 c.whens = c
4371 .whens
4372 .into_iter()
4373 .map(|(cond, then)| {
4374 (
4375 Self::replace_rownum_with_range(cond),
4376 Self::replace_rownum_with_range(then),
4377 )
4378 })
4379 .collect();
4380 if let Some(else_) = c.else_ {
4381 c.else_ = Some(Self::replace_rownum_with_range(else_));
4382 }
4383 Expression::Case(c)
4384 }
4385 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
4386 left: Self::replace_rownum_with_range(op.left),
4387 right: op.right,
4388 left_comments: op.left_comments,
4389 operator_comments: op.operator_comments,
4390 trailing_comments: op.trailing_comments,
4391 inferred_type: op.inferred_type,
4392 })),
4393 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
4394 left: Self::replace_rownum_with_range(op.left),
4395 right: op.right,
4396 left_comments: op.left_comments,
4397 operator_comments: op.operator_comments,
4398 trailing_comments: op.trailing_comments,
4399 inferred_type: op.inferred_type,
4400 })),
4401 Expression::Alias(mut a) => {
4402 a.this = Self::replace_rownum_with_range(a.this);
4403 Expression::Alias(a)
4404 }
4405 other => other,
4406 }
4407 }
4408
4409 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
4410 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
4411 if let Expression::Paren(ref p) = expr {
4412 if let Expression::Sub(ref sub) = p.this {
4413 if let Expression::WindowFunction(ref wf) = sub.left {
4414 if let Expression::Function(ref f) = wf.this {
4415 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
4416 if let Expression::Literal(ref lit) = sub.right {
4417 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
4418 if n == "1" {
4419 return Expression::column("range");
4420 }
4421 }
4422 }
4423 }
4424 }
4425 }
4426 }
4427 }
4428 expr.clone()
4429 }
4430
4431 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
4432 /// Converts:
4433 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
4434 /// To:
4435 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
4436 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
4437 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
4438 use crate::expressions::*;
4439 transform_recursive(expr, &|e| {
4440 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
4441 if let Expression::ArraySize(ref af) = e {
4442 if let Expression::Function(ref f) = af.this {
4443 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4444 let result = Self::convert_array_size_gda_snowflake(f)?;
4445 return Ok(result);
4446 }
4447 }
4448 }
4449
4450 let Expression::Select(mut sel) = e else {
4451 return Ok(e);
4452 };
4453
4454 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
4455 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
4456 let mut gda_join_idx: Option<usize> = None;
4457
4458 for (idx, join) in sel.joins.iter().enumerate() {
4459 // The join.this may be:
4460 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
4461 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
4462 let (unnest_ref, alias_name) = match &join.this {
4463 Expression::Unnest(ref unnest) => {
4464 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
4465 (Some(unnest.as_ref()), alias)
4466 }
4467 Expression::Alias(ref a) => {
4468 if let Expression::Unnest(ref unnest) = a.this {
4469 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
4470 } else {
4471 (None, None)
4472 }
4473 }
4474 _ => (None, None),
4475 };
4476
4477 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
4478 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
4479 if let Expression::Function(ref f) = unnest.this {
4480 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4481 let start_expr = f.args[0].clone();
4482 let end_expr = f.args[1].clone();
4483 let step = f.args.get(2).cloned();
4484
4485 // Extract unit from step interval
4486 let unit = if let Some(Expression::Interval(ref iv)) = step {
4487 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
4488 Some(format!("{:?}", unit).to_ascii_uppercase())
4489 } else if let Some(ref this) = iv.this {
4490 // The interval may be stored as a string like "1 MONTH"
4491 if let Expression::Literal(lit) = this {
4492 if let Literal::String(ref s) = lit.as_ref() {
4493 let parts: Vec<&str> = s.split_whitespace().collect();
4494 if parts.len() == 2 {
4495 Some(parts[1].to_ascii_uppercase())
4496 } else if parts.len() == 1 {
4497 // Single word like "MONTH" or just "1"
4498 let upper = parts[0].to_ascii_uppercase();
4499 if matches!(
4500 upper.as_str(),
4501 "YEAR"
4502 | "QUARTER"
4503 | "MONTH"
4504 | "WEEK"
4505 | "DAY"
4506 | "HOUR"
4507 | "MINUTE"
4508 | "SECOND"
4509 ) {
4510 Some(upper)
4511 } else {
4512 None
4513 }
4514 } else {
4515 None
4516 }
4517 } else {
4518 None
4519 }
4520 } else {
4521 None
4522 }
4523 } else {
4524 None
4525 }
4526 } else {
4527 None
4528 };
4529
4530 if let Some(unit_str) = unit {
4531 gda_info = Some((alias, start_expr, end_expr, unit_str));
4532 gda_join_idx = Some(idx);
4533 }
4534 }
4535 }
4536 }
4537 if gda_info.is_some() {
4538 break;
4539 }
4540 }
4541
4542 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
4543 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
4544 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
4545 let result = Self::try_transform_from_gda_snowflake(sel);
4546 return result;
4547 };
4548 let join_idx = gda_join_idx.unwrap();
4549
4550 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
4551 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
4552 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
4553 let datediff = Expression::Function(Box::new(Function::new(
4554 "DATEDIFF".to_string(),
4555 vec![
4556 Expression::boxed_column(Column {
4557 name: Identifier::new(&unit_str),
4558 table: None,
4559 join_mark: false,
4560 trailing_comments: vec![],
4561 span: None,
4562 inferred_type: None,
4563 }),
4564 start_expr.clone(),
4565 end_expr.clone(),
4566 ],
4567 )));
4568 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4569 left: datediff,
4570 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4571 left_comments: vec![],
4572 operator_comments: vec![],
4573 trailing_comments: vec![],
4574 inferred_type: None,
4575 }));
4576
4577 let array_gen_range = Expression::Function(Box::new(Function::new(
4578 "ARRAY_GENERATE_RANGE".to_string(),
4579 vec![
4580 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4581 datediff_plus_one,
4582 ],
4583 )));
4584
4585 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
4586 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4587 name: Identifier::new("INPUT"),
4588 value: array_gen_range,
4589 separator: crate::expressions::NamedArgSeparator::DArrow,
4590 }));
4591 let flatten = Expression::Function(Box::new(Function::new(
4592 "FLATTEN".to_string(),
4593 vec![flatten_input],
4594 )));
4595
4596 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
4597 let alias_table = Alias {
4598 this: flatten,
4599 alias: Identifier::new("_t0"),
4600 column_aliases: vec![
4601 Identifier::new("seq"),
4602 Identifier::new("key"),
4603 Identifier::new("path"),
4604 Identifier::new("index"),
4605 Identifier::new(&alias_name),
4606 Identifier::new("this"),
4607 ],
4608 alias_explicit_as: false,
4609 alias_keyword: None,
4610 pre_alias_comments: vec![],
4611 trailing_comments: vec![],
4612 inferred_type: None,
4613 };
4614 let lateral_expr = Expression::Lateral(Box::new(Lateral {
4615 this: Box::new(Expression::Alias(Box::new(alias_table))),
4616 view: None,
4617 outer: None,
4618 alias: None,
4619 alias_quoted: false,
4620 cross_apply: None,
4621 ordinality: None,
4622 column_aliases: vec![],
4623 }));
4624
4625 // Remove the original join and add to FROM expressions
4626 sel.joins.remove(join_idx);
4627 if let Some(ref mut from) = sel.from {
4628 from.expressions.push(lateral_expr);
4629 }
4630
4631 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
4632 let dateadd_expr = Expression::Function(Box::new(Function::new(
4633 "DATEADD".to_string(),
4634 vec![
4635 Expression::boxed_column(Column {
4636 name: Identifier::new(&unit_str),
4637 table: None,
4638 join_mark: false,
4639 trailing_comments: vec![],
4640 span: None,
4641 inferred_type: None,
4642 }),
4643 Expression::Cast(Box::new(Cast {
4644 this: Expression::boxed_column(Column {
4645 name: Identifier::new(&alias_name),
4646 table: None,
4647 join_mark: false,
4648 trailing_comments: vec![],
4649 span: None,
4650 inferred_type: None,
4651 }),
4652 to: DataType::Int {
4653 length: None,
4654 integer_spelling: false,
4655 },
4656 trailing_comments: vec![],
4657 double_colon_syntax: false,
4658 format: None,
4659 default: None,
4660 inferred_type: None,
4661 })),
4662 Expression::Cast(Box::new(Cast {
4663 this: start_expr.clone(),
4664 to: DataType::Date,
4665 trailing_comments: vec![],
4666 double_colon_syntax: false,
4667 format: None,
4668 default: None,
4669 inferred_type: None,
4670 })),
4671 ],
4672 )));
4673
4674 // Replace references to the alias in the SELECT list
4675 let new_exprs: Vec<Expression> = sel
4676 .expressions
4677 .iter()
4678 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
4679 .collect();
4680 sel.expressions = new_exprs;
4681
4682 Ok(Expression::Select(sel))
4683 })
4684 }
4685
4686 /// Helper: replace column references to `alias_name` with dateadd expression
4687 fn replace_column_ref_with_dateadd(
4688 expr: &Expression,
4689 alias_name: &str,
4690 dateadd: &Expression,
4691 ) -> Expression {
4692 use crate::expressions::*;
4693 match expr {
4694 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4695 // Plain column reference -> DATEADD(...) AS alias_name
4696 Expression::Alias(Box::new(Alias {
4697 this: dateadd.clone(),
4698 alias: Identifier::new(alias_name),
4699 column_aliases: vec![],
4700 alias_explicit_as: false,
4701 alias_keyword: None,
4702 pre_alias_comments: vec![],
4703 trailing_comments: vec![],
4704 inferred_type: None,
4705 }))
4706 }
4707 Expression::Alias(a) => {
4708 // Check if the inner expression references the alias
4709 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
4710 Expression::Alias(Box::new(Alias {
4711 this: new_this,
4712 alias: a.alias.clone(),
4713 column_aliases: a.column_aliases.clone(),
4714 alias_explicit_as: false,
4715 alias_keyword: None,
4716 pre_alias_comments: a.pre_alias_comments.clone(),
4717 trailing_comments: a.trailing_comments.clone(),
4718 inferred_type: None,
4719 }))
4720 }
4721 _ => expr.clone(),
4722 }
4723 }
4724
4725 /// Helper: replace column references in inner expression (not top-level)
4726 fn replace_column_ref_inner(
4727 expr: &Expression,
4728 alias_name: &str,
4729 dateadd: &Expression,
4730 ) -> Expression {
4731 use crate::expressions::*;
4732 match expr {
4733 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4734 dateadd.clone()
4735 }
4736 Expression::Add(op) => {
4737 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4738 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4739 Expression::Add(Box::new(BinaryOp {
4740 left,
4741 right,
4742 left_comments: op.left_comments.clone(),
4743 operator_comments: op.operator_comments.clone(),
4744 trailing_comments: op.trailing_comments.clone(),
4745 inferred_type: None,
4746 }))
4747 }
4748 Expression::Sub(op) => {
4749 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4750 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4751 Expression::Sub(Box::new(BinaryOp {
4752 left,
4753 right,
4754 left_comments: op.left_comments.clone(),
4755 operator_comments: op.operator_comments.clone(),
4756 trailing_comments: op.trailing_comments.clone(),
4757 inferred_type: None,
4758 }))
4759 }
4760 Expression::Mul(op) => {
4761 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4762 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4763 Expression::Mul(Box::new(BinaryOp {
4764 left,
4765 right,
4766 left_comments: op.left_comments.clone(),
4767 operator_comments: op.operator_comments.clone(),
4768 trailing_comments: op.trailing_comments.clone(),
4769 inferred_type: None,
4770 }))
4771 }
4772 _ => expr.clone(),
4773 }
4774 }
4775
4776 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
4777 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
4778 fn try_transform_from_gda_snowflake(
4779 mut sel: Box<crate::expressions::Select>,
4780 ) -> Result<Expression> {
4781 use crate::expressions::*;
4782
4783 // Extract GDA info from FROM clause
4784 let mut gda_info: Option<(
4785 usize,
4786 String,
4787 Expression,
4788 Expression,
4789 String,
4790 Option<(String, Vec<Identifier>)>,
4791 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
4792
4793 if let Some(ref from) = sel.from {
4794 for (idx, table_expr) in from.expressions.iter().enumerate() {
4795 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
4796 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
4797 let (unnest_opt, outer_alias_info) = match table_expr {
4798 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
4799 Expression::Alias(ref a) => {
4800 if let Expression::Unnest(ref unnest) = a.this {
4801 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
4802 (Some(unnest.as_ref()), Some(alias_info))
4803 } else {
4804 (None, None)
4805 }
4806 }
4807 _ => (None, None),
4808 };
4809
4810 if let Some(unnest) = unnest_opt {
4811 // Check for GENERATE_DATE_ARRAY function
4812 let func_opt = match &unnest.this {
4813 Expression::Function(ref f)
4814 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
4815 && f.args.len() >= 2 =>
4816 {
4817 Some(f)
4818 }
4819 // Also check for GenerateSeries (from earlier normalization)
4820 _ => None,
4821 };
4822
4823 if let Some(f) = func_opt {
4824 let start_expr = f.args[0].clone();
4825 let end_expr = f.args[1].clone();
4826 let step = f.args.get(2).cloned();
4827
4828 // Extract unit and column name
4829 let unit = Self::extract_interval_unit_str(&step);
4830 let col_name = outer_alias_info
4831 .as_ref()
4832 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
4833 .unwrap_or_else(|| "value".to_string());
4834
4835 if let Some(unit_str) = unit {
4836 gda_info = Some((
4837 idx,
4838 col_name,
4839 start_expr,
4840 end_expr,
4841 unit_str,
4842 outer_alias_info,
4843 ));
4844 break;
4845 }
4846 }
4847 }
4848 }
4849 }
4850
4851 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
4852 else {
4853 return Ok(Expression::Select(sel));
4854 };
4855
4856 // Build the Snowflake subquery:
4857 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4858 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
4859
4860 // DATEDIFF(unit, start, end)
4861 let datediff = Expression::Function(Box::new(Function::new(
4862 "DATEDIFF".to_string(),
4863 vec![
4864 Expression::boxed_column(Column {
4865 name: Identifier::new(&unit_str),
4866 table: None,
4867 join_mark: false,
4868 trailing_comments: vec![],
4869 span: None,
4870 inferred_type: None,
4871 }),
4872 start_expr.clone(),
4873 end_expr.clone(),
4874 ],
4875 )));
4876 // DATEDIFF(...) + 1
4877 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4878 left: datediff,
4879 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4880 left_comments: vec![],
4881 operator_comments: vec![],
4882 trailing_comments: vec![],
4883 inferred_type: None,
4884 }));
4885
4886 let array_gen_range = Expression::Function(Box::new(Function::new(
4887 "ARRAY_GENERATE_RANGE".to_string(),
4888 vec![
4889 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4890 datediff_plus_one,
4891 ],
4892 )));
4893
4894 // TABLE(FLATTEN(INPUT => ...))
4895 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4896 name: Identifier::new("INPUT"),
4897 value: array_gen_range,
4898 separator: crate::expressions::NamedArgSeparator::DArrow,
4899 }));
4900 let flatten = Expression::Function(Box::new(Function::new(
4901 "FLATTEN".to_string(),
4902 vec![flatten_input],
4903 )));
4904
4905 // Determine alias name for the table: use outer alias or _t0
4906 let table_alias_name = outer_alias_info
4907 .as_ref()
4908 .map(|(name, _)| name.clone())
4909 .unwrap_or_else(|| "_t0".to_string());
4910
4911 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
4912 let table_func =
4913 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
4914 let flatten_aliased = Expression::Alias(Box::new(Alias {
4915 this: table_func,
4916 alias: Identifier::new(&table_alias_name),
4917 column_aliases: vec![
4918 Identifier::new("seq"),
4919 Identifier::new("key"),
4920 Identifier::new("path"),
4921 Identifier::new("index"),
4922 Identifier::new(&col_name),
4923 Identifier::new("this"),
4924 ],
4925 alias_explicit_as: false,
4926 alias_keyword: None,
4927 pre_alias_comments: vec![],
4928 trailing_comments: vec![],
4929 inferred_type: None,
4930 }));
4931
4932 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4933 let dateadd_expr = Expression::Function(Box::new(Function::new(
4934 "DATEADD".to_string(),
4935 vec![
4936 Expression::boxed_column(Column {
4937 name: Identifier::new(&unit_str),
4938 table: None,
4939 join_mark: false,
4940 trailing_comments: vec![],
4941 span: None,
4942 inferred_type: None,
4943 }),
4944 Expression::Cast(Box::new(Cast {
4945 this: Expression::boxed_column(Column {
4946 name: Identifier::new(&col_name),
4947 table: None,
4948 join_mark: false,
4949 trailing_comments: vec![],
4950 span: None,
4951 inferred_type: None,
4952 }),
4953 to: DataType::Int {
4954 length: None,
4955 integer_spelling: false,
4956 },
4957 trailing_comments: vec![],
4958 double_colon_syntax: false,
4959 format: None,
4960 default: None,
4961 inferred_type: None,
4962 })),
4963 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
4964 start_expr.clone(),
4965 ],
4966 )));
4967 let dateadd_aliased = Expression::Alias(Box::new(Alias {
4968 this: dateadd_expr,
4969 alias: Identifier::new(&col_name),
4970 column_aliases: vec![],
4971 alias_explicit_as: false,
4972 alias_keyword: None,
4973 pre_alias_comments: vec![],
4974 trailing_comments: vec![],
4975 inferred_type: None,
4976 }));
4977
4978 // Build inner SELECT
4979 let mut inner_select = Select::new();
4980 inner_select.expressions = vec![dateadd_aliased];
4981 inner_select.from = Some(From {
4982 expressions: vec![flatten_aliased],
4983 });
4984
4985 let inner_select_expr = Expression::Select(Box::new(inner_select));
4986 let subquery = Expression::Subquery(Box::new(Subquery {
4987 this: inner_select_expr,
4988 alias: None,
4989 column_aliases: vec![],
4990 alias_explicit_as: false,
4991 alias_keyword: None,
4992 order_by: None,
4993 limit: None,
4994 offset: None,
4995 distribute_by: None,
4996 sort_by: None,
4997 cluster_by: None,
4998 lateral: false,
4999 modifiers_inside: false,
5000 trailing_comments: vec![],
5001 inferred_type: None,
5002 }));
5003
5004 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
5005 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
5006 Expression::Alias(Box::new(Alias {
5007 this: subquery,
5008 alias: Identifier::new(&alias_name),
5009 column_aliases: col_aliases,
5010 alias_explicit_as: false,
5011 alias_keyword: None,
5012 pre_alias_comments: vec![],
5013 trailing_comments: vec![],
5014 inferred_type: None,
5015 }))
5016 } else {
5017 subquery
5018 };
5019
5020 // Replace the FROM expression
5021 if let Some(ref mut from) = sel.from {
5022 from.expressions[from_idx] = replacement;
5023 }
5024
5025 Ok(Expression::Select(sel))
5026 }
5027
5028 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
5029 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
5030 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
5031 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
5032 use crate::expressions::*;
5033
5034 let start_expr = f.args[0].clone();
5035 let end_expr = f.args[1].clone();
5036 let step = f.args.get(2).cloned();
5037 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
5038 let col_name = "value";
5039
5040 // Build the inner subquery: same as try_transform_from_gda_snowflake
5041 let datediff = Expression::Function(Box::new(Function::new(
5042 "DATEDIFF".to_string(),
5043 vec![
5044 Expression::boxed_column(Column {
5045 name: Identifier::new(&unit_str),
5046 table: None,
5047 join_mark: false,
5048 trailing_comments: vec![],
5049 span: None,
5050 inferred_type: None,
5051 }),
5052 start_expr.clone(),
5053 end_expr.clone(),
5054 ],
5055 )));
5056 // DATEDIFF(...) + 1
5057 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5058 left: datediff,
5059 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5060 left_comments: vec![],
5061 operator_comments: vec![],
5062 trailing_comments: vec![],
5063 inferred_type: None,
5064 }));
5065
5066 let array_gen_range = Expression::Function(Box::new(Function::new(
5067 "ARRAY_GENERATE_RANGE".to_string(),
5068 vec![
5069 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5070 datediff_plus_one,
5071 ],
5072 )));
5073
5074 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5075 name: Identifier::new("INPUT"),
5076 value: array_gen_range,
5077 separator: crate::expressions::NamedArgSeparator::DArrow,
5078 }));
5079 let flatten = Expression::Function(Box::new(Function::new(
5080 "FLATTEN".to_string(),
5081 vec![flatten_input],
5082 )));
5083
5084 let table_func =
5085 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5086 let flatten_aliased = Expression::Alias(Box::new(Alias {
5087 this: table_func,
5088 alias: Identifier::new("_t0"),
5089 column_aliases: vec![
5090 Identifier::new("seq"),
5091 Identifier::new("key"),
5092 Identifier::new("path"),
5093 Identifier::new("index"),
5094 Identifier::new(col_name),
5095 Identifier::new("this"),
5096 ],
5097 alias_explicit_as: false,
5098 alias_keyword: None,
5099 pre_alias_comments: vec![],
5100 trailing_comments: vec![],
5101 inferred_type: None,
5102 }));
5103
5104 let dateadd_expr = Expression::Function(Box::new(Function::new(
5105 "DATEADD".to_string(),
5106 vec![
5107 Expression::boxed_column(Column {
5108 name: Identifier::new(&unit_str),
5109 table: None,
5110 join_mark: false,
5111 trailing_comments: vec![],
5112 span: None,
5113 inferred_type: None,
5114 }),
5115 Expression::Cast(Box::new(Cast {
5116 this: Expression::boxed_column(Column {
5117 name: Identifier::new(col_name),
5118 table: None,
5119 join_mark: false,
5120 trailing_comments: vec![],
5121 span: None,
5122 inferred_type: None,
5123 }),
5124 to: DataType::Int {
5125 length: None,
5126 integer_spelling: false,
5127 },
5128 trailing_comments: vec![],
5129 double_colon_syntax: false,
5130 format: None,
5131 default: None,
5132 inferred_type: None,
5133 })),
5134 start_expr.clone(),
5135 ],
5136 )));
5137 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5138 this: dateadd_expr,
5139 alias: Identifier::new(col_name),
5140 column_aliases: vec![],
5141 alias_explicit_as: false,
5142 alias_keyword: None,
5143 pre_alias_comments: vec![],
5144 trailing_comments: vec![],
5145 inferred_type: None,
5146 }));
5147
5148 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
5149 let mut inner_select = Select::new();
5150 inner_select.expressions = vec![dateadd_aliased];
5151 inner_select.from = Some(From {
5152 expressions: vec![flatten_aliased],
5153 });
5154
5155 // Wrap in subquery for the inner part
5156 let inner_subquery = Expression::Subquery(Box::new(Subquery {
5157 this: Expression::Select(Box::new(inner_select)),
5158 alias: None,
5159 column_aliases: vec![],
5160 alias_explicit_as: false,
5161 alias_keyword: None,
5162 order_by: None,
5163 limit: None,
5164 offset: None,
5165 distribute_by: None,
5166 sort_by: None,
5167 cluster_by: None,
5168 lateral: false,
5169 modifiers_inside: false,
5170 trailing_comments: vec![],
5171 inferred_type: None,
5172 }));
5173
5174 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
5175 let star = Expression::Star(Star {
5176 table: None,
5177 except: None,
5178 replace: None,
5179 rename: None,
5180 trailing_comments: vec![],
5181 span: None,
5182 });
5183 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
5184 this: star,
5185 distinct: false,
5186 filter: None,
5187 order_by: vec![],
5188 name: Some("ARRAY_AGG".to_string()),
5189 ignore_nulls: None,
5190 having_max: None,
5191 limit: None,
5192 inferred_type: None,
5193 }));
5194
5195 let mut outer_select = Select::new();
5196 outer_select.expressions = vec![array_agg];
5197 outer_select.from = Some(From {
5198 expressions: vec![inner_subquery],
5199 });
5200
5201 // Wrap in a subquery
5202 let outer_subquery = Expression::Subquery(Box::new(Subquery {
5203 this: Expression::Select(Box::new(outer_select)),
5204 alias: None,
5205 column_aliases: vec![],
5206 alias_explicit_as: false,
5207 alias_keyword: None,
5208 order_by: None,
5209 limit: None,
5210 offset: None,
5211 distribute_by: None,
5212 sort_by: None,
5213 cluster_by: None,
5214 lateral: false,
5215 modifiers_inside: false,
5216 trailing_comments: vec![],
5217 inferred_type: None,
5218 }));
5219
5220 // ARRAY_SIZE(subquery)
5221 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
5222 outer_subquery,
5223 ))))
5224 }
5225
5226 /// Extract interval unit string from an optional step expression.
5227 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
5228 use crate::expressions::*;
5229 if let Some(Expression::Interval(ref iv)) = step {
5230 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
5231 return Some(format!("{:?}", unit).to_ascii_uppercase());
5232 }
5233 if let Some(ref this) = iv.this {
5234 if let Expression::Literal(lit) = this {
5235 if let Literal::String(ref s) = lit.as_ref() {
5236 let parts: Vec<&str> = s.split_whitespace().collect();
5237 if parts.len() == 2 {
5238 return Some(parts[1].to_ascii_uppercase());
5239 } else if parts.len() == 1 {
5240 let upper = parts[0].to_ascii_uppercase();
5241 if matches!(
5242 upper.as_str(),
5243 "YEAR"
5244 | "QUARTER"
5245 | "MONTH"
5246 | "WEEK"
5247 | "DAY"
5248 | "HOUR"
5249 | "MINUTE"
5250 | "SECOND"
5251 ) {
5252 return Some(upper);
5253 }
5254 }
5255 }
5256 }
5257 }
5258 }
5259 // Default to DAY if no step or no interval
5260 if step.is_none() {
5261 return Some("DAY".to_string());
5262 }
5263 None
5264 }
5265
5266 fn normalize_snowflake_pretty(mut sql: String) -> String {
5267 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
5268 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
5269 {
5270 sql = sql.replace(
5271 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
5272 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
5273 );
5274
5275 sql = sql.replace(
5276 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
5277 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
5278 );
5279
5280 sql = sql.replace(
5281 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
5282 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
5283 );
5284 }
5285
5286 sql
5287 }
5288
5289 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
5290 /// This handles cases where the same syntax has different semantics across dialects.
5291 fn cross_dialect_normalize(
5292 expr: Expression,
5293 source: DialectType,
5294 target: DialectType,
5295 ) -> Result<Expression> {
5296 use crate::expressions::{
5297 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
5298 Function, Identifier, IsNull, Literal, Null, Paren,
5299 };
5300
5301 // Helper to tag which kind of transform to apply
5302 #[derive(Debug)]
5303 enum Action {
5304 None,
5305 GreatestLeastNull,
5306 ArrayGenerateRange,
5307 Div0TypedDivision,
5308 ArrayAggCollectList,
5309 ArrayAggWithinGroupFilter,
5310 ArrayAggFilter,
5311 CastTimestampToDatetime,
5312 DateTruncWrapCast,
5313 ToDateToCast,
5314 ConvertTimezoneToExpr,
5315 SetToVariable,
5316 RegexpReplaceSnowflakeToDuckDB,
5317 BigQueryFunctionNormalize,
5318 BigQuerySafeDivide,
5319 BigQueryCastType,
5320 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
5321 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
5322 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
5323 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
5324 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
5325 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
5326 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5327 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
5328 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
5329 EpochConvert, // Expression::Epoch -> target-specific epoch function
5330 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
5331 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
5332 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
5333 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
5334 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
5335 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
5336 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
5337 TempTableHash, // TSQL #table -> temp table normalization
5338 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
5339 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
5340 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
5341 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
5342 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
5343 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
5344 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5345 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5346 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
5347 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
5348 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
5349 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
5350 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
5351 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
5352 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
5353 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
5354 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
5355 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
5356 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
5357 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
5358 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
5359 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
5360 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
5361 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
5362 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
5363 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
5364 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
5365 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5366 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
5367 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
5368 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
5369 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
5370 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
5371 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
5372 DollarParamConvert, // $foo -> @foo for BigQuery
5373 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
5374 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
5375 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
5376 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
5377 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
5378 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
5379 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
5380 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
5381 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
5382 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
5383 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
5384 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
5385 RespectNullsConvert, // RESPECT NULLS window function handling
5386 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
5387 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
5388 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
5389 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
5390 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
5391 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
5392 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
5393 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
5394 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
5395 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
5396 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
5397 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
5398 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
5399 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
5400 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
5401 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
5402 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
5403 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
5404 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
5405 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
5406 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
5407 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
5408 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
5409 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
5410 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
5411 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
5412 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
5413 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
5414 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
5415 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
5416 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5417 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5418 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
5419 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
5420 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
5421 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
5422 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
5423 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
5424 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
5425 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
5426 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
5427 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
5428 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
5429 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
5430 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
5431 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
5432 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
5433 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
5434 DecodeSimplify, // DECODE with null-safe -> simple = comparison
5435 ArraySumConvert, // ARRAY_SUM -> target-specific
5436 ArraySizeConvert, // ARRAY_SIZE -> target-specific
5437 ArrayAnyConvert, // ARRAY_ANY -> target-specific
5438 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
5439 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
5440 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
5441 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
5442 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
5443 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
5444 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
5445 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
5446 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
5447 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
5448 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
5449 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
5450 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
5451 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
5452 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
5453 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
5454 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
5455 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
5456 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
5457 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
5458 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5459 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
5460 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
5461 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
5462 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
5463 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
5464 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
5465 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
5466 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
5467 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
5468 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
5469 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
5470 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
5471 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
5472 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
5473 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
5474 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
5475 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
5476 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
5477 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
5478 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
5479 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
5480 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
5481 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
5482 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
5483 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
5484 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
5485 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
5486 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
5487 }
5488
5489 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
5490 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
5491 Self::transform_select_into(expr, source, target)
5492 } else {
5493 expr
5494 };
5495
5496 // Strip OFFSET ROWS for non-TSQL/Oracle targets
5497 let expr = if !matches!(
5498 target,
5499 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
5500 ) {
5501 if let Expression::Select(mut select) = expr {
5502 if let Some(ref mut offset) = select.offset {
5503 offset.rows = None;
5504 }
5505 Expression::Select(select)
5506 } else {
5507 expr
5508 }
5509 } else {
5510 expr
5511 };
5512
5513 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
5514 let expr = if matches!(target, DialectType::Oracle) {
5515 if let Expression::Select(mut select) = expr {
5516 if let Some(limit) = select.limit.take() {
5517 // Convert LIMIT to FETCH FIRST n ROWS ONLY
5518 select.fetch = Some(crate::expressions::Fetch {
5519 direction: "FIRST".to_string(),
5520 count: Some(limit.this),
5521 percent: false,
5522 rows: true,
5523 with_ties: false,
5524 });
5525 }
5526 // Add ROWS to OFFSET if present
5527 if let Some(ref mut offset) = select.offset {
5528 offset.rows = Some(true);
5529 }
5530 Expression::Select(select)
5531 } else {
5532 expr
5533 }
5534 } else {
5535 expr
5536 };
5537
5538 // Handle CreateTable WITH properties transformation before recursive transforms
5539 let expr = if let Expression::CreateTable(mut ct) = expr {
5540 Self::transform_create_table_properties(&mut ct, source, target);
5541
5542 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
5543 // When the PARTITIONED BY clause contains column definitions, merge them into the
5544 // main column list and adjust the PARTITIONED BY clause for the target dialect.
5545 if matches!(
5546 source,
5547 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5548 ) {
5549 let mut partition_col_names: Vec<String> = Vec::new();
5550 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
5551 let mut has_col_def_partitions = false;
5552
5553 // Check if any PARTITIONED BY property contains ColumnDef expressions
5554 for prop in &ct.properties {
5555 if let Expression::PartitionedByProperty(ref pbp) = prop {
5556 if let Expression::Tuple(ref tuple) = *pbp.this {
5557 for expr in &tuple.expressions {
5558 if let Expression::ColumnDef(ref cd) = expr {
5559 has_col_def_partitions = true;
5560 partition_col_names.push(cd.name.name.clone());
5561 partition_col_defs.push(*cd.clone());
5562 }
5563 }
5564 }
5565 }
5566 }
5567
5568 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
5569 // Merge partition columns into main column list
5570 for cd in partition_col_defs {
5571 ct.columns.push(cd);
5572 }
5573
5574 // Replace PARTITIONED BY property with column-name-only version
5575 ct.properties
5576 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
5577
5578 if matches!(
5579 target,
5580 DialectType::Presto | DialectType::Trino | DialectType::Athena
5581 ) {
5582 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
5583 let array_elements: Vec<String> = partition_col_names
5584 .iter()
5585 .map(|n| format!("'{}'", n))
5586 .collect();
5587 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
5588 ct.with_properties
5589 .push(("PARTITIONED_BY".to_string(), array_value));
5590 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5591 // Spark: PARTITIONED BY (y, z) - just column names
5592 let name_exprs: Vec<Expression> = partition_col_names
5593 .iter()
5594 .map(|n| {
5595 Expression::Column(Box::new(crate::expressions::Column {
5596 name: crate::expressions::Identifier::new(n.clone()),
5597 table: None,
5598 join_mark: false,
5599 trailing_comments: Vec::new(),
5600 span: None,
5601 inferred_type: None,
5602 }))
5603 })
5604 .collect();
5605 ct.properties.insert(
5606 0,
5607 Expression::PartitionedByProperty(Box::new(
5608 crate::expressions::PartitionedByProperty {
5609 this: Box::new(Expression::Tuple(Box::new(
5610 crate::expressions::Tuple {
5611 expressions: name_exprs,
5612 },
5613 ))),
5614 },
5615 )),
5616 );
5617 }
5618 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
5619 }
5620
5621 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
5622 // are handled by transform_create_table_properties which runs first
5623 }
5624
5625 // Strip LOCATION property for Presto/Trino (not supported)
5626 if matches!(
5627 target,
5628 DialectType::Presto | DialectType::Trino | DialectType::Athena
5629 ) {
5630 ct.properties
5631 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
5632 }
5633
5634 // Strip table-level constraints for Spark/Hive/Databricks
5635 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
5636 if matches!(
5637 target,
5638 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5639 ) {
5640 ct.constraints.retain(|c| {
5641 matches!(
5642 c,
5643 crate::expressions::TableConstraint::PrimaryKey { .. }
5644 | crate::expressions::TableConstraint::Like { .. }
5645 )
5646 });
5647 for constraint in &mut ct.constraints {
5648 if let crate::expressions::TableConstraint::PrimaryKey {
5649 columns,
5650 modifiers,
5651 ..
5652 } = constraint
5653 {
5654 // Strip ASC/DESC from column names
5655 for col in columns.iter_mut() {
5656 if col.name.ends_with(" ASC") {
5657 col.name = col.name[..col.name.len() - 4].to_string();
5658 } else if col.name.ends_with(" DESC") {
5659 col.name = col.name[..col.name.len() - 5].to_string();
5660 }
5661 }
5662 // Strip TSQL-specific modifiers
5663 modifiers.clustered = None;
5664 modifiers.with_options.clear();
5665 modifiers.on_filegroup = None;
5666 }
5667 }
5668 }
5669
5670 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
5671 if matches!(target, DialectType::Databricks) {
5672 for col in &mut ct.columns {
5673 if col.auto_increment {
5674 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
5675 col.data_type = crate::expressions::DataType::BigInt { length: None };
5676 }
5677 }
5678 }
5679 }
5680
5681 // Spark/Databricks: INTEGER -> INT in column definitions
5682 // Python sqlglot always outputs INT for Spark/Databricks
5683 if matches!(target, DialectType::Spark | DialectType::Databricks) {
5684 for col in &mut ct.columns {
5685 if let crate::expressions::DataType::Int {
5686 integer_spelling, ..
5687 } = &mut col.data_type
5688 {
5689 *integer_spelling = false;
5690 }
5691 }
5692 }
5693
5694 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
5695 if matches!(target, DialectType::Hive | DialectType::Spark) {
5696 for col in &mut ct.columns {
5697 // If nullable is explicitly true (NULL), change to None (omit it)
5698 if col.nullable == Some(true) {
5699 col.nullable = None;
5700 }
5701 // Also remove from constraints if stored there
5702 col.constraints
5703 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
5704 }
5705 }
5706
5707 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
5708 if ct.on_property.is_some()
5709 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5710 {
5711 ct.on_property = None;
5712 }
5713
5714 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
5715 // Snowflake doesn't support typed arrays in DDL
5716 if matches!(target, DialectType::Snowflake) {
5717 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
5718 if let crate::expressions::DataType::Array { .. } = dt {
5719 *dt = crate::expressions::DataType::Custom {
5720 name: "ARRAY".to_string(),
5721 };
5722 }
5723 }
5724 for col in &mut ct.columns {
5725 strip_array_type_params(&mut col.data_type);
5726 }
5727 }
5728
5729 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
5730 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
5731 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
5732 if matches!(target, DialectType::PostgreSQL) {
5733 for col in &mut ct.columns {
5734 if col.auto_increment && !col.constraint_order.is_empty() {
5735 use crate::expressions::ConstraintType;
5736 let has_explicit_not_null = col
5737 .constraint_order
5738 .iter()
5739 .any(|ct| *ct == ConstraintType::NotNull);
5740
5741 if has_explicit_not_null {
5742 // Source had explicit NOT NULL - preserve original order
5743 // Just ensure nullable is set
5744 if col.nullable != Some(false) {
5745 col.nullable = Some(false);
5746 }
5747 } else {
5748 // Source didn't have explicit NOT NULL - build order with
5749 // AutoIncrement + NotNull first, then remaining constraints
5750 let mut new_order = Vec::new();
5751 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
5752 new_order.push(ConstraintType::AutoIncrement);
5753 new_order.push(ConstraintType::NotNull);
5754 // Add remaining constraints in original order (except AutoIncrement)
5755 for ct_type in &col.constraint_order {
5756 if *ct_type != ConstraintType::AutoIncrement {
5757 new_order.push(ct_type.clone());
5758 }
5759 }
5760 col.constraint_order = new_order;
5761 col.nullable = Some(false);
5762 }
5763 }
5764 }
5765 }
5766
5767 Expression::CreateTable(ct)
5768 } else {
5769 expr
5770 };
5771
5772 // Handle CreateView column stripping for Presto/Trino target
5773 let expr = if let Expression::CreateView(mut cv) = expr {
5774 // Presto/Trino: drop column list when view has a SELECT body
5775 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
5776 {
5777 if !matches!(&cv.query, Expression::Null(_)) {
5778 cv.columns.clear();
5779 }
5780 }
5781 Expression::CreateView(cv)
5782 } else {
5783 expr
5784 };
5785
5786 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
5787 let expr = if !matches!(
5788 target,
5789 DialectType::Presto | DialectType::Trino | DialectType::Athena
5790 ) {
5791 if let Expression::Select(mut select) = expr {
5792 if let Some(ref mut with) = select.with {
5793 for cte in &mut with.ctes {
5794 if let Expression::Values(ref vals) = cte.this {
5795 // Build: SELECT * FROM (VALUES ...) AS _values
5796 let values_subquery =
5797 Expression::Subquery(Box::new(crate::expressions::Subquery {
5798 this: Expression::Values(vals.clone()),
5799 alias: Some(Identifier::new("_values".to_string())),
5800 column_aliases: Vec::new(),
5801 alias_explicit_as: false,
5802 alias_keyword: None,
5803 order_by: None,
5804 limit: None,
5805 offset: None,
5806 distribute_by: None,
5807 sort_by: None,
5808 cluster_by: None,
5809 lateral: false,
5810 modifiers_inside: false,
5811 trailing_comments: Vec::new(),
5812 inferred_type: None,
5813 }));
5814 let mut new_select = crate::expressions::Select::new();
5815 new_select.expressions =
5816 vec![Expression::Star(crate::expressions::Star {
5817 table: None,
5818 except: None,
5819 replace: None,
5820 rename: None,
5821 trailing_comments: Vec::new(),
5822 span: None,
5823 })];
5824 new_select.from = Some(crate::expressions::From {
5825 expressions: vec![values_subquery],
5826 });
5827 cte.this = Expression::Select(Box::new(new_select));
5828 }
5829 }
5830 }
5831 Expression::Select(select)
5832 } else {
5833 expr
5834 }
5835 } else {
5836 expr
5837 };
5838
5839 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
5840 let expr = if matches!(target, DialectType::PostgreSQL) {
5841 if let Expression::CreateIndex(mut ci) = expr {
5842 for col in &mut ci.columns {
5843 if col.nulls_first.is_none() {
5844 col.nulls_first = Some(true);
5845 }
5846 }
5847 Expression::CreateIndex(ci)
5848 } else {
5849 expr
5850 }
5851 } else {
5852 expr
5853 };
5854
5855 transform_recursive(expr, &|e| {
5856 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
5857 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
5858 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5859 if let Expression::Cast(ref c) = e {
5860 // Check if this is a CAST of an array to a struct array type
5861 let is_struct_array_cast =
5862 matches!(&c.to, crate::expressions::DataType::Array { .. });
5863 if is_struct_array_cast {
5864 let has_auto_named_structs = match &c.this {
5865 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
5866 if let Expression::Struct(s) = elem {
5867 s.fields.iter().all(|(name, _)| {
5868 name.as_ref().map_or(true, |n| {
5869 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5870 })
5871 })
5872 } else {
5873 false
5874 }
5875 }),
5876 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
5877 if let Expression::Struct(s) = elem {
5878 s.fields.iter().all(|(name, _)| {
5879 name.as_ref().map_or(true, |n| {
5880 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5881 })
5882 })
5883 } else {
5884 false
5885 }
5886 }),
5887 _ => false,
5888 };
5889 if has_auto_named_structs {
5890 let convert_struct_to_row = |elem: Expression| -> Expression {
5891 if let Expression::Struct(s) = elem {
5892 let row_args: Vec<Expression> =
5893 s.fields.into_iter().map(|(_, v)| v).collect();
5894 Expression::Function(Box::new(Function::new(
5895 "ROW".to_string(),
5896 row_args,
5897 )))
5898 } else {
5899 elem
5900 }
5901 };
5902 let mut c_clone = c.as_ref().clone();
5903 match &mut c_clone.this {
5904 Expression::Array(arr) => {
5905 arr.expressions = arr
5906 .expressions
5907 .drain(..)
5908 .map(convert_struct_to_row)
5909 .collect();
5910 }
5911 Expression::ArrayFunc(arr) => {
5912 arr.expressions = arr
5913 .expressions
5914 .drain(..)
5915 .map(convert_struct_to_row)
5916 .collect();
5917 }
5918 _ => {}
5919 }
5920 return Ok(Expression::Cast(Box::new(c_clone)));
5921 }
5922 }
5923 }
5924 }
5925
5926 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
5927 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5928 if let Expression::Select(ref sel) = e {
5929 if sel.kind.as_deref() == Some("STRUCT") {
5930 let mut fields = Vec::new();
5931 for expr in &sel.expressions {
5932 match expr {
5933 Expression::Alias(a) => {
5934 fields.push((Some(a.alias.name.clone()), a.this.clone()));
5935 }
5936 Expression::Column(c) => {
5937 fields.push((Some(c.name.name.clone()), expr.clone()));
5938 }
5939 _ => {
5940 fields.push((None, expr.clone()));
5941 }
5942 }
5943 }
5944 let struct_lit =
5945 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
5946 let mut new_select = sel.as_ref().clone();
5947 new_select.kind = None;
5948 new_select.expressions = vec![struct_lit];
5949 return Ok(Expression::Select(Box::new(new_select)));
5950 }
5951 }
5952 }
5953
5954 // Convert @variable -> ${variable} for Spark/Hive/Databricks
5955 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5956 && matches!(
5957 target,
5958 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5959 )
5960 {
5961 if let Expression::Parameter(ref p) = e {
5962 if p.style == crate::expressions::ParameterStyle::At {
5963 if let Some(ref name) = p.name {
5964 return Ok(Expression::Parameter(Box::new(
5965 crate::expressions::Parameter {
5966 name: Some(name.clone()),
5967 index: p.index,
5968 style: crate::expressions::ParameterStyle::DollarBrace,
5969 quoted: p.quoted,
5970 string_quoted: p.string_quoted,
5971 expression: None,
5972 },
5973 )));
5974 }
5975 }
5976 }
5977 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
5978 if let Expression::Column(ref col) = e {
5979 if col.name.name.starts_with('@') && col.table.is_none() {
5980 let var_name = col.name.name.trim_start_matches('@').to_string();
5981 return Ok(Expression::Parameter(Box::new(
5982 crate::expressions::Parameter {
5983 name: Some(var_name),
5984 index: None,
5985 style: crate::expressions::ParameterStyle::DollarBrace,
5986 quoted: false,
5987 string_quoted: false,
5988 expression: None,
5989 },
5990 )));
5991 }
5992 }
5993 }
5994
5995 // Convert @variable -> variable in SET statements for Spark/Databricks
5996 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5997 && matches!(target, DialectType::Spark | DialectType::Databricks)
5998 {
5999 if let Expression::SetStatement(ref s) = e {
6000 let mut new_items = s.items.clone();
6001 let mut changed = false;
6002 for item in &mut new_items {
6003 // Strip @ from the SET name (Parameter style)
6004 if let Expression::Parameter(ref p) = item.name {
6005 if p.style == crate::expressions::ParameterStyle::At {
6006 if let Some(ref name) = p.name {
6007 item.name = Expression::Identifier(Identifier::new(name));
6008 changed = true;
6009 }
6010 }
6011 }
6012 // Strip @ from the SET name (Identifier style - SET parser)
6013 if let Expression::Identifier(ref id) = item.name {
6014 if id.name.starts_with('@') {
6015 let var_name = id.name.trim_start_matches('@').to_string();
6016 item.name = Expression::Identifier(Identifier::new(&var_name));
6017 changed = true;
6018 }
6019 }
6020 // Strip @ from the SET name (Column style - alternative parsing)
6021 if let Expression::Column(ref col) = item.name {
6022 if col.name.name.starts_with('@') && col.table.is_none() {
6023 let var_name = col.name.name.trim_start_matches('@').to_string();
6024 item.name = Expression::Identifier(Identifier::new(&var_name));
6025 changed = true;
6026 }
6027 }
6028 }
6029 if changed {
6030 let mut new_set = (**s).clone();
6031 new_set.items = new_items;
6032 return Ok(Expression::SetStatement(Box::new(new_set)));
6033 }
6034 }
6035 }
6036
6037 // Strip NOLOCK hint for non-TSQL targets
6038 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6039 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6040 {
6041 if let Expression::Table(ref tr) = e {
6042 if !tr.hints.is_empty() {
6043 let mut new_tr = tr.clone();
6044 new_tr.hints.clear();
6045 return Ok(Expression::Table(new_tr));
6046 }
6047 }
6048 }
6049
6050 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
6051 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
6052 if matches!(target, DialectType::Snowflake) {
6053 if let Expression::IsTrue(ref itf) = e {
6054 if let Expression::Boolean(ref b) = itf.this {
6055 if !itf.not {
6056 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6057 value: b.value,
6058 }));
6059 } else {
6060 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6061 value: !b.value,
6062 }));
6063 }
6064 }
6065 }
6066 if let Expression::IsFalse(ref itf) = e {
6067 if let Expression::Boolean(ref b) = itf.this {
6068 if !itf.not {
6069 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6070 value: !b.value,
6071 }));
6072 } else {
6073 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6074 value: b.value,
6075 }));
6076 }
6077 }
6078 }
6079 }
6080
6081 // BigQuery: split dotted backtick identifiers in table names
6082 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
6083 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6084 if let Expression::CreateTable(ref ct) = e {
6085 let mut changed = false;
6086 let mut new_ct = ct.clone();
6087 // Split the table name
6088 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
6089 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
6090 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
6091 let was_quoted = ct.name.name.quoted;
6092 let mk_id = |s: &str| {
6093 if was_quoted {
6094 Identifier::quoted(s)
6095 } else {
6096 Identifier::new(s)
6097 }
6098 };
6099 if parts.len() == 3 {
6100 new_ct.name.catalog = Some(mk_id(parts[0]));
6101 new_ct.name.schema = Some(mk_id(parts[1]));
6102 new_ct.name.name = mk_id(parts[2]);
6103 changed = true;
6104 } else if parts.len() == 2 {
6105 new_ct.name.schema = Some(mk_id(parts[0]));
6106 new_ct.name.name = mk_id(parts[1]);
6107 changed = true;
6108 }
6109 }
6110 // Split the clone source name
6111 if let Some(ref clone_src) = ct.clone_source {
6112 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
6113 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
6114 let was_quoted = clone_src.name.quoted;
6115 let mk_id = |s: &str| {
6116 if was_quoted {
6117 Identifier::quoted(s)
6118 } else {
6119 Identifier::new(s)
6120 }
6121 };
6122 let mut new_src = clone_src.clone();
6123 if parts.len() == 3 {
6124 new_src.catalog = Some(mk_id(parts[0]));
6125 new_src.schema = Some(mk_id(parts[1]));
6126 new_src.name = mk_id(parts[2]);
6127 new_ct.clone_source = Some(new_src);
6128 changed = true;
6129 } else if parts.len() == 2 {
6130 new_src.schema = Some(mk_id(parts[0]));
6131 new_src.name = mk_id(parts[1]);
6132 new_ct.clone_source = Some(new_src);
6133 changed = true;
6134 }
6135 }
6136 }
6137 if changed {
6138 return Ok(Expression::CreateTable(new_ct));
6139 }
6140 }
6141 }
6142
6143 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
6144 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
6145 if matches!(source, DialectType::BigQuery)
6146 && matches!(
6147 target,
6148 DialectType::DuckDB
6149 | DialectType::Presto
6150 | DialectType::Trino
6151 | DialectType::Athena
6152 )
6153 {
6154 if let Expression::Subscript(ref sub) = e {
6155 let (new_index, is_safe) = match &sub.index {
6156 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
6157 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
6158 let Literal::Number(n) = lit.as_ref() else {
6159 unreachable!()
6160 };
6161 if let Ok(val) = n.parse::<i64>() {
6162 (
6163 Some(Expression::Literal(Box::new(Literal::Number(
6164 (val + 1).to_string(),
6165 )))),
6166 false,
6167 )
6168 } else {
6169 (None, false)
6170 }
6171 }
6172 // OFFSET(n) -> n+1 (0-based)
6173 Expression::Function(ref f)
6174 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
6175 {
6176 if let Expression::Literal(lit) = &f.args[0] {
6177 if let Literal::Number(n) = lit.as_ref() {
6178 if let Ok(val) = n.parse::<i64>() {
6179 (
6180 Some(Expression::Literal(Box::new(Literal::Number(
6181 (val + 1).to_string(),
6182 )))),
6183 false,
6184 )
6185 } else {
6186 (
6187 Some(Expression::Add(Box::new(
6188 crate::expressions::BinaryOp::new(
6189 f.args[0].clone(),
6190 Expression::number(1),
6191 ),
6192 ))),
6193 false,
6194 )
6195 }
6196 } else {
6197 (None, false)
6198 }
6199 } else {
6200 (
6201 Some(Expression::Add(Box::new(
6202 crate::expressions::BinaryOp::new(
6203 f.args[0].clone(),
6204 Expression::number(1),
6205 ),
6206 ))),
6207 false,
6208 )
6209 }
6210 }
6211 // ORDINAL(n) -> n (already 1-based)
6212 Expression::Function(ref f)
6213 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
6214 {
6215 (Some(f.args[0].clone()), false)
6216 }
6217 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
6218 Expression::Function(ref f)
6219 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
6220 {
6221 if let Expression::Literal(lit) = &f.args[0] {
6222 if let Literal::Number(n) = lit.as_ref() {
6223 if let Ok(val) = n.parse::<i64>() {
6224 (
6225 Some(Expression::Literal(Box::new(Literal::Number(
6226 (val + 1).to_string(),
6227 )))),
6228 true,
6229 )
6230 } else {
6231 (
6232 Some(Expression::Add(Box::new(
6233 crate::expressions::BinaryOp::new(
6234 f.args[0].clone(),
6235 Expression::number(1),
6236 ),
6237 ))),
6238 true,
6239 )
6240 }
6241 } else {
6242 (None, false)
6243 }
6244 } else {
6245 (
6246 Some(Expression::Add(Box::new(
6247 crate::expressions::BinaryOp::new(
6248 f.args[0].clone(),
6249 Expression::number(1),
6250 ),
6251 ))),
6252 true,
6253 )
6254 }
6255 }
6256 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
6257 Expression::Function(ref f)
6258 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
6259 {
6260 (Some(f.args[0].clone()), true)
6261 }
6262 _ => (None, false),
6263 };
6264 if let Some(idx) = new_index {
6265 if is_safe
6266 && matches!(
6267 target,
6268 DialectType::Presto | DialectType::Trino | DialectType::Athena
6269 )
6270 {
6271 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
6272 return Ok(Expression::Function(Box::new(Function::new(
6273 "ELEMENT_AT".to_string(),
6274 vec![sub.this.clone(), idx],
6275 ))));
6276 } else {
6277 // DuckDB or non-safe: just use subscript with converted index
6278 return Ok(Expression::Subscript(Box::new(
6279 crate::expressions::Subscript {
6280 this: sub.this.clone(),
6281 index: idx,
6282 },
6283 )));
6284 }
6285 }
6286 }
6287 }
6288
6289 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
6290 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6291 if let Expression::Length(ref uf) = e {
6292 let arg = uf.this.clone();
6293 let typeof_func = Expression::Function(Box::new(Function::new(
6294 "TYPEOF".to_string(),
6295 vec![arg.clone()],
6296 )));
6297 let blob_cast = Expression::Cast(Box::new(Cast {
6298 this: arg.clone(),
6299 to: DataType::VarBinary { length: None },
6300 trailing_comments: vec![],
6301 double_colon_syntax: false,
6302 format: None,
6303 default: None,
6304 inferred_type: None,
6305 }));
6306 let octet_length = Expression::Function(Box::new(Function::new(
6307 "OCTET_LENGTH".to_string(),
6308 vec![blob_cast],
6309 )));
6310 let text_cast = Expression::Cast(Box::new(Cast {
6311 this: arg,
6312 to: DataType::Text,
6313 trailing_comments: vec![],
6314 double_colon_syntax: false,
6315 format: None,
6316 default: None,
6317 inferred_type: None,
6318 }));
6319 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
6320 this: text_cast,
6321 original_name: None,
6322 inferred_type: None,
6323 }));
6324 return Ok(Expression::Case(Box::new(Case {
6325 operand: Some(typeof_func),
6326 whens: vec![(
6327 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
6328 octet_length,
6329 )],
6330 else_: Some(length_text),
6331 comments: Vec::new(),
6332 inferred_type: None,
6333 })));
6334 }
6335 }
6336
6337 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
6338 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
6339 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
6340 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
6341 if let Expression::Alias(ref a) = e {
6342 if matches!(&a.this, Expression::Unnest(_)) {
6343 if a.column_aliases.is_empty() {
6344 // Drop the entire alias, return just the UNNEST expression
6345 return Ok(a.this.clone());
6346 } else {
6347 // Use first column alias as the main alias
6348 let mut new_alias = a.as_ref().clone();
6349 new_alias.alias = a.column_aliases[0].clone();
6350 new_alias.column_aliases.clear();
6351 return Ok(Expression::Alias(Box::new(new_alias)));
6352 }
6353 }
6354 }
6355 }
6356
6357 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
6358 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6359 if let Expression::In(ref in_expr) = e {
6360 if let Some(ref unnest_inner) = in_expr.unnest {
6361 // Build the function call for the target dialect
6362 let func_expr = if matches!(
6363 target,
6364 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6365 ) {
6366 // Use EXPLODE for Hive/Spark
6367 Expression::Function(Box::new(Function::new(
6368 "EXPLODE".to_string(),
6369 vec![*unnest_inner.clone()],
6370 )))
6371 } else {
6372 // Use UNNEST for Presto/Trino/DuckDB/etc.
6373 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
6374 this: *unnest_inner.clone(),
6375 expressions: Vec::new(),
6376 with_ordinality: false,
6377 alias: None,
6378 offset_alias: None,
6379 }))
6380 };
6381
6382 // Wrap in SELECT
6383 let mut inner_select = crate::expressions::Select::new();
6384 inner_select.expressions = vec![func_expr];
6385
6386 let subquery_expr = Expression::Select(Box::new(inner_select));
6387
6388 return Ok(Expression::In(Box::new(crate::expressions::In {
6389 this: in_expr.this.clone(),
6390 expressions: Vec::new(),
6391 query: Some(subquery_expr),
6392 not: in_expr.not,
6393 global: in_expr.global,
6394 unnest: None,
6395 is_field: false,
6396 })));
6397 }
6398 }
6399 }
6400
6401 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
6402 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
6403 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
6404 if let Expression::Alias(ref a) = e {
6405 if let Expression::Function(ref f) = a.this {
6406 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
6407 && !a.column_aliases.is_empty()
6408 {
6409 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
6410 let col_alias = a.column_aliases[0].clone();
6411 let mut inner_select = crate::expressions::Select::new();
6412 inner_select.expressions =
6413 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
6414 Expression::Identifier(Identifier::new("value".to_string())),
6415 col_alias,
6416 )))];
6417 inner_select.from = Some(crate::expressions::From {
6418 expressions: vec![a.this.clone()],
6419 });
6420 let subquery =
6421 Expression::Subquery(Box::new(crate::expressions::Subquery {
6422 this: Expression::Select(Box::new(inner_select)),
6423 alias: Some(a.alias.clone()),
6424 column_aliases: Vec::new(),
6425 alias_explicit_as: false,
6426 alias_keyword: None,
6427 order_by: None,
6428 limit: None,
6429 offset: None,
6430 lateral: false,
6431 modifiers_inside: false,
6432 trailing_comments: Vec::new(),
6433 distribute_by: None,
6434 sort_by: None,
6435 cluster_by: None,
6436 inferred_type: None,
6437 }));
6438 return Ok(subquery);
6439 }
6440 }
6441 }
6442 }
6443
6444 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
6445 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
6446 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
6447 if matches!(source, DialectType::BigQuery) {
6448 if let Expression::Select(ref s) = e {
6449 if let Some(ref from) = s.from {
6450 if from.expressions.len() >= 2 {
6451 // Collect table names from first expression
6452 let first_tables: Vec<String> = from
6453 .expressions
6454 .iter()
6455 .take(1)
6456 .filter_map(|expr| {
6457 if let Expression::Table(t) = expr {
6458 Some(t.name.name.to_ascii_lowercase())
6459 } else {
6460 None
6461 }
6462 })
6463 .collect();
6464
6465 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
6466 // or have a dotted name matching a table
6467 let mut needs_rewrite = false;
6468 for expr in from.expressions.iter().skip(1) {
6469 if let Expression::Table(t) = expr {
6470 if let Some(ref schema) = t.schema {
6471 if first_tables.contains(&schema.name.to_ascii_lowercase())
6472 {
6473 needs_rewrite = true;
6474 break;
6475 }
6476 }
6477 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
6478 if t.schema.is_none() && t.name.name.contains('.') {
6479 let parts: Vec<&str> = t.name.name.split('.').collect();
6480 if parts.len() >= 2
6481 && first_tables.contains(&parts[0].to_ascii_lowercase())
6482 {
6483 needs_rewrite = true;
6484 break;
6485 }
6486 }
6487 }
6488 }
6489
6490 if needs_rewrite {
6491 let mut new_select = s.clone();
6492 let mut new_from_exprs = vec![from.expressions[0].clone()];
6493 let mut new_joins = s.joins.clone();
6494
6495 for expr in from.expressions.iter().skip(1) {
6496 if let Expression::Table(ref t) = expr {
6497 if let Some(ref schema) = t.schema {
6498 if first_tables
6499 .contains(&schema.name.to_ascii_lowercase())
6500 {
6501 // This is an array path reference, convert to CROSS JOIN UNNEST
6502 let col_expr = Expression::Column(Box::new(
6503 crate::expressions::Column {
6504 name: t.name.clone(),
6505 table: Some(schema.clone()),
6506 join_mark: false,
6507 trailing_comments: vec![],
6508 span: None,
6509 inferred_type: None,
6510 },
6511 ));
6512 let unnest_expr = Expression::Unnest(Box::new(
6513 crate::expressions::UnnestFunc {
6514 this: col_expr,
6515 expressions: Vec::new(),
6516 with_ordinality: false,
6517 alias: None,
6518 offset_alias: None,
6519 },
6520 ));
6521 let join_this = if let Some(ref alias) = t.alias {
6522 if matches!(
6523 target,
6524 DialectType::Presto
6525 | DialectType::Trino
6526 | DialectType::Athena
6527 ) {
6528 // Presto: UNNEST(x) AS _t0(results)
6529 Expression::Alias(Box::new(
6530 crate::expressions::Alias {
6531 this: unnest_expr,
6532 alias: Identifier::new("_t0"),
6533 column_aliases: vec![alias.clone()],
6534 alias_explicit_as: false,
6535 alias_keyword: None,
6536 pre_alias_comments: vec![],
6537 trailing_comments: vec![],
6538 inferred_type: None,
6539 },
6540 ))
6541 } else {
6542 // BigQuery: UNNEST(x) AS results
6543 Expression::Alias(Box::new(
6544 crate::expressions::Alias {
6545 this: unnest_expr,
6546 alias: alias.clone(),
6547 column_aliases: vec![],
6548 alias_explicit_as: false,
6549 alias_keyword: None,
6550 pre_alias_comments: vec![],
6551 trailing_comments: vec![],
6552 inferred_type: None,
6553 },
6554 ))
6555 }
6556 } else {
6557 unnest_expr
6558 };
6559 new_joins.push(crate::expressions::Join {
6560 kind: crate::expressions::JoinKind::Cross,
6561 this: join_this,
6562 on: None,
6563 using: Vec::new(),
6564 use_inner_keyword: false,
6565 use_outer_keyword: false,
6566 deferred_condition: false,
6567 join_hint: None,
6568 match_condition: None,
6569 pivots: Vec::new(),
6570 comments: Vec::new(),
6571 nesting_group: 0,
6572 directed: false,
6573 });
6574 } else {
6575 new_from_exprs.push(expr.clone());
6576 }
6577 } else if t.schema.is_none() && t.name.name.contains('.') {
6578 // Dotted name in quoted identifier: `Coordinates.position`
6579 let parts: Vec<&str> = t.name.name.split('.').collect();
6580 if parts.len() >= 2
6581 && first_tables
6582 .contains(&parts[0].to_ascii_lowercase())
6583 {
6584 let join_this =
6585 if matches!(target, DialectType::BigQuery) {
6586 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
6587 Expression::Table(t.clone())
6588 } else {
6589 // Other targets: split into "schema"."name"
6590 let mut new_t = t.clone();
6591 new_t.schema =
6592 Some(Identifier::quoted(parts[0]));
6593 new_t.name = Identifier::quoted(parts[1]);
6594 Expression::Table(new_t)
6595 };
6596 new_joins.push(crate::expressions::Join {
6597 kind: crate::expressions::JoinKind::Cross,
6598 this: join_this,
6599 on: None,
6600 using: Vec::new(),
6601 use_inner_keyword: false,
6602 use_outer_keyword: false,
6603 deferred_condition: false,
6604 join_hint: None,
6605 match_condition: None,
6606 pivots: Vec::new(),
6607 comments: Vec::new(),
6608 nesting_group: 0,
6609 directed: false,
6610 });
6611 } else {
6612 new_from_exprs.push(expr.clone());
6613 }
6614 } else {
6615 new_from_exprs.push(expr.clone());
6616 }
6617 } else {
6618 new_from_exprs.push(expr.clone());
6619 }
6620 }
6621
6622 new_select.from = Some(crate::expressions::From {
6623 expressions: new_from_exprs,
6624 ..from.clone()
6625 });
6626 new_select.joins = new_joins;
6627 return Ok(Expression::Select(new_select));
6628 }
6629 }
6630 }
6631 }
6632 }
6633
6634 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
6635 if matches!(
6636 target,
6637 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6638 ) {
6639 if let Expression::Select(ref s) = e {
6640 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
6641 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
6642 matches!(expr, Expression::Unnest(_))
6643 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
6644 };
6645 let has_unnest_join = s.joins.iter().any(|j| {
6646 j.kind == crate::expressions::JoinKind::Cross && (
6647 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
6648 || is_unnest_or_explode_expr(&j.this)
6649 )
6650 });
6651 if has_unnest_join {
6652 let mut select = s.clone();
6653 let mut new_joins = Vec::new();
6654 for join in select.joins.drain(..) {
6655 if join.kind == crate::expressions::JoinKind::Cross {
6656 // Extract the UNNEST/EXPLODE from the join
6657 let (func_expr, table_alias, col_aliases) = match &join.this {
6658 Expression::Alias(a) => {
6659 let ta = if a.alias.is_empty() {
6660 None
6661 } else {
6662 Some(a.alias.clone())
6663 };
6664 let cas = a.column_aliases.clone();
6665 match &a.this {
6666 Expression::Unnest(u) => {
6667 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
6668 if !u.expressions.is_empty() {
6669 let mut all_args = vec![u.this.clone()];
6670 all_args.extend(u.expressions.clone());
6671 let arrays_zip =
6672 Expression::Function(Box::new(
6673 crate::expressions::Function::new(
6674 "ARRAYS_ZIP".to_string(),
6675 all_args,
6676 ),
6677 ));
6678 let inline = Expression::Function(Box::new(
6679 crate::expressions::Function::new(
6680 "INLINE".to_string(),
6681 vec![arrays_zip],
6682 ),
6683 ));
6684 (Some(inline), ta, a.column_aliases.clone())
6685 } else {
6686 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
6687 let func_name = if u.with_ordinality {
6688 "POSEXPLODE"
6689 } else {
6690 "EXPLODE"
6691 };
6692 let explode = Expression::Function(Box::new(
6693 crate::expressions::Function::new(
6694 func_name.to_string(),
6695 vec![u.this.clone()],
6696 ),
6697 ));
6698 // For POSEXPLODE, add 'pos' to column aliases
6699 let cas = if u.with_ordinality {
6700 let mut pos_aliases =
6701 vec![Identifier::new(
6702 "pos".to_string(),
6703 )];
6704 pos_aliases
6705 .extend(a.column_aliases.clone());
6706 pos_aliases
6707 } else {
6708 a.column_aliases.clone()
6709 };
6710 (Some(explode), ta, cas)
6711 }
6712 }
6713 Expression::Function(f)
6714 if f.name.eq_ignore_ascii_case("EXPLODE") =>
6715 {
6716 (Some(Expression::Function(f.clone())), ta, cas)
6717 }
6718 _ => (None, None, Vec::new()),
6719 }
6720 }
6721 Expression::Unnest(u) => {
6722 let func_name = if u.with_ordinality {
6723 "POSEXPLODE"
6724 } else {
6725 "EXPLODE"
6726 };
6727 let explode = Expression::Function(Box::new(
6728 crate::expressions::Function::new(
6729 func_name.to_string(),
6730 vec![u.this.clone()],
6731 ),
6732 ));
6733 let ta = u.alias.clone();
6734 let col_aliases = if u.with_ordinality {
6735 vec![Identifier::new("pos".to_string())]
6736 } else {
6737 Vec::new()
6738 };
6739 (Some(explode), ta, col_aliases)
6740 }
6741 _ => (None, None, Vec::new()),
6742 };
6743 if let Some(func) = func_expr {
6744 select.lateral_views.push(crate::expressions::LateralView {
6745 this: func,
6746 table_alias,
6747 column_aliases: col_aliases,
6748 outer: false,
6749 });
6750 } else {
6751 new_joins.push(join);
6752 }
6753 } else {
6754 new_joins.push(join);
6755 }
6756 }
6757 select.joins = new_joins;
6758 return Ok(Expression::Select(select));
6759 }
6760 }
6761 }
6762
6763 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
6764 // for BigQuery, Presto/Trino, Snowflake
6765 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
6766 && matches!(
6767 target,
6768 DialectType::BigQuery
6769 | DialectType::Presto
6770 | DialectType::Trino
6771 | DialectType::Snowflake
6772 )
6773 {
6774 if let Expression::Select(ref s) = e {
6775 // Check if any SELECT expressions contain UNNEST
6776 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
6777 let has_unnest_in_select = s.expressions.iter().any(|expr| {
6778 fn contains_unnest(e: &Expression) -> bool {
6779 match e {
6780 Expression::Unnest(_) => true,
6781 Expression::Function(f)
6782 if f.name.eq_ignore_ascii_case("UNNEST") =>
6783 {
6784 true
6785 }
6786 Expression::Alias(a) => contains_unnest(&a.this),
6787 Expression::Add(op)
6788 | Expression::Sub(op)
6789 | Expression::Mul(op)
6790 | Expression::Div(op) => {
6791 contains_unnest(&op.left) || contains_unnest(&op.right)
6792 }
6793 _ => false,
6794 }
6795 }
6796 contains_unnest(expr)
6797 });
6798
6799 if has_unnest_in_select {
6800 let rewritten = Self::rewrite_unnest_expansion(s, target);
6801 if let Some(new_select) = rewritten {
6802 return Ok(Expression::Select(Box::new(new_select)));
6803 }
6804 }
6805 }
6806 }
6807
6808 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
6809 // BigQuery '\n' -> PostgreSQL literal newline in string
6810 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
6811 {
6812 if let Expression::Literal(ref lit) = e {
6813 if let Literal::String(ref s) = lit.as_ref() {
6814 if s.contains("\\n")
6815 || s.contains("\\t")
6816 || s.contains("\\r")
6817 || s.contains("\\\\")
6818 {
6819 let converted = s
6820 .replace("\\n", "\n")
6821 .replace("\\t", "\t")
6822 .replace("\\r", "\r")
6823 .replace("\\\\", "\\");
6824 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
6825 }
6826 }
6827 }
6828 }
6829
6830 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
6831 // when source != target (identity tests keep the Literal::Timestamp for native handling)
6832 if source != target {
6833 if let Expression::Literal(ref lit) = e {
6834 if let Literal::Timestamp(ref s) = lit.as_ref() {
6835 let s = s.clone();
6836 // MySQL: TIMESTAMP handling depends on source dialect
6837 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
6838 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
6839 if matches!(target, DialectType::MySQL) {
6840 if matches!(source, DialectType::BigQuery) {
6841 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
6842 return Ok(Expression::Function(Box::new(Function::new(
6843 "TIMESTAMP".to_string(),
6844 vec![Expression::Literal(Box::new(Literal::String(s)))],
6845 ))));
6846 } else {
6847 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
6848 return Ok(Expression::Cast(Box::new(Cast {
6849 this: Expression::Literal(Box::new(Literal::String(s))),
6850 to: DataType::Custom {
6851 name: "DATETIME".to_string(),
6852 },
6853 trailing_comments: Vec::new(),
6854 double_colon_syntax: false,
6855 format: None,
6856 default: None,
6857 inferred_type: None,
6858 })));
6859 }
6860 }
6861 let dt = match target {
6862 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
6863 name: "DATETIME".to_string(),
6864 },
6865 DialectType::Snowflake => {
6866 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
6867 if matches!(source, DialectType::BigQuery) {
6868 DataType::Custom {
6869 name: "TIMESTAMPTZ".to_string(),
6870 }
6871 } else if matches!(
6872 source,
6873 DialectType::PostgreSQL
6874 | DialectType::Redshift
6875 | DialectType::Snowflake
6876 ) {
6877 DataType::Timestamp {
6878 precision: None,
6879 timezone: false,
6880 }
6881 } else {
6882 DataType::Custom {
6883 name: "TIMESTAMPNTZ".to_string(),
6884 }
6885 }
6886 }
6887 DialectType::Spark | DialectType::Databricks => {
6888 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
6889 if matches!(source, DialectType::BigQuery) {
6890 DataType::Timestamp {
6891 precision: None,
6892 timezone: false,
6893 }
6894 } else {
6895 DataType::Custom {
6896 name: "TIMESTAMP_NTZ".to_string(),
6897 }
6898 }
6899 }
6900 DialectType::ClickHouse => DataType::Nullable {
6901 inner: Box::new(DataType::Custom {
6902 name: "DateTime".to_string(),
6903 }),
6904 },
6905 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
6906 name: "DATETIME2".to_string(),
6907 },
6908 DialectType::DuckDB => {
6909 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
6910 // or when the timestamp string explicitly has timezone info
6911 if matches!(source, DialectType::BigQuery)
6912 || Self::timestamp_string_has_timezone(&s)
6913 {
6914 DataType::Custom {
6915 name: "TIMESTAMPTZ".to_string(),
6916 }
6917 } else {
6918 DataType::Timestamp {
6919 precision: None,
6920 timezone: false,
6921 }
6922 }
6923 }
6924 _ => DataType::Timestamp {
6925 precision: None,
6926 timezone: false,
6927 },
6928 };
6929 return Ok(Expression::Cast(Box::new(Cast {
6930 this: Expression::Literal(Box::new(Literal::String(s))),
6931 to: dt,
6932 trailing_comments: vec![],
6933 double_colon_syntax: false,
6934 format: None,
6935 default: None,
6936 inferred_type: None,
6937 })));
6938 }
6939 }
6940 }
6941
6942 // PostgreSQL DELETE requires explicit AS for table aliases
6943 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
6944 if let Expression::Delete(ref del) = e {
6945 if del.alias.is_some() && !del.alias_explicit_as {
6946 let mut new_del = del.clone();
6947 new_del.alias_explicit_as = true;
6948 return Ok(Expression::Delete(new_del));
6949 }
6950 }
6951 }
6952
6953 // UNION/INTERSECT/EXCEPT DISTINCT handling:
6954 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
6955 // while others don't support it (Presto, Spark, DuckDB, etc.)
6956 {
6957 let needs_distinct =
6958 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
6959 let drop_distinct = matches!(
6960 target,
6961 DialectType::Presto
6962 | DialectType::Trino
6963 | DialectType::Athena
6964 | DialectType::Spark
6965 | DialectType::Databricks
6966 | DialectType::DuckDB
6967 | DialectType::Hive
6968 | DialectType::MySQL
6969 | DialectType::PostgreSQL
6970 | DialectType::SQLite
6971 | DialectType::TSQL
6972 | DialectType::Redshift
6973 | DialectType::Snowflake
6974 | DialectType::Oracle
6975 | DialectType::Teradata
6976 | DialectType::Drill
6977 | DialectType::Doris
6978 | DialectType::StarRocks
6979 );
6980 match &e {
6981 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
6982 let mut new_u = (**u).clone();
6983 new_u.distinct = true;
6984 return Ok(Expression::Union(Box::new(new_u)));
6985 }
6986 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
6987 let mut new_i = (**i).clone();
6988 new_i.distinct = true;
6989 return Ok(Expression::Intersect(Box::new(new_i)));
6990 }
6991 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
6992 let mut new_ex = (**ex).clone();
6993 new_ex.distinct = true;
6994 return Ok(Expression::Except(Box::new(new_ex)));
6995 }
6996 Expression::Union(u) if u.distinct && drop_distinct => {
6997 let mut new_u = (**u).clone();
6998 new_u.distinct = false;
6999 return Ok(Expression::Union(Box::new(new_u)));
7000 }
7001 Expression::Intersect(i) if i.distinct && drop_distinct => {
7002 let mut new_i = (**i).clone();
7003 new_i.distinct = false;
7004 return Ok(Expression::Intersect(Box::new(new_i)));
7005 }
7006 Expression::Except(ex) if ex.distinct && drop_distinct => {
7007 let mut new_ex = (**ex).clone();
7008 new_ex.distinct = false;
7009 return Ok(Expression::Except(Box::new(new_ex)));
7010 }
7011 _ => {}
7012 }
7013 }
7014
7015 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
7016 if matches!(target, DialectType::ClickHouse) {
7017 if let Expression::Function(ref f) = e {
7018 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
7019 let mut new_f = f.as_ref().clone();
7020 new_f.name = "map".to_string();
7021 return Ok(Expression::Function(Box::new(new_f)));
7022 }
7023 }
7024 }
7025
7026 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
7027 if matches!(target, DialectType::ClickHouse) {
7028 if let Expression::Intersect(ref i) = e {
7029 if i.all {
7030 let mut new_i = (**i).clone();
7031 new_i.all = false;
7032 return Ok(Expression::Intersect(Box::new(new_i)));
7033 }
7034 }
7035 }
7036
7037 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
7038 // Only from Generic source, to prevent double-wrapping
7039 if matches!(source, DialectType::Generic) {
7040 if let Expression::Div(ref op) = e {
7041 let cast_type = match target {
7042 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
7043 precision: None,
7044 scale: None,
7045 real_spelling: false,
7046 }),
7047 DialectType::Drill
7048 | DialectType::Trino
7049 | DialectType::Athena
7050 | DialectType::Presto => Some(DataType::Double {
7051 precision: None,
7052 scale: None,
7053 }),
7054 DialectType::PostgreSQL
7055 | DialectType::Redshift
7056 | DialectType::Materialize
7057 | DialectType::Teradata
7058 | DialectType::RisingWave => Some(DataType::Double {
7059 precision: None,
7060 scale: None,
7061 }),
7062 _ => None,
7063 };
7064 if let Some(dt) = cast_type {
7065 let cast_left = Expression::Cast(Box::new(Cast {
7066 this: op.left.clone(),
7067 to: dt,
7068 double_colon_syntax: false,
7069 trailing_comments: Vec::new(),
7070 format: None,
7071 default: None,
7072 inferred_type: None,
7073 }));
7074 let new_op = crate::expressions::BinaryOp {
7075 left: cast_left,
7076 right: op.right.clone(),
7077 left_comments: op.left_comments.clone(),
7078 operator_comments: op.operator_comments.clone(),
7079 trailing_comments: op.trailing_comments.clone(),
7080 inferred_type: None,
7081 };
7082 return Ok(Expression::Div(Box::new(new_op)));
7083 }
7084 }
7085 }
7086
7087 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
7088 if matches!(target, DialectType::DuckDB) {
7089 if let Expression::CreateDatabase(db) = e {
7090 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
7091 schema.if_not_exists = db.if_not_exists;
7092 return Ok(Expression::CreateSchema(Box::new(schema)));
7093 }
7094 if let Expression::DropDatabase(db) = e {
7095 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
7096 schema.if_exists = db.if_exists;
7097 return Ok(Expression::DropSchema(Box::new(schema)));
7098 }
7099 }
7100
7101 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
7102 if matches!(source, DialectType::ClickHouse)
7103 && !matches!(target, DialectType::ClickHouse)
7104 {
7105 if let Expression::Cast(ref c) = e {
7106 if let DataType::Custom { ref name } = c.to {
7107 if name.len() >= 9
7108 && name[..9].eq_ignore_ascii_case("NULLABLE(")
7109 && name.ends_with(")")
7110 {
7111 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
7112 let inner_upper = inner.to_ascii_uppercase();
7113 let new_dt = match inner_upper.as_str() {
7114 "DATETIME" | "DATETIME64" => DataType::Timestamp {
7115 precision: None,
7116 timezone: false,
7117 },
7118 "DATE" => DataType::Date,
7119 "INT64" | "BIGINT" => DataType::BigInt { length: None },
7120 "INT32" | "INT" | "INTEGER" => DataType::Int {
7121 length: None,
7122 integer_spelling: false,
7123 },
7124 "FLOAT64" | "DOUBLE" => DataType::Double {
7125 precision: None,
7126 scale: None,
7127 },
7128 "STRING" => DataType::Text,
7129 _ => DataType::Custom {
7130 name: inner.to_string(),
7131 },
7132 };
7133 let mut new_cast = c.clone();
7134 new_cast.to = new_dt;
7135 return Ok(Expression::Cast(new_cast));
7136 }
7137 }
7138 }
7139 }
7140
7141 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
7142 if matches!(target, DialectType::Snowflake) {
7143 if let Expression::ArrayConcatAgg(ref agg) = e {
7144 let mut agg_clone = agg.as_ref().clone();
7145 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
7146 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
7147 let flatten = Expression::Function(Box::new(Function::new(
7148 "ARRAY_FLATTEN".to_string(),
7149 vec![array_agg],
7150 )));
7151 return Ok(flatten);
7152 }
7153 }
7154
7155 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
7156 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
7157 if let Expression::ArrayConcatAgg(agg) = e {
7158 let arg = agg.this;
7159 return Ok(Expression::Function(Box::new(Function::new(
7160 "ARRAY_CONCAT_AGG".to_string(),
7161 vec![arg],
7162 ))));
7163 }
7164 }
7165
7166 // Determine what action to take by inspecting e immutably
7167 let action = {
7168 let source_propagates_nulls =
7169 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
7170 let target_ignores_nulls =
7171 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
7172
7173 match &e {
7174 Expression::Function(f) => {
7175 let name = f.name.to_ascii_uppercase();
7176 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
7177 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
7178 if name == "JSON"
7179 && f.args.len() == 1
7180 && matches!(source, DialectType::DuckDB)
7181 && matches!(
7182 target,
7183 DialectType::Presto | DialectType::Trino | DialectType::Athena
7184 )
7185 {
7186 Action::DuckDBJsonFuncToJsonParse
7187 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
7188 // SQL:2016 `x IS JSON` predicate which has matching semantics.
7189 } else if name == "JSON_VALID"
7190 && f.args.len() == 1
7191 && matches!(source, DialectType::DuckDB)
7192 && matches!(
7193 target,
7194 DialectType::Presto | DialectType::Trino | DialectType::Athena
7195 )
7196 {
7197 Action::DuckDBJsonValidToIsJson
7198 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
7199 } else if (name == "DATE_PART" || name == "DATEPART")
7200 && f.args.len() == 2
7201 && matches!(target, DialectType::Snowflake)
7202 && !matches!(source, DialectType::Snowflake)
7203 && matches!(
7204 &f.args[0],
7205 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7206 )
7207 {
7208 Action::DatePartUnquote
7209 } else if source_propagates_nulls
7210 && target_ignores_nulls
7211 && (name == "GREATEST" || name == "LEAST")
7212 && f.args.len() >= 2
7213 {
7214 Action::GreatestLeastNull
7215 } else if matches!(source, DialectType::Snowflake)
7216 && name == "ARRAY_GENERATE_RANGE"
7217 && f.args.len() >= 2
7218 {
7219 Action::ArrayGenerateRange
7220 } else if matches!(source, DialectType::Snowflake)
7221 && matches!(target, DialectType::DuckDB)
7222 && name == "DATE_TRUNC"
7223 && f.args.len() == 2
7224 {
7225 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
7226 // Logic based on Python sqlglot's input_type_preserved flag:
7227 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
7228 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
7229 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
7230 let unit_str = match &f.args[0] {
7231 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
7232 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
7233 Some(s.to_ascii_uppercase())
7234 }
7235 _ => None,
7236 };
7237 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
7238 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
7239 });
7240 match &f.args[1] {
7241 Expression::Cast(c) => match &c.to {
7242 DataType::Time { .. } => Action::DateTruncWrapCast,
7243 DataType::Custom { name }
7244 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
7245 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
7246 {
7247 Action::DateTruncWrapCast
7248 }
7249 DataType::Timestamp { timezone: true, .. } => {
7250 Action::DateTruncWrapCast
7251 }
7252 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
7253 DataType::Timestamp {
7254 timezone: false, ..
7255 } if is_date_unit => Action::DateTruncWrapCast,
7256 _ => Action::None,
7257 },
7258 _ => Action::None,
7259 }
7260 } else if matches!(source, DialectType::Snowflake)
7261 && matches!(target, DialectType::DuckDB)
7262 && name == "TO_DATE"
7263 && f.args.len() == 1
7264 && !matches!(
7265 &f.args[0],
7266 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7267 )
7268 {
7269 Action::ToDateToCast
7270 } else if !matches!(source, DialectType::Redshift)
7271 && matches!(target, DialectType::Redshift)
7272 && name == "CONVERT_TIMEZONE"
7273 && (f.args.len() == 2 || f.args.len() == 3)
7274 {
7275 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
7276 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
7277 // The Redshift parser adds 'UTC' as default source_tz, but when
7278 // transpiling from other dialects, we should preserve the original form.
7279 Action::ConvertTimezoneToExpr
7280 } else if matches!(source, DialectType::Snowflake)
7281 && matches!(target, DialectType::DuckDB)
7282 && name == "REGEXP_REPLACE"
7283 && f.args.len() == 4
7284 && !matches!(
7285 &f.args[3],
7286 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7287 )
7288 {
7289 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
7290 Action::RegexpReplaceSnowflakeToDuckDB
7291 } else if matches!(source, DialectType::Snowflake)
7292 && matches!(target, DialectType::DuckDB)
7293 && name == "REGEXP_REPLACE"
7294 && f.args.len() == 5
7295 {
7296 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
7297 Action::RegexpReplacePositionSnowflakeToDuckDB
7298 } else if matches!(source, DialectType::Snowflake)
7299 && matches!(target, DialectType::DuckDB)
7300 && name == "REGEXP_SUBSTR"
7301 {
7302 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
7303 Action::RegexpSubstrSnowflakeToDuckDB
7304 } else if matches!(source, DialectType::Snowflake)
7305 && matches!(target, DialectType::Snowflake)
7306 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
7307 && f.args.len() == 6
7308 {
7309 // Snowflake identity: strip trailing group=0
7310 Action::RegexpSubstrSnowflakeIdentity
7311 } else if matches!(source, DialectType::Snowflake)
7312 && matches!(target, DialectType::DuckDB)
7313 && name == "REGEXP_SUBSTR_ALL"
7314 {
7315 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
7316 Action::RegexpSubstrAllSnowflakeToDuckDB
7317 } else if matches!(source, DialectType::Snowflake)
7318 && matches!(target, DialectType::DuckDB)
7319 && name == "REGEXP_COUNT"
7320 {
7321 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
7322 Action::RegexpCountSnowflakeToDuckDB
7323 } else if matches!(source, DialectType::Snowflake)
7324 && matches!(target, DialectType::DuckDB)
7325 && name == "REGEXP_INSTR"
7326 {
7327 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
7328 Action::RegexpInstrSnowflakeToDuckDB
7329 } else if matches!(source, DialectType::BigQuery)
7330 && matches!(target, DialectType::Snowflake)
7331 && name == "REGEXP_EXTRACT_ALL"
7332 {
7333 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
7334 Action::RegexpExtractAllToSnowflake
7335 } else if name == "_BQ_TO_HEX" {
7336 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
7337 Action::BigQueryToHexBare
7338 } else if matches!(source, DialectType::BigQuery)
7339 && !matches!(target, DialectType::BigQuery)
7340 {
7341 // BigQuery-specific functions that need to be converted to standard forms
7342 match name.as_str() {
7343 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
7344 | "DATE_DIFF"
7345 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
7346 | "DATETIME_ADD" | "DATETIME_SUB"
7347 | "TIME_ADD" | "TIME_SUB"
7348 | "DATE_ADD" | "DATE_SUB"
7349 | "SAFE_DIVIDE"
7350 | "GENERATE_UUID"
7351 | "COUNTIF"
7352 | "EDIT_DISTANCE"
7353 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
7354 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
7355 | "TO_HEX"
7356 | "TO_JSON_STRING"
7357 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
7358 | "DIV"
7359 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
7360 | "LAST_DAY"
7361 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
7362 | "REGEXP_CONTAINS"
7363 | "CONTAINS_SUBSTR"
7364 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
7365 | "SAFE_CAST"
7366 | "GENERATE_DATE_ARRAY"
7367 | "PARSE_DATE" | "PARSE_TIMESTAMP"
7368 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
7369 | "ARRAY_CONCAT"
7370 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
7371 | "INSTR"
7372 | "MD5" | "SHA1" | "SHA256" | "SHA512"
7373 | "GENERATE_UUID()" // just in case
7374 | "REGEXP_EXTRACT_ALL"
7375 | "REGEXP_EXTRACT"
7376 | "INT64"
7377 | "ARRAY_CONCAT_AGG"
7378 | "DATE_DIFF(" // just in case
7379 | "TO_HEX_MD5" // internal
7380 | "MOD"
7381 | "CONCAT"
7382 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
7383 | "STRUCT"
7384 | "ROUND"
7385 | "MAKE_INTERVAL"
7386 | "ARRAY_TO_STRING"
7387 | "PERCENTILE_CONT"
7388 => Action::BigQueryFunctionNormalize,
7389 "ARRAY" if matches!(target, DialectType::Snowflake)
7390 && f.args.len() == 1
7391 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
7392 => Action::BigQueryArraySelectAsStructToSnowflake,
7393 _ => Action::None,
7394 }
7395 } else if matches!(source, DialectType::BigQuery)
7396 && matches!(target, DialectType::BigQuery)
7397 {
7398 // BigQuery -> BigQuery normalizations
7399 match name.as_str() {
7400 "TIMESTAMP_DIFF"
7401 | "DATETIME_DIFF"
7402 | "TIME_DIFF"
7403 | "DATE_DIFF"
7404 | "DATE_ADD"
7405 | "TO_HEX"
7406 | "CURRENT_TIMESTAMP"
7407 | "CURRENT_DATE"
7408 | "CURRENT_TIME"
7409 | "CURRENT_DATETIME"
7410 | "GENERATE_DATE_ARRAY"
7411 | "INSTR"
7412 | "FORMAT_DATETIME"
7413 | "DATETIME"
7414 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
7415 _ => Action::None,
7416 }
7417 } else {
7418 // Generic function normalization for non-BigQuery sources
7419 match name.as_str() {
7420 "ARBITRARY" | "AGGREGATE"
7421 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
7422 | "STRUCT_EXTRACT"
7423 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
7424 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
7425 | "SUBSTRINGINDEX"
7426 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
7427 | "UNICODE"
7428 | "XOR"
7429 | "ARRAY_REVERSE_SORT"
7430 | "ENCODE" | "DECODE"
7431 | "QUANTILE"
7432 | "EPOCH" | "EPOCH_MS"
7433 | "HASHBYTES"
7434 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
7435 | "APPROX_DISTINCT"
7436 | "DATE_PARSE" | "FORMAT_DATETIME"
7437 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
7438 | "RLIKE"
7439 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
7440 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
7441 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
7442 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
7443 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
7444 | "MAP" | "MAP_FROM_ENTRIES"
7445 | "COLLECT_LIST" | "COLLECT_SET"
7446 | "ISNAN" | "IS_NAN"
7447 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
7448 | "FORMAT_NUMBER"
7449 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
7450 | "ELEMENT_AT"
7451 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
7452 | "SPLIT_PART"
7453 // GENERATE_SERIES: handled separately below
7454 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
7455 | "JSON_QUERY" | "JSON_VALUE"
7456 | "JSON_SEARCH"
7457 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
7458 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
7459 | "CURDATE" | "CURTIME"
7460 | "ARRAY_TO_STRING"
7461 | "ARRAY_SORT" | "SORT_ARRAY"
7462 | "LEFT" | "RIGHT"
7463 | "MAP_FROM_ARRAYS"
7464 | "LIKE" | "ILIKE"
7465 | "ARRAY_CONCAT" | "LIST_CONCAT"
7466 | "QUANTILE_CONT" | "QUANTILE_DISC"
7467 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
7468 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
7469 | "LOCATE" | "STRPOS" | "INSTR"
7470 | "CHAR"
7471 // CONCAT: handled separately for COALESCE wrapping
7472 | "ARRAY_JOIN"
7473 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
7474 | "ISNULL"
7475 | "MONTHNAME"
7476 | "TO_TIMESTAMP"
7477 | "TO_DATE"
7478 | "TO_JSON"
7479 | "REGEXP_SPLIT"
7480 | "SPLIT"
7481 | "FORMATDATETIME"
7482 | "ARRAYJOIN"
7483 | "SPLITBYSTRING" | "SPLITBYREGEXP"
7484 | "NVL"
7485 | "TO_CHAR"
7486 | "DBMS_RANDOM.VALUE"
7487 | "REGEXP_LIKE"
7488 | "REPLICATE"
7489 | "LEN"
7490 | "COUNT_BIG"
7491 | "DATEFROMPARTS"
7492 | "DATETIMEFROMPARTS"
7493 | "CONVERT" | "TRY_CONVERT"
7494 | "STRFTIME" | "STRPTIME"
7495 | "DATE_FORMAT" | "FORMAT_DATE"
7496 | "PARSE_TIMESTAMP" | "PARSE_DATE"
7497 | "FROM_BASE64" | "TO_BASE64"
7498 | "GETDATE"
7499 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
7500 | "TO_UTF8" | "FROM_UTF8"
7501 | "STARTS_WITH" | "STARTSWITH"
7502 | "APPROX_COUNT_DISTINCT"
7503 | "JSON_FORMAT"
7504 | "SYSDATE"
7505 | "LOGICAL_OR" | "LOGICAL_AND"
7506 | "MONTHS_ADD"
7507 | "SCHEMA_NAME"
7508 | "STRTOL"
7509 | "EDITDIST3"
7510 | "FORMAT"
7511 | "LIST_CONTAINS" | "LIST_HAS"
7512 | "VARIANCE" | "STDDEV"
7513 | "ISINF"
7514 | "TO_UNIXTIME"
7515 | "FROM_UNIXTIME"
7516 | "DATEPART" | "DATE_PART"
7517 | "DATENAME"
7518 | "STRING_AGG"
7519 | "JSON_ARRAYAGG"
7520 | "APPROX_QUANTILE"
7521 | "MAKE_DATE"
7522 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
7523 | "RANGE"
7524 | "TRY_ELEMENT_AT"
7525 | "STR_TO_MAP"
7526 | "STRING"
7527 | "STR_TO_TIME"
7528 | "CURRENT_SCHEMA"
7529 | "LTRIM" | "RTRIM"
7530 | "UUID"
7531 | "FARM_FINGERPRINT"
7532 | "JSON_KEYS"
7533 | "WEEKOFYEAR"
7534 | "CONCAT_WS"
7535 | "TRY_DIVIDE"
7536 | "ARRAY_SLICE"
7537 | "ARRAY_PREPEND"
7538 | "ARRAY_REMOVE"
7539 | "GENERATE_DATE_ARRAY"
7540 | "PARSE_JSON"
7541 | "JSON_REMOVE"
7542 | "JSON_SET"
7543 | "LEVENSHTEIN"
7544 | "CURRENT_VERSION"
7545 | "ARRAY_MAX"
7546 | "ARRAY_MIN"
7547 | "JAROWINKLER_SIMILARITY"
7548 | "CURRENT_SCHEMAS"
7549 | "TO_VARIANT"
7550 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
7551 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
7552 => Action::GenericFunctionNormalize,
7553 // Canonical date functions -> dialect-specific
7554 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
7555 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
7556 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
7557 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
7558 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
7559 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
7560 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
7561 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
7562 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
7563 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
7564 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
7565 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
7566 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
7567 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
7568 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
7569 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
7570 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
7571 // STR_TO_DATE(x, fmt) -> dialect-specific
7572 "STR_TO_DATE" if f.args.len() == 2
7573 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
7574 "STR_TO_DATE" => Action::GenericFunctionNormalize,
7575 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
7576 "TS_OR_DS_ADD" if f.args.len() == 3
7577 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
7578 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7579 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
7580 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
7581 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
7582 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
7583 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
7584 // IS_ASCII(x) -> dialect-specific
7585 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
7586 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
7587 "STR_POSITION" => Action::StrPositionConvert,
7588 // ARRAY_SUM -> dialect-specific
7589 "ARRAY_SUM" => Action::ArraySumConvert,
7590 // ARRAY_SIZE -> dialect-specific (Drill only)
7591 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
7592 // ARRAY_ANY -> dialect-specific
7593 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
7594 // Functions needing specific cross-dialect transforms
7595 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
7596 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
7597 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
7598 "ARRAY" if matches!(source, DialectType::BigQuery)
7599 && matches!(target, DialectType::Snowflake)
7600 && f.args.len() == 1
7601 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
7602 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
7603 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
7604 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
7605 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
7606 "DATE_TRUNC" if f.args.len() == 2
7607 && matches!(source, DialectType::Generic)
7608 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
7609 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
7610 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
7611 "TIMESTAMP_TRUNC" if f.args.len() >= 2
7612 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
7613 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
7614 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
7615 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7616 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
7617 // GENERATE_SERIES with interval normalization for PG target
7618 "GENERATE_SERIES" if f.args.len() >= 3
7619 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7620 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
7621 "GENERATE_SERIES" => Action::None, // passthrough for other cases
7622 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
7623 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7624 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
7625 "CONCAT" => Action::GenericFunctionNormalize,
7626 // DIV(a, b) -> target-specific integer division
7627 "DIV" if f.args.len() == 2
7628 && matches!(source, DialectType::PostgreSQL)
7629 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
7630 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7631 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
7632 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
7633 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
7634 "JSONB_EXISTS" if f.args.len() == 2
7635 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
7636 // DATE_BIN -> TIME_BUCKET for DuckDB
7637 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
7638 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
7639 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
7640 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
7641 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
7642 // ClickHouse any -> ANY_VALUE for other dialects
7643 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
7644 _ => Action::None,
7645 }
7646 }
7647 }
7648 Expression::AggregateFunction(af) => {
7649 let name = af.name.to_ascii_uppercase();
7650 match name.as_str() {
7651 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
7652 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
7653 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7654 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
7655 if matches!(target, DialectType::DuckDB) =>
7656 {
7657 Action::JsonObjectAggConvert
7658 }
7659 "ARRAY_AGG"
7660 if matches!(
7661 target,
7662 DialectType::Hive
7663 | DialectType::Spark
7664 | DialectType::Databricks
7665 ) =>
7666 {
7667 Action::ArrayAggToCollectList
7668 }
7669 "MAX_BY" | "MIN_BY"
7670 if matches!(
7671 target,
7672 DialectType::ClickHouse
7673 | DialectType::Spark
7674 | DialectType::Databricks
7675 | DialectType::DuckDB
7676 ) =>
7677 {
7678 Action::MaxByMinByConvert
7679 }
7680 "COLLECT_LIST"
7681 if matches!(
7682 target,
7683 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
7684 ) =>
7685 {
7686 Action::CollectListToArrayAgg
7687 }
7688 "COLLECT_SET"
7689 if matches!(
7690 target,
7691 DialectType::Presto
7692 | DialectType::Trino
7693 | DialectType::Snowflake
7694 | DialectType::DuckDB
7695 ) =>
7696 {
7697 Action::CollectSetConvert
7698 }
7699 "PERCENTILE"
7700 if matches!(
7701 target,
7702 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7703 ) =>
7704 {
7705 Action::PercentileConvert
7706 }
7707 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
7708 "CORR"
7709 if matches!(target, DialectType::DuckDB)
7710 && matches!(source, DialectType::Snowflake) =>
7711 {
7712 Action::CorrIsnanWrap
7713 }
7714 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7715 "APPROX_QUANTILES"
7716 if matches!(source, DialectType::BigQuery)
7717 && matches!(target, DialectType::DuckDB) =>
7718 {
7719 Action::BigQueryApproxQuantiles
7720 }
7721 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
7722 "PERCENTILE_CONT"
7723 if matches!(source, DialectType::BigQuery)
7724 && matches!(target, DialectType::DuckDB)
7725 && af.args.len() >= 2 =>
7726 {
7727 Action::BigQueryPercentileContToDuckDB
7728 }
7729 _ => Action::None,
7730 }
7731 }
7732 Expression::JSONArrayAgg(_) => match target {
7733 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
7734 _ => Action::None,
7735 },
7736 Expression::ToNumber(tn) => {
7737 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
7738 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
7739 match target {
7740 DialectType::Oracle
7741 | DialectType::Snowflake
7742 | DialectType::Teradata => Action::None,
7743 _ => Action::GenericFunctionNormalize,
7744 }
7745 } else {
7746 Action::None
7747 }
7748 }
7749 Expression::Nvl2(_) => {
7750 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
7751 // Keep as NVL2 for dialects that support it natively
7752 match target {
7753 DialectType::Oracle
7754 | DialectType::Snowflake
7755 | DialectType::Teradata
7756 | DialectType::Spark
7757 | DialectType::Databricks
7758 | DialectType::Redshift => Action::None,
7759 _ => Action::Nvl2Expand,
7760 }
7761 }
7762 Expression::Decode(_) | Expression::DecodeCase(_) => {
7763 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
7764 // Keep as DECODE for Oracle/Snowflake
7765 match target {
7766 DialectType::Oracle | DialectType::Snowflake => Action::None,
7767 _ => Action::DecodeSimplify,
7768 }
7769 }
7770 Expression::Coalesce(ref cf) => {
7771 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
7772 // BigQuery keeps IFNULL natively when source is also BigQuery
7773 if cf.original_name.as_deref() == Some("IFNULL")
7774 && !(matches!(source, DialectType::BigQuery)
7775 && matches!(target, DialectType::BigQuery))
7776 {
7777 Action::IfnullToCoalesce
7778 } else {
7779 Action::None
7780 }
7781 }
7782 Expression::IfFunc(if_func) => {
7783 if matches!(source, DialectType::Snowflake)
7784 && matches!(
7785 target,
7786 DialectType::Presto | DialectType::Trino | DialectType::SQLite
7787 )
7788 && matches!(if_func.false_value, Some(Expression::Div(_)))
7789 {
7790 Action::Div0TypedDivision
7791 } else {
7792 Action::None
7793 }
7794 }
7795 Expression::ToJson(_) => match target {
7796 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
7797 DialectType::BigQuery => Action::ToJsonConvert,
7798 DialectType::DuckDB => Action::ToJsonConvert,
7799 _ => Action::None,
7800 },
7801 Expression::ArrayAgg(ref agg) => {
7802 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7803 Action::ArrayAggToGroupConcat
7804 } else if matches!(
7805 target,
7806 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7807 ) {
7808 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
7809 Action::ArrayAggToCollectList
7810 } else if matches!(
7811 source,
7812 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7813 ) && matches!(target, DialectType::DuckDB)
7814 && agg.filter.is_some()
7815 {
7816 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
7817 // Need to add NOT x IS NULL to existing filter
7818 Action::ArrayAggNullFilter
7819 } else if matches!(target, DialectType::DuckDB)
7820 && agg.ignore_nulls == Some(true)
7821 && !agg.order_by.is_empty()
7822 {
7823 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
7824 Action::ArrayAggIgnoreNullsDuckDB
7825 } else if !matches!(source, DialectType::Snowflake) {
7826 Action::None
7827 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7828 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
7829 || agg.name.is_none();
7830 if is_array_agg {
7831 Action::ArrayAggCollectList
7832 } else {
7833 Action::None
7834 }
7835 } else if matches!(
7836 target,
7837 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7838 ) && agg.filter.is_none()
7839 {
7840 Action::ArrayAggFilter
7841 } else {
7842 Action::None
7843 }
7844 }
7845 Expression::WithinGroup(wg) => {
7846 if matches!(source, DialectType::Snowflake)
7847 && matches!(
7848 target,
7849 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7850 )
7851 && matches!(wg.this, Expression::ArrayAgg(_))
7852 {
7853 Action::ArrayAggWithinGroupFilter
7854 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
7855 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
7856 || matches!(&wg.this, Expression::StringAgg(_))
7857 {
7858 Action::StringAggConvert
7859 } else if matches!(
7860 target,
7861 DialectType::Presto
7862 | DialectType::Trino
7863 | DialectType::Athena
7864 | DialectType::Spark
7865 | DialectType::Databricks
7866 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7867 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7868 || matches!(&wg.this, Expression::PercentileCont(_)))
7869 {
7870 Action::PercentileContConvert
7871 } else {
7872 Action::None
7873 }
7874 }
7875 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7876 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
7877 // DATETIME is the timezone-unaware type
7878 Expression::Cast(ref c) => {
7879 if c.format.is_some()
7880 && (matches!(source, DialectType::BigQuery)
7881 || matches!(source, DialectType::Teradata))
7882 {
7883 Action::BigQueryCastFormat
7884 } else if matches!(target, DialectType::BigQuery)
7885 && !matches!(source, DialectType::BigQuery)
7886 && matches!(
7887 c.to,
7888 DataType::Timestamp {
7889 timezone: false,
7890 ..
7891 }
7892 )
7893 {
7894 Action::CastTimestampToDatetime
7895 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
7896 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
7897 && matches!(
7898 c.to,
7899 DataType::Timestamp {
7900 timezone: false,
7901 ..
7902 }
7903 )
7904 {
7905 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7906 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
7907 Action::CastTimestampToDatetime
7908 } else if matches!(
7909 source,
7910 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7911 ) && matches!(
7912 target,
7913 DialectType::Presto
7914 | DialectType::Trino
7915 | DialectType::Athena
7916 | DialectType::DuckDB
7917 | DialectType::Snowflake
7918 | DialectType::BigQuery
7919 | DialectType::Databricks
7920 | DialectType::TSQL
7921 ) {
7922 Action::HiveCastToTryCast
7923 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7924 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
7925 {
7926 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7927 Action::CastTimestamptzToFunc
7928 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7929 && matches!(
7930 target,
7931 DialectType::Hive
7932 | DialectType::Spark
7933 | DialectType::Databricks
7934 | DialectType::BigQuery
7935 )
7936 {
7937 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7938 Action::CastTimestampStripTz
7939 } else if matches!(&c.to, DataType::Json)
7940 && matches!(source, DialectType::DuckDB)
7941 && matches!(target, DialectType::Snowflake)
7942 {
7943 Action::DuckDBCastJsonToVariant
7944 } else if matches!(&c.to, DataType::Json)
7945 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7946 && matches!(
7947 target,
7948 DialectType::Presto
7949 | DialectType::Trino
7950 | DialectType::Athena
7951 | DialectType::Snowflake
7952 )
7953 {
7954 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
7955 // Only when the input is a string literal (JSON 'value' syntax)
7956 Action::JsonLiteralToJsonParse
7957 } else if matches!(&c.to, DataType::Json)
7958 && matches!(source, DialectType::DuckDB)
7959 && matches!(
7960 target,
7961 DialectType::Presto | DialectType::Trino | DialectType::Athena
7962 )
7963 {
7964 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
7965 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
7966 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
7967 // in the target to preserve DuckDB's parse semantics.
7968 Action::JsonLiteralToJsonParse
7969 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
7970 && matches!(target, DialectType::Spark | DialectType::Databricks)
7971 {
7972 // CAST(x AS JSON) -> TO_JSON(x) for Spark
7973 Action::CastToJsonForSpark
7974 } else if (matches!(
7975 &c.to,
7976 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
7977 )) && matches!(
7978 target,
7979 DialectType::Spark | DialectType::Databricks
7980 ) && (matches!(&c.this, Expression::ParseJson(_))
7981 || matches!(
7982 &c.this,
7983 Expression::Function(f)
7984 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
7985 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
7986 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
7987 ))
7988 {
7989 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
7990 // -> FROM_JSON(..., type_string) for Spark
7991 Action::CastJsonToFromJson
7992 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
7993 && matches!(
7994 c.to,
7995 DataType::Timestamp {
7996 timezone: false,
7997 ..
7998 }
7999 )
8000 && matches!(source, DialectType::DuckDB)
8001 {
8002 Action::StrftimeCastTimestamp
8003 } else if matches!(source, DialectType::DuckDB)
8004 && matches!(
8005 c.to,
8006 DataType::Decimal {
8007 precision: None,
8008 ..
8009 }
8010 )
8011 {
8012 Action::DecimalDefaultPrecision
8013 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
8014 && matches!(c.to, DataType::Char { length: None })
8015 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
8016 {
8017 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
8018 Action::MysqlCastCharToText
8019 } else if matches!(
8020 source,
8021 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8022 ) && matches!(
8023 target,
8024 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8025 ) && Self::has_varchar_char_type(&c.to)
8026 {
8027 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
8028 Action::SparkCastVarcharToString
8029 } else {
8030 Action::None
8031 }
8032 }
8033 Expression::SafeCast(ref c) => {
8034 if c.format.is_some()
8035 && matches!(source, DialectType::BigQuery)
8036 && !matches!(target, DialectType::BigQuery)
8037 {
8038 Action::BigQueryCastFormat
8039 } else {
8040 Action::None
8041 }
8042 }
8043 Expression::TryCast(ref c) => {
8044 if matches!(&c.to, DataType::Json)
8045 && matches!(source, DialectType::DuckDB)
8046 && matches!(
8047 target,
8048 DialectType::Presto | DialectType::Trino | DialectType::Athena
8049 )
8050 {
8051 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
8052 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
8053 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
8054 // to preserve DuckDB's parse-or-null semantics.
8055 Action::DuckDBTryCastJsonToTryJsonParse
8056 } else {
8057 Action::None
8058 }
8059 }
8060 Expression::JSONArray(ref ja)
8061 if matches!(target, DialectType::Snowflake)
8062 && ja.null_handling.is_none()
8063 && ja.return_type.is_none()
8064 && ja.strict.is_none() =>
8065 {
8066 Action::GenericFunctionNormalize
8067 }
8068 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
8069 Action::GenericFunctionNormalize
8070 }
8071 // For DuckDB: DATE_TRUNC should preserve the input type
8072 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
8073 if matches!(source, DialectType::Snowflake)
8074 && matches!(target, DialectType::DuckDB)
8075 {
8076 Action::DateTruncWrapCast
8077 } else {
8078 Action::None
8079 }
8080 }
8081 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8082 Expression::SetStatement(s) => {
8083 if matches!(target, DialectType::DuckDB)
8084 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
8085 && s.items.iter().any(|item| item.kind.is_none())
8086 {
8087 Action::SetToVariable
8088 } else {
8089 Action::None
8090 }
8091 }
8092 // Cross-dialect NULL ordering normalization.
8093 // When nulls_first is not specified, fill in the source dialect's implied
8094 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
8095 Expression::Ordered(o) => {
8096 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
8097 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
8098 Action::MysqlNullsOrdering
8099 } else {
8100 // Skip targets that don't support NULLS FIRST/LAST syntax
8101 let target_supports_nulls = !matches!(
8102 target,
8103 DialectType::MySQL
8104 | DialectType::TSQL
8105 | DialectType::StarRocks
8106 | DialectType::Doris
8107 );
8108 if o.nulls_first.is_none() && source != target && target_supports_nulls
8109 {
8110 Action::NullsOrdering
8111 } else {
8112 Action::None
8113 }
8114 }
8115 }
8116 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
8117 Expression::DataType(dt) => {
8118 if matches!(source, DialectType::BigQuery)
8119 && !matches!(target, DialectType::BigQuery)
8120 {
8121 match dt {
8122 DataType::Custom { ref name }
8123 if name.eq_ignore_ascii_case("INT64")
8124 || name.eq_ignore_ascii_case("FLOAT64")
8125 || name.eq_ignore_ascii_case("BOOL")
8126 || name.eq_ignore_ascii_case("BYTES")
8127 || name.eq_ignore_ascii_case("NUMERIC")
8128 || name.eq_ignore_ascii_case("STRING")
8129 || name.eq_ignore_ascii_case("DATETIME") =>
8130 {
8131 Action::BigQueryCastType
8132 }
8133 _ => Action::None,
8134 }
8135 } else if matches!(source, DialectType::TSQL) {
8136 // For TSQL source -> any target (including TSQL itself for REAL)
8137 match dt {
8138 // REAL -> FLOAT even for TSQL->TSQL
8139 DataType::Custom { ref name }
8140 if name.eq_ignore_ascii_case("REAL") =>
8141 {
8142 Action::TSQLTypeNormalize
8143 }
8144 DataType::Float {
8145 real_spelling: true,
8146 ..
8147 } => Action::TSQLTypeNormalize,
8148 // Other TSQL type normalizations only for non-TSQL targets
8149 DataType::Custom { ref name }
8150 if !matches!(target, DialectType::TSQL)
8151 && (name.eq_ignore_ascii_case("MONEY")
8152 || name.eq_ignore_ascii_case("SMALLMONEY")
8153 || name.eq_ignore_ascii_case("DATETIME2")
8154 || name.eq_ignore_ascii_case("IMAGE")
8155 || name.eq_ignore_ascii_case("BIT")
8156 || name.eq_ignore_ascii_case("ROWVERSION")
8157 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
8158 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
8159 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
8160 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
8161 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
8162 {
8163 Action::TSQLTypeNormalize
8164 }
8165 DataType::Float {
8166 precision: Some(_), ..
8167 } if !matches!(target, DialectType::TSQL) => {
8168 Action::TSQLTypeNormalize
8169 }
8170 DataType::TinyInt { .. }
8171 if !matches!(target, DialectType::TSQL) =>
8172 {
8173 Action::TSQLTypeNormalize
8174 }
8175 // INTEGER -> INT for Databricks/Spark targets
8176 DataType::Int {
8177 integer_spelling: true,
8178 ..
8179 } if matches!(
8180 target,
8181 DialectType::Databricks | DialectType::Spark
8182 ) =>
8183 {
8184 Action::TSQLTypeNormalize
8185 }
8186 _ => Action::None,
8187 }
8188 } else if (matches!(source, DialectType::Oracle)
8189 || matches!(source, DialectType::Generic))
8190 && !matches!(target, DialectType::Oracle)
8191 {
8192 match dt {
8193 DataType::Custom { ref name }
8194 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
8195 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
8196 || name.eq_ignore_ascii_case("VARCHAR2")
8197 || name.eq_ignore_ascii_case("NVARCHAR2") =>
8198 {
8199 Action::OracleVarchar2ToVarchar
8200 }
8201 _ => Action::None,
8202 }
8203 } else if matches!(target, DialectType::Snowflake)
8204 && !matches!(source, DialectType::Snowflake)
8205 {
8206 // When target is Snowflake but source is NOT Snowflake,
8207 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
8208 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
8209 // should keep their FLOAT spelling.
8210 match dt {
8211 DataType::Float { .. } => Action::SnowflakeFloatProtect,
8212 _ => Action::None,
8213 }
8214 } else {
8215 Action::None
8216 }
8217 }
8218 // LOWER patterns from BigQuery TO_HEX conversions:
8219 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
8220 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
8221 Expression::Lower(uf) => {
8222 if matches!(source, DialectType::BigQuery) {
8223 match &uf.this {
8224 Expression::Lower(_) => Action::BigQueryToHexLower,
8225 Expression::Function(f)
8226 if f.name == "TO_HEX"
8227 && matches!(target, DialectType::BigQuery) =>
8228 {
8229 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8230 Action::BigQueryToHexLower
8231 }
8232 _ => Action::None,
8233 }
8234 } else {
8235 Action::None
8236 }
8237 }
8238 // UPPER patterns from BigQuery TO_HEX conversions:
8239 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
8240 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
8241 Expression::Upper(uf) => {
8242 if matches!(source, DialectType::BigQuery) {
8243 match &uf.this {
8244 Expression::Lower(_) => Action::BigQueryToHexUpper,
8245 _ => Action::None,
8246 }
8247 } else {
8248 Action::None
8249 }
8250 }
8251 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
8252 // Snowflake supports LAST_DAY with unit, so keep it there
8253 Expression::LastDay(ld) => {
8254 if matches!(source, DialectType::BigQuery)
8255 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
8256 && ld.unit.is_some()
8257 {
8258 Action::BigQueryLastDayStripUnit
8259 } else {
8260 Action::None
8261 }
8262 }
8263 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
8264 Expression::SafeDivide(_) => {
8265 if matches!(source, DialectType::BigQuery)
8266 && !matches!(target, DialectType::BigQuery)
8267 {
8268 Action::BigQuerySafeDivide
8269 } else {
8270 Action::None
8271 }
8272 }
8273 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
8274 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
8275 Expression::AnyValue(ref agg) => {
8276 if matches!(source, DialectType::BigQuery)
8277 && matches!(target, DialectType::DuckDB)
8278 && agg.having_max.is_some()
8279 {
8280 Action::BigQueryAnyValueHaving
8281 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8282 && !matches!(source, DialectType::Spark | DialectType::Databricks)
8283 && agg.ignore_nulls.is_none()
8284 {
8285 Action::AnyValueIgnoreNulls
8286 } else {
8287 Action::None
8288 }
8289 }
8290 Expression::Any(ref q) => {
8291 if matches!(source, DialectType::PostgreSQL)
8292 && matches!(
8293 target,
8294 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8295 )
8296 && q.op.is_some()
8297 && !matches!(
8298 q.subquery,
8299 Expression::Select(_) | Expression::Subquery(_)
8300 )
8301 {
8302 Action::AnyToExists
8303 } else {
8304 Action::None
8305 }
8306 }
8307 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
8308 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
8309 Expression::RegexpLike(_)
8310 if matches!(source, DialectType::Snowflake)
8311 && matches!(target, DialectType::DuckDB) =>
8312 {
8313 Action::RlikeSnowflakeToDuckDB
8314 }
8315 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
8316 Expression::RegexpLike(_)
8317 if !matches!(source, DialectType::DuckDB)
8318 && matches!(target, DialectType::DuckDB) =>
8319 {
8320 Action::RegexpLikeToDuckDB
8321 }
8322 // RegexpLike -> Exasol: anchor pattern with .*...*
8323 Expression::RegexpLike(_)
8324 if matches!(target, DialectType::Exasol) =>
8325 {
8326 Action::RegexpLikeExasolAnchor
8327 }
8328 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
8329 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
8330 Expression::Div(ref op)
8331 if matches!(
8332 source,
8333 DialectType::MySQL
8334 | DialectType::DuckDB
8335 | DialectType::SingleStore
8336 | DialectType::TiDB
8337 | DialectType::ClickHouse
8338 | DialectType::Doris
8339 ) && matches!(
8340 target,
8341 DialectType::PostgreSQL
8342 | DialectType::Redshift
8343 | DialectType::Drill
8344 | DialectType::Trino
8345 | DialectType::Presto
8346 | DialectType::Athena
8347 | DialectType::TSQL
8348 | DialectType::Teradata
8349 | DialectType::SQLite
8350 | DialectType::BigQuery
8351 | DialectType::Snowflake
8352 | DialectType::Databricks
8353 | DialectType::Oracle
8354 | DialectType::Materialize
8355 | DialectType::RisingWave
8356 ) =>
8357 {
8358 // Only wrap if RHS is not already NULLIF
8359 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
8360 {
8361 Action::MySQLSafeDivide
8362 } else {
8363 Action::None
8364 }
8365 }
8366 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
8367 // For TSQL/Fabric, convert to sp_rename instead
8368 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
8369 if let Some(crate::expressions::AlterTableAction::RenameTable(
8370 ref new_tbl,
8371 )) = at.actions.first()
8372 {
8373 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
8374 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
8375 Action::AlterTableToSpRename
8376 } else if new_tbl.schema.is_some()
8377 && matches!(
8378 target,
8379 DialectType::BigQuery
8380 | DialectType::Doris
8381 | DialectType::StarRocks
8382 | DialectType::DuckDB
8383 | DialectType::PostgreSQL
8384 | DialectType::Redshift
8385 )
8386 {
8387 Action::AlterTableRenameStripSchema
8388 } else {
8389 Action::None
8390 }
8391 } else {
8392 Action::None
8393 }
8394 }
8395 // EPOCH(x) expression -> target-specific epoch conversion
8396 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
8397 Action::EpochConvert
8398 }
8399 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
8400 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
8401 Action::EpochMsConvert
8402 }
8403 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
8404 Expression::StringAgg(_) => {
8405 if matches!(
8406 target,
8407 DialectType::MySQL
8408 | DialectType::SingleStore
8409 | DialectType::Doris
8410 | DialectType::StarRocks
8411 | DialectType::SQLite
8412 ) {
8413 Action::StringAggConvert
8414 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8415 Action::StringAggConvert
8416 } else {
8417 Action::None
8418 }
8419 }
8420 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
8421 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
8422 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
8423 Expression::GroupConcat(_) => Action::GroupConcatConvert,
8424 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
8425 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
8426 Expression::Cardinality(_)
8427 if matches!(source, DialectType::DuckDB)
8428 && matches!(target, DialectType::DuckDB) =>
8429 {
8430 Action::None
8431 }
8432 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
8433 Action::ArrayLengthConvert
8434 }
8435 Expression::ArraySize(_) => {
8436 if matches!(target, DialectType::Drill) {
8437 Action::ArraySizeDrill
8438 } else {
8439 Action::ArrayLengthConvert
8440 }
8441 }
8442 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
8443 Expression::ArrayRemove(_) => match target {
8444 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
8445 Action::ArrayRemoveConvert
8446 }
8447 _ => Action::None,
8448 },
8449 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
8450 Expression::ArrayReverse(_) => match target {
8451 DialectType::ClickHouse => Action::ArrayReverseConvert,
8452 _ => Action::None,
8453 },
8454 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
8455 Expression::JsonKeys(_) => match target {
8456 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
8457 Action::JsonKeysConvert
8458 }
8459 _ => Action::None,
8460 },
8461 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
8462 Expression::ParseJson(_) => match target {
8463 DialectType::SQLite
8464 | DialectType::Doris
8465 | DialectType::MySQL
8466 | DialectType::StarRocks => Action::ParseJsonStrip,
8467 _ => Action::None,
8468 },
8469 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
8470 Expression::WeekOfYear(_)
8471 if matches!(target, DialectType::Snowflake)
8472 && !matches!(source, DialectType::Snowflake) =>
8473 {
8474 Action::WeekOfYearToWeekIso
8475 }
8476 // NVL: clear original_name so generator uses dialect-specific function names
8477 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
8478 // XOR: expand for dialects that don't support the XOR keyword
8479 Expression::Xor(_) => {
8480 let target_supports_xor = matches!(
8481 target,
8482 DialectType::MySQL
8483 | DialectType::SingleStore
8484 | DialectType::Doris
8485 | DialectType::StarRocks
8486 );
8487 if !target_supports_xor {
8488 Action::XorExpand
8489 } else {
8490 Action::None
8491 }
8492 }
8493 // TSQL #table -> temp table normalization (CREATE TABLE)
8494 Expression::CreateTable(ct)
8495 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8496 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8497 && ct.name.name.name.starts_with('#') =>
8498 {
8499 Action::TempTableHash
8500 }
8501 // TSQL #table -> strip # from table references in SELECT/etc.
8502 Expression::Table(tr)
8503 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8504 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8505 && tr.name.name.starts_with('#') =>
8506 {
8507 Action::TempTableHash
8508 }
8509 // TSQL #table -> strip # from DROP TABLE names
8510 Expression::DropTable(ref dt)
8511 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8512 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8513 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
8514 {
8515 Action::TempTableHash
8516 }
8517 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8518 Expression::JsonExtract(_)
8519 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8520 {
8521 Action::JsonExtractToTsql
8522 }
8523 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8524 Expression::JsonExtractScalar(_)
8525 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8526 {
8527 Action::JsonExtractToTsql
8528 }
8529 // JSON_EXTRACT -> JSONExtractString for ClickHouse
8530 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
8531 Action::JsonExtractToClickHouse
8532 }
8533 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
8534 Expression::JsonExtractScalar(_)
8535 if matches!(target, DialectType::ClickHouse) =>
8536 {
8537 Action::JsonExtractToClickHouse
8538 }
8539 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
8540 Expression::JsonExtract(ref f)
8541 if !f.arrow_syntax
8542 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
8543 {
8544 Action::JsonExtractToArrow
8545 }
8546 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
8547 Expression::JsonExtract(ref f)
8548 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
8549 && !matches!(
8550 source,
8551 DialectType::PostgreSQL
8552 | DialectType::Redshift
8553 | DialectType::Materialize
8554 )
8555 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
8556 {
8557 Action::JsonExtractToGetJsonObject
8558 }
8559 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
8560 Expression::JsonExtract(_)
8561 if matches!(
8562 target,
8563 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8564 ) =>
8565 {
8566 Action::JsonExtractToGetJsonObject
8567 }
8568 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
8569 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
8570 Expression::JsonExtractScalar(ref f)
8571 if !f.arrow_syntax
8572 && !f.hash_arrow_syntax
8573 && matches!(
8574 target,
8575 DialectType::PostgreSQL
8576 | DialectType::Redshift
8577 | DialectType::Snowflake
8578 | DialectType::SQLite
8579 | DialectType::DuckDB
8580 ) =>
8581 {
8582 Action::JsonExtractScalarConvert
8583 }
8584 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
8585 Expression::JsonExtractScalar(_)
8586 if matches!(
8587 target,
8588 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8589 ) =>
8590 {
8591 Action::JsonExtractScalarToGetJsonObject
8592 }
8593 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
8594 Expression::JsonExtract(ref f)
8595 if !f.arrow_syntax
8596 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
8597 {
8598 Action::JsonPathNormalize
8599 }
8600 // JsonQuery (parsed JSON_QUERY) -> target-specific
8601 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
8602 // JsonValue (parsed JSON_VALUE) -> target-specific
8603 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
8604 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
8605 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
8606 Expression::AtTimeZone(_)
8607 if matches!(
8608 target,
8609 DialectType::Presto
8610 | DialectType::Trino
8611 | DialectType::Athena
8612 | DialectType::Spark
8613 | DialectType::Databricks
8614 | DialectType::BigQuery
8615 | DialectType::Snowflake
8616 ) =>
8617 {
8618 Action::AtTimeZoneConvert
8619 }
8620 // DAY_OF_WEEK -> dialect-specific
8621 Expression::DayOfWeek(_)
8622 if matches!(
8623 target,
8624 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
8625 ) =>
8626 {
8627 Action::DayOfWeekConvert
8628 }
8629 // CURRENT_USER -> CURRENT_USER() for Snowflake
8630 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
8631 Action::CurrentUserParens
8632 }
8633 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
8634 Expression::ElementAt(_)
8635 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
8636 {
8637 Action::ElementAtConvert
8638 }
8639 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
8640 Expression::ArrayFunc(ref arr)
8641 if !arr.bracket_notation
8642 && matches!(
8643 target,
8644 DialectType::Spark
8645 | DialectType::Databricks
8646 | DialectType::Hive
8647 | DialectType::BigQuery
8648 | DialectType::DuckDB
8649 | DialectType::Snowflake
8650 | DialectType::Presto
8651 | DialectType::Trino
8652 | DialectType::Athena
8653 | DialectType::ClickHouse
8654 | DialectType::StarRocks
8655 ) =>
8656 {
8657 Action::ArraySyntaxConvert
8658 }
8659 // VARIANCE expression -> varSamp for ClickHouse
8660 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
8661 Action::VarianceToClickHouse
8662 }
8663 // STDDEV expression -> stddevSamp for ClickHouse
8664 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
8665 Action::StddevToClickHouse
8666 }
8667 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
8668 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
8669 Action::ApproxQuantileConvert
8670 }
8671 // MonthsBetween -> target-specific
8672 Expression::MonthsBetween(_)
8673 if !matches!(
8674 target,
8675 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8676 ) =>
8677 {
8678 Action::MonthsBetweenConvert
8679 }
8680 // AddMonths -> target-specific DATEADD/DATE_ADD
8681 Expression::AddMonths(_) => Action::AddMonthsConvert,
8682 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
8683 Expression::MapFromArrays(_)
8684 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
8685 {
8686 Action::MapFromArraysConvert
8687 }
8688 // CURRENT_USER -> CURRENT_USER() for Spark
8689 Expression::CurrentUser(_)
8690 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
8691 {
8692 Action::CurrentUserSparkParens
8693 }
8694 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
8695 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
8696 if matches!(
8697 source,
8698 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8699 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8700 && matches!(
8701 target,
8702 DialectType::DuckDB
8703 | DialectType::Presto
8704 | DialectType::Trino
8705 | DialectType::Athena
8706 | DialectType::PostgreSQL
8707 | DialectType::Redshift
8708 ) =>
8709 {
8710 Action::SparkDateFuncCast
8711 }
8712 // $parameter -> @parameter for BigQuery
8713 Expression::Parameter(ref p)
8714 if matches!(target, DialectType::BigQuery)
8715 && matches!(source, DialectType::DuckDB)
8716 && (p.style == crate::expressions::ParameterStyle::Dollar
8717 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
8718 {
8719 Action::DollarParamConvert
8720 }
8721 // EscapeString literal: normalize literal newlines to \n
8722 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
8723 =>
8724 {
8725 Action::EscapeStringNormalize
8726 }
8727 // straight_join: keep lowercase for DuckDB, quote for MySQL
8728 Expression::Column(ref col)
8729 if col.name.name == "STRAIGHT_JOIN"
8730 && col.table.is_none()
8731 && matches!(source, DialectType::DuckDB)
8732 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
8733 {
8734 Action::StraightJoinCase
8735 }
8736 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
8737 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
8738 Expression::Interval(ref iv)
8739 if matches!(
8740 target,
8741 DialectType::Snowflake
8742 | DialectType::PostgreSQL
8743 | DialectType::Redshift
8744 ) && iv.unit.is_some()
8745 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
8746 {
8747 Action::SnowflakeIntervalFormat
8748 }
8749 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
8750 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
8751 if let Some(ref sample) = ts.sample {
8752 if !sample.explicit_method {
8753 Action::TablesampleReservoir
8754 } else {
8755 Action::None
8756 }
8757 } else {
8758 Action::None
8759 }
8760 }
8761 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
8762 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
8763 Expression::TableSample(ref ts)
8764 if matches!(target, DialectType::Snowflake)
8765 && !matches!(source, DialectType::Snowflake)
8766 && ts.sample.is_some() =>
8767 {
8768 if let Some(ref sample) = ts.sample {
8769 if !sample.explicit_method {
8770 Action::TablesampleSnowflakeStrip
8771 } else {
8772 Action::None
8773 }
8774 } else {
8775 Action::None
8776 }
8777 }
8778 Expression::Table(ref t)
8779 if matches!(target, DialectType::Snowflake)
8780 && !matches!(source, DialectType::Snowflake)
8781 && t.table_sample.is_some() =>
8782 {
8783 if let Some(ref sample) = t.table_sample {
8784 if !sample.explicit_method {
8785 Action::TablesampleSnowflakeStrip
8786 } else {
8787 Action::None
8788 }
8789 } else {
8790 Action::None
8791 }
8792 }
8793 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
8794 Expression::AlterTable(ref at)
8795 if matches!(target, DialectType::TSQL | DialectType::Fabric)
8796 && !at.actions.is_empty()
8797 && matches!(
8798 at.actions.first(),
8799 Some(crate::expressions::AlterTableAction::RenameTable(_))
8800 ) =>
8801 {
8802 Action::AlterTableToSpRename
8803 }
8804 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
8805 Expression::Subscript(ref sub)
8806 if matches!(
8807 target,
8808 DialectType::BigQuery
8809 | DialectType::Hive
8810 | DialectType::Spark
8811 | DialectType::Databricks
8812 ) && matches!(
8813 source,
8814 DialectType::DuckDB
8815 | DialectType::PostgreSQL
8816 | DialectType::Presto
8817 | DialectType::Trino
8818 | DialectType::Redshift
8819 | DialectType::ClickHouse
8820 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
8821 {
8822 Action::ArrayIndexConvert
8823 }
8824 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
8825 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
8826 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
8827 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
8828 Expression::WindowFunction(ref wf) => {
8829 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
8830 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
8831 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
8832 if matches!(target, DialectType::BigQuery)
8833 && !is_row_number
8834 && !wf.over.order_by.is_empty()
8835 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
8836 {
8837 Action::BigQueryNullsOrdering
8838 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
8839 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
8840 } else {
8841 let source_nulls_last = matches!(source, DialectType::DuckDB);
8842 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
8843 matches!(
8844 f.kind,
8845 crate::expressions::WindowFrameKind::Range
8846 | crate::expressions::WindowFrameKind::Groups
8847 )
8848 });
8849 if source_nulls_last
8850 && matches!(target, DialectType::MySQL)
8851 && !wf.over.order_by.is_empty()
8852 && wf.over.order_by.iter().any(|o| !o.desc)
8853 && !has_range_frame
8854 {
8855 Action::MysqlNullsLastRewrite
8856 } else {
8857 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
8858 let is_ranking_window_func = matches!(
8859 &wf.this,
8860 Expression::FirstValue(_)
8861 | Expression::LastValue(_)
8862 | Expression::NthValue(_)
8863 );
8864 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
8865 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
8866 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
8867 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
8868 && f.exclude.is_none()
8869 });
8870 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
8871 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
8872 // Strip the default frame for Snowflake target
8873 Action::SnowflakeWindowFrameStrip
8874 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
8875 // Add default frame for non-Snowflake target
8876 Action::SnowflakeWindowFrameAdd
8877 } else {
8878 match &wf.this {
8879 Expression::FirstValue(ref vf)
8880 | Expression::LastValue(ref vf)
8881 if vf.ignore_nulls == Some(false) =>
8882 {
8883 match target {
8884 DialectType::SQLite => Action::RespectNullsConvert,
8885 _ => Action::None,
8886 }
8887 }
8888 _ => Action::None,
8889 }
8890 }
8891 } else {
8892 match &wf.this {
8893 Expression::FirstValue(ref vf)
8894 | Expression::LastValue(ref vf)
8895 if vf.ignore_nulls == Some(false) =>
8896 {
8897 // RESPECT NULLS
8898 match target {
8899 DialectType::SQLite | DialectType::PostgreSQL => {
8900 Action::RespectNullsConvert
8901 }
8902 _ => Action::None,
8903 }
8904 }
8905 _ => Action::None,
8906 }
8907 }
8908 }
8909 }
8910 }
8911 // CREATE TABLE a LIKE b -> dialect-specific transformations
8912 Expression::CreateTable(ref ct)
8913 if ct.columns.is_empty()
8914 && ct.constraints.iter().any(|c| {
8915 matches!(c, crate::expressions::TableConstraint::Like { .. })
8916 })
8917 && matches!(
8918 target,
8919 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
8920 ) =>
8921 {
8922 Action::CreateTableLikeToCtas
8923 }
8924 Expression::CreateTable(ref ct)
8925 if ct.columns.is_empty()
8926 && ct.constraints.iter().any(|c| {
8927 matches!(c, crate::expressions::TableConstraint::Like { .. })
8928 })
8929 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8930 {
8931 Action::CreateTableLikeToSelectInto
8932 }
8933 Expression::CreateTable(ref ct)
8934 if ct.columns.is_empty()
8935 && ct.constraints.iter().any(|c| {
8936 matches!(c, crate::expressions::TableConstraint::Like { .. })
8937 })
8938 && matches!(target, DialectType::ClickHouse) =>
8939 {
8940 Action::CreateTableLikeToAs
8941 }
8942 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
8943 Expression::CreateTable(ref ct)
8944 if matches!(target, DialectType::DuckDB)
8945 && matches!(
8946 source,
8947 DialectType::DuckDB
8948 | DialectType::Spark
8949 | DialectType::Databricks
8950 | DialectType::Hive
8951 ) =>
8952 {
8953 let has_comment = ct.columns.iter().any(|c| {
8954 c.comment.is_some()
8955 || c.constraints.iter().any(|con| {
8956 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
8957 })
8958 });
8959 let has_props = !ct.properties.is_empty();
8960 if has_comment || has_props {
8961 Action::CreateTableStripComment
8962 } else {
8963 Action::None
8964 }
8965 }
8966 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
8967 Expression::Array(_)
8968 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
8969 {
8970 Action::ArrayConcatBracketConvert
8971 }
8972 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
8973 Expression::ArrayFunc(ref arr)
8974 if arr.bracket_notation
8975 && matches!(source, DialectType::BigQuery)
8976 && matches!(target, DialectType::Redshift) =>
8977 {
8978 Action::ArrayConcatBracketConvert
8979 }
8980 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
8981 Expression::BitwiseOrAgg(ref f)
8982 | Expression::BitwiseAndAgg(ref f)
8983 | Expression::BitwiseXorAgg(ref f) => {
8984 if matches!(target, DialectType::DuckDB) {
8985 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
8986 if let Expression::Cast(ref c) = f.this {
8987 match &c.to {
8988 DataType::Float { .. }
8989 | DataType::Double { .. }
8990 | DataType::Decimal { .. } => Action::BitAggFloatCast,
8991 DataType::Custom { ref name }
8992 if name.eq_ignore_ascii_case("REAL") =>
8993 {
8994 Action::BitAggFloatCast
8995 }
8996 _ => Action::None,
8997 }
8998 } else {
8999 Action::None
9000 }
9001 } else if matches!(target, DialectType::Snowflake) {
9002 Action::BitAggSnowflakeRename
9003 } else {
9004 Action::None
9005 }
9006 }
9007 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
9008 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
9009 Action::FilterToIff
9010 }
9011 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
9012 Expression::Avg(ref f)
9013 | Expression::Sum(ref f)
9014 | Expression::Min(ref f)
9015 | Expression::Max(ref f)
9016 | Expression::CountIf(ref f)
9017 | Expression::Stddev(ref f)
9018 | Expression::StddevPop(ref f)
9019 | Expression::StddevSamp(ref f)
9020 | Expression::Variance(ref f)
9021 | Expression::VarPop(ref f)
9022 | Expression::VarSamp(ref f)
9023 | Expression::Median(ref f)
9024 | Expression::Mode(ref f)
9025 | Expression::First(ref f)
9026 | Expression::Last(ref f)
9027 | Expression::ApproxDistinct(ref f)
9028 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9029 {
9030 Action::AggFilterToIff
9031 }
9032 Expression::Count(ref c)
9033 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9034 {
9035 Action::AggFilterToIff
9036 }
9037 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
9038 Expression::Count(ref c)
9039 if c.distinct
9040 && matches!(&c.this, Some(Expression::Tuple(_)))
9041 && matches!(
9042 target,
9043 DialectType::Presto
9044 | DialectType::Trino
9045 | DialectType::DuckDB
9046 | DialectType::PostgreSQL
9047 ) =>
9048 {
9049 Action::CountDistinctMultiArg
9050 }
9051 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
9052 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
9053 Action::JsonToGetPath
9054 }
9055 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
9056 Expression::Struct(_)
9057 if matches!(
9058 target,
9059 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9060 ) && matches!(source, DialectType::DuckDB) =>
9061 {
9062 Action::StructToRow
9063 }
9064 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
9065 Expression::MapFunc(ref m)
9066 if m.curly_brace_syntax
9067 && matches!(
9068 target,
9069 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9070 )
9071 && matches!(source, DialectType::DuckDB) =>
9072 {
9073 Action::StructToRow
9074 }
9075 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
9076 Expression::ApproxCountDistinct(_)
9077 if matches!(
9078 target,
9079 DialectType::Presto | DialectType::Trino | DialectType::Athena
9080 ) =>
9081 {
9082 Action::ApproxCountDistinctToApproxDistinct
9083 }
9084 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
9085 Expression::ArrayContains(_)
9086 if matches!(
9087 target,
9088 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
9089 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
9090 {
9091 Action::ArrayContainsConvert
9092 }
9093 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
9094 Expression::ArrayContains(_)
9095 if matches!(target, DialectType::DuckDB)
9096 && matches!(source, DialectType::Snowflake) =>
9097 {
9098 Action::ArrayContainsDuckDBConvert
9099 }
9100 // ARRAY_EXCEPT -> target-specific conversion
9101 Expression::ArrayExcept(_)
9102 if matches!(
9103 target,
9104 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
9105 ) =>
9106 {
9107 Action::ArrayExceptConvert
9108 }
9109 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
9110 Expression::ArrayPosition(_)
9111 if matches!(target, DialectType::Snowflake)
9112 && !matches!(source, DialectType::Snowflake) =>
9113 {
9114 Action::ArrayPositionSnowflakeSwap
9115 }
9116 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
9117 Expression::ArrayPosition(_)
9118 if matches!(target, DialectType::DuckDB)
9119 && matches!(source, DialectType::Snowflake) =>
9120 {
9121 Action::SnowflakeArrayPositionToDuckDB
9122 }
9123 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
9124 Expression::ArrayDistinct(_)
9125 if matches!(target, DialectType::ClickHouse) =>
9126 {
9127 Action::ArrayDistinctClickHouse
9128 }
9129 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
9130 Expression::ArrayDistinct(_)
9131 if matches!(target, DialectType::DuckDB)
9132 && matches!(source, DialectType::Snowflake) =>
9133 {
9134 Action::ArrayDistinctConvert
9135 }
9136 // StrPosition with position -> complex expansion for Presto/DuckDB
9137 // STRPOS doesn't support a position arg in these dialects
9138 Expression::StrPosition(ref sp)
9139 if sp.position.is_some()
9140 && matches!(
9141 target,
9142 DialectType::Presto
9143 | DialectType::Trino
9144 | DialectType::Athena
9145 | DialectType::DuckDB
9146 ) =>
9147 {
9148 Action::StrPositionExpand
9149 }
9150 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
9151 Expression::First(ref f)
9152 if f.ignore_nulls == Some(true)
9153 && matches!(target, DialectType::DuckDB) =>
9154 {
9155 Action::FirstToAnyValue
9156 }
9157 // BEGIN -> START TRANSACTION for Presto/Trino
9158 Expression::Command(ref cmd)
9159 if cmd.this.eq_ignore_ascii_case("BEGIN")
9160 && matches!(
9161 target,
9162 DialectType::Presto | DialectType::Trino | DialectType::Athena
9163 ) =>
9164 {
9165 // Handled inline below
9166 Action::None // We'll handle it directly
9167 }
9168 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
9169 // PostgreSQL # is parsed as BitwiseXor (which is correct).
9170 // a || b (Concat operator) -> CONCAT function for Presto/Trino
9171 Expression::Concat(ref _op)
9172 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9173 && matches!(target, DialectType::Presto | DialectType::Trino) =>
9174 {
9175 Action::PipeConcatToConcat
9176 }
9177 _ => Action::None,
9178 }
9179 };
9180
9181 match action {
9182 Action::None => {
9183 // Handle inline transforms that don't need a dedicated action
9184 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9185 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
9186 return Ok(rewritten);
9187 }
9188 }
9189
9190 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
9191 if let Expression::Between(ref b) = e {
9192 if let Some(sym) = b.symmetric {
9193 let keeps_symmetric =
9194 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
9195 if !keeps_symmetric {
9196 if sym {
9197 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
9198 let b = if let Expression::Between(b) = e {
9199 *b
9200 } else {
9201 unreachable!()
9202 };
9203 let between1 = Expression::Between(Box::new(
9204 crate::expressions::Between {
9205 this: b.this.clone(),
9206 low: b.low.clone(),
9207 high: b.high.clone(),
9208 not: b.not,
9209 symmetric: None,
9210 },
9211 ));
9212 let between2 = Expression::Between(Box::new(
9213 crate::expressions::Between {
9214 this: b.this,
9215 low: b.high,
9216 high: b.low,
9217 not: b.not,
9218 symmetric: None,
9219 },
9220 ));
9221 return Ok(Expression::Paren(Box::new(
9222 crate::expressions::Paren {
9223 this: Expression::Or(Box::new(
9224 crate::expressions::BinaryOp::new(
9225 between1, between2,
9226 ),
9227 )),
9228 trailing_comments: vec![],
9229 },
9230 )));
9231 } else {
9232 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
9233 let b = if let Expression::Between(b) = e {
9234 *b
9235 } else {
9236 unreachable!()
9237 };
9238 return Ok(Expression::Between(Box::new(
9239 crate::expressions::Between {
9240 this: b.this,
9241 low: b.low,
9242 high: b.high,
9243 not: b.not,
9244 symmetric: None,
9245 },
9246 )));
9247 }
9248 }
9249 }
9250 }
9251
9252 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
9253 if let Expression::ILike(ref _like) = e {
9254 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
9255 let like = if let Expression::ILike(l) = e {
9256 *l
9257 } else {
9258 unreachable!()
9259 };
9260 let lower_left = Expression::Function(Box::new(Function::new(
9261 "LOWER".to_string(),
9262 vec![like.left],
9263 )));
9264 let lower_right = Expression::Function(Box::new(Function::new(
9265 "LOWER".to_string(),
9266 vec![like.right],
9267 )));
9268 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
9269 left: lower_left,
9270 right: lower_right,
9271 escape: like.escape,
9272 quantifier: like.quantifier,
9273 inferred_type: None,
9274 })));
9275 }
9276 }
9277
9278 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
9279 if let Expression::MethodCall(ref mc) = e {
9280 if matches!(source, DialectType::Oracle)
9281 && mc.method.name.eq_ignore_ascii_case("VALUE")
9282 && mc.args.is_empty()
9283 {
9284 let is_dbms_random = match &mc.this {
9285 Expression::Identifier(id) => {
9286 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
9287 }
9288 Expression::Column(col) => {
9289 col.table.is_none()
9290 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
9291 }
9292 _ => false,
9293 };
9294 if is_dbms_random {
9295 let func_name = match target {
9296 DialectType::PostgreSQL
9297 | DialectType::Redshift
9298 | DialectType::DuckDB
9299 | DialectType::SQLite => "RANDOM",
9300 DialectType::Oracle => "DBMS_RANDOM.VALUE",
9301 _ => "RAND",
9302 };
9303 return Ok(Expression::Function(Box::new(Function::new(
9304 func_name.to_string(),
9305 vec![],
9306 ))));
9307 }
9308 }
9309 }
9310 // TRIM without explicit position -> add BOTH for ClickHouse
9311 if let Expression::Trim(ref trim) = e {
9312 if matches!(target, DialectType::ClickHouse)
9313 && trim.sql_standard_syntax
9314 && trim.characters.is_some()
9315 && !trim.position_explicit
9316 {
9317 let mut new_trim = (**trim).clone();
9318 new_trim.position_explicit = true;
9319 return Ok(Expression::Trim(Box::new(new_trim)));
9320 }
9321 }
9322 // BEGIN -> START TRANSACTION for Presto/Trino
9323 if let Expression::Transaction(ref txn) = e {
9324 if matches!(
9325 target,
9326 DialectType::Presto | DialectType::Trino | DialectType::Athena
9327 ) {
9328 // Convert BEGIN to START TRANSACTION by setting mark to "START"
9329 let mut txn = txn.clone();
9330 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
9331 "START".to_string(),
9332 ))));
9333 return Ok(Expression::Transaction(Box::new(*txn)));
9334 }
9335 }
9336 // IS TRUE/FALSE -> simplified forms for Presto/Trino
9337 if matches!(
9338 target,
9339 DialectType::Presto | DialectType::Trino | DialectType::Athena
9340 ) {
9341 match &e {
9342 Expression::IsTrue(itf) if !itf.not => {
9343 // x IS TRUE -> x
9344 return Ok(itf.this.clone());
9345 }
9346 Expression::IsTrue(itf) if itf.not => {
9347 // x IS NOT TRUE -> NOT x
9348 return Ok(Expression::Not(Box::new(
9349 crate::expressions::UnaryOp {
9350 this: itf.this.clone(),
9351 inferred_type: None,
9352 },
9353 )));
9354 }
9355 Expression::IsFalse(itf) if !itf.not => {
9356 // x IS FALSE -> NOT x
9357 return Ok(Expression::Not(Box::new(
9358 crate::expressions::UnaryOp {
9359 this: itf.this.clone(),
9360 inferred_type: None,
9361 },
9362 )));
9363 }
9364 Expression::IsFalse(itf) if itf.not => {
9365 // x IS NOT FALSE -> NOT NOT x
9366 let not_x =
9367 Expression::Not(Box::new(crate::expressions::UnaryOp {
9368 this: itf.this.clone(),
9369 inferred_type: None,
9370 }));
9371 return Ok(Expression::Not(Box::new(
9372 crate::expressions::UnaryOp {
9373 this: not_x,
9374 inferred_type: None,
9375 },
9376 )));
9377 }
9378 _ => {}
9379 }
9380 }
9381 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
9382 if matches!(target, DialectType::Redshift) {
9383 if let Expression::IsFalse(ref itf) = e {
9384 if itf.not {
9385 return Ok(Expression::Not(Box::new(
9386 crate::expressions::UnaryOp {
9387 this: Expression::IsFalse(Box::new(
9388 crate::expressions::IsTrueFalse {
9389 this: itf.this.clone(),
9390 not: false,
9391 },
9392 )),
9393 inferred_type: None,
9394 },
9395 )));
9396 }
9397 }
9398 }
9399 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
9400 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
9401 if let Expression::Function(ref f) = e {
9402 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
9403 && matches!(source, DialectType::Snowflake)
9404 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
9405 {
9406 if f.args.len() == 3 {
9407 let mut args = f.args.clone();
9408 args.push(Expression::string("g"));
9409 return Ok(Expression::Function(Box::new(Function::new(
9410 "REGEXP_REPLACE".to_string(),
9411 args,
9412 ))));
9413 } else if f.args.len() == 4 {
9414 // 4th arg might be position, add 'g' as 5th
9415 let mut args = f.args.clone();
9416 args.push(Expression::string("g"));
9417 return Ok(Expression::Function(Box::new(Function::new(
9418 "REGEXP_REPLACE".to_string(),
9419 args,
9420 ))));
9421 }
9422 }
9423 }
9424 Ok(e)
9425 }
9426
9427 Action::GreatestLeastNull => {
9428 let f = if let Expression::Function(f) = e {
9429 *f
9430 } else {
9431 unreachable!("action only triggered for Function expressions")
9432 };
9433 let mut null_checks: Vec<Expression> = f
9434 .args
9435 .iter()
9436 .map(|a| {
9437 Expression::IsNull(Box::new(IsNull {
9438 this: a.clone(),
9439 not: false,
9440 postfix_form: false,
9441 }))
9442 })
9443 .collect();
9444 let condition = if null_checks.len() == 1 {
9445 null_checks.remove(0)
9446 } else {
9447 let first = null_checks.remove(0);
9448 null_checks.into_iter().fold(first, |acc, check| {
9449 Expression::Or(Box::new(BinaryOp::new(acc, check)))
9450 })
9451 };
9452 Ok(Expression::Case(Box::new(Case {
9453 operand: None,
9454 whens: vec![(condition, Expression::Null(Null))],
9455 else_: Some(Expression::Function(Box::new(Function::new(
9456 f.name, f.args,
9457 )))),
9458 comments: Vec::new(),
9459 inferred_type: None,
9460 })))
9461 }
9462
9463 Action::ArrayGenerateRange => {
9464 let f = if let Expression::Function(f) = e {
9465 *f
9466 } else {
9467 unreachable!("action only triggered for Function expressions")
9468 };
9469 let start = f.args[0].clone();
9470 let end = f.args[1].clone();
9471 let step = f.args.get(2).cloned();
9472
9473 // Helper: compute end - 1 for converting exclusive→inclusive end.
9474 // When end is a literal number, simplify to a computed literal.
9475 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
9476 // Try to simplify literal numbers
9477 match end {
9478 Expression::Literal(lit)
9479 if matches!(lit.as_ref(), Literal::Number(_)) =>
9480 {
9481 let Literal::Number(n) = lit.as_ref() else {
9482 unreachable!()
9483 };
9484 if let Ok(val) = n.parse::<i64>() {
9485 return Expression::number(val - 1);
9486 }
9487 }
9488 Expression::Neg(u) => {
9489 if let Expression::Literal(lit) = &u.this {
9490 if let Literal::Number(n) = lit.as_ref() {
9491 if let Ok(val) = n.parse::<i64>() {
9492 return Expression::number(-val - 1);
9493 }
9494 }
9495 }
9496 }
9497 _ => {}
9498 }
9499 // Non-literal: produce end - 1 expression
9500 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
9501 }
9502
9503 match target {
9504 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
9505 // so no adjustment needed — just rename the function.
9506 DialectType::Snowflake => {
9507 let mut args = vec![start, end];
9508 if let Some(s) = step {
9509 args.push(s);
9510 }
9511 Ok(Expression::Function(Box::new(Function::new(
9512 "ARRAY_GENERATE_RANGE".to_string(),
9513 args,
9514 ))))
9515 }
9516 DialectType::DuckDB => {
9517 let mut args = vec![start, end];
9518 if let Some(s) = step {
9519 args.push(s);
9520 }
9521 Ok(Expression::Function(Box::new(Function::new(
9522 "RANGE".to_string(),
9523 args,
9524 ))))
9525 }
9526 // These dialects use inclusive end, so convert exclusive→inclusive.
9527 // Presto/Trino: simplify literal numbers (3 → 2).
9528 DialectType::Presto | DialectType::Trino => {
9529 let end_inclusive = exclusive_to_inclusive_end(&end);
9530 let mut args = vec![start, end_inclusive];
9531 if let Some(s) = step {
9532 args.push(s);
9533 }
9534 Ok(Expression::Function(Box::new(Function::new(
9535 "SEQUENCE".to_string(),
9536 args,
9537 ))))
9538 }
9539 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
9540 DialectType::PostgreSQL | DialectType::Redshift => {
9541 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9542 end.clone(),
9543 Expression::number(1),
9544 )));
9545 let mut args = vec![start, end_minus_1];
9546 if let Some(s) = step {
9547 args.push(s);
9548 }
9549 Ok(Expression::Function(Box::new(Function::new(
9550 "GENERATE_SERIES".to_string(),
9551 args,
9552 ))))
9553 }
9554 DialectType::BigQuery => {
9555 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9556 end.clone(),
9557 Expression::number(1),
9558 )));
9559 let mut args = vec![start, end_minus_1];
9560 if let Some(s) = step {
9561 args.push(s);
9562 }
9563 Ok(Expression::Function(Box::new(Function::new(
9564 "GENERATE_ARRAY".to_string(),
9565 args,
9566 ))))
9567 }
9568 _ => Ok(Expression::Function(Box::new(Function::new(
9569 f.name, f.args,
9570 )))),
9571 }
9572 }
9573
9574 Action::Div0TypedDivision => {
9575 let if_func = if let Expression::IfFunc(f) = e {
9576 *f
9577 } else {
9578 unreachable!("action only triggered for IfFunc expressions")
9579 };
9580 if let Some(Expression::Div(div)) = if_func.false_value {
9581 let cast_type = if matches!(target, DialectType::SQLite) {
9582 DataType::Float {
9583 precision: None,
9584 scale: None,
9585 real_spelling: true,
9586 }
9587 } else {
9588 DataType::Double {
9589 precision: None,
9590 scale: None,
9591 }
9592 };
9593 let casted_left = Expression::Cast(Box::new(Cast {
9594 this: div.left,
9595 to: cast_type,
9596 trailing_comments: vec![],
9597 double_colon_syntax: false,
9598 format: None,
9599 default: None,
9600 inferred_type: None,
9601 }));
9602 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9603 condition: if_func.condition,
9604 true_value: if_func.true_value,
9605 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
9606 casted_left,
9607 div.right,
9608 )))),
9609 original_name: if_func.original_name,
9610 inferred_type: None,
9611 })))
9612 } else {
9613 // Not actually a Div, reconstruct
9614 Ok(Expression::IfFunc(Box::new(if_func)))
9615 }
9616 }
9617
9618 Action::ArrayAggCollectList => {
9619 let agg = if let Expression::ArrayAgg(a) = e {
9620 *a
9621 } else {
9622 unreachable!("action only triggered for ArrayAgg expressions")
9623 };
9624 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9625 name: Some("COLLECT_LIST".to_string()),
9626 ..agg
9627 })))
9628 }
9629
9630 Action::ArrayAggToGroupConcat => {
9631 let agg = if let Expression::ArrayAgg(a) = e {
9632 *a
9633 } else {
9634 unreachable!("action only triggered for ArrayAgg expressions")
9635 };
9636 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9637 name: Some("GROUP_CONCAT".to_string()),
9638 ..agg
9639 })))
9640 }
9641
9642 Action::ArrayAggWithinGroupFilter => {
9643 let wg = if let Expression::WithinGroup(w) = e {
9644 *w
9645 } else {
9646 unreachable!("action only triggered for WithinGroup expressions")
9647 };
9648 if let Expression::ArrayAgg(inner_agg) = wg.this {
9649 let col = inner_agg.this.clone();
9650 let filter = Expression::IsNull(Box::new(IsNull {
9651 this: col,
9652 not: true,
9653 postfix_form: false,
9654 }));
9655 // For DuckDB, add explicit NULLS FIRST for DESC ordering
9656 let order_by = if matches!(target, DialectType::DuckDB) {
9657 wg.order_by
9658 .into_iter()
9659 .map(|mut o| {
9660 if o.desc && o.nulls_first.is_none() {
9661 o.nulls_first = Some(true);
9662 }
9663 o
9664 })
9665 .collect()
9666 } else {
9667 wg.order_by
9668 };
9669 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9670 this: inner_agg.this,
9671 distinct: inner_agg.distinct,
9672 filter: Some(filter),
9673 order_by,
9674 name: inner_agg.name,
9675 ignore_nulls: inner_agg.ignore_nulls,
9676 having_max: inner_agg.having_max,
9677 limit: inner_agg.limit,
9678 inferred_type: None,
9679 })))
9680 } else {
9681 Ok(Expression::WithinGroup(Box::new(wg)))
9682 }
9683 }
9684
9685 Action::ArrayAggFilter => {
9686 let agg = if let Expression::ArrayAgg(a) = e {
9687 *a
9688 } else {
9689 unreachable!("action only triggered for ArrayAgg expressions")
9690 };
9691 let col = agg.this.clone();
9692 let filter = Expression::IsNull(Box::new(IsNull {
9693 this: col,
9694 not: true,
9695 postfix_form: false,
9696 }));
9697 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9698 filter: Some(filter),
9699 ..agg
9700 })))
9701 }
9702
9703 Action::ArrayAggNullFilter => {
9704 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
9705 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
9706 let agg = if let Expression::ArrayAgg(a) = e {
9707 *a
9708 } else {
9709 unreachable!("action only triggered for ArrayAgg expressions")
9710 };
9711 let col = agg.this.clone();
9712 let not_null = Expression::IsNull(Box::new(IsNull {
9713 this: col,
9714 not: true,
9715 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
9716 }));
9717 let new_filter = if let Some(existing_filter) = agg.filter {
9718 // AND the NOT IS NULL with existing filter
9719 Expression::And(Box::new(crate::expressions::BinaryOp::new(
9720 existing_filter,
9721 not_null,
9722 )))
9723 } else {
9724 not_null
9725 };
9726 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9727 filter: Some(new_filter),
9728 ..agg
9729 })))
9730 }
9731
9732 Action::BigQueryArraySelectAsStructToSnowflake => {
9733 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
9734 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
9735 if let Expression::Function(mut f) = e {
9736 let is_match = f.args.len() == 1
9737 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
9738 if is_match {
9739 let inner_select = match f.args.remove(0) {
9740 Expression::Select(s) => *s,
9741 _ => unreachable!(
9742 "argument already verified to be a Select expression"
9743 ),
9744 };
9745 // Build OBJECT_CONSTRUCT args from SELECT expressions
9746 let mut oc_args = Vec::new();
9747 for expr in &inner_select.expressions {
9748 match expr {
9749 Expression::Alias(a) => {
9750 let key = Expression::Literal(Box::new(Literal::String(
9751 a.alias.name.clone(),
9752 )));
9753 let value = a.this.clone();
9754 oc_args.push(key);
9755 oc_args.push(value);
9756 }
9757 Expression::Column(c) => {
9758 let key = Expression::Literal(Box::new(Literal::String(
9759 c.name.name.clone(),
9760 )));
9761 oc_args.push(key);
9762 oc_args.push(expr.clone());
9763 }
9764 _ => {
9765 oc_args.push(expr.clone());
9766 }
9767 }
9768 }
9769 let object_construct = Expression::Function(Box::new(Function::new(
9770 "OBJECT_CONSTRUCT".to_string(),
9771 oc_args,
9772 )));
9773 let array_agg = Expression::Function(Box::new(Function::new(
9774 "ARRAY_AGG".to_string(),
9775 vec![object_construct],
9776 )));
9777 let mut new_select = crate::expressions::Select::new();
9778 new_select.expressions = vec![array_agg];
9779 new_select.from = inner_select.from.clone();
9780 new_select.where_clause = inner_select.where_clause.clone();
9781 new_select.group_by = inner_select.group_by.clone();
9782 new_select.having = inner_select.having.clone();
9783 new_select.joins = inner_select.joins.clone();
9784 Ok(Expression::Subquery(Box::new(
9785 crate::expressions::Subquery {
9786 this: Expression::Select(Box::new(new_select)),
9787 alias: None,
9788 column_aliases: Vec::new(),
9789 alias_explicit_as: false,
9790 alias_keyword: None,
9791 order_by: None,
9792 limit: None,
9793 offset: None,
9794 distribute_by: None,
9795 sort_by: None,
9796 cluster_by: None,
9797 lateral: false,
9798 modifiers_inside: false,
9799 trailing_comments: Vec::new(),
9800 inferred_type: None,
9801 },
9802 )))
9803 } else {
9804 Ok(Expression::Function(f))
9805 }
9806 } else {
9807 Ok(e)
9808 }
9809 }
9810
9811 Action::BigQueryPercentileContToDuckDB => {
9812 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
9813 if let Expression::AggregateFunction(mut af) = e {
9814 af.name = "QUANTILE_CONT".to_string();
9815 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
9816 // Keep only first 2 args
9817 if af.args.len() > 2 {
9818 af.args.truncate(2);
9819 }
9820 Ok(Expression::AggregateFunction(af))
9821 } else {
9822 Ok(e)
9823 }
9824 }
9825
9826 Action::ArrayAggIgnoreNullsDuckDB => {
9827 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
9828 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
9829 let mut agg = if let Expression::ArrayAgg(a) = e {
9830 *a
9831 } else {
9832 unreachable!("action only triggered for ArrayAgg expressions")
9833 };
9834 agg.ignore_nulls = None; // Strip IGNORE NULLS
9835 if !agg.order_by.is_empty() {
9836 agg.order_by[0].nulls_first = Some(true);
9837 }
9838 Ok(Expression::ArrayAgg(Box::new(agg)))
9839 }
9840
9841 Action::CountDistinctMultiArg => {
9842 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
9843 if let Expression::Count(c) = e {
9844 if let Some(Expression::Tuple(t)) = c.this {
9845 let args = t.expressions;
9846 // Build CASE expression:
9847 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
9848 let mut whens = Vec::new();
9849 for arg in &args {
9850 whens.push((
9851 Expression::IsNull(Box::new(IsNull {
9852 this: arg.clone(),
9853 not: false,
9854 postfix_form: false,
9855 })),
9856 Expression::Null(crate::expressions::Null),
9857 ));
9858 }
9859 // Build the tuple for ELSE
9860 let tuple_expr =
9861 Expression::Tuple(Box::new(crate::expressions::Tuple {
9862 expressions: args,
9863 }));
9864 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
9865 operand: None,
9866 whens,
9867 else_: Some(tuple_expr),
9868 comments: Vec::new(),
9869 inferred_type: None,
9870 }));
9871 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
9872 this: Some(case_expr),
9873 star: false,
9874 distinct: true,
9875 filter: c.filter,
9876 ignore_nulls: c.ignore_nulls,
9877 original_name: c.original_name,
9878 inferred_type: None,
9879 })))
9880 } else {
9881 Ok(Expression::Count(c))
9882 }
9883 } else {
9884 Ok(e)
9885 }
9886 }
9887
9888 Action::CastTimestampToDatetime => {
9889 let c = if let Expression::Cast(c) = e {
9890 *c
9891 } else {
9892 unreachable!("action only triggered for Cast expressions")
9893 };
9894 Ok(Expression::Cast(Box::new(Cast {
9895 to: DataType::Custom {
9896 name: "DATETIME".to_string(),
9897 },
9898 ..c
9899 })))
9900 }
9901
9902 Action::CastTimestampStripTz => {
9903 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
9904 let c = if let Expression::Cast(c) = e {
9905 *c
9906 } else {
9907 unreachable!("action only triggered for Cast expressions")
9908 };
9909 Ok(Expression::Cast(Box::new(Cast {
9910 to: DataType::Timestamp {
9911 precision: None,
9912 timezone: false,
9913 },
9914 ..c
9915 })))
9916 }
9917
9918 Action::CastTimestamptzToFunc => {
9919 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
9920 let c = if let Expression::Cast(c) = e {
9921 *c
9922 } else {
9923 unreachable!("action only triggered for Cast expressions")
9924 };
9925 Ok(Expression::Function(Box::new(Function::new(
9926 "TIMESTAMP".to_string(),
9927 vec![c.this],
9928 ))))
9929 }
9930
9931 Action::ToDateToCast => {
9932 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
9933 if let Expression::Function(f) = e {
9934 let arg = f.args.into_iter().next().unwrap();
9935 Ok(Expression::Cast(Box::new(Cast {
9936 this: arg,
9937 to: DataType::Date,
9938 double_colon_syntax: false,
9939 trailing_comments: vec![],
9940 format: None,
9941 default: None,
9942 inferred_type: None,
9943 })))
9944 } else {
9945 Ok(e)
9946 }
9947 }
9948 Action::DateTruncWrapCast => {
9949 // Handle both Expression::DateTrunc/TimestampTrunc and
9950 // Expression::Function("DATE_TRUNC", [unit, expr])
9951 match e {
9952 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
9953 let input_type = match &d.this {
9954 Expression::Cast(c) => Some(c.to.clone()),
9955 _ => None,
9956 };
9957 if let Some(cast_type) = input_type {
9958 let is_time = matches!(cast_type, DataType::Time { .. });
9959 if is_time {
9960 let date_expr = Expression::Cast(Box::new(Cast {
9961 this: Expression::Literal(Box::new(
9962 crate::expressions::Literal::String(
9963 "1970-01-01".to_string(),
9964 ),
9965 )),
9966 to: DataType::Date,
9967 double_colon_syntax: false,
9968 trailing_comments: vec![],
9969 format: None,
9970 default: None,
9971 inferred_type: None,
9972 }));
9973 let add_expr =
9974 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
9975 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
9976 this: add_expr,
9977 unit: d.unit,
9978 }));
9979 Ok(Expression::Cast(Box::new(Cast {
9980 this: inner,
9981 to: cast_type,
9982 double_colon_syntax: false,
9983 trailing_comments: vec![],
9984 format: None,
9985 default: None,
9986 inferred_type: None,
9987 })))
9988 } else {
9989 let inner = Expression::DateTrunc(Box::new(*d));
9990 Ok(Expression::Cast(Box::new(Cast {
9991 this: inner,
9992 to: cast_type,
9993 double_colon_syntax: false,
9994 trailing_comments: vec![],
9995 format: None,
9996 default: None,
9997 inferred_type: None,
9998 })))
9999 }
10000 } else {
10001 Ok(Expression::DateTrunc(d))
10002 }
10003 }
10004 Expression::Function(f) if f.args.len() == 2 => {
10005 // Function-based DATE_TRUNC(unit, expr)
10006 let input_type = match &f.args[1] {
10007 Expression::Cast(c) => Some(c.to.clone()),
10008 _ => None,
10009 };
10010 if let Some(cast_type) = input_type {
10011 let is_time = matches!(cast_type, DataType::Time { .. });
10012 if is_time {
10013 let date_expr = Expression::Cast(Box::new(Cast {
10014 this: Expression::Literal(Box::new(
10015 crate::expressions::Literal::String(
10016 "1970-01-01".to_string(),
10017 ),
10018 )),
10019 to: DataType::Date,
10020 double_colon_syntax: false,
10021 trailing_comments: vec![],
10022 format: None,
10023 default: None,
10024 inferred_type: None,
10025 }));
10026 let mut args = f.args;
10027 let unit_arg = args.remove(0);
10028 let time_expr = args.remove(0);
10029 let add_expr = Expression::Add(Box::new(BinaryOp::new(
10030 date_expr, time_expr,
10031 )));
10032 let inner = Expression::Function(Box::new(Function::new(
10033 "DATE_TRUNC".to_string(),
10034 vec![unit_arg, add_expr],
10035 )));
10036 Ok(Expression::Cast(Box::new(Cast {
10037 this: inner,
10038 to: cast_type,
10039 double_colon_syntax: false,
10040 trailing_comments: vec![],
10041 format: None,
10042 default: None,
10043 inferred_type: None,
10044 })))
10045 } else {
10046 // Wrap the function in CAST
10047 Ok(Expression::Cast(Box::new(Cast {
10048 this: Expression::Function(f),
10049 to: cast_type,
10050 double_colon_syntax: false,
10051 trailing_comments: vec![],
10052 format: None,
10053 default: None,
10054 inferred_type: None,
10055 })))
10056 }
10057 } else {
10058 Ok(Expression::Function(f))
10059 }
10060 }
10061 other => Ok(other),
10062 }
10063 }
10064
10065 Action::RegexpReplaceSnowflakeToDuckDB => {
10066 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
10067 if let Expression::Function(f) = e {
10068 let mut args = f.args;
10069 let subject = args.remove(0);
10070 let pattern = args.remove(0);
10071 let replacement = args.remove(0);
10072 Ok(Expression::Function(Box::new(Function::new(
10073 "REGEXP_REPLACE".to_string(),
10074 vec![
10075 subject,
10076 pattern,
10077 replacement,
10078 Expression::Literal(Box::new(crate::expressions::Literal::String(
10079 "g".to_string(),
10080 ))),
10081 ],
10082 ))))
10083 } else {
10084 Ok(e)
10085 }
10086 }
10087
10088 Action::RegexpReplacePositionSnowflakeToDuckDB => {
10089 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
10090 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
10091 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
10092 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
10093 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
10094 if let Expression::Function(f) = e {
10095 let mut args = f.args;
10096 let subject = args.remove(0);
10097 let pattern = args.remove(0);
10098 let replacement = args.remove(0);
10099 let position = args.remove(0);
10100 let occurrence = args.remove(0);
10101
10102 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10103 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10104 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10105
10106 if is_pos_1 && is_occ_1 {
10107 // REGEXP_REPLACE(s, p, r) - single replace, no flags
10108 Ok(Expression::Function(Box::new(Function::new(
10109 "REGEXP_REPLACE".to_string(),
10110 vec![subject, pattern, replacement],
10111 ))))
10112 } else if is_pos_1 && is_occ_0 {
10113 // REGEXP_REPLACE(s, p, r, 'g') - global replace
10114 Ok(Expression::Function(Box::new(Function::new(
10115 "REGEXP_REPLACE".to_string(),
10116 vec![
10117 subject,
10118 pattern,
10119 replacement,
10120 Expression::Literal(Box::new(Literal::String("g".to_string()))),
10121 ],
10122 ))))
10123 } else {
10124 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
10125 // Pre-compute pos-1 when position is a numeric literal
10126 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
10127 if let Literal::Number(ref n) = lit.as_ref() {
10128 if let Ok(val) = n.parse::<i64>() {
10129 Expression::number(val - 1)
10130 } else {
10131 Expression::Sub(Box::new(BinaryOp::new(
10132 position.clone(),
10133 Expression::number(1),
10134 )))
10135 }
10136 } else {
10137 position.clone()
10138 }
10139 } else {
10140 Expression::Sub(Box::new(BinaryOp::new(
10141 position.clone(),
10142 Expression::number(1),
10143 )))
10144 };
10145 let prefix = Expression::Function(Box::new(Function::new(
10146 "SUBSTRING".to_string(),
10147 vec![subject.clone(), Expression::number(1), pos_minus_1],
10148 )));
10149 let suffix_subject = Expression::Function(Box::new(Function::new(
10150 "SUBSTRING".to_string(),
10151 vec![subject, position],
10152 )));
10153 let mut replace_args = vec![suffix_subject, pattern, replacement];
10154 if is_occ_0 {
10155 replace_args.push(Expression::Literal(Box::new(Literal::String(
10156 "g".to_string(),
10157 ))));
10158 }
10159 let replace_expr = Expression::Function(Box::new(Function::new(
10160 "REGEXP_REPLACE".to_string(),
10161 replace_args,
10162 )));
10163 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10164 this: Box::new(prefix),
10165 expression: Box::new(replace_expr),
10166 safe: None,
10167 })))
10168 }
10169 } else {
10170 Ok(e)
10171 }
10172 }
10173
10174 Action::RegexpSubstrSnowflakeToDuckDB => {
10175 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
10176 if let Expression::Function(f) = e {
10177 let mut args = f.args;
10178 let arg_count = args.len();
10179 match arg_count {
10180 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
10181 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10182 "REGEXP_EXTRACT".to_string(),
10183 args,
10184 )))),
10185 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
10186 3 => {
10187 let subject = args.remove(0);
10188 let pattern = args.remove(0);
10189 let position = args.remove(0);
10190 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10191 if is_pos_1 {
10192 Ok(Expression::Function(Box::new(Function::new(
10193 "REGEXP_EXTRACT".to_string(),
10194 vec![subject, pattern],
10195 ))))
10196 } else {
10197 let substring_expr =
10198 Expression::Function(Box::new(Function::new(
10199 "SUBSTRING".to_string(),
10200 vec![subject, position],
10201 )));
10202 let nullif_expr =
10203 Expression::Function(Box::new(Function::new(
10204 "NULLIF".to_string(),
10205 vec![
10206 substring_expr,
10207 Expression::Literal(Box::new(Literal::String(
10208 String::new(),
10209 ))),
10210 ],
10211 )));
10212 Ok(Expression::Function(Box::new(Function::new(
10213 "REGEXP_EXTRACT".to_string(),
10214 vec![nullif_expr, pattern],
10215 ))))
10216 }
10217 }
10218 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
10219 4 => {
10220 let subject = args.remove(0);
10221 let pattern = args.remove(0);
10222 let position = args.remove(0);
10223 let occurrence = args.remove(0);
10224 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10225 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10226
10227 let effective_subject = if is_pos_1 {
10228 subject
10229 } else {
10230 let substring_expr =
10231 Expression::Function(Box::new(Function::new(
10232 "SUBSTRING".to_string(),
10233 vec![subject, position],
10234 )));
10235 Expression::Function(Box::new(Function::new(
10236 "NULLIF".to_string(),
10237 vec![
10238 substring_expr,
10239 Expression::Literal(Box::new(Literal::String(
10240 String::new(),
10241 ))),
10242 ],
10243 )))
10244 };
10245
10246 if is_occ_1 {
10247 Ok(Expression::Function(Box::new(Function::new(
10248 "REGEXP_EXTRACT".to_string(),
10249 vec![effective_subject, pattern],
10250 ))))
10251 } else {
10252 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
10253 let extract_all =
10254 Expression::Function(Box::new(Function::new(
10255 "REGEXP_EXTRACT_ALL".to_string(),
10256 vec![effective_subject, pattern],
10257 )));
10258 Ok(Expression::Function(Box::new(Function::new(
10259 "ARRAY_EXTRACT".to_string(),
10260 vec![extract_all, occurrence],
10261 ))))
10262 }
10263 }
10264 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
10265 5 => {
10266 let subject = args.remove(0);
10267 let pattern = args.remove(0);
10268 let _position = args.remove(0);
10269 let _occurrence = args.remove(0);
10270 let _flags = args.remove(0);
10271 // Strip 'e' flag, convert to REGEXP_EXTRACT
10272 Ok(Expression::Function(Box::new(Function::new(
10273 "REGEXP_EXTRACT".to_string(),
10274 vec![subject, pattern],
10275 ))))
10276 }
10277 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
10278 _ => {
10279 let subject = args.remove(0);
10280 let pattern = args.remove(0);
10281 let _position = args.remove(0);
10282 let _occurrence = args.remove(0);
10283 let _flags = args.remove(0);
10284 let group = args.remove(0);
10285 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10286 if is_group_0 {
10287 // Strip group=0 (default)
10288 Ok(Expression::Function(Box::new(Function::new(
10289 "REGEXP_EXTRACT".to_string(),
10290 vec![subject, pattern],
10291 ))))
10292 } else {
10293 Ok(Expression::Function(Box::new(Function::new(
10294 "REGEXP_EXTRACT".to_string(),
10295 vec![subject, pattern, group],
10296 ))))
10297 }
10298 }
10299 }
10300 } else {
10301 Ok(e)
10302 }
10303 }
10304
10305 Action::RegexpSubstrSnowflakeIdentity => {
10306 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
10307 // Strip trailing group=0
10308 if let Expression::Function(f) = e {
10309 let func_name = f.name.clone();
10310 let mut args = f.args;
10311 if args.len() == 6 {
10312 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10313 if is_group_0 {
10314 args.truncate(5);
10315 }
10316 }
10317 Ok(Expression::Function(Box::new(Function::new(
10318 func_name, args,
10319 ))))
10320 } else {
10321 Ok(e)
10322 }
10323 }
10324
10325 Action::RegexpSubstrAllSnowflakeToDuckDB => {
10326 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
10327 if let Expression::Function(f) = e {
10328 let mut args = f.args;
10329 let arg_count = args.len();
10330 match arg_count {
10331 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
10332 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10333 "REGEXP_EXTRACT_ALL".to_string(),
10334 args,
10335 )))),
10336 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
10337 3 => {
10338 let subject = args.remove(0);
10339 let pattern = args.remove(0);
10340 let position = args.remove(0);
10341 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10342 if is_pos_1 {
10343 Ok(Expression::Function(Box::new(Function::new(
10344 "REGEXP_EXTRACT_ALL".to_string(),
10345 vec![subject, pattern],
10346 ))))
10347 } else {
10348 let substring_expr =
10349 Expression::Function(Box::new(Function::new(
10350 "SUBSTRING".to_string(),
10351 vec![subject, position],
10352 )));
10353 Ok(Expression::Function(Box::new(Function::new(
10354 "REGEXP_EXTRACT_ALL".to_string(),
10355 vec![substring_expr, pattern],
10356 ))))
10357 }
10358 }
10359 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
10360 4 => {
10361 let subject = args.remove(0);
10362 let pattern = args.remove(0);
10363 let position = args.remove(0);
10364 let occurrence = args.remove(0);
10365 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10366 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10367
10368 let effective_subject = if is_pos_1 {
10369 subject
10370 } else {
10371 Expression::Function(Box::new(Function::new(
10372 "SUBSTRING".to_string(),
10373 vec![subject, position],
10374 )))
10375 };
10376
10377 if is_occ_1 {
10378 Ok(Expression::Function(Box::new(Function::new(
10379 "REGEXP_EXTRACT_ALL".to_string(),
10380 vec![effective_subject, pattern],
10381 ))))
10382 } else {
10383 // REGEXP_EXTRACT_ALL(s, p)[occ:]
10384 let extract_all =
10385 Expression::Function(Box::new(Function::new(
10386 "REGEXP_EXTRACT_ALL".to_string(),
10387 vec![effective_subject, pattern],
10388 )));
10389 Ok(Expression::ArraySlice(Box::new(
10390 crate::expressions::ArraySlice {
10391 this: extract_all,
10392 start: Some(occurrence),
10393 end: None,
10394 },
10395 )))
10396 }
10397 }
10398 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
10399 5 => {
10400 let subject = args.remove(0);
10401 let pattern = args.remove(0);
10402 let _position = args.remove(0);
10403 let _occurrence = args.remove(0);
10404 let _flags = args.remove(0);
10405 Ok(Expression::Function(Box::new(Function::new(
10406 "REGEXP_EXTRACT_ALL".to_string(),
10407 vec![subject, pattern],
10408 ))))
10409 }
10410 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
10411 _ => {
10412 let subject = args.remove(0);
10413 let pattern = args.remove(0);
10414 let _position = args.remove(0);
10415 let _occurrence = args.remove(0);
10416 let _flags = args.remove(0);
10417 let group = args.remove(0);
10418 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10419 if is_group_0 {
10420 Ok(Expression::Function(Box::new(Function::new(
10421 "REGEXP_EXTRACT_ALL".to_string(),
10422 vec![subject, pattern],
10423 ))))
10424 } else {
10425 Ok(Expression::Function(Box::new(Function::new(
10426 "REGEXP_EXTRACT_ALL".to_string(),
10427 vec![subject, pattern, group],
10428 ))))
10429 }
10430 }
10431 }
10432 } else {
10433 Ok(e)
10434 }
10435 }
10436
10437 Action::RegexpCountSnowflakeToDuckDB => {
10438 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
10439 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10440 if let Expression::Function(f) = e {
10441 let mut args = f.args;
10442 let arg_count = args.len();
10443 let subject = args.remove(0);
10444 let pattern = args.remove(0);
10445
10446 // Handle position arg
10447 let effective_subject = if arg_count >= 3 {
10448 let position = args.remove(0);
10449 Expression::Function(Box::new(Function::new(
10450 "SUBSTRING".to_string(),
10451 vec![subject, position],
10452 )))
10453 } else {
10454 subject
10455 };
10456
10457 // Handle flags arg -> embed as (?flags) prefix in pattern
10458 let effective_pattern = if arg_count >= 4 {
10459 let flags = args.remove(0);
10460 match &flags {
10461 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
10462 {
10463 let Literal::String(f_str) = lit.as_ref() else {
10464 unreachable!()
10465 };
10466 // Always use concatenation: '(?flags)' || pattern
10467 let prefix = Expression::Literal(Box::new(Literal::String(
10468 format!("(?{})", f_str),
10469 )));
10470 Expression::DPipe(Box::new(crate::expressions::DPipe {
10471 this: Box::new(prefix),
10472 expression: Box::new(pattern.clone()),
10473 safe: None,
10474 }))
10475 }
10476 _ => pattern.clone(),
10477 }
10478 } else {
10479 pattern.clone()
10480 };
10481
10482 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10483 let extract_all = Expression::Function(Box::new(Function::new(
10484 "REGEXP_EXTRACT_ALL".to_string(),
10485 vec![effective_subject, effective_pattern.clone()],
10486 )));
10487 let length_expr =
10488 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10489 this: extract_all,
10490 original_name: None,
10491 inferred_type: None,
10492 }));
10493 let condition = Expression::Eq(Box::new(BinaryOp::new(
10494 effective_pattern,
10495 Expression::Literal(Box::new(Literal::String(String::new()))),
10496 )));
10497 Ok(Expression::Case(Box::new(Case {
10498 operand: None,
10499 whens: vec![(condition, Expression::number(0))],
10500 else_: Some(length_expr),
10501 comments: vec![],
10502 inferred_type: None,
10503 })))
10504 } else {
10505 Ok(e)
10506 }
10507 }
10508
10509 Action::RegexpInstrSnowflakeToDuckDB => {
10510 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
10511 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
10512 // WHEN p = '' THEN 0
10513 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10514 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
10515 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
10516 // + pos_offset
10517 // END
10518 if let Expression::Function(f) = e {
10519 let mut args = f.args;
10520 let subject = args.remove(0);
10521 let pattern = if !args.is_empty() {
10522 args.remove(0)
10523 } else {
10524 Expression::Literal(Box::new(Literal::String(String::new())))
10525 };
10526
10527 // Collect all original args for NULL checks
10528 let position = if !args.is_empty() {
10529 Some(args.remove(0))
10530 } else {
10531 None
10532 };
10533 let occurrence = if !args.is_empty() {
10534 Some(args.remove(0))
10535 } else {
10536 None
10537 };
10538 let option = if !args.is_empty() {
10539 Some(args.remove(0))
10540 } else {
10541 None
10542 };
10543 let flags = if !args.is_empty() {
10544 Some(args.remove(0))
10545 } else {
10546 None
10547 };
10548 let _group = if !args.is_empty() {
10549 Some(args.remove(0))
10550 } else {
10551 None
10552 };
10553
10554 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
10555 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
10556
10557 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
10558 let mut null_checks: Vec<Expression> = vec![
10559 Expression::Is(Box::new(BinaryOp::new(
10560 subject.clone(),
10561 Expression::Null(Null),
10562 ))),
10563 Expression::Is(Box::new(BinaryOp::new(
10564 pattern.clone(),
10565 Expression::Null(Null),
10566 ))),
10567 ];
10568 // Add NULL checks for all provided optional args
10569 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
10570 if let Some(arg) = opt_arg {
10571 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
10572 (*arg).clone(),
10573 Expression::Null(Null),
10574 ))));
10575 }
10576 }
10577 // Chain with OR
10578 let null_condition = null_checks
10579 .into_iter()
10580 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
10581 .unwrap();
10582
10583 // Effective subject (apply position offset)
10584 let effective_subject = if is_pos_1 {
10585 subject.clone()
10586 } else {
10587 let pos = position.clone().unwrap_or(Expression::number(1));
10588 Expression::Function(Box::new(Function::new(
10589 "SUBSTRING".to_string(),
10590 vec![subject.clone(), pos],
10591 )))
10592 };
10593
10594 // Effective pattern (apply flags if present)
10595 let effective_pattern = if let Some(ref fl) = flags {
10596 if let Expression::Literal(lit) = fl {
10597 if let Literal::String(f_str) = lit.as_ref() {
10598 if !f_str.is_empty() {
10599 let prefix = Expression::Literal(Box::new(
10600 Literal::String(format!("(?{})", f_str)),
10601 ));
10602 Expression::DPipe(Box::new(crate::expressions::DPipe {
10603 this: Box::new(prefix),
10604 expression: Box::new(pattern.clone()),
10605 safe: None,
10606 }))
10607 } else {
10608 pattern.clone()
10609 }
10610 } else {
10611 fl.clone()
10612 }
10613 } else {
10614 pattern.clone()
10615 }
10616 } else {
10617 pattern.clone()
10618 };
10619
10620 // WHEN pattern = '' THEN 0
10621 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
10622 effective_pattern.clone(),
10623 Expression::Literal(Box::new(Literal::String(String::new()))),
10624 )));
10625
10626 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10627 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
10628 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10629 this: Expression::Function(Box::new(Function::new(
10630 "REGEXP_EXTRACT_ALL".to_string(),
10631 vec![effective_subject.clone(), effective_pattern.clone()],
10632 ))),
10633 original_name: None,
10634 inferred_type: None,
10635 })),
10636 occurrence_expr.clone(),
10637 )));
10638
10639 // Helper: build LENGTH lambda for LIST_TRANSFORM
10640 let make_len_lambda = || {
10641 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10642 parameters: vec![crate::expressions::Identifier::new("x")],
10643 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
10644 this: Expression::Identifier(
10645 crate::expressions::Identifier::new("x"),
10646 ),
10647 original_name: None,
10648 inferred_type: None,
10649 })),
10650 colon: false,
10651 parameter_types: vec![],
10652 }))
10653 };
10654
10655 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
10656 let split_sliced =
10657 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10658 this: Expression::Function(Box::new(Function::new(
10659 "STRING_SPLIT_REGEX".to_string(),
10660 vec![effective_subject.clone(), effective_pattern.clone()],
10661 ))),
10662 start: Some(Expression::number(1)),
10663 end: Some(occurrence_expr.clone()),
10664 }));
10665 let split_sum = Expression::Function(Box::new(Function::new(
10666 "COALESCE".to_string(),
10667 vec![
10668 Expression::Function(Box::new(Function::new(
10669 "LIST_SUM".to_string(),
10670 vec![Expression::Function(Box::new(Function::new(
10671 "LIST_TRANSFORM".to_string(),
10672 vec![split_sliced, make_len_lambda()],
10673 )))],
10674 ))),
10675 Expression::number(0),
10676 ],
10677 )));
10678
10679 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
10680 let extract_sliced =
10681 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10682 this: Expression::Function(Box::new(Function::new(
10683 "REGEXP_EXTRACT_ALL".to_string(),
10684 vec![effective_subject.clone(), effective_pattern.clone()],
10685 ))),
10686 start: Some(Expression::number(1)),
10687 end: Some(Expression::Sub(Box::new(BinaryOp::new(
10688 occurrence_expr.clone(),
10689 Expression::number(1),
10690 )))),
10691 }));
10692 let extract_sum = Expression::Function(Box::new(Function::new(
10693 "COALESCE".to_string(),
10694 vec![
10695 Expression::Function(Box::new(Function::new(
10696 "LIST_SUM".to_string(),
10697 vec![Expression::Function(Box::new(Function::new(
10698 "LIST_TRANSFORM".to_string(),
10699 vec![extract_sliced, make_len_lambda()],
10700 )))],
10701 ))),
10702 Expression::number(0),
10703 ],
10704 )));
10705
10706 // Position offset: pos - 1 when pos > 1, else 0
10707 let pos_offset: Expression = if !is_pos_1 {
10708 let pos = position.clone().unwrap_or(Expression::number(1));
10709 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
10710 } else {
10711 Expression::number(0)
10712 };
10713
10714 // ELSE: 1 + split_sum + extract_sum + pos_offset
10715 let else_expr = Expression::Add(Box::new(BinaryOp::new(
10716 Expression::Add(Box::new(BinaryOp::new(
10717 Expression::Add(Box::new(BinaryOp::new(
10718 Expression::number(1),
10719 split_sum,
10720 ))),
10721 extract_sum,
10722 ))),
10723 pos_offset,
10724 )));
10725
10726 Ok(Expression::Case(Box::new(Case {
10727 operand: None,
10728 whens: vec![
10729 (null_condition, Expression::Null(Null)),
10730 (empty_pattern_check, Expression::number(0)),
10731 (match_count_check, Expression::number(0)),
10732 ],
10733 else_: Some(else_expr),
10734 comments: vec![],
10735 inferred_type: None,
10736 })))
10737 } else {
10738 Ok(e)
10739 }
10740 }
10741
10742 Action::RlikeSnowflakeToDuckDB => {
10743 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
10744 // Both do full-string matching, so no anchoring needed
10745 let (subject, pattern, flags) = match e {
10746 Expression::RegexpLike(ref rl) => {
10747 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
10748 }
10749 Expression::Function(ref f) if f.args.len() >= 2 => {
10750 let s = f.args[0].clone();
10751 let p = f.args[1].clone();
10752 let fl = f.args.get(2).cloned();
10753 (s, p, fl)
10754 }
10755 _ => return Ok(e),
10756 };
10757
10758 let mut result_args = vec![subject, pattern];
10759 if let Some(fl) = flags {
10760 result_args.push(fl);
10761 }
10762 Ok(Expression::Function(Box::new(Function::new(
10763 "REGEXP_FULL_MATCH".to_string(),
10764 result_args,
10765 ))))
10766 }
10767
10768 Action::RegexpExtractAllToSnowflake => {
10769 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
10770 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
10771 if let Expression::Function(f) = e {
10772 let mut args = f.args;
10773 if args.len() >= 2 {
10774 let str_expr = args.remove(0);
10775 let pattern = args.remove(0);
10776
10777 let has_groups = match &pattern {
10778 Expression::Literal(lit)
10779 if matches!(lit.as_ref(), Literal::String(_)) =>
10780 {
10781 let Literal::String(s) = lit.as_ref() else {
10782 unreachable!()
10783 };
10784 s.contains('(') && s.contains(')')
10785 }
10786 _ => false,
10787 };
10788
10789 if has_groups {
10790 Ok(Expression::Function(Box::new(Function::new(
10791 "REGEXP_SUBSTR_ALL".to_string(),
10792 vec![
10793 str_expr,
10794 pattern,
10795 Expression::number(1),
10796 Expression::number(1),
10797 Expression::Literal(Box::new(Literal::String(
10798 "c".to_string(),
10799 ))),
10800 Expression::number(1),
10801 ],
10802 ))))
10803 } else {
10804 Ok(Expression::Function(Box::new(Function::new(
10805 "REGEXP_SUBSTR_ALL".to_string(),
10806 vec![str_expr, pattern],
10807 ))))
10808 }
10809 } else {
10810 Ok(Expression::Function(Box::new(Function::new(
10811 "REGEXP_SUBSTR_ALL".to_string(),
10812 args,
10813 ))))
10814 }
10815 } else {
10816 Ok(e)
10817 }
10818 }
10819
10820 Action::SetToVariable => {
10821 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
10822 if let Expression::SetStatement(mut s) = e {
10823 for item in &mut s.items {
10824 if item.kind.is_none() {
10825 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
10826 let already_variable = match &item.name {
10827 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
10828 _ => false,
10829 };
10830 if already_variable {
10831 // Extract the actual name and set kind
10832 if let Expression::Identifier(ref mut id) = item.name {
10833 let actual_name = id.name["VARIABLE ".len()..].to_string();
10834 id.name = actual_name;
10835 }
10836 }
10837 item.kind = Some("VARIABLE".to_string());
10838 }
10839 }
10840 Ok(Expression::SetStatement(s))
10841 } else {
10842 Ok(e)
10843 }
10844 }
10845
10846 Action::ConvertTimezoneToExpr => {
10847 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
10848 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
10849 if let Expression::Function(f) = e {
10850 if f.args.len() == 2 {
10851 let mut args = f.args;
10852 let target_tz = args.remove(0);
10853 let timestamp = args.remove(0);
10854 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10855 source_tz: None,
10856 target_tz: Some(Box::new(target_tz)),
10857 timestamp: Some(Box::new(timestamp)),
10858 options: vec![],
10859 })))
10860 } else if f.args.len() == 3 {
10861 let mut args = f.args;
10862 let source_tz = args.remove(0);
10863 let target_tz = args.remove(0);
10864 let timestamp = args.remove(0);
10865 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10866 source_tz: Some(Box::new(source_tz)),
10867 target_tz: Some(Box::new(target_tz)),
10868 timestamp: Some(Box::new(timestamp)),
10869 options: vec![],
10870 })))
10871 } else {
10872 Ok(Expression::Function(f))
10873 }
10874 } else {
10875 Ok(e)
10876 }
10877 }
10878
10879 Action::BigQueryCastType => {
10880 // Convert BigQuery types to standard SQL types
10881 if let Expression::DataType(dt) = e {
10882 match dt {
10883 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
10884 Ok(Expression::DataType(DataType::BigInt { length: None }))
10885 }
10886 DataType::Custom { ref name }
10887 if name.eq_ignore_ascii_case("FLOAT64") =>
10888 {
10889 Ok(Expression::DataType(DataType::Double {
10890 precision: None,
10891 scale: None,
10892 }))
10893 }
10894 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
10895 Ok(Expression::DataType(DataType::Boolean))
10896 }
10897 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
10898 Ok(Expression::DataType(DataType::VarBinary { length: None }))
10899 }
10900 DataType::Custom { ref name }
10901 if name.eq_ignore_ascii_case("NUMERIC") =>
10902 {
10903 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
10904 // default precision (18, 3) being added to bare DECIMAL
10905 if matches!(target, DialectType::DuckDB) {
10906 Ok(Expression::DataType(DataType::Custom {
10907 name: "DECIMAL".to_string(),
10908 }))
10909 } else {
10910 Ok(Expression::DataType(DataType::Decimal {
10911 precision: None,
10912 scale: None,
10913 }))
10914 }
10915 }
10916 DataType::Custom { ref name }
10917 if name.eq_ignore_ascii_case("STRING") =>
10918 {
10919 Ok(Expression::DataType(DataType::String { length: None }))
10920 }
10921 DataType::Custom { ref name }
10922 if name.eq_ignore_ascii_case("DATETIME") =>
10923 {
10924 Ok(Expression::DataType(DataType::Timestamp {
10925 precision: None,
10926 timezone: false,
10927 }))
10928 }
10929 _ => Ok(Expression::DataType(dt)),
10930 }
10931 } else {
10932 Ok(e)
10933 }
10934 }
10935
10936 Action::BigQuerySafeDivide => {
10937 // Convert SafeDivide expression to IF/CASE form for most targets
10938 if let Expression::SafeDivide(sd) = e {
10939 let x = *sd.this;
10940 let y = *sd.expression;
10941 // Wrap x and y in parens if they're complex expressions
10942 let y_ref = match &y {
10943 Expression::Column(_)
10944 | Expression::Literal(_)
10945 | Expression::Identifier(_) => y.clone(),
10946 _ => Expression::Paren(Box::new(Paren {
10947 this: y.clone(),
10948 trailing_comments: vec![],
10949 })),
10950 };
10951 let x_ref = match &x {
10952 Expression::Column(_)
10953 | Expression::Literal(_)
10954 | Expression::Identifier(_) => x.clone(),
10955 _ => Expression::Paren(Box::new(Paren {
10956 this: x.clone(),
10957 trailing_comments: vec![],
10958 })),
10959 };
10960 let condition = Expression::Neq(Box::new(BinaryOp::new(
10961 y_ref.clone(),
10962 Expression::number(0),
10963 )));
10964 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
10965
10966 if matches!(target, DialectType::Spark | DialectType::Databricks) {
10967 Ok(Expression::Function(Box::new(Function::new(
10968 "TRY_DIVIDE".to_string(),
10969 vec![x, y],
10970 ))))
10971 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
10972 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
10973 let cast_x = Expression::Cast(Box::new(Cast {
10974 this: match &x {
10975 Expression::Column(_)
10976 | Expression::Literal(_)
10977 | Expression::Identifier(_) => x,
10978 _ => Expression::Paren(Box::new(Paren {
10979 this: x,
10980 trailing_comments: vec![],
10981 })),
10982 },
10983 to: DataType::Double {
10984 precision: None,
10985 scale: None,
10986 },
10987 trailing_comments: vec![],
10988 double_colon_syntax: false,
10989 format: None,
10990 default: None,
10991 inferred_type: None,
10992 }));
10993 let cast_div = Expression::Div(Box::new(BinaryOp::new(
10994 cast_x,
10995 match &y {
10996 Expression::Column(_)
10997 | Expression::Literal(_)
10998 | Expression::Identifier(_) => y,
10999 _ => Expression::Paren(Box::new(Paren {
11000 this: y,
11001 trailing_comments: vec![],
11002 })),
11003 },
11004 )));
11005 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11006 condition,
11007 true_value: cast_div,
11008 false_value: Some(Expression::Null(Null)),
11009 original_name: None,
11010 inferred_type: None,
11011 })))
11012 } else if matches!(target, DialectType::PostgreSQL) {
11013 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
11014 let cast_x = Expression::Cast(Box::new(Cast {
11015 this: match &x {
11016 Expression::Column(_)
11017 | Expression::Literal(_)
11018 | Expression::Identifier(_) => x,
11019 _ => Expression::Paren(Box::new(Paren {
11020 this: x,
11021 trailing_comments: vec![],
11022 })),
11023 },
11024 to: DataType::Custom {
11025 name: "DOUBLE PRECISION".to_string(),
11026 },
11027 trailing_comments: vec![],
11028 double_colon_syntax: false,
11029 format: None,
11030 default: None,
11031 inferred_type: None,
11032 }));
11033 let y_paren = match &y {
11034 Expression::Column(_)
11035 | Expression::Literal(_)
11036 | Expression::Identifier(_) => y,
11037 _ => Expression::Paren(Box::new(Paren {
11038 this: y,
11039 trailing_comments: vec![],
11040 })),
11041 };
11042 let cast_div =
11043 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
11044 Ok(Expression::Case(Box::new(Case {
11045 operand: None,
11046 whens: vec![(condition, cast_div)],
11047 else_: Some(Expression::Null(Null)),
11048 comments: Vec::new(),
11049 inferred_type: None,
11050 })))
11051 } else if matches!(target, DialectType::DuckDB) {
11052 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
11053 Ok(Expression::Case(Box::new(Case {
11054 operand: None,
11055 whens: vec![(condition, div_expr)],
11056 else_: Some(Expression::Null(Null)),
11057 comments: Vec::new(),
11058 inferred_type: None,
11059 })))
11060 } else if matches!(target, DialectType::Snowflake) {
11061 // Snowflake: IFF(y <> 0, x / y, NULL)
11062 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11063 condition,
11064 true_value: div_expr,
11065 false_value: Some(Expression::Null(Null)),
11066 original_name: Some("IFF".to_string()),
11067 inferred_type: None,
11068 })))
11069 } else {
11070 // All others: IF(y <> 0, x / y, NULL)
11071 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11072 condition,
11073 true_value: div_expr,
11074 false_value: Some(Expression::Null(Null)),
11075 original_name: None,
11076 inferred_type: None,
11077 })))
11078 }
11079 } else {
11080 Ok(e)
11081 }
11082 }
11083
11084 Action::BigQueryLastDayStripUnit => {
11085 if let Expression::LastDay(mut ld) = e {
11086 ld.unit = None; // Strip the unit (MONTH is default)
11087 match target {
11088 DialectType::PostgreSQL => {
11089 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11090 let date_trunc = Expression::Function(Box::new(Function::new(
11091 "DATE_TRUNC".to_string(),
11092 vec![
11093 Expression::Literal(Box::new(
11094 crate::expressions::Literal::String(
11095 "MONTH".to_string(),
11096 ),
11097 )),
11098 ld.this.clone(),
11099 ],
11100 )));
11101 let plus_month =
11102 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11103 date_trunc,
11104 Expression::Interval(Box::new(
11105 crate::expressions::Interval {
11106 this: Some(Expression::Literal(Box::new(
11107 crate::expressions::Literal::String(
11108 "1 MONTH".to_string(),
11109 ),
11110 ))),
11111 unit: None,
11112 },
11113 )),
11114 )));
11115 let minus_day =
11116 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
11117 plus_month,
11118 Expression::Interval(Box::new(
11119 crate::expressions::Interval {
11120 this: Some(Expression::Literal(Box::new(
11121 crate::expressions::Literal::String(
11122 "1 DAY".to_string(),
11123 ),
11124 ))),
11125 unit: None,
11126 },
11127 )),
11128 )));
11129 Ok(Expression::Cast(Box::new(Cast {
11130 this: minus_day,
11131 to: DataType::Date,
11132 trailing_comments: vec![],
11133 double_colon_syntax: false,
11134 format: None,
11135 default: None,
11136 inferred_type: None,
11137 })))
11138 }
11139 DialectType::Presto => {
11140 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
11141 Ok(Expression::Function(Box::new(Function::new(
11142 "LAST_DAY_OF_MONTH".to_string(),
11143 vec![ld.this],
11144 ))))
11145 }
11146 DialectType::ClickHouse => {
11147 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
11148 // Need to wrap the DATE type in Nullable
11149 let nullable_date = match ld.this {
11150 Expression::Cast(mut c) => {
11151 c.to = DataType::Nullable {
11152 inner: Box::new(DataType::Date),
11153 };
11154 Expression::Cast(c)
11155 }
11156 other => other,
11157 };
11158 ld.this = nullable_date;
11159 Ok(Expression::LastDay(ld))
11160 }
11161 _ => Ok(Expression::LastDay(ld)),
11162 }
11163 } else {
11164 Ok(e)
11165 }
11166 }
11167
11168 Action::BigQueryCastFormat => {
11169 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
11170 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
11171 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
11172 let (this, to, format_expr, is_safe) = match e {
11173 Expression::Cast(ref c) if c.format.is_some() => (
11174 c.this.clone(),
11175 c.to.clone(),
11176 c.format.as_ref().unwrap().as_ref().clone(),
11177 false,
11178 ),
11179 Expression::SafeCast(ref c) if c.format.is_some() => (
11180 c.this.clone(),
11181 c.to.clone(),
11182 c.format.as_ref().unwrap().as_ref().clone(),
11183 true,
11184 ),
11185 _ => return Ok(e),
11186 };
11187 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
11188 if matches!(target, DialectType::BigQuery) {
11189 match &to {
11190 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
11191 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
11192 return Ok(e);
11193 }
11194 _ => {}
11195 }
11196 }
11197 // Extract timezone from format if AT TIME ZONE is present
11198 let (actual_format_expr, timezone) = match &format_expr {
11199 Expression::AtTimeZone(ref atz) => {
11200 (atz.this.clone(), Some(atz.zone.clone()))
11201 }
11202 _ => (format_expr.clone(), None),
11203 };
11204 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
11205 match target {
11206 DialectType::BigQuery => {
11207 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
11208 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
11209 let func_name = match &to {
11210 DataType::Date => "PARSE_DATE",
11211 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
11212 DataType::Time { .. } => "PARSE_TIMESTAMP",
11213 _ => "PARSE_TIMESTAMP",
11214 };
11215 let mut func_args = vec![strftime_fmt, this];
11216 if let Some(tz) = timezone {
11217 func_args.push(tz);
11218 }
11219 Ok(Expression::Function(Box::new(Function::new(
11220 func_name.to_string(),
11221 func_args,
11222 ))))
11223 }
11224 DialectType::DuckDB => {
11225 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
11226 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
11227 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
11228 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
11229 let parse_call = Expression::Function(Box::new(Function::new(
11230 parse_fn_name.to_string(),
11231 vec![this, duck_fmt],
11232 )));
11233 Ok(Expression::Cast(Box::new(Cast {
11234 this: parse_call,
11235 to,
11236 trailing_comments: vec![],
11237 double_colon_syntax: false,
11238 format: None,
11239 default: None,
11240 inferred_type: None,
11241 })))
11242 }
11243 _ => Ok(e),
11244 }
11245 }
11246
11247 Action::BigQueryFunctionNormalize => {
11248 Self::normalize_bigquery_function(e, source, target)
11249 }
11250
11251 Action::BigQueryToHexBare => {
11252 // Not used anymore - handled directly in normalize_bigquery_function
11253 Ok(e)
11254 }
11255
11256 Action::BigQueryToHexLower => {
11257 if let Expression::Lower(uf) = e {
11258 match uf.this {
11259 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
11260 Expression::Function(f)
11261 if matches!(target, DialectType::BigQuery)
11262 && f.name == "TO_HEX" =>
11263 {
11264 Ok(Expression::Function(f))
11265 }
11266 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
11267 Expression::Lower(inner_uf) => {
11268 if matches!(target, DialectType::BigQuery) {
11269 // BQ->BQ: extract TO_HEX
11270 if let Expression::Function(f) = inner_uf.this {
11271 Ok(Expression::Function(Box::new(Function::new(
11272 "TO_HEX".to_string(),
11273 f.args,
11274 ))))
11275 } else {
11276 Ok(Expression::Lower(inner_uf))
11277 }
11278 } else {
11279 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
11280 Ok(Expression::Lower(inner_uf))
11281 }
11282 }
11283 other => {
11284 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
11285 this: other,
11286 original_name: None,
11287 inferred_type: None,
11288 })))
11289 }
11290 }
11291 } else {
11292 Ok(e)
11293 }
11294 }
11295
11296 Action::BigQueryToHexUpper => {
11297 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
11298 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
11299 if let Expression::Upper(uf) = e {
11300 if let Expression::Lower(inner_uf) = uf.this {
11301 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
11302 if matches!(target, DialectType::BigQuery) {
11303 // Restore TO_HEX name in inner function
11304 if let Expression::Function(f) = inner_uf.this {
11305 let restored = Expression::Function(Box::new(Function::new(
11306 "TO_HEX".to_string(),
11307 f.args,
11308 )));
11309 Ok(Expression::Upper(Box::new(
11310 crate::expressions::UnaryFunc::new(restored),
11311 )))
11312 } else {
11313 Ok(Expression::Upper(inner_uf))
11314 }
11315 } else {
11316 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
11317 Ok(inner_uf.this)
11318 }
11319 } else {
11320 Ok(Expression::Upper(uf))
11321 }
11322 } else {
11323 Ok(e)
11324 }
11325 }
11326
11327 Action::BigQueryAnyValueHaving => {
11328 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
11329 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
11330 if let Expression::AnyValue(agg) = e {
11331 if let Some((having_expr, is_max)) = agg.having_max {
11332 let func_name = if is_max {
11333 "ARG_MAX_NULL"
11334 } else {
11335 "ARG_MIN_NULL"
11336 };
11337 Ok(Expression::Function(Box::new(Function::new(
11338 func_name.to_string(),
11339 vec![agg.this, *having_expr],
11340 ))))
11341 } else {
11342 Ok(Expression::AnyValue(agg))
11343 }
11344 } else {
11345 Ok(e)
11346 }
11347 }
11348
11349 Action::BigQueryApproxQuantiles => {
11350 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
11351 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
11352 if let Expression::AggregateFunction(agg) = e {
11353 if agg.args.len() >= 2 {
11354 let x_expr = agg.args[0].clone();
11355 let n_expr = &agg.args[1];
11356
11357 // Extract the numeric value from n_expr
11358 let n = match n_expr {
11359 Expression::Literal(lit)
11360 if matches!(
11361 lit.as_ref(),
11362 crate::expressions::Literal::Number(_)
11363 ) =>
11364 {
11365 let crate::expressions::Literal::Number(s) = lit.as_ref()
11366 else {
11367 unreachable!()
11368 };
11369 s.parse::<usize>().unwrap_or(2)
11370 }
11371 _ => 2,
11372 };
11373
11374 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
11375 let mut quantiles = Vec::new();
11376 for i in 0..=n {
11377 let q = i as f64 / n as f64;
11378 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
11379 if q == 0.0 {
11380 quantiles.push(Expression::number(0));
11381 } else if q == 1.0 {
11382 quantiles.push(Expression::number(1));
11383 } else {
11384 quantiles.push(Expression::Literal(Box::new(
11385 crate::expressions::Literal::Number(format!("{}", q)),
11386 )));
11387 }
11388 }
11389
11390 let array_expr =
11391 Expression::Array(Box::new(crate::expressions::Array {
11392 expressions: quantiles,
11393 }));
11394
11395 // Preserve DISTINCT modifier
11396 let mut new_func = Function::new(
11397 "APPROX_QUANTILE".to_string(),
11398 vec![x_expr, array_expr],
11399 );
11400 new_func.distinct = agg.distinct;
11401 Ok(Expression::Function(Box::new(new_func)))
11402 } else {
11403 Ok(Expression::AggregateFunction(agg))
11404 }
11405 } else {
11406 Ok(e)
11407 }
11408 }
11409
11410 Action::GenericFunctionNormalize => {
11411 // Helper closure to convert ARBITRARY to target-specific function
11412 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
11413 let name = match target {
11414 DialectType::ClickHouse => "any",
11415 DialectType::TSQL | DialectType::SQLite => "MAX",
11416 DialectType::Hive => "FIRST",
11417 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11418 "ARBITRARY"
11419 }
11420 _ => "ANY_VALUE",
11421 };
11422 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
11423 }
11424
11425 if let Expression::Function(f) = e {
11426 let name = f.name.to_ascii_uppercase();
11427 match name.as_str() {
11428 "ARBITRARY" if f.args.len() == 1 => {
11429 let arg = f.args.into_iter().next().unwrap();
11430 Ok(convert_arbitrary(arg, target))
11431 }
11432 "TO_NUMBER" if f.args.len() == 1 => {
11433 let arg = f.args.into_iter().next().unwrap();
11434 match target {
11435 DialectType::Oracle | DialectType::Snowflake => {
11436 Ok(Expression::Function(Box::new(Function::new(
11437 "TO_NUMBER".to_string(),
11438 vec![arg],
11439 ))))
11440 }
11441 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11442 this: arg,
11443 to: crate::expressions::DataType::Double {
11444 precision: None,
11445 scale: None,
11446 },
11447 double_colon_syntax: false,
11448 trailing_comments: Vec::new(),
11449 format: None,
11450 default: None,
11451 inferred_type: None,
11452 }))),
11453 }
11454 }
11455 "AGGREGATE" if f.args.len() >= 3 => match target {
11456 DialectType::DuckDB
11457 | DialectType::Hive
11458 | DialectType::Presto
11459 | DialectType::Trino => Ok(Expression::Function(Box::new(
11460 Function::new("REDUCE".to_string(), f.args),
11461 ))),
11462 _ => Ok(Expression::Function(f)),
11463 },
11464 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
11465 "REGEXP_MATCHES" if f.args.len() >= 2 => {
11466 if matches!(target, DialectType::DuckDB) {
11467 Ok(Expression::Function(f))
11468 } else {
11469 let mut args = f.args;
11470 let this = args.remove(0);
11471 let pattern = args.remove(0);
11472 let flags = if args.is_empty() {
11473 None
11474 } else {
11475 Some(args.remove(0))
11476 };
11477 Ok(Expression::RegexpLike(Box::new(
11478 crate::expressions::RegexpFunc {
11479 this,
11480 pattern,
11481 flags,
11482 },
11483 )))
11484 }
11485 }
11486 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
11487 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
11488 if matches!(target, DialectType::DuckDB) {
11489 Ok(Expression::Function(f))
11490 } else {
11491 let mut args = f.args;
11492 let this = args.remove(0);
11493 let pattern = args.remove(0);
11494 let flags = if args.is_empty() {
11495 None
11496 } else {
11497 Some(args.remove(0))
11498 };
11499 Ok(Expression::RegexpLike(Box::new(
11500 crate::expressions::RegexpFunc {
11501 this,
11502 pattern,
11503 flags,
11504 },
11505 )))
11506 }
11507 }
11508 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
11509 "STRUCT_EXTRACT" if f.args.len() == 2 => {
11510 let mut args = f.args;
11511 let this = args.remove(0);
11512 let field_expr = args.remove(0);
11513 // Extract string literal to get field name
11514 let field_name = match &field_expr {
11515 Expression::Literal(lit)
11516 if matches!(
11517 lit.as_ref(),
11518 crate::expressions::Literal::String(_)
11519 ) =>
11520 {
11521 let crate::expressions::Literal::String(s) = lit.as_ref()
11522 else {
11523 unreachable!()
11524 };
11525 s.clone()
11526 }
11527 Expression::Identifier(id) => id.name.clone(),
11528 _ => {
11529 return Ok(Expression::Function(Box::new(Function::new(
11530 "STRUCT_EXTRACT".to_string(),
11531 vec![this, field_expr],
11532 ))))
11533 }
11534 };
11535 Ok(Expression::StructExtract(Box::new(
11536 crate::expressions::StructExtractFunc {
11537 this,
11538 field: crate::expressions::Identifier::new(field_name),
11539 },
11540 )))
11541 }
11542 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
11543 "LIST_FILTER" if f.args.len() == 2 => {
11544 let name = match target {
11545 DialectType::DuckDB => "LIST_FILTER",
11546 _ => "FILTER",
11547 };
11548 Ok(Expression::Function(Box::new(Function::new(
11549 name.to_string(),
11550 f.args,
11551 ))))
11552 }
11553 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
11554 "LIST_TRANSFORM" if f.args.len() == 2 => {
11555 let name = match target {
11556 DialectType::DuckDB => "LIST_TRANSFORM",
11557 _ => "TRANSFORM",
11558 };
11559 Ok(Expression::Function(Box::new(Function::new(
11560 name.to_string(),
11561 f.args,
11562 ))))
11563 }
11564 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
11565 "LIST_SORT" if f.args.len() >= 1 => {
11566 let name = match target {
11567 DialectType::DuckDB => "LIST_SORT",
11568 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
11569 _ => "SORT_ARRAY",
11570 };
11571 Ok(Expression::Function(Box::new(Function::new(
11572 name.to_string(),
11573 f.args,
11574 ))))
11575 }
11576 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11577 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
11578 match target {
11579 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11580 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
11581 ))),
11582 DialectType::Spark
11583 | DialectType::Databricks
11584 | DialectType::Hive => {
11585 let mut args = f.args;
11586 args.push(Expression::Identifier(
11587 crate::expressions::Identifier::new("FALSE"),
11588 ));
11589 Ok(Expression::Function(Box::new(Function::new(
11590 "SORT_ARRAY".to_string(),
11591 args,
11592 ))))
11593 }
11594 DialectType::Presto
11595 | DialectType::Trino
11596 | DialectType::Athena => {
11597 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11598 let arr = f.args.into_iter().next().unwrap();
11599 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
11600 parameters: vec![
11601 crate::expressions::Identifier::new("a"),
11602 crate::expressions::Identifier::new("b"),
11603 ],
11604 body: Expression::Case(Box::new(Case {
11605 operand: None,
11606 whens: vec![
11607 (
11608 Expression::Lt(Box::new(BinaryOp::new(
11609 Expression::Identifier(crate::expressions::Identifier::new("a")),
11610 Expression::Identifier(crate::expressions::Identifier::new("b")),
11611 ))),
11612 Expression::number(1),
11613 ),
11614 (
11615 Expression::Gt(Box::new(BinaryOp::new(
11616 Expression::Identifier(crate::expressions::Identifier::new("a")),
11617 Expression::Identifier(crate::expressions::Identifier::new("b")),
11618 ))),
11619 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
11620 ),
11621 ],
11622 else_: Some(Expression::number(0)),
11623 comments: Vec::new(),
11624 inferred_type: None,
11625 })),
11626 colon: false,
11627 parameter_types: Vec::new(),
11628 }));
11629 Ok(Expression::Function(Box::new(Function::new(
11630 "ARRAY_SORT".to_string(),
11631 vec![arr, lambda],
11632 ))))
11633 }
11634 _ => Ok(Expression::Function(Box::new(Function::new(
11635 "LIST_REVERSE_SORT".to_string(),
11636 f.args,
11637 )))),
11638 }
11639 }
11640 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
11641 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
11642 let mut args = f.args;
11643 args.push(Expression::string(","));
11644 let name = match target {
11645 DialectType::DuckDB => "STR_SPLIT",
11646 DialectType::Presto | DialectType::Trino => "SPLIT",
11647 DialectType::Spark
11648 | DialectType::Databricks
11649 | DialectType::Hive => "SPLIT",
11650 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11651 DialectType::Redshift => "SPLIT_TO_ARRAY",
11652 _ => "SPLIT",
11653 };
11654 Ok(Expression::Function(Box::new(Function::new(
11655 name.to_string(),
11656 args,
11657 ))))
11658 }
11659 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
11660 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
11661 let name = match target {
11662 DialectType::DuckDB => "STR_SPLIT",
11663 DialectType::Presto | DialectType::Trino => "SPLIT",
11664 DialectType::Spark
11665 | DialectType::Databricks
11666 | DialectType::Hive => "SPLIT",
11667 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11668 DialectType::Redshift => "SPLIT_TO_ARRAY",
11669 _ => "SPLIT",
11670 };
11671 Ok(Expression::Function(Box::new(Function::new(
11672 name.to_string(),
11673 f.args,
11674 ))))
11675 }
11676 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
11677 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
11678 let name = match target {
11679 DialectType::DuckDB => "STR_SPLIT",
11680 DialectType::Presto | DialectType::Trino => "SPLIT",
11681 DialectType::Spark
11682 | DialectType::Databricks
11683 | DialectType::Hive => "SPLIT",
11684 DialectType::Doris | DialectType::StarRocks => {
11685 "SPLIT_BY_STRING"
11686 }
11687 DialectType::PostgreSQL | DialectType::Redshift => {
11688 "STRING_TO_ARRAY"
11689 }
11690 _ => "SPLIT",
11691 };
11692 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
11693 if matches!(
11694 target,
11695 DialectType::Spark
11696 | DialectType::Databricks
11697 | DialectType::Hive
11698 ) {
11699 let mut args = f.args;
11700 let x = args.remove(0);
11701 let sep = args.remove(0);
11702 // Wrap separator in CONCAT('\\Q', sep, '\\E')
11703 let escaped_sep =
11704 Expression::Function(Box::new(Function::new(
11705 "CONCAT".to_string(),
11706 vec![
11707 Expression::string("\\Q"),
11708 sep,
11709 Expression::string("\\E"),
11710 ],
11711 )));
11712 Ok(Expression::Function(Box::new(Function::new(
11713 name.to_string(),
11714 vec![x, escaped_sep],
11715 ))))
11716 } else {
11717 Ok(Expression::Function(Box::new(Function::new(
11718 name.to_string(),
11719 f.args,
11720 ))))
11721 }
11722 }
11723 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
11724 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
11725 let name = match target {
11726 DialectType::DuckDB => "STR_SPLIT_REGEX",
11727 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
11728 DialectType::Spark
11729 | DialectType::Databricks
11730 | DialectType::Hive => "SPLIT",
11731 _ => "REGEXP_SPLIT",
11732 };
11733 Ok(Expression::Function(Box::new(Function::new(
11734 name.to_string(),
11735 f.args,
11736 ))))
11737 }
11738 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
11739 "SPLIT"
11740 if f.args.len() == 2
11741 && matches!(source, DialectType::Snowflake)
11742 && matches!(target, DialectType::DuckDB) =>
11743 {
11744 let mut args = f.args;
11745 let str_arg = args.remove(0);
11746 let delim_arg = args.remove(0);
11747
11748 // STR_SPLIT(str, delim) as the base
11749 let base_func = Expression::Function(Box::new(Function::new(
11750 "STR_SPLIT".to_string(),
11751 vec![str_arg.clone(), delim_arg.clone()],
11752 )));
11753
11754 // [str] - array with single element
11755 let array_with_input =
11756 Expression::Array(Box::new(crate::expressions::Array {
11757 expressions: vec![str_arg],
11758 }));
11759
11760 // CASE
11761 // WHEN delim IS NULL THEN NULL
11762 // WHEN delim = '' THEN [str]
11763 // ELSE STR_SPLIT(str, delim)
11764 // END
11765 Ok(Expression::Case(Box::new(Case {
11766 operand: None,
11767 whens: vec![
11768 (
11769 Expression::Is(Box::new(BinaryOp {
11770 left: delim_arg.clone(),
11771 right: Expression::Null(Null),
11772 left_comments: vec![],
11773 operator_comments: vec![],
11774 trailing_comments: vec![],
11775 inferred_type: None,
11776 })),
11777 Expression::Null(Null),
11778 ),
11779 (
11780 Expression::Eq(Box::new(BinaryOp {
11781 left: delim_arg,
11782 right: Expression::string(""),
11783 left_comments: vec![],
11784 operator_comments: vec![],
11785 trailing_comments: vec![],
11786 inferred_type: None,
11787 })),
11788 array_with_input,
11789 ),
11790 ],
11791 else_: Some(base_func),
11792 comments: vec![],
11793 inferred_type: None,
11794 })))
11795 }
11796 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
11797 "SPLIT"
11798 if f.args.len() == 2
11799 && matches!(
11800 source,
11801 DialectType::Presto
11802 | DialectType::Trino
11803 | DialectType::Athena
11804 | DialectType::StarRocks
11805 | DialectType::Doris
11806 )
11807 && matches!(
11808 target,
11809 DialectType::Spark
11810 | DialectType::Databricks
11811 | DialectType::Hive
11812 ) =>
11813 {
11814 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
11815 let mut args = f.args;
11816 let x = args.remove(0);
11817 let sep = args.remove(0);
11818 let escaped_sep = Expression::Function(Box::new(Function::new(
11819 "CONCAT".to_string(),
11820 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
11821 )));
11822 Ok(Expression::Function(Box::new(Function::new(
11823 "SPLIT".to_string(),
11824 vec![x, escaped_sep],
11825 ))))
11826 }
11827 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
11828 // For ClickHouse target, preserve original name to maintain camelCase
11829 "SUBSTRINGINDEX" => {
11830 let name = if matches!(target, DialectType::ClickHouse) {
11831 f.name.clone()
11832 } else {
11833 "SUBSTRING_INDEX".to_string()
11834 };
11835 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
11836 }
11837 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
11838 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
11839 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
11840 if name == "CARDINALITY"
11841 && matches!(source, DialectType::DuckDB)
11842 && matches!(target, DialectType::DuckDB)
11843 {
11844 return Ok(Expression::Function(f));
11845 }
11846 // Get the array argument (first arg, drop dimension args)
11847 let mut args = f.args;
11848 let arr = if args.is_empty() {
11849 return Ok(Expression::Function(Box::new(Function::new(
11850 name.to_string(),
11851 args,
11852 ))));
11853 } else {
11854 args.remove(0)
11855 };
11856 let name =
11857 match target {
11858 DialectType::Spark
11859 | DialectType::Databricks
11860 | DialectType::Hive => "SIZE",
11861 DialectType::Presto | DialectType::Trino => "CARDINALITY",
11862 DialectType::BigQuery => "ARRAY_LENGTH",
11863 DialectType::DuckDB => {
11864 // DuckDB: use ARRAY_LENGTH with all args
11865 let mut all_args = vec![arr];
11866 all_args.extend(args);
11867 return Ok(Expression::Function(Box::new(
11868 Function::new("ARRAY_LENGTH".to_string(), all_args),
11869 )));
11870 }
11871 DialectType::PostgreSQL | DialectType::Redshift => {
11872 // Keep ARRAY_LENGTH with dimension arg
11873 let mut all_args = vec![arr];
11874 all_args.extend(args);
11875 return Ok(Expression::Function(Box::new(
11876 Function::new("ARRAY_LENGTH".to_string(), all_args),
11877 )));
11878 }
11879 DialectType::ClickHouse => "LENGTH",
11880 _ => "ARRAY_LENGTH",
11881 };
11882 Ok(Expression::Function(Box::new(Function::new(
11883 name.to_string(),
11884 vec![arr],
11885 ))))
11886 }
11887 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
11888 "TO_VARIANT" if f.args.len() == 1 => match target {
11889 DialectType::DuckDB => {
11890 let arg = f.args.into_iter().next().unwrap();
11891 Ok(Expression::Cast(Box::new(Cast {
11892 this: arg,
11893 to: DataType::Custom {
11894 name: "VARIANT".to_string(),
11895 },
11896 double_colon_syntax: false,
11897 trailing_comments: Vec::new(),
11898 format: None,
11899 default: None,
11900 inferred_type: None,
11901 })))
11902 }
11903 _ => Ok(Expression::Function(f)),
11904 },
11905 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
11906 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
11907 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11908 Function::new("JSON_AGG".to_string(), f.args),
11909 ))),
11910 _ => Ok(Expression::Function(f)),
11911 },
11912 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
11913 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
11914 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11915 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
11916 ))),
11917 _ => Ok(Expression::Function(f)),
11918 },
11919 // UNICODE(x) -> target-specific codepoint function
11920 "UNICODE" if f.args.len() == 1 => {
11921 match target {
11922 DialectType::SQLite | DialectType::DuckDB => {
11923 Ok(Expression::Function(Box::new(Function::new(
11924 "UNICODE".to_string(),
11925 f.args,
11926 ))))
11927 }
11928 DialectType::Oracle => {
11929 // ASCII(UNISTR(x))
11930 let inner = Expression::Function(Box::new(Function::new(
11931 "UNISTR".to_string(),
11932 f.args,
11933 )));
11934 Ok(Expression::Function(Box::new(Function::new(
11935 "ASCII".to_string(),
11936 vec![inner],
11937 ))))
11938 }
11939 DialectType::MySQL => {
11940 // ORD(CONVERT(x USING utf32))
11941 let arg = f.args.into_iter().next().unwrap();
11942 let convert_expr = Expression::ConvertToCharset(Box::new(
11943 crate::expressions::ConvertToCharset {
11944 this: Box::new(arg),
11945 dest: Some(Box::new(Expression::Identifier(
11946 crate::expressions::Identifier::new("utf32"),
11947 ))),
11948 source: None,
11949 },
11950 ));
11951 Ok(Expression::Function(Box::new(Function::new(
11952 "ORD".to_string(),
11953 vec![convert_expr],
11954 ))))
11955 }
11956 _ => Ok(Expression::Function(Box::new(Function::new(
11957 "ASCII".to_string(),
11958 f.args,
11959 )))),
11960 }
11961 }
11962 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
11963 "XOR" if f.args.len() >= 2 => {
11964 match target {
11965 DialectType::ClickHouse => {
11966 // ClickHouse: keep as xor() function with lowercase name
11967 Ok(Expression::Function(Box::new(Function::new(
11968 "xor".to_string(),
11969 f.args,
11970 ))))
11971 }
11972 DialectType::Presto | DialectType::Trino => {
11973 if f.args.len() == 2 {
11974 Ok(Expression::Function(Box::new(Function::new(
11975 "BITWISE_XOR".to_string(),
11976 f.args,
11977 ))))
11978 } else {
11979 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
11980 let mut args = f.args;
11981 let first = args.remove(0);
11982 let second = args.remove(0);
11983 let mut result =
11984 Expression::Function(Box::new(Function::new(
11985 "BITWISE_XOR".to_string(),
11986 vec![first, second],
11987 )));
11988 for arg in args {
11989 result =
11990 Expression::Function(Box::new(Function::new(
11991 "BITWISE_XOR".to_string(),
11992 vec![result, arg],
11993 )));
11994 }
11995 Ok(result)
11996 }
11997 }
11998 DialectType::MySQL
11999 | DialectType::SingleStore
12000 | DialectType::Doris
12001 | DialectType::StarRocks => {
12002 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
12003 let args = f.args;
12004 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
12005 this: None,
12006 expression: None,
12007 expressions: args,
12008 })))
12009 }
12010 DialectType::PostgreSQL | DialectType::Redshift => {
12011 // PostgreSQL: a # b (hash operator for XOR)
12012 let mut args = f.args;
12013 let first = args.remove(0);
12014 let second = args.remove(0);
12015 let mut result = Expression::BitwiseXor(Box::new(
12016 BinaryOp::new(first, second),
12017 ));
12018 for arg in args {
12019 result = Expression::BitwiseXor(Box::new(
12020 BinaryOp::new(result, arg),
12021 ));
12022 }
12023 Ok(result)
12024 }
12025 DialectType::DuckDB => {
12026 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
12027 Ok(Expression::Function(Box::new(Function::new(
12028 "XOR".to_string(),
12029 f.args,
12030 ))))
12031 }
12032 DialectType::BigQuery => {
12033 // BigQuery: a ^ b (caret operator for XOR)
12034 let mut args = f.args;
12035 let first = args.remove(0);
12036 let second = args.remove(0);
12037 let mut result = Expression::BitwiseXor(Box::new(
12038 BinaryOp::new(first, second),
12039 ));
12040 for arg in args {
12041 result = Expression::BitwiseXor(Box::new(
12042 BinaryOp::new(result, arg),
12043 ));
12044 }
12045 Ok(result)
12046 }
12047 _ => Ok(Expression::Function(Box::new(Function::new(
12048 "XOR".to_string(),
12049 f.args,
12050 )))),
12051 }
12052 }
12053 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
12054 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
12055 match target {
12056 DialectType::Spark
12057 | DialectType::Databricks
12058 | DialectType::Hive => {
12059 let mut args = f.args;
12060 args.push(Expression::Identifier(
12061 crate::expressions::Identifier::new("FALSE"),
12062 ));
12063 Ok(Expression::Function(Box::new(Function::new(
12064 "SORT_ARRAY".to_string(),
12065 args,
12066 ))))
12067 }
12068 DialectType::Presto
12069 | DialectType::Trino
12070 | DialectType::Athena => {
12071 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
12072 let arr = f.args.into_iter().next().unwrap();
12073 let lambda = Expression::Lambda(Box::new(
12074 crate::expressions::LambdaExpr {
12075 parameters: vec![
12076 Identifier::new("a"),
12077 Identifier::new("b"),
12078 ],
12079 colon: false,
12080 parameter_types: Vec::new(),
12081 body: Expression::Case(Box::new(Case {
12082 operand: None,
12083 whens: vec![
12084 (
12085 Expression::Lt(Box::new(
12086 BinaryOp::new(
12087 Expression::Identifier(
12088 Identifier::new("a"),
12089 ),
12090 Expression::Identifier(
12091 Identifier::new("b"),
12092 ),
12093 ),
12094 )),
12095 Expression::number(1),
12096 ),
12097 (
12098 Expression::Gt(Box::new(
12099 BinaryOp::new(
12100 Expression::Identifier(
12101 Identifier::new("a"),
12102 ),
12103 Expression::Identifier(
12104 Identifier::new("b"),
12105 ),
12106 ),
12107 )),
12108 Expression::Neg(Box::new(
12109 crate::expressions::UnaryOp {
12110 this: Expression::number(1),
12111 inferred_type: None,
12112 },
12113 )),
12114 ),
12115 ],
12116 else_: Some(Expression::number(0)),
12117 comments: Vec::new(),
12118 inferred_type: None,
12119 })),
12120 },
12121 ));
12122 Ok(Expression::Function(Box::new(Function::new(
12123 "ARRAY_SORT".to_string(),
12124 vec![arr, lambda],
12125 ))))
12126 }
12127 _ => Ok(Expression::Function(Box::new(Function::new(
12128 "ARRAY_REVERSE_SORT".to_string(),
12129 f.args,
12130 )))),
12131 }
12132 }
12133 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
12134 "ENCODE" if f.args.len() == 1 => match target {
12135 DialectType::Spark
12136 | DialectType::Databricks
12137 | DialectType::Hive => {
12138 let mut args = f.args;
12139 args.push(Expression::string("utf-8"));
12140 Ok(Expression::Function(Box::new(Function::new(
12141 "ENCODE".to_string(),
12142 args,
12143 ))))
12144 }
12145 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12146 Ok(Expression::Function(Box::new(Function::new(
12147 "TO_UTF8".to_string(),
12148 f.args,
12149 ))))
12150 }
12151 _ => Ok(Expression::Function(Box::new(Function::new(
12152 "ENCODE".to_string(),
12153 f.args,
12154 )))),
12155 },
12156 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
12157 "DECODE" if f.args.len() == 1 => match target {
12158 DialectType::Spark
12159 | DialectType::Databricks
12160 | DialectType::Hive => {
12161 let mut args = f.args;
12162 args.push(Expression::string("utf-8"));
12163 Ok(Expression::Function(Box::new(Function::new(
12164 "DECODE".to_string(),
12165 args,
12166 ))))
12167 }
12168 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12169 Ok(Expression::Function(Box::new(Function::new(
12170 "FROM_UTF8".to_string(),
12171 f.args,
12172 ))))
12173 }
12174 _ => Ok(Expression::Function(Box::new(Function::new(
12175 "DECODE".to_string(),
12176 f.args,
12177 )))),
12178 },
12179 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
12180 "QUANTILE" if f.args.len() == 2 => {
12181 let name = match target {
12182 DialectType::Spark
12183 | DialectType::Databricks
12184 | DialectType::Hive => "PERCENTILE",
12185 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
12186 DialectType::BigQuery => "PERCENTILE_CONT",
12187 _ => "QUANTILE",
12188 };
12189 Ok(Expression::Function(Box::new(Function::new(
12190 name.to_string(),
12191 f.args,
12192 ))))
12193 }
12194 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12195 "QUANTILE_CONT" if f.args.len() == 2 => {
12196 let mut args = f.args;
12197 let column = args.remove(0);
12198 let quantile = args.remove(0);
12199 match target {
12200 DialectType::DuckDB => {
12201 Ok(Expression::Function(Box::new(Function::new(
12202 "QUANTILE_CONT".to_string(),
12203 vec![column, quantile],
12204 ))))
12205 }
12206 DialectType::PostgreSQL
12207 | DialectType::Redshift
12208 | DialectType::Snowflake => {
12209 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
12210 let inner = Expression::PercentileCont(Box::new(
12211 crate::expressions::PercentileFunc {
12212 this: column.clone(),
12213 percentile: quantile,
12214 order_by: None,
12215 filter: None,
12216 },
12217 ));
12218 Ok(Expression::WithinGroup(Box::new(
12219 crate::expressions::WithinGroup {
12220 this: inner,
12221 order_by: vec![crate::expressions::Ordered {
12222 this: column,
12223 desc: false,
12224 nulls_first: None,
12225 explicit_asc: false,
12226 with_fill: None,
12227 }],
12228 },
12229 )))
12230 }
12231 _ => Ok(Expression::Function(Box::new(Function::new(
12232 "QUANTILE_CONT".to_string(),
12233 vec![column, quantile],
12234 )))),
12235 }
12236 }
12237 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12238 "QUANTILE_DISC" if f.args.len() == 2 => {
12239 let mut args = f.args;
12240 let column = args.remove(0);
12241 let quantile = args.remove(0);
12242 match target {
12243 DialectType::DuckDB => {
12244 Ok(Expression::Function(Box::new(Function::new(
12245 "QUANTILE_DISC".to_string(),
12246 vec![column, quantile],
12247 ))))
12248 }
12249 DialectType::PostgreSQL
12250 | DialectType::Redshift
12251 | DialectType::Snowflake => {
12252 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
12253 let inner = Expression::PercentileDisc(Box::new(
12254 crate::expressions::PercentileFunc {
12255 this: column.clone(),
12256 percentile: quantile,
12257 order_by: None,
12258 filter: None,
12259 },
12260 ));
12261 Ok(Expression::WithinGroup(Box::new(
12262 crate::expressions::WithinGroup {
12263 this: inner,
12264 order_by: vec![crate::expressions::Ordered {
12265 this: column,
12266 desc: false,
12267 nulls_first: None,
12268 explicit_asc: false,
12269 with_fill: None,
12270 }],
12271 },
12272 )))
12273 }
12274 _ => Ok(Expression::Function(Box::new(Function::new(
12275 "QUANTILE_DISC".to_string(),
12276 vec![column, quantile],
12277 )))),
12278 }
12279 }
12280 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
12281 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
12282 let name = match target {
12283 DialectType::Presto
12284 | DialectType::Trino
12285 | DialectType::Athena => "APPROX_PERCENTILE",
12286 DialectType::Spark
12287 | DialectType::Databricks
12288 | DialectType::Hive => "PERCENTILE_APPROX",
12289 DialectType::DuckDB => "APPROX_QUANTILE",
12290 DialectType::PostgreSQL | DialectType::Redshift => {
12291 "PERCENTILE_CONT"
12292 }
12293 _ => &f.name,
12294 };
12295 Ok(Expression::Function(Box::new(Function::new(
12296 name.to_string(),
12297 f.args,
12298 ))))
12299 }
12300 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12301 "EPOCH" if f.args.len() == 1 => {
12302 let name = match target {
12303 DialectType::Spark
12304 | DialectType::Databricks
12305 | DialectType::Hive => "UNIX_TIMESTAMP",
12306 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
12307 _ => "EPOCH",
12308 };
12309 Ok(Expression::Function(Box::new(Function::new(
12310 name.to_string(),
12311 f.args,
12312 ))))
12313 }
12314 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
12315 "EPOCH_MS" if f.args.len() == 1 => {
12316 match target {
12317 DialectType::Spark | DialectType::Databricks => {
12318 Ok(Expression::Function(Box::new(Function::new(
12319 "TIMESTAMP_MILLIS".to_string(),
12320 f.args,
12321 ))))
12322 }
12323 DialectType::Hive => {
12324 // Hive: FROM_UNIXTIME(x / 1000)
12325 let arg = f.args.into_iter().next().unwrap();
12326 let div_expr = Expression::Div(Box::new(
12327 crate::expressions::BinaryOp::new(
12328 arg,
12329 Expression::number(1000),
12330 ),
12331 ));
12332 Ok(Expression::Function(Box::new(Function::new(
12333 "FROM_UNIXTIME".to_string(),
12334 vec![div_expr],
12335 ))))
12336 }
12337 DialectType::Presto | DialectType::Trino => {
12338 Ok(Expression::Function(Box::new(Function::new(
12339 "FROM_UNIXTIME".to_string(),
12340 vec![Expression::Div(Box::new(
12341 crate::expressions::BinaryOp::new(
12342 f.args.into_iter().next().unwrap(),
12343 Expression::number(1000),
12344 ),
12345 ))],
12346 ))))
12347 }
12348 _ => Ok(Expression::Function(Box::new(Function::new(
12349 "EPOCH_MS".to_string(),
12350 f.args,
12351 )))),
12352 }
12353 }
12354 // HASHBYTES('algorithm', x) -> target-specific hash function
12355 "HASHBYTES" if f.args.len() == 2 => {
12356 // Keep HASHBYTES as-is for TSQL target
12357 if matches!(target, DialectType::TSQL) {
12358 return Ok(Expression::Function(f));
12359 }
12360 let algo_expr = &f.args[0];
12361 let algo = match algo_expr {
12362 Expression::Literal(lit)
12363 if matches!(
12364 lit.as_ref(),
12365 crate::expressions::Literal::String(_)
12366 ) =>
12367 {
12368 let crate::expressions::Literal::String(s) = lit.as_ref()
12369 else {
12370 unreachable!()
12371 };
12372 s.to_ascii_uppercase()
12373 }
12374 _ => return Ok(Expression::Function(f)),
12375 };
12376 let data_arg = f.args.into_iter().nth(1).unwrap();
12377 match algo.as_str() {
12378 "SHA1" => {
12379 let name = match target {
12380 DialectType::Spark | DialectType::Databricks => "SHA",
12381 DialectType::Hive => "SHA1",
12382 _ => "SHA1",
12383 };
12384 Ok(Expression::Function(Box::new(Function::new(
12385 name.to_string(),
12386 vec![data_arg],
12387 ))))
12388 }
12389 "SHA2_256" => {
12390 Ok(Expression::Function(Box::new(Function::new(
12391 "SHA2".to_string(),
12392 vec![data_arg, Expression::number(256)],
12393 ))))
12394 }
12395 "SHA2_512" => {
12396 Ok(Expression::Function(Box::new(Function::new(
12397 "SHA2".to_string(),
12398 vec![data_arg, Expression::number(512)],
12399 ))))
12400 }
12401 "MD5" => Ok(Expression::Function(Box::new(Function::new(
12402 "MD5".to_string(),
12403 vec![data_arg],
12404 )))),
12405 _ => Ok(Expression::Function(Box::new(Function::new(
12406 "HASHBYTES".to_string(),
12407 vec![Expression::string(&algo), data_arg],
12408 )))),
12409 }
12410 }
12411 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
12412 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
12413 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
12414 let mut args = f.args;
12415 let json_expr = args.remove(0);
12416 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
12417 let mut json_path = "$".to_string();
12418 for a in &args {
12419 match a {
12420 Expression::Literal(lit)
12421 if matches!(
12422 lit.as_ref(),
12423 crate::expressions::Literal::String(_)
12424 ) =>
12425 {
12426 let crate::expressions::Literal::String(s) =
12427 lit.as_ref()
12428 else {
12429 unreachable!()
12430 };
12431 // Numeric string keys become array indices: [0]
12432 if s.chars().all(|c| c.is_ascii_digit()) {
12433 json_path.push('[');
12434 json_path.push_str(s);
12435 json_path.push(']');
12436 } else {
12437 json_path.push('.');
12438 json_path.push_str(s);
12439 }
12440 }
12441 _ => {
12442 json_path.push_str(".?");
12443 }
12444 }
12445 }
12446 match target {
12447 DialectType::Spark
12448 | DialectType::Databricks
12449 | DialectType::Hive => {
12450 Ok(Expression::Function(Box::new(Function::new(
12451 "GET_JSON_OBJECT".to_string(),
12452 vec![json_expr, Expression::string(&json_path)],
12453 ))))
12454 }
12455 DialectType::Presto | DialectType::Trino => {
12456 let func_name = if is_text {
12457 "JSON_EXTRACT_SCALAR"
12458 } else {
12459 "JSON_EXTRACT"
12460 };
12461 Ok(Expression::Function(Box::new(Function::new(
12462 func_name.to_string(),
12463 vec![json_expr, Expression::string(&json_path)],
12464 ))))
12465 }
12466 DialectType::BigQuery | DialectType::MySQL => {
12467 let func_name = if is_text {
12468 "JSON_EXTRACT_SCALAR"
12469 } else {
12470 "JSON_EXTRACT"
12471 };
12472 Ok(Expression::Function(Box::new(Function::new(
12473 func_name.to_string(),
12474 vec![json_expr, Expression::string(&json_path)],
12475 ))))
12476 }
12477 DialectType::PostgreSQL | DialectType::Materialize => {
12478 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
12479 let func_name = if is_text {
12480 "JSON_EXTRACT_PATH_TEXT"
12481 } else {
12482 "JSON_EXTRACT_PATH"
12483 };
12484 let mut new_args = vec![json_expr];
12485 new_args.extend(args);
12486 Ok(Expression::Function(Box::new(Function::new(
12487 func_name.to_string(),
12488 new_args,
12489 ))))
12490 }
12491 DialectType::DuckDB | DialectType::SQLite => {
12492 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
12493 if is_text {
12494 Ok(Expression::JsonExtractScalar(Box::new(
12495 crate::expressions::JsonExtractFunc {
12496 this: json_expr,
12497 path: Expression::string(&json_path),
12498 returning: None,
12499 arrow_syntax: true,
12500 hash_arrow_syntax: false,
12501 wrapper_option: None,
12502 quotes_option: None,
12503 on_scalar_string: false,
12504 on_error: None,
12505 },
12506 )))
12507 } else {
12508 Ok(Expression::JsonExtract(Box::new(
12509 crate::expressions::JsonExtractFunc {
12510 this: json_expr,
12511 path: Expression::string(&json_path),
12512 returning: None,
12513 arrow_syntax: true,
12514 hash_arrow_syntax: false,
12515 wrapper_option: None,
12516 quotes_option: None,
12517 on_scalar_string: false,
12518 on_error: None,
12519 },
12520 )))
12521 }
12522 }
12523 DialectType::Redshift => {
12524 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
12525 let mut new_args = vec![json_expr];
12526 new_args.extend(args);
12527 Ok(Expression::Function(Box::new(Function::new(
12528 "JSON_EXTRACT_PATH_TEXT".to_string(),
12529 new_args,
12530 ))))
12531 }
12532 DialectType::TSQL => {
12533 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
12534 let jq = Expression::Function(Box::new(Function::new(
12535 "JSON_QUERY".to_string(),
12536 vec![json_expr.clone(), Expression::string(&json_path)],
12537 )));
12538 let jv = Expression::Function(Box::new(Function::new(
12539 "JSON_VALUE".to_string(),
12540 vec![json_expr, Expression::string(&json_path)],
12541 )));
12542 Ok(Expression::Function(Box::new(Function::new(
12543 "ISNULL".to_string(),
12544 vec![jq, jv],
12545 ))))
12546 }
12547 DialectType::ClickHouse => {
12548 let func_name = if is_text {
12549 "JSONExtractString"
12550 } else {
12551 "JSONExtractRaw"
12552 };
12553 let mut new_args = vec![json_expr];
12554 new_args.extend(args);
12555 Ok(Expression::Function(Box::new(Function::new(
12556 func_name.to_string(),
12557 new_args,
12558 ))))
12559 }
12560 _ => {
12561 let func_name = if is_text {
12562 "JSON_EXTRACT_SCALAR"
12563 } else {
12564 "JSON_EXTRACT"
12565 };
12566 Ok(Expression::Function(Box::new(Function::new(
12567 func_name.to_string(),
12568 vec![json_expr, Expression::string(&json_path)],
12569 ))))
12570 }
12571 }
12572 }
12573 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
12574 "APPROX_DISTINCT" if f.args.len() >= 1 => {
12575 let name = match target {
12576 DialectType::Spark
12577 | DialectType::Databricks
12578 | DialectType::Hive
12579 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
12580 _ => "APPROX_DISTINCT",
12581 };
12582 let mut args = f.args;
12583 // Hive doesn't support the accuracy parameter
12584 if name == "APPROX_COUNT_DISTINCT"
12585 && matches!(target, DialectType::Hive)
12586 {
12587 args.truncate(1);
12588 }
12589 Ok(Expression::Function(Box::new(Function::new(
12590 name.to_string(),
12591 args,
12592 ))))
12593 }
12594 // REGEXP_EXTRACT(x, pattern) - normalize default group index
12595 "REGEXP_EXTRACT" if f.args.len() == 2 => {
12596 // Determine source default group index
12597 let source_default = match source {
12598 DialectType::Presto
12599 | DialectType::Trino
12600 | DialectType::DuckDB => 0,
12601 _ => 1, // Hive/Spark/Databricks default = 1
12602 };
12603 // Determine target default group index
12604 let target_default = match target {
12605 DialectType::Presto
12606 | DialectType::Trino
12607 | DialectType::DuckDB
12608 | DialectType::BigQuery => 0,
12609 DialectType::Snowflake => {
12610 // Snowflake uses REGEXP_SUBSTR
12611 return Ok(Expression::Function(Box::new(Function::new(
12612 "REGEXP_SUBSTR".to_string(),
12613 f.args,
12614 ))));
12615 }
12616 _ => 1, // Hive/Spark/Databricks default = 1
12617 };
12618 if source_default != target_default {
12619 let mut args = f.args;
12620 args.push(Expression::number(source_default));
12621 Ok(Expression::Function(Box::new(Function::new(
12622 "REGEXP_EXTRACT".to_string(),
12623 args,
12624 ))))
12625 } else {
12626 Ok(Expression::Function(Box::new(Function::new(
12627 "REGEXP_EXTRACT".to_string(),
12628 f.args,
12629 ))))
12630 }
12631 }
12632 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
12633 "RLIKE" if f.args.len() == 2 => {
12634 let mut args = f.args;
12635 let str_expr = args.remove(0);
12636 let pattern = args.remove(0);
12637 match target {
12638 DialectType::DuckDB => {
12639 // REGEXP_MATCHES(str, pattern)
12640 Ok(Expression::Function(Box::new(Function::new(
12641 "REGEXP_MATCHES".to_string(),
12642 vec![str_expr, pattern],
12643 ))))
12644 }
12645 _ => {
12646 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
12647 Ok(Expression::RegexpLike(Box::new(
12648 crate::expressions::RegexpFunc {
12649 this: str_expr,
12650 pattern,
12651 flags: None,
12652 },
12653 )))
12654 }
12655 }
12656 }
12657 // EOMONTH(date[, month_offset]) -> target-specific
12658 "EOMONTH" if f.args.len() >= 1 => {
12659 let mut args = f.args;
12660 let date_arg = args.remove(0);
12661 let month_offset = if !args.is_empty() {
12662 Some(args.remove(0))
12663 } else {
12664 None
12665 };
12666
12667 // Helper: wrap date in CAST to DATE
12668 let cast_to_date = |e: Expression| -> Expression {
12669 Expression::Cast(Box::new(Cast {
12670 this: e,
12671 to: DataType::Date,
12672 trailing_comments: vec![],
12673 double_colon_syntax: false,
12674 format: None,
12675 default: None,
12676 inferred_type: None,
12677 }))
12678 };
12679
12680 match target {
12681 DialectType::TSQL | DialectType::Fabric => {
12682 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
12683 let date = cast_to_date(date_arg);
12684 let date = if let Some(offset) = month_offset {
12685 Expression::Function(Box::new(Function::new(
12686 "DATEADD".to_string(),
12687 vec![
12688 Expression::Identifier(Identifier::new(
12689 "MONTH",
12690 )),
12691 offset,
12692 date,
12693 ],
12694 )))
12695 } else {
12696 date
12697 };
12698 Ok(Expression::Function(Box::new(Function::new(
12699 "EOMONTH".to_string(),
12700 vec![date],
12701 ))))
12702 }
12703 DialectType::Presto
12704 | DialectType::Trino
12705 | DialectType::Athena => {
12706 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
12707 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
12708 let cast_ts = Expression::Cast(Box::new(Cast {
12709 this: date_arg,
12710 to: DataType::Timestamp {
12711 timezone: false,
12712 precision: None,
12713 },
12714 trailing_comments: vec![],
12715 double_colon_syntax: false,
12716 format: None,
12717 default: None,
12718 inferred_type: None,
12719 }));
12720 let date = cast_to_date(cast_ts);
12721 let date = if let Some(offset) = month_offset {
12722 Expression::Function(Box::new(Function::new(
12723 "DATE_ADD".to_string(),
12724 vec![Expression::string("MONTH"), offset, date],
12725 )))
12726 } else {
12727 date
12728 };
12729 Ok(Expression::Function(Box::new(Function::new(
12730 "LAST_DAY_OF_MONTH".to_string(),
12731 vec![date],
12732 ))))
12733 }
12734 DialectType::PostgreSQL => {
12735 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12736 let date = cast_to_date(date_arg);
12737 let date = if let Some(offset) = month_offset {
12738 let interval_str = format!(
12739 "{} MONTH",
12740 Self::expr_to_string_static(&offset)
12741 );
12742 Expression::Add(Box::new(
12743 crate::expressions::BinaryOp::new(
12744 date,
12745 Expression::Interval(Box::new(
12746 crate::expressions::Interval {
12747 this: Some(Expression::string(
12748 &interval_str,
12749 )),
12750 unit: None,
12751 },
12752 )),
12753 ),
12754 ))
12755 } else {
12756 date
12757 };
12758 let truncated =
12759 Expression::Function(Box::new(Function::new(
12760 "DATE_TRUNC".to_string(),
12761 vec![Expression::string("MONTH"), date],
12762 )));
12763 let plus_month = Expression::Add(Box::new(
12764 crate::expressions::BinaryOp::new(
12765 truncated,
12766 Expression::Interval(Box::new(
12767 crate::expressions::Interval {
12768 this: Some(Expression::string("1 MONTH")),
12769 unit: None,
12770 },
12771 )),
12772 ),
12773 ));
12774 let minus_day = Expression::Sub(Box::new(
12775 crate::expressions::BinaryOp::new(
12776 plus_month,
12777 Expression::Interval(Box::new(
12778 crate::expressions::Interval {
12779 this: Some(Expression::string("1 DAY")),
12780 unit: None,
12781 },
12782 )),
12783 ),
12784 ));
12785 Ok(Expression::Cast(Box::new(Cast {
12786 this: minus_day,
12787 to: DataType::Date,
12788 trailing_comments: vec![],
12789 double_colon_syntax: false,
12790 format: None,
12791 default: None,
12792 inferred_type: None,
12793 })))
12794 }
12795 DialectType::DuckDB => {
12796 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
12797 let date = cast_to_date(date_arg);
12798 let date = if let Some(offset) = month_offset {
12799 // Wrap negative numbers in parentheses for DuckDB INTERVAL
12800 let interval_val =
12801 if matches!(&offset, Expression::Neg(_)) {
12802 Expression::Paren(Box::new(
12803 crate::expressions::Paren {
12804 this: offset,
12805 trailing_comments: Vec::new(),
12806 },
12807 ))
12808 } else {
12809 offset
12810 };
12811 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
12812 date,
12813 Expression::Interval(Box::new(crate::expressions::Interval {
12814 this: Some(interval_val),
12815 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12816 unit: crate::expressions::IntervalUnit::Month,
12817 use_plural: false,
12818 }),
12819 })),
12820 )))
12821 } else {
12822 date
12823 };
12824 Ok(Expression::Function(Box::new(Function::new(
12825 "LAST_DAY".to_string(),
12826 vec![date],
12827 ))))
12828 }
12829 DialectType::Snowflake | DialectType::Redshift => {
12830 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
12831 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
12832 let date = if matches!(target, DialectType::Snowflake) {
12833 Expression::Function(Box::new(Function::new(
12834 "TO_DATE".to_string(),
12835 vec![date_arg],
12836 )))
12837 } else {
12838 cast_to_date(date_arg)
12839 };
12840 let date = if let Some(offset) = month_offset {
12841 Expression::Function(Box::new(Function::new(
12842 "DATEADD".to_string(),
12843 vec![
12844 Expression::Identifier(Identifier::new(
12845 "MONTH",
12846 )),
12847 offset,
12848 date,
12849 ],
12850 )))
12851 } else {
12852 date
12853 };
12854 Ok(Expression::Function(Box::new(Function::new(
12855 "LAST_DAY".to_string(),
12856 vec![date],
12857 ))))
12858 }
12859 DialectType::Spark | DialectType::Databricks => {
12860 // Spark: LAST_DAY(TO_DATE(date))
12861 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
12862 let date = Expression::Function(Box::new(Function::new(
12863 "TO_DATE".to_string(),
12864 vec![date_arg],
12865 )));
12866 let date = if let Some(offset) = month_offset {
12867 Expression::Function(Box::new(Function::new(
12868 "ADD_MONTHS".to_string(),
12869 vec![date, offset],
12870 )))
12871 } else {
12872 date
12873 };
12874 Ok(Expression::Function(Box::new(Function::new(
12875 "LAST_DAY".to_string(),
12876 vec![date],
12877 ))))
12878 }
12879 DialectType::MySQL => {
12880 // MySQL: LAST_DAY(DATE(date)) - no offset
12881 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
12882 let date = if let Some(offset) = month_offset {
12883 let iu = crate::expressions::IntervalUnit::Month;
12884 Expression::DateAdd(Box::new(
12885 crate::expressions::DateAddFunc {
12886 this: date_arg,
12887 interval: offset,
12888 unit: iu,
12889 },
12890 ))
12891 } else {
12892 Expression::Function(Box::new(Function::new(
12893 "DATE".to_string(),
12894 vec![date_arg],
12895 )))
12896 };
12897 Ok(Expression::Function(Box::new(Function::new(
12898 "LAST_DAY".to_string(),
12899 vec![date],
12900 ))))
12901 }
12902 DialectType::BigQuery => {
12903 // BigQuery: LAST_DAY(CAST(date AS DATE))
12904 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
12905 let date = cast_to_date(date_arg);
12906 let date = if let Some(offset) = month_offset {
12907 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
12908 this: Some(offset),
12909 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12910 unit: crate::expressions::IntervalUnit::Month,
12911 use_plural: false,
12912 }),
12913 }));
12914 Expression::Function(Box::new(Function::new(
12915 "DATE_ADD".to_string(),
12916 vec![date, interval],
12917 )))
12918 } else {
12919 date
12920 };
12921 Ok(Expression::Function(Box::new(Function::new(
12922 "LAST_DAY".to_string(),
12923 vec![date],
12924 ))))
12925 }
12926 DialectType::ClickHouse => {
12927 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
12928 let date = Expression::Cast(Box::new(Cast {
12929 this: date_arg,
12930 to: DataType::Nullable {
12931 inner: Box::new(DataType::Date),
12932 },
12933 trailing_comments: vec![],
12934 double_colon_syntax: false,
12935 format: None,
12936 default: None,
12937 inferred_type: None,
12938 }));
12939 let date = if let Some(offset) = month_offset {
12940 Expression::Function(Box::new(Function::new(
12941 "DATE_ADD".to_string(),
12942 vec![
12943 Expression::Identifier(Identifier::new(
12944 "MONTH",
12945 )),
12946 offset,
12947 date,
12948 ],
12949 )))
12950 } else {
12951 date
12952 };
12953 Ok(Expression::Function(Box::new(Function::new(
12954 "LAST_DAY".to_string(),
12955 vec![date],
12956 ))))
12957 }
12958 DialectType::Hive => {
12959 // Hive: LAST_DAY(date)
12960 let date = if let Some(offset) = month_offset {
12961 Expression::Function(Box::new(Function::new(
12962 "ADD_MONTHS".to_string(),
12963 vec![date_arg, offset],
12964 )))
12965 } else {
12966 date_arg
12967 };
12968 Ok(Expression::Function(Box::new(Function::new(
12969 "LAST_DAY".to_string(),
12970 vec![date],
12971 ))))
12972 }
12973 _ => {
12974 // Default: LAST_DAY(date)
12975 let date = if let Some(offset) = month_offset {
12976 let unit =
12977 Expression::Identifier(Identifier::new("MONTH"));
12978 Expression::Function(Box::new(Function::new(
12979 "DATEADD".to_string(),
12980 vec![unit, offset, date_arg],
12981 )))
12982 } else {
12983 date_arg
12984 };
12985 Ok(Expression::Function(Box::new(Function::new(
12986 "LAST_DAY".to_string(),
12987 vec![date],
12988 ))))
12989 }
12990 }
12991 }
12992 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
12993 "LAST_DAY" | "LAST_DAY_OF_MONTH"
12994 if !matches!(source, DialectType::BigQuery)
12995 && f.args.len() >= 1 =>
12996 {
12997 let first_arg = f.args.into_iter().next().unwrap();
12998 match target {
12999 DialectType::TSQL | DialectType::Fabric => {
13000 Ok(Expression::Function(Box::new(Function::new(
13001 "EOMONTH".to_string(),
13002 vec![first_arg],
13003 ))))
13004 }
13005 DialectType::Presto
13006 | DialectType::Trino
13007 | DialectType::Athena => {
13008 Ok(Expression::Function(Box::new(Function::new(
13009 "LAST_DAY_OF_MONTH".to_string(),
13010 vec![first_arg],
13011 ))))
13012 }
13013 _ => Ok(Expression::Function(Box::new(Function::new(
13014 "LAST_DAY".to_string(),
13015 vec![first_arg],
13016 )))),
13017 }
13018 }
13019 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
13020 "MAP"
13021 if f.args.len() == 2
13022 && matches!(
13023 source,
13024 DialectType::Presto
13025 | DialectType::Trino
13026 | DialectType::Athena
13027 ) =>
13028 {
13029 let keys_arg = f.args[0].clone();
13030 let vals_arg = f.args[1].clone();
13031
13032 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
13033 fn extract_array_elements(
13034 expr: &Expression,
13035 ) -> Option<&Vec<Expression>> {
13036 match expr {
13037 Expression::Array(arr) => Some(&arr.expressions),
13038 Expression::ArrayFunc(arr) => Some(&arr.expressions),
13039 Expression::Function(f)
13040 if f.name.eq_ignore_ascii_case("ARRAY") =>
13041 {
13042 Some(&f.args)
13043 }
13044 _ => None,
13045 }
13046 }
13047
13048 match target {
13049 DialectType::Spark | DialectType::Databricks => {
13050 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
13051 Ok(Expression::Function(Box::new(Function::new(
13052 "MAP_FROM_ARRAYS".to_string(),
13053 f.args,
13054 ))))
13055 }
13056 DialectType::Hive => {
13057 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
13058 if let (Some(keys), Some(vals)) = (
13059 extract_array_elements(&keys_arg),
13060 extract_array_elements(&vals_arg),
13061 ) {
13062 if keys.len() == vals.len() {
13063 let mut interleaved = Vec::new();
13064 for (k, v) in keys.iter().zip(vals.iter()) {
13065 interleaved.push(k.clone());
13066 interleaved.push(v.clone());
13067 }
13068 Ok(Expression::Function(Box::new(Function::new(
13069 "MAP".to_string(),
13070 interleaved,
13071 ))))
13072 } else {
13073 Ok(Expression::Function(Box::new(Function::new(
13074 "MAP".to_string(),
13075 f.args,
13076 ))))
13077 }
13078 } else {
13079 Ok(Expression::Function(Box::new(Function::new(
13080 "MAP".to_string(),
13081 f.args,
13082 ))))
13083 }
13084 }
13085 DialectType::Snowflake => {
13086 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
13087 if let (Some(keys), Some(vals)) = (
13088 extract_array_elements(&keys_arg),
13089 extract_array_elements(&vals_arg),
13090 ) {
13091 if keys.len() == vals.len() {
13092 let mut interleaved = Vec::new();
13093 for (k, v) in keys.iter().zip(vals.iter()) {
13094 interleaved.push(k.clone());
13095 interleaved.push(v.clone());
13096 }
13097 Ok(Expression::Function(Box::new(Function::new(
13098 "OBJECT_CONSTRUCT".to_string(),
13099 interleaved,
13100 ))))
13101 } else {
13102 Ok(Expression::Function(Box::new(Function::new(
13103 "MAP".to_string(),
13104 f.args,
13105 ))))
13106 }
13107 } else {
13108 Ok(Expression::Function(Box::new(Function::new(
13109 "MAP".to_string(),
13110 f.args,
13111 ))))
13112 }
13113 }
13114 _ => Ok(Expression::Function(f)),
13115 }
13116 }
13117 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
13118 "MAP"
13119 if f.args.is_empty()
13120 && matches!(
13121 source,
13122 DialectType::Hive
13123 | DialectType::Spark
13124 | DialectType::Databricks
13125 )
13126 && matches!(
13127 target,
13128 DialectType::Presto
13129 | DialectType::Trino
13130 | DialectType::Athena
13131 ) =>
13132 {
13133 let empty_keys =
13134 Expression::Array(Box::new(crate::expressions::Array {
13135 expressions: vec![],
13136 }));
13137 let empty_vals =
13138 Expression::Array(Box::new(crate::expressions::Array {
13139 expressions: vec![],
13140 }));
13141 Ok(Expression::Function(Box::new(Function::new(
13142 "MAP".to_string(),
13143 vec![empty_keys, empty_vals],
13144 ))))
13145 }
13146 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
13147 "MAP"
13148 if f.args.len() >= 2
13149 && f.args.len() % 2 == 0
13150 && matches!(
13151 source,
13152 DialectType::Hive
13153 | DialectType::Spark
13154 | DialectType::Databricks
13155 | DialectType::ClickHouse
13156 ) =>
13157 {
13158 let args = f.args;
13159 match target {
13160 DialectType::DuckDB => {
13161 // MAP([k1, k2], [v1, v2])
13162 let mut keys = Vec::new();
13163 let mut vals = Vec::new();
13164 for (i, arg) in args.into_iter().enumerate() {
13165 if i % 2 == 0 {
13166 keys.push(arg);
13167 } else {
13168 vals.push(arg);
13169 }
13170 }
13171 let keys_arr = Expression::Array(Box::new(
13172 crate::expressions::Array { expressions: keys },
13173 ));
13174 let vals_arr = Expression::Array(Box::new(
13175 crate::expressions::Array { expressions: vals },
13176 ));
13177 Ok(Expression::Function(Box::new(Function::new(
13178 "MAP".to_string(),
13179 vec![keys_arr, vals_arr],
13180 ))))
13181 }
13182 DialectType::Presto | DialectType::Trino => {
13183 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
13184 let mut keys = Vec::new();
13185 let mut vals = Vec::new();
13186 for (i, arg) in args.into_iter().enumerate() {
13187 if i % 2 == 0 {
13188 keys.push(arg);
13189 } else {
13190 vals.push(arg);
13191 }
13192 }
13193 let keys_arr = Expression::Array(Box::new(
13194 crate::expressions::Array { expressions: keys },
13195 ));
13196 let vals_arr = Expression::Array(Box::new(
13197 crate::expressions::Array { expressions: vals },
13198 ));
13199 Ok(Expression::Function(Box::new(Function::new(
13200 "MAP".to_string(),
13201 vec![keys_arr, vals_arr],
13202 ))))
13203 }
13204 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13205 Function::new("OBJECT_CONSTRUCT".to_string(), args),
13206 ))),
13207 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
13208 Function::new("map".to_string(), args),
13209 ))),
13210 _ => Ok(Expression::Function(Box::new(Function::new(
13211 "MAP".to_string(),
13212 args,
13213 )))),
13214 }
13215 }
13216 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
13217 "COLLECT_LIST" if f.args.len() >= 1 => {
13218 let name = match target {
13219 DialectType::Spark
13220 | DialectType::Databricks
13221 | DialectType::Hive => "COLLECT_LIST",
13222 DialectType::DuckDB
13223 | DialectType::PostgreSQL
13224 | DialectType::Redshift
13225 | DialectType::Snowflake
13226 | DialectType::BigQuery => "ARRAY_AGG",
13227 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
13228 _ => "ARRAY_AGG",
13229 };
13230 Ok(Expression::Function(Box::new(Function::new(
13231 name.to_string(),
13232 f.args,
13233 ))))
13234 }
13235 // COLLECT_SET(x) -> target-specific distinct array aggregation
13236 "COLLECT_SET" if f.args.len() >= 1 => {
13237 let name = match target {
13238 DialectType::Spark
13239 | DialectType::Databricks
13240 | DialectType::Hive => "COLLECT_SET",
13241 DialectType::Presto
13242 | DialectType::Trino
13243 | DialectType::Athena => "SET_AGG",
13244 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
13245 _ => "ARRAY_AGG",
13246 };
13247 Ok(Expression::Function(Box::new(Function::new(
13248 name.to_string(),
13249 f.args,
13250 ))))
13251 }
13252 // ISNAN(x) / IS_NAN(x) - normalize
13253 "ISNAN" | "IS_NAN" => {
13254 let name = match target {
13255 DialectType::Spark
13256 | DialectType::Databricks
13257 | DialectType::Hive => "ISNAN",
13258 DialectType::Presto
13259 | DialectType::Trino
13260 | DialectType::Athena => "IS_NAN",
13261 DialectType::BigQuery
13262 | DialectType::PostgreSQL
13263 | DialectType::Redshift => "IS_NAN",
13264 DialectType::ClickHouse => "IS_NAN",
13265 _ => "ISNAN",
13266 };
13267 Ok(Expression::Function(Box::new(Function::new(
13268 name.to_string(),
13269 f.args,
13270 ))))
13271 }
13272 // SPLIT_PART(str, delim, index) -> target-specific
13273 "SPLIT_PART" if f.args.len() == 3 => {
13274 match target {
13275 DialectType::Spark | DialectType::Databricks => {
13276 // Keep as SPLIT_PART (Spark 3.4+)
13277 Ok(Expression::Function(Box::new(Function::new(
13278 "SPLIT_PART".to_string(),
13279 f.args,
13280 ))))
13281 }
13282 DialectType::DuckDB
13283 if matches!(source, DialectType::Snowflake) =>
13284 {
13285 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
13286 // - part_index 0 treated as 1
13287 // - empty delimiter: return whole string if index 1 or -1, else ''
13288 let mut args = f.args;
13289 let str_arg = args.remove(0);
13290 let delim_arg = args.remove(0);
13291 let idx_arg = args.remove(0);
13292
13293 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
13294 let adjusted_idx = Expression::Paren(Box::new(Paren {
13295 this: Expression::Case(Box::new(Case {
13296 operand: None,
13297 whens: vec![(
13298 Expression::Eq(Box::new(BinaryOp {
13299 left: idx_arg.clone(),
13300 right: Expression::number(0),
13301 left_comments: vec![],
13302 operator_comments: vec![],
13303 trailing_comments: vec![],
13304 inferred_type: None,
13305 })),
13306 Expression::number(1),
13307 )],
13308 else_: Some(idx_arg.clone()),
13309 comments: vec![],
13310 inferred_type: None,
13311 })),
13312 trailing_comments: vec![],
13313 }));
13314
13315 // SPLIT_PART(str, delim, adjusted_idx)
13316 let base_func =
13317 Expression::Function(Box::new(Function::new(
13318 "SPLIT_PART".to_string(),
13319 vec![
13320 str_arg.clone(),
13321 delim_arg.clone(),
13322 adjusted_idx.clone(),
13323 ],
13324 )));
13325
13326 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
13327 let empty_delim_case = Expression::Paren(Box::new(Paren {
13328 this: Expression::Case(Box::new(Case {
13329 operand: None,
13330 whens: vec![(
13331 Expression::Or(Box::new(BinaryOp {
13332 left: Expression::Eq(Box::new(BinaryOp {
13333 left: adjusted_idx.clone(),
13334 right: Expression::number(1),
13335 left_comments: vec![],
13336 operator_comments: vec![],
13337 trailing_comments: vec![],
13338 inferred_type: None,
13339 })),
13340 right: Expression::Eq(Box::new(BinaryOp {
13341 left: adjusted_idx,
13342 right: Expression::number(-1),
13343 left_comments: vec![],
13344 operator_comments: vec![],
13345 trailing_comments: vec![],
13346 inferred_type: None,
13347 })),
13348 left_comments: vec![],
13349 operator_comments: vec![],
13350 trailing_comments: vec![],
13351 inferred_type: None,
13352 })),
13353 str_arg,
13354 )],
13355 else_: Some(Expression::string("")),
13356 comments: vec![],
13357 inferred_type: None,
13358 })),
13359 trailing_comments: vec![],
13360 }));
13361
13362 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
13363 Ok(Expression::Case(Box::new(Case {
13364 operand: None,
13365 whens: vec![(
13366 Expression::Eq(Box::new(BinaryOp {
13367 left: delim_arg,
13368 right: Expression::string(""),
13369 left_comments: vec![],
13370 operator_comments: vec![],
13371 trailing_comments: vec![],
13372 inferred_type: None,
13373 })),
13374 empty_delim_case,
13375 )],
13376 else_: Some(base_func),
13377 comments: vec![],
13378 inferred_type: None,
13379 })))
13380 }
13381 DialectType::DuckDB
13382 | DialectType::PostgreSQL
13383 | DialectType::Snowflake
13384 | DialectType::Redshift
13385 | DialectType::Trino
13386 | DialectType::Presto => Ok(Expression::Function(Box::new(
13387 Function::new("SPLIT_PART".to_string(), f.args),
13388 ))),
13389 DialectType::Hive => {
13390 // SPLIT(str, delim)[index]
13391 // Complex conversion, just keep as-is for now
13392 Ok(Expression::Function(Box::new(Function::new(
13393 "SPLIT_PART".to_string(),
13394 f.args,
13395 ))))
13396 }
13397 _ => Ok(Expression::Function(Box::new(Function::new(
13398 "SPLIT_PART".to_string(),
13399 f.args,
13400 )))),
13401 }
13402 }
13403 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
13404 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
13405 let is_scalar = name == "JSON_EXTRACT_SCALAR";
13406 match target {
13407 DialectType::Spark
13408 | DialectType::Databricks
13409 | DialectType::Hive => {
13410 let mut args = f.args;
13411 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
13412 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
13413 if let Some(Expression::Function(inner)) = args.first() {
13414 if inner.name.eq_ignore_ascii_case("TRY")
13415 && inner.args.len() == 1
13416 {
13417 let mut inner_args = inner.args.clone();
13418 args[0] = inner_args.remove(0);
13419 }
13420 }
13421 Ok(Expression::Function(Box::new(Function::new(
13422 "GET_JSON_OBJECT".to_string(),
13423 args,
13424 ))))
13425 }
13426 DialectType::DuckDB | DialectType::SQLite => {
13427 // json -> path syntax
13428 let mut args = f.args;
13429 let json_expr = args.remove(0);
13430 let path = args.remove(0);
13431 Ok(Expression::JsonExtract(Box::new(
13432 crate::expressions::JsonExtractFunc {
13433 this: json_expr,
13434 path,
13435 returning: None,
13436 arrow_syntax: true,
13437 hash_arrow_syntax: false,
13438 wrapper_option: None,
13439 quotes_option: None,
13440 on_scalar_string: false,
13441 on_error: None,
13442 },
13443 )))
13444 }
13445 DialectType::TSQL => {
13446 let func_name = if is_scalar {
13447 "JSON_VALUE"
13448 } else {
13449 "JSON_QUERY"
13450 };
13451 Ok(Expression::Function(Box::new(Function::new(
13452 func_name.to_string(),
13453 f.args,
13454 ))))
13455 }
13456 DialectType::PostgreSQL | DialectType::Redshift => {
13457 let func_name = if is_scalar {
13458 "JSON_EXTRACT_PATH_TEXT"
13459 } else {
13460 "JSON_EXTRACT_PATH"
13461 };
13462 Ok(Expression::Function(Box::new(Function::new(
13463 func_name.to_string(),
13464 f.args,
13465 ))))
13466 }
13467 _ => Ok(Expression::Function(Box::new(Function::new(
13468 name.to_string(),
13469 f.args,
13470 )))),
13471 }
13472 }
13473 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
13474 "JSON_SEARCH"
13475 if matches!(target, DialectType::DuckDB)
13476 && (3..=5).contains(&f.args.len()) =>
13477 {
13478 let args = &f.args;
13479
13480 // Only rewrite deterministic modes and NULL/no escape-char variant.
13481 let mode = match &args[1] {
13482 Expression::Literal(lit)
13483 if matches!(
13484 lit.as_ref(),
13485 crate::expressions::Literal::String(_)
13486 ) =>
13487 {
13488 let crate::expressions::Literal::String(s) = lit.as_ref()
13489 else {
13490 unreachable!()
13491 };
13492 s.to_ascii_lowercase()
13493 }
13494 _ => return Ok(Expression::Function(f)),
13495 };
13496 if mode != "one" && mode != "all" {
13497 return Ok(Expression::Function(f));
13498 }
13499 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
13500 return Ok(Expression::Function(f));
13501 }
13502
13503 let json_doc_sql = match Generator::sql(&args[0]) {
13504 Ok(sql) => sql,
13505 Err(_) => return Ok(Expression::Function(f)),
13506 };
13507 let search_sql = match Generator::sql(&args[2]) {
13508 Ok(sql) => sql,
13509 Err(_) => return Ok(Expression::Function(f)),
13510 };
13511 let path_sql = if args.len() == 5 {
13512 match Generator::sql(&args[4]) {
13513 Ok(sql) => sql,
13514 Err(_) => return Ok(Expression::Function(f)),
13515 }
13516 } else {
13517 "'$'".to_string()
13518 };
13519
13520 let rewrite_sql = if mode == "all" {
13521 format!(
13522 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
13523 json_doc_sql, path_sql, search_sql
13524 )
13525 } else {
13526 format!(
13527 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
13528 json_doc_sql, path_sql, search_sql
13529 )
13530 };
13531
13532 Ok(Expression::Raw(crate::expressions::Raw {
13533 sql: rewrite_sql,
13534 }))
13535 }
13536 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
13537 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
13538 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
13539 if f.args.len() >= 2
13540 && matches!(source, DialectType::SingleStore) =>
13541 {
13542 let is_bson = name == "BSON_EXTRACT_BSON";
13543 let mut args = f.args;
13544 let json_expr = args.remove(0);
13545
13546 // Build JSONPath from remaining arguments
13547 let mut path = String::from("$");
13548 for arg in &args {
13549 if let Expression::Literal(lit) = arg {
13550 if let crate::expressions::Literal::String(s) = lit.as_ref()
13551 {
13552 // Check if it's a numeric string (array index)
13553 if s.parse::<i64>().is_ok() {
13554 path.push('[');
13555 path.push_str(s);
13556 path.push(']');
13557 } else {
13558 path.push('.');
13559 path.push_str(s);
13560 }
13561 }
13562 }
13563 }
13564
13565 let target_func = if is_bson {
13566 "JSONB_EXTRACT"
13567 } else {
13568 "JSON_EXTRACT"
13569 };
13570 Ok(Expression::Function(Box::new(Function::new(
13571 target_func.to_string(),
13572 vec![json_expr, Expression::string(&path)],
13573 ))))
13574 }
13575 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
13576 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
13577 Ok(Expression::Function(Box::new(Function {
13578 name: "arraySum".to_string(),
13579 args: f.args,
13580 distinct: f.distinct,
13581 trailing_comments: f.trailing_comments,
13582 use_bracket_syntax: f.use_bracket_syntax,
13583 no_parens: f.no_parens,
13584 quoted: f.quoted,
13585 span: None,
13586 inferred_type: None,
13587 })))
13588 }
13589 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
13590 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
13591 // and is handled by JsonQueryValueConvert action. This handles the case where
13592 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
13593 "JSON_QUERY" | "JSON_VALUE"
13594 if f.args.len() == 2
13595 && matches!(
13596 source,
13597 DialectType::TSQL | DialectType::Fabric
13598 ) =>
13599 {
13600 match target {
13601 DialectType::Spark
13602 | DialectType::Databricks
13603 | DialectType::Hive => Ok(Expression::Function(Box::new(
13604 Function::new("GET_JSON_OBJECT".to_string(), f.args),
13605 ))),
13606 _ => Ok(Expression::Function(Box::new(Function::new(
13607 name.to_string(),
13608 f.args,
13609 )))),
13610 }
13611 }
13612 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
13613 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
13614 let arg = f.args.into_iter().next().unwrap();
13615 let is_hive_source = matches!(
13616 source,
13617 DialectType::Hive
13618 | DialectType::Spark
13619 | DialectType::Databricks
13620 );
13621 match target {
13622 DialectType::DuckDB if is_hive_source => {
13623 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
13624 let strptime =
13625 Expression::Function(Box::new(Function::new(
13626 "STRPTIME".to_string(),
13627 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
13628 )));
13629 Ok(Expression::Function(Box::new(Function::new(
13630 "EPOCH".to_string(),
13631 vec![strptime],
13632 ))))
13633 }
13634 DialectType::Presto | DialectType::Trino if is_hive_source => {
13635 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
13636 let cast_varchar =
13637 Expression::Cast(Box::new(crate::expressions::Cast {
13638 this: arg.clone(),
13639 to: DataType::VarChar {
13640 length: None,
13641 parenthesized_length: false,
13642 },
13643 trailing_comments: vec![],
13644 double_colon_syntax: false,
13645 format: None,
13646 default: None,
13647 inferred_type: None,
13648 }));
13649 let date_parse =
13650 Expression::Function(Box::new(Function::new(
13651 "DATE_PARSE".to_string(),
13652 vec![
13653 cast_varchar,
13654 Expression::string("%Y-%m-%d %T"),
13655 ],
13656 )));
13657 let try_expr = Expression::Function(Box::new(
13658 Function::new("TRY".to_string(), vec![date_parse]),
13659 ));
13660 let date_format =
13661 Expression::Function(Box::new(Function::new(
13662 "DATE_FORMAT".to_string(),
13663 vec![arg, Expression::string("%Y-%m-%d %T")],
13664 )));
13665 let parse_datetime =
13666 Expression::Function(Box::new(Function::new(
13667 "PARSE_DATETIME".to_string(),
13668 vec![
13669 date_format,
13670 Expression::string("yyyy-MM-dd HH:mm:ss"),
13671 ],
13672 )));
13673 let coalesce =
13674 Expression::Function(Box::new(Function::new(
13675 "COALESCE".to_string(),
13676 vec![try_expr, parse_datetime],
13677 )));
13678 Ok(Expression::Function(Box::new(Function::new(
13679 "TO_UNIXTIME".to_string(),
13680 vec![coalesce],
13681 ))))
13682 }
13683 DialectType::Presto | DialectType::Trino => {
13684 Ok(Expression::Function(Box::new(Function::new(
13685 "TO_UNIXTIME".to_string(),
13686 vec![arg],
13687 ))))
13688 }
13689 _ => Ok(Expression::Function(Box::new(Function::new(
13690 "UNIX_TIMESTAMP".to_string(),
13691 vec![arg],
13692 )))),
13693 }
13694 }
13695 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13696 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
13697 DialectType::Spark
13698 | DialectType::Databricks
13699 | DialectType::Hive => Ok(Expression::Function(Box::new(
13700 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
13701 ))),
13702 _ => Ok(Expression::Function(Box::new(Function::new(
13703 "TO_UNIX_TIMESTAMP".to_string(),
13704 f.args,
13705 )))),
13706 },
13707 // CURDATE() -> CURRENT_DATE
13708 "CURDATE" => {
13709 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
13710 }
13711 // CURTIME() -> CURRENT_TIME
13712 "CURTIME" => {
13713 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
13714 precision: None,
13715 }))
13716 }
13717 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
13718 "ARRAY_SORT" if f.args.len() >= 1 => {
13719 match target {
13720 DialectType::Hive => {
13721 let mut args = f.args;
13722 args.truncate(1); // Drop lambda comparator
13723 Ok(Expression::Function(Box::new(Function::new(
13724 "SORT_ARRAY".to_string(),
13725 args,
13726 ))))
13727 }
13728 DialectType::DuckDB
13729 if matches!(source, DialectType::Snowflake) =>
13730 {
13731 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
13732 let mut args_iter = f.args.into_iter();
13733 let arr = args_iter.next().unwrap();
13734 let asc_arg = args_iter.next();
13735 let nulls_first_arg = args_iter.next();
13736
13737 let is_asc_bool = asc_arg
13738 .as_ref()
13739 .map(|a| matches!(a, Expression::Boolean(_)))
13740 .unwrap_or(false);
13741 let is_nf_bool = nulls_first_arg
13742 .as_ref()
13743 .map(|a| matches!(a, Expression::Boolean(_)))
13744 .unwrap_or(false);
13745
13746 // No boolean args: pass through as-is
13747 if !is_asc_bool && !is_nf_bool {
13748 let mut result_args = vec![arr];
13749 if let Some(asc) = asc_arg {
13750 result_args.push(asc);
13751 if let Some(nf) = nulls_first_arg {
13752 result_args.push(nf);
13753 }
13754 }
13755 Ok(Expression::Function(Box::new(Function::new(
13756 "LIST_SORT".to_string(),
13757 result_args,
13758 ))))
13759 } else {
13760 // Has boolean args: convert to DuckDB LIST_SORT format
13761 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
13762
13763 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
13764 let nulls_are_first = match &nulls_first_arg {
13765 Some(Expression::Boolean(b)) => b.value,
13766 None if is_asc_bool => descending, // Snowflake default
13767 _ => false,
13768 };
13769 let nulls_first_sql = if nulls_are_first {
13770 Some(Expression::string("NULLS FIRST"))
13771 } else {
13772 None
13773 };
13774
13775 if !is_asc_bool {
13776 // asc is non-boolean expression, nulls_first is boolean
13777 let mut result_args = vec![arr];
13778 if let Some(asc) = asc_arg {
13779 result_args.push(asc);
13780 }
13781 if let Some(nf) = nulls_first_sql {
13782 result_args.push(nf);
13783 }
13784 Ok(Expression::Function(Box::new(Function::new(
13785 "LIST_SORT".to_string(),
13786 result_args,
13787 ))))
13788 } else {
13789 if !descending && !nulls_are_first {
13790 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
13791 Ok(Expression::Function(Box::new(
13792 Function::new(
13793 "LIST_SORT".to_string(),
13794 vec![arr],
13795 ),
13796 )))
13797 } else if descending && !nulls_are_first {
13798 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
13799 Ok(Expression::Function(Box::new(
13800 Function::new(
13801 "ARRAY_REVERSE_SORT".to_string(),
13802 vec![arr],
13803 ),
13804 )))
13805 } else {
13806 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
13807 let order_str =
13808 if descending { "DESC" } else { "ASC" };
13809 Ok(Expression::Function(Box::new(
13810 Function::new(
13811 "LIST_SORT".to_string(),
13812 vec![
13813 arr,
13814 Expression::string(order_str),
13815 Expression::string("NULLS FIRST"),
13816 ],
13817 ),
13818 )))
13819 }
13820 }
13821 }
13822 }
13823 DialectType::DuckDB => {
13824 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
13825 let mut args = f.args;
13826 args.truncate(1); // Drop lambda comparator for DuckDB
13827 Ok(Expression::Function(Box::new(Function::new(
13828 "ARRAY_SORT".to_string(),
13829 args,
13830 ))))
13831 }
13832 _ => Ok(Expression::Function(f)),
13833 }
13834 }
13835 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
13836 "SORT_ARRAY" if f.args.len() == 1 => match target {
13837 DialectType::Hive
13838 | DialectType::Spark
13839 | DialectType::Databricks => Ok(Expression::Function(f)),
13840 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13841 Function::new("LIST_SORT".to_string(), f.args),
13842 ))),
13843 _ => Ok(Expression::Function(Box::new(Function::new(
13844 "ARRAY_SORT".to_string(),
13845 f.args,
13846 )))),
13847 },
13848 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
13849 "SORT_ARRAY" if f.args.len() == 2 => {
13850 let is_desc =
13851 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
13852 if is_desc {
13853 match target {
13854 DialectType::DuckDB => {
13855 Ok(Expression::Function(Box::new(Function::new(
13856 "ARRAY_REVERSE_SORT".to_string(),
13857 vec![f.args.into_iter().next().unwrap()],
13858 ))))
13859 }
13860 DialectType::Presto | DialectType::Trino => {
13861 let arr_arg = f.args.into_iter().next().unwrap();
13862 let a = Expression::Column(Box::new(
13863 crate::expressions::Column {
13864 name: crate::expressions::Identifier::new("a"),
13865 table: None,
13866 join_mark: false,
13867 trailing_comments: Vec::new(),
13868 span: None,
13869 inferred_type: None,
13870 },
13871 ));
13872 let b = Expression::Column(Box::new(
13873 crate::expressions::Column {
13874 name: crate::expressions::Identifier::new("b"),
13875 table: None,
13876 join_mark: false,
13877 trailing_comments: Vec::new(),
13878 span: None,
13879 inferred_type: None,
13880 },
13881 ));
13882 let case_expr = Expression::Case(Box::new(
13883 crate::expressions::Case {
13884 operand: None,
13885 whens: vec![
13886 (
13887 Expression::Lt(Box::new(
13888 BinaryOp::new(a.clone(), b.clone()),
13889 )),
13890 Expression::Literal(Box::new(
13891 Literal::Number("1".to_string()),
13892 )),
13893 ),
13894 (
13895 Expression::Gt(Box::new(
13896 BinaryOp::new(a.clone(), b.clone()),
13897 )),
13898 Expression::Literal(Box::new(
13899 Literal::Number("-1".to_string()),
13900 )),
13901 ),
13902 ],
13903 else_: Some(Expression::Literal(Box::new(
13904 Literal::Number("0".to_string()),
13905 ))),
13906 comments: Vec::new(),
13907 inferred_type: None,
13908 },
13909 ));
13910 let lambda = Expression::Lambda(Box::new(
13911 crate::expressions::LambdaExpr {
13912 parameters: vec![
13913 crate::expressions::Identifier::new("a"),
13914 crate::expressions::Identifier::new("b"),
13915 ],
13916 body: case_expr,
13917 colon: false,
13918 parameter_types: Vec::new(),
13919 },
13920 ));
13921 Ok(Expression::Function(Box::new(Function::new(
13922 "ARRAY_SORT".to_string(),
13923 vec![arr_arg, lambda],
13924 ))))
13925 }
13926 _ => Ok(Expression::Function(f)),
13927 }
13928 } else {
13929 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
13930 match target {
13931 DialectType::Hive => Ok(Expression::Function(f)),
13932 DialectType::DuckDB => {
13933 Ok(Expression::Function(Box::new(Function::new(
13934 "LIST_SORT".to_string(),
13935 vec![f.args.into_iter().next().unwrap()],
13936 ))))
13937 }
13938 _ => Ok(Expression::Function(Box::new(Function::new(
13939 "ARRAY_SORT".to_string(),
13940 vec![f.args.into_iter().next().unwrap()],
13941 )))),
13942 }
13943 }
13944 }
13945 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
13946 "LEFT" if f.args.len() == 2 => {
13947 match target {
13948 DialectType::Hive
13949 | DialectType::Presto
13950 | DialectType::Trino
13951 | DialectType::Athena => {
13952 let x = f.args[0].clone();
13953 let n = f.args[1].clone();
13954 Ok(Expression::Function(Box::new(Function::new(
13955 "SUBSTRING".to_string(),
13956 vec![x, Expression::number(1), n],
13957 ))))
13958 }
13959 DialectType::Spark | DialectType::Databricks
13960 if matches!(
13961 source,
13962 DialectType::TSQL | DialectType::Fabric
13963 ) =>
13964 {
13965 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
13966 let x = f.args[0].clone();
13967 let n = f.args[1].clone();
13968 let cast_x = Expression::Cast(Box::new(Cast {
13969 this: x,
13970 to: DataType::VarChar {
13971 length: None,
13972 parenthesized_length: false,
13973 },
13974 double_colon_syntax: false,
13975 trailing_comments: Vec::new(),
13976 format: None,
13977 default: None,
13978 inferred_type: None,
13979 }));
13980 Ok(Expression::Function(Box::new(Function::new(
13981 "LEFT".to_string(),
13982 vec![cast_x, n],
13983 ))))
13984 }
13985 _ => Ok(Expression::Function(f)),
13986 }
13987 }
13988 "RIGHT" if f.args.len() == 2 => {
13989 match target {
13990 DialectType::Hive
13991 | DialectType::Presto
13992 | DialectType::Trino
13993 | DialectType::Athena => {
13994 let x = f.args[0].clone();
13995 let n = f.args[1].clone();
13996 // SUBSTRING(x, LENGTH(x) - (n - 1))
13997 let len_x = Expression::Function(Box::new(Function::new(
13998 "LENGTH".to_string(),
13999 vec![x.clone()],
14000 )));
14001 let n_minus_1 = Expression::Sub(Box::new(
14002 crate::expressions::BinaryOp::new(
14003 n,
14004 Expression::number(1),
14005 ),
14006 ));
14007 let n_minus_1_paren = Expression::Paren(Box::new(
14008 crate::expressions::Paren {
14009 this: n_minus_1,
14010 trailing_comments: Vec::new(),
14011 },
14012 ));
14013 let offset = Expression::Sub(Box::new(
14014 crate::expressions::BinaryOp::new(
14015 len_x,
14016 n_minus_1_paren,
14017 ),
14018 ));
14019 Ok(Expression::Function(Box::new(Function::new(
14020 "SUBSTRING".to_string(),
14021 vec![x, offset],
14022 ))))
14023 }
14024 DialectType::Spark | DialectType::Databricks
14025 if matches!(
14026 source,
14027 DialectType::TSQL | DialectType::Fabric
14028 ) =>
14029 {
14030 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
14031 let x = f.args[0].clone();
14032 let n = f.args[1].clone();
14033 let cast_x = Expression::Cast(Box::new(Cast {
14034 this: x,
14035 to: DataType::VarChar {
14036 length: None,
14037 parenthesized_length: false,
14038 },
14039 double_colon_syntax: false,
14040 trailing_comments: Vec::new(),
14041 format: None,
14042 default: None,
14043 inferred_type: None,
14044 }));
14045 Ok(Expression::Function(Box::new(Function::new(
14046 "RIGHT".to_string(),
14047 vec![cast_x, n],
14048 ))))
14049 }
14050 _ => Ok(Expression::Function(f)),
14051 }
14052 }
14053 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
14054 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
14055 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14056 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
14057 ))),
14058 DialectType::Spark | DialectType::Databricks => {
14059 Ok(Expression::Function(Box::new(Function::new(
14060 "MAP_FROM_ARRAYS".to_string(),
14061 f.args,
14062 ))))
14063 }
14064 _ => Ok(Expression::Function(Box::new(Function::new(
14065 "MAP".to_string(),
14066 f.args,
14067 )))),
14068 },
14069 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
14070 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
14071 "LIKE" if f.args.len() >= 2 => {
14072 let (this, pattern) = if matches!(source, DialectType::SQLite) {
14073 // SQLite: LIKE(pattern, string) -> string LIKE pattern
14074 (f.args[1].clone(), f.args[0].clone())
14075 } else {
14076 // Standard: LIKE(string, pattern) -> string LIKE pattern
14077 (f.args[0].clone(), f.args[1].clone())
14078 };
14079 let escape = if f.args.len() >= 3 {
14080 Some(f.args[2].clone())
14081 } else {
14082 None
14083 };
14084 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
14085 left: this,
14086 right: pattern,
14087 escape,
14088 quantifier: None,
14089 inferred_type: None,
14090 })))
14091 }
14092 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
14093 "ILIKE" if f.args.len() >= 2 => {
14094 let this = f.args[0].clone();
14095 let pattern = f.args[1].clone();
14096 let escape = if f.args.len() >= 3 {
14097 Some(f.args[2].clone())
14098 } else {
14099 None
14100 };
14101 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
14102 left: this,
14103 right: pattern,
14104 escape,
14105 quantifier: None,
14106 inferred_type: None,
14107 })))
14108 }
14109 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
14110 "CHAR" if f.args.len() == 1 => match target {
14111 DialectType::MySQL
14112 | DialectType::SingleStore
14113 | DialectType::TSQL => Ok(Expression::Function(f)),
14114 _ => Ok(Expression::Function(Box::new(Function::new(
14115 "CHR".to_string(),
14116 f.args,
14117 )))),
14118 },
14119 // CONCAT(a, b) -> a || b for PostgreSQL
14120 "CONCAT"
14121 if f.args.len() == 2
14122 && matches!(target, DialectType::PostgreSQL)
14123 && matches!(
14124 source,
14125 DialectType::ClickHouse | DialectType::MySQL
14126 ) =>
14127 {
14128 let mut args = f.args;
14129 let right = args.pop().unwrap();
14130 let left = args.pop().unwrap();
14131 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
14132 this: Box::new(left),
14133 expression: Box::new(right),
14134 safe: None,
14135 })))
14136 }
14137 // ARRAY_TO_STRING(arr, delim) -> target-specific
14138 "ARRAY_TO_STRING"
14139 if f.args.len() == 2
14140 && matches!(target, DialectType::DuckDB)
14141 && matches!(source, DialectType::Snowflake) =>
14142 {
14143 let mut args = f.args;
14144 let arr = args.remove(0);
14145 let sep = args.remove(0);
14146 // sep IS NULL
14147 let sep_is_null = Expression::IsNull(Box::new(IsNull {
14148 this: sep.clone(),
14149 not: false,
14150 postfix_form: false,
14151 }));
14152 // COALESCE(CAST(x AS TEXT), '')
14153 let cast_x = Expression::Cast(Box::new(Cast {
14154 this: Expression::Identifier(Identifier::new("x")),
14155 to: DataType::Text,
14156 trailing_comments: Vec::new(),
14157 double_colon_syntax: false,
14158 format: None,
14159 default: None,
14160 inferred_type: None,
14161 }));
14162 let coalesce = Expression::Coalesce(Box::new(
14163 crate::expressions::VarArgFunc {
14164 original_name: None,
14165 expressions: vec![
14166 cast_x,
14167 Expression::Literal(Box::new(Literal::String(
14168 String::new(),
14169 ))),
14170 ],
14171 inferred_type: None,
14172 },
14173 ));
14174 let lambda =
14175 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14176 parameters: vec![Identifier::new("x")],
14177 body: coalesce,
14178 colon: false,
14179 parameter_types: Vec::new(),
14180 }));
14181 let list_transform = Expression::Function(Box::new(Function::new(
14182 "LIST_TRANSFORM".to_string(),
14183 vec![arr, lambda],
14184 )));
14185 let array_to_string =
14186 Expression::Function(Box::new(Function::new(
14187 "ARRAY_TO_STRING".to_string(),
14188 vec![list_transform, sep],
14189 )));
14190 Ok(Expression::Case(Box::new(Case {
14191 operand: None,
14192 whens: vec![(sep_is_null, Expression::Null(Null))],
14193 else_: Some(array_to_string),
14194 comments: Vec::new(),
14195 inferred_type: None,
14196 })))
14197 }
14198 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
14199 DialectType::Presto | DialectType::Trino => {
14200 Ok(Expression::Function(Box::new(Function::new(
14201 "ARRAY_JOIN".to_string(),
14202 f.args,
14203 ))))
14204 }
14205 DialectType::TSQL => Ok(Expression::Function(Box::new(
14206 Function::new("STRING_AGG".to_string(), f.args),
14207 ))),
14208 _ => Ok(Expression::Function(f)),
14209 },
14210 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
14211 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
14212 DialectType::Spark
14213 | DialectType::Databricks
14214 | DialectType::Hive => Ok(Expression::Function(Box::new(
14215 Function::new("CONCAT".to_string(), f.args),
14216 ))),
14217 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14218 Function::new("ARRAY_CAT".to_string(), f.args),
14219 ))),
14220 DialectType::Redshift => Ok(Expression::Function(Box::new(
14221 Function::new("ARRAY_CONCAT".to_string(), f.args),
14222 ))),
14223 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14224 Function::new("ARRAY_CAT".to_string(), f.args),
14225 ))),
14226 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14227 Function::new("LIST_CONCAT".to_string(), f.args),
14228 ))),
14229 DialectType::Presto | DialectType::Trino => {
14230 Ok(Expression::Function(Box::new(Function::new(
14231 "CONCAT".to_string(),
14232 f.args,
14233 ))))
14234 }
14235 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14236 Function::new("ARRAY_CONCAT".to_string(), f.args),
14237 ))),
14238 _ => Ok(Expression::Function(f)),
14239 },
14240 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
14241 "HAS" if f.args.len() == 2 => match target {
14242 DialectType::Spark
14243 | DialectType::Databricks
14244 | DialectType::Hive => Ok(Expression::Function(Box::new(
14245 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14246 ))),
14247 DialectType::Presto | DialectType::Trino => {
14248 Ok(Expression::Function(Box::new(Function::new(
14249 "CONTAINS".to_string(),
14250 f.args,
14251 ))))
14252 }
14253 _ => Ok(Expression::Function(f)),
14254 },
14255 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
14256 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
14257 Function::new("COALESCE".to_string(), f.args),
14258 ))),
14259 // ISNULL(x) in MySQL -> (x IS NULL)
14260 "ISNULL"
14261 if f.args.len() == 1
14262 && matches!(source, DialectType::MySQL)
14263 && matches!(target, DialectType::MySQL) =>
14264 {
14265 let arg = f.args.into_iter().next().unwrap();
14266 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14267 this: Expression::IsNull(Box::new(
14268 crate::expressions::IsNull {
14269 this: arg,
14270 not: false,
14271 postfix_form: false,
14272 },
14273 )),
14274 trailing_comments: Vec::new(),
14275 })))
14276 }
14277 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
14278 "MONTHNAME"
14279 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
14280 {
14281 let arg = f.args.into_iter().next().unwrap();
14282 Ok(Expression::Function(Box::new(Function::new(
14283 "DATE_FORMAT".to_string(),
14284 vec![arg, Expression::string("%M")],
14285 ))))
14286 }
14287 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
14288 "SPLITBYSTRING" if f.args.len() == 2 => {
14289 let sep = f.args[0].clone();
14290 let str_arg = f.args[1].clone();
14291 match target {
14292 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14293 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
14294 ))),
14295 DialectType::Doris => {
14296 Ok(Expression::Function(Box::new(Function::new(
14297 "SPLIT_BY_STRING".to_string(),
14298 vec![str_arg, sep],
14299 ))))
14300 }
14301 DialectType::Hive
14302 | DialectType::Spark
14303 | DialectType::Databricks => {
14304 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
14305 let escaped =
14306 Expression::Function(Box::new(Function::new(
14307 "CONCAT".to_string(),
14308 vec![
14309 Expression::string("\\Q"),
14310 sep,
14311 Expression::string("\\E"),
14312 ],
14313 )));
14314 Ok(Expression::Function(Box::new(Function::new(
14315 "SPLIT".to_string(),
14316 vec![str_arg, escaped],
14317 ))))
14318 }
14319 _ => Ok(Expression::Function(f)),
14320 }
14321 }
14322 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
14323 "SPLITBYREGEXP" if f.args.len() == 2 => {
14324 let sep = f.args[0].clone();
14325 let str_arg = f.args[1].clone();
14326 match target {
14327 DialectType::DuckDB => {
14328 Ok(Expression::Function(Box::new(Function::new(
14329 "STR_SPLIT_REGEX".to_string(),
14330 vec![str_arg, sep],
14331 ))))
14332 }
14333 DialectType::Hive
14334 | DialectType::Spark
14335 | DialectType::Databricks => {
14336 Ok(Expression::Function(Box::new(Function::new(
14337 "SPLIT".to_string(),
14338 vec![str_arg, sep],
14339 ))))
14340 }
14341 _ => Ok(Expression::Function(f)),
14342 }
14343 }
14344 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
14345 "TOMONDAY" => {
14346 if f.args.len() == 1 {
14347 let arg = f.args.into_iter().next().unwrap();
14348 match target {
14349 DialectType::Doris => {
14350 Ok(Expression::Function(Box::new(Function::new(
14351 "DATE_TRUNC".to_string(),
14352 vec![arg, Expression::string("WEEK")],
14353 ))))
14354 }
14355 _ => Ok(Expression::Function(Box::new(Function::new(
14356 "DATE_TRUNC".to_string(),
14357 vec![Expression::string("WEEK"), arg],
14358 )))),
14359 }
14360 } else {
14361 Ok(Expression::Function(f))
14362 }
14363 }
14364 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
14365 "COLLECT_LIST" if f.args.len() == 1 => match target {
14366 DialectType::Spark
14367 | DialectType::Databricks
14368 | DialectType::Hive => Ok(Expression::Function(f)),
14369 _ => Ok(Expression::Function(Box::new(Function::new(
14370 "ARRAY_AGG".to_string(),
14371 f.args,
14372 )))),
14373 },
14374 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
14375 "TO_CHAR"
14376 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
14377 {
14378 let arg = f.args.into_iter().next().unwrap();
14379 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14380 this: arg,
14381 to: DataType::Custom {
14382 name: "STRING".to_string(),
14383 },
14384 double_colon_syntax: false,
14385 trailing_comments: Vec::new(),
14386 format: None,
14387 default: None,
14388 inferred_type: None,
14389 })))
14390 }
14391 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
14392 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
14393 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14394 Function::new("RANDOM".to_string(), vec![]),
14395 ))),
14396 _ => Ok(Expression::Function(f)),
14397 },
14398 // ClickHouse formatDateTime -> target-specific
14399 "FORMATDATETIME" if f.args.len() >= 2 => match target {
14400 DialectType::MySQL => Ok(Expression::Function(Box::new(
14401 Function::new("DATE_FORMAT".to_string(), f.args),
14402 ))),
14403 _ => Ok(Expression::Function(f)),
14404 },
14405 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
14406 "REPLICATE" if f.args.len() == 2 => match target {
14407 DialectType::TSQL => Ok(Expression::Function(f)),
14408 _ => Ok(Expression::Function(Box::new(Function::new(
14409 "REPEAT".to_string(),
14410 f.args,
14411 )))),
14412 },
14413 // LEN(x) -> LENGTH(x) for non-TSQL targets
14414 // No CAST needed when arg is already a string literal
14415 "LEN" if f.args.len() == 1 => {
14416 match target {
14417 DialectType::TSQL => Ok(Expression::Function(f)),
14418 DialectType::Spark | DialectType::Databricks => {
14419 let arg = f.args.into_iter().next().unwrap();
14420 // Don't wrap string literals with CAST - they're already strings
14421 let is_string = matches!(
14422 &arg,
14423 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
14424 );
14425 let final_arg = if is_string {
14426 arg
14427 } else {
14428 Expression::Cast(Box::new(Cast {
14429 this: arg,
14430 to: DataType::VarChar {
14431 length: None,
14432 parenthesized_length: false,
14433 },
14434 double_colon_syntax: false,
14435 trailing_comments: Vec::new(),
14436 format: None,
14437 default: None,
14438 inferred_type: None,
14439 }))
14440 };
14441 Ok(Expression::Function(Box::new(Function::new(
14442 "LENGTH".to_string(),
14443 vec![final_arg],
14444 ))))
14445 }
14446 _ => {
14447 let arg = f.args.into_iter().next().unwrap();
14448 Ok(Expression::Function(Box::new(Function::new(
14449 "LENGTH".to_string(),
14450 vec![arg],
14451 ))))
14452 }
14453 }
14454 }
14455 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
14456 "COUNT_BIG" if f.args.len() == 1 => match target {
14457 DialectType::TSQL => Ok(Expression::Function(f)),
14458 _ => Ok(Expression::Function(Box::new(Function::new(
14459 "COUNT".to_string(),
14460 f.args,
14461 )))),
14462 },
14463 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
14464 "DATEFROMPARTS" if f.args.len() == 3 => match target {
14465 DialectType::TSQL => Ok(Expression::Function(f)),
14466 _ => Ok(Expression::Function(Box::new(Function::new(
14467 "MAKE_DATE".to_string(),
14468 f.args,
14469 )))),
14470 },
14471 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
14472 "REGEXP_LIKE" if f.args.len() >= 2 => {
14473 let str_expr = f.args[0].clone();
14474 let pattern = f.args[1].clone();
14475 let flags = if f.args.len() >= 3 {
14476 Some(f.args[2].clone())
14477 } else {
14478 None
14479 };
14480 match target {
14481 DialectType::DuckDB => {
14482 let mut new_args = vec![str_expr, pattern];
14483 if let Some(fl) = flags {
14484 new_args.push(fl);
14485 }
14486 Ok(Expression::Function(Box::new(Function::new(
14487 "REGEXP_MATCHES".to_string(),
14488 new_args,
14489 ))))
14490 }
14491 _ => Ok(Expression::RegexpLike(Box::new(
14492 crate::expressions::RegexpFunc {
14493 this: str_expr,
14494 pattern,
14495 flags,
14496 },
14497 ))),
14498 }
14499 }
14500 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
14501 "ARRAYJOIN" if f.args.len() == 1 => match target {
14502 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14503 Function::new("UNNEST".to_string(), f.args),
14504 ))),
14505 _ => Ok(Expression::Function(f)),
14506 },
14507 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
14508 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
14509 match target {
14510 DialectType::TSQL => Ok(Expression::Function(f)),
14511 DialectType::DuckDB => {
14512 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
14513 let mut args = f.args;
14514 let ms = args.pop().unwrap();
14515 let s = args.pop().unwrap();
14516 // s + (ms / 1000.0)
14517 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
14518 ms,
14519 Expression::Literal(Box::new(
14520 crate::expressions::Literal::Number(
14521 "1000.0".to_string(),
14522 ),
14523 )),
14524 )));
14525 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
14526 s,
14527 Expression::Paren(Box::new(Paren {
14528 this: ms_frac,
14529 trailing_comments: vec![],
14530 })),
14531 )));
14532 args.push(s_with_ms);
14533 Ok(Expression::Function(Box::new(Function::new(
14534 "MAKE_TIMESTAMP".to_string(),
14535 args,
14536 ))))
14537 }
14538 DialectType::Snowflake => {
14539 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
14540 let mut args = f.args;
14541 let ms = args.pop().unwrap();
14542 // ms * 1000000
14543 let ns = Expression::Mul(Box::new(BinaryOp::new(
14544 ms,
14545 Expression::number(1000000),
14546 )));
14547 args.push(ns);
14548 Ok(Expression::Function(Box::new(Function::new(
14549 "TIMESTAMP_FROM_PARTS".to_string(),
14550 args,
14551 ))))
14552 }
14553 _ => {
14554 // Default: keep function name for other targets
14555 Ok(Expression::Function(Box::new(Function::new(
14556 "DATETIMEFROMPARTS".to_string(),
14557 f.args,
14558 ))))
14559 }
14560 }
14561 }
14562 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
14563 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
14564 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
14565 let is_try = name == "TRY_CONVERT";
14566 let type_expr = f.args[0].clone();
14567 let value_expr = f.args[1].clone();
14568 let style = if f.args.len() >= 3 {
14569 Some(&f.args[2])
14570 } else {
14571 None
14572 };
14573
14574 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
14575 if matches!(target, DialectType::TSQL) {
14576 let normalized_type = match &type_expr {
14577 Expression::DataType(dt) => {
14578 let new_dt = match dt {
14579 DataType::Int { .. } => DataType::Custom {
14580 name: "INTEGER".to_string(),
14581 },
14582 _ => dt.clone(),
14583 };
14584 Expression::DataType(new_dt)
14585 }
14586 Expression::Identifier(id) => {
14587 if id.name.eq_ignore_ascii_case("INT") {
14588 Expression::Identifier(
14589 crate::expressions::Identifier::new("INTEGER"),
14590 )
14591 } else {
14592 let upper = id.name.to_ascii_uppercase();
14593 Expression::Identifier(
14594 crate::expressions::Identifier::new(upper),
14595 )
14596 }
14597 }
14598 Expression::Column(col) => {
14599 if col.name.name.eq_ignore_ascii_case("INT") {
14600 Expression::Identifier(
14601 crate::expressions::Identifier::new("INTEGER"),
14602 )
14603 } else {
14604 let upper = col.name.name.to_ascii_uppercase();
14605 Expression::Identifier(
14606 crate::expressions::Identifier::new(upper),
14607 )
14608 }
14609 }
14610 _ => type_expr.clone(),
14611 };
14612 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
14613 let mut new_args = vec![normalized_type, value_expr];
14614 if let Some(s) = style {
14615 new_args.push(s.clone());
14616 }
14617 return Ok(Expression::Function(Box::new(Function::new(
14618 func_name.to_string(),
14619 new_args,
14620 ))));
14621 }
14622
14623 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
14624 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
14625 match e {
14626 Expression::DataType(dt) => {
14627 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
14628 match dt {
14629 DataType::Custom { name }
14630 if name.starts_with("NVARCHAR(")
14631 || name.starts_with("NCHAR(") =>
14632 {
14633 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
14634 let inner = &name[name.find('(').unwrap() + 1
14635 ..name.len() - 1];
14636 if inner.eq_ignore_ascii_case("MAX") {
14637 Some(DataType::Text)
14638 } else if let Ok(len) = inner.parse::<u32>() {
14639 if name.starts_with("NCHAR") {
14640 Some(DataType::Char {
14641 length: Some(len),
14642 })
14643 } else {
14644 Some(DataType::VarChar {
14645 length: Some(len),
14646 parenthesized_length: false,
14647 })
14648 }
14649 } else {
14650 Some(dt.clone())
14651 }
14652 }
14653 DataType::Custom { name } if name == "NVARCHAR" => {
14654 Some(DataType::VarChar {
14655 length: None,
14656 parenthesized_length: false,
14657 })
14658 }
14659 DataType::Custom { name } if name == "NCHAR" => {
14660 Some(DataType::Char { length: None })
14661 }
14662 DataType::Custom { name }
14663 if name == "NVARCHAR(MAX)"
14664 || name == "VARCHAR(MAX)" =>
14665 {
14666 Some(DataType::Text)
14667 }
14668 _ => Some(dt.clone()),
14669 }
14670 }
14671 Expression::Identifier(id) => {
14672 let name = id.name.to_ascii_uppercase();
14673 match name.as_str() {
14674 "INT" | "INTEGER" => Some(DataType::Int {
14675 length: None,
14676 integer_spelling: false,
14677 }),
14678 "BIGINT" => Some(DataType::BigInt { length: None }),
14679 "SMALLINT" => {
14680 Some(DataType::SmallInt { length: None })
14681 }
14682 "TINYINT" => {
14683 Some(DataType::TinyInt { length: None })
14684 }
14685 "FLOAT" => Some(DataType::Float {
14686 precision: None,
14687 scale: None,
14688 real_spelling: false,
14689 }),
14690 "REAL" => Some(DataType::Float {
14691 precision: None,
14692 scale: None,
14693 real_spelling: true,
14694 }),
14695 "DATETIME" | "DATETIME2" => {
14696 Some(DataType::Timestamp {
14697 timezone: false,
14698 precision: None,
14699 })
14700 }
14701 "DATE" => Some(DataType::Date),
14702 "BIT" => Some(DataType::Boolean),
14703 "TEXT" => Some(DataType::Text),
14704 "NUMERIC" => Some(DataType::Decimal {
14705 precision: None,
14706 scale: None,
14707 }),
14708 "MONEY" => Some(DataType::Decimal {
14709 precision: Some(15),
14710 scale: Some(4),
14711 }),
14712 "SMALLMONEY" => Some(DataType::Decimal {
14713 precision: Some(6),
14714 scale: Some(4),
14715 }),
14716 "VARCHAR" => Some(DataType::VarChar {
14717 length: None,
14718 parenthesized_length: false,
14719 }),
14720 "NVARCHAR" => Some(DataType::VarChar {
14721 length: None,
14722 parenthesized_length: false,
14723 }),
14724 "CHAR" => Some(DataType::Char { length: None }),
14725 "NCHAR" => Some(DataType::Char { length: None }),
14726 _ => Some(DataType::Custom { name }),
14727 }
14728 }
14729 Expression::Column(col) => {
14730 let name = col.name.name.to_ascii_uppercase();
14731 match name.as_str() {
14732 "INT" | "INTEGER" => Some(DataType::Int {
14733 length: None,
14734 integer_spelling: false,
14735 }),
14736 "BIGINT" => Some(DataType::BigInt { length: None }),
14737 "FLOAT" => Some(DataType::Float {
14738 precision: None,
14739 scale: None,
14740 real_spelling: false,
14741 }),
14742 "DATETIME" | "DATETIME2" => {
14743 Some(DataType::Timestamp {
14744 timezone: false,
14745 precision: None,
14746 })
14747 }
14748 "DATE" => Some(DataType::Date),
14749 "NUMERIC" => Some(DataType::Decimal {
14750 precision: None,
14751 scale: None,
14752 }),
14753 "VARCHAR" => Some(DataType::VarChar {
14754 length: None,
14755 parenthesized_length: false,
14756 }),
14757 "NVARCHAR" => Some(DataType::VarChar {
14758 length: None,
14759 parenthesized_length: false,
14760 }),
14761 "CHAR" => Some(DataType::Char { length: None }),
14762 "NCHAR" => Some(DataType::Char { length: None }),
14763 _ => Some(DataType::Custom { name }),
14764 }
14765 }
14766 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
14767 Expression::Function(f) => {
14768 let fname = f.name.to_ascii_uppercase();
14769 match fname.as_str() {
14770 "VARCHAR" | "NVARCHAR" => {
14771 let len = f.args.first().and_then(|a| {
14772 if let Expression::Literal(lit) = a
14773 {
14774 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14775 n.parse::<u32>().ok()
14776 } else { None }
14777 } else if let Expression::Identifier(id) = a
14778 {
14779 if id.name.eq_ignore_ascii_case("MAX") {
14780 None
14781 } else {
14782 None
14783 }
14784 } else {
14785 None
14786 }
14787 });
14788 // Check for VARCHAR(MAX) -> TEXT
14789 let is_max = f.args.first().map_or(false, |a| {
14790 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
14791 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
14792 });
14793 if is_max {
14794 Some(DataType::Text)
14795 } else {
14796 Some(DataType::VarChar {
14797 length: len,
14798 parenthesized_length: false,
14799 })
14800 }
14801 }
14802 "NCHAR" | "CHAR" => {
14803 let len = f.args.first().and_then(|a| {
14804 if let Expression::Literal(lit) = a
14805 {
14806 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14807 n.parse::<u32>().ok()
14808 } else { None }
14809 } else {
14810 None
14811 }
14812 });
14813 Some(DataType::Char { length: len })
14814 }
14815 "NUMERIC" | "DECIMAL" => {
14816 let precision = f.args.first().and_then(|a| {
14817 if let Expression::Literal(lit) = a
14818 {
14819 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14820 n.parse::<u32>().ok()
14821 } else { None }
14822 } else {
14823 None
14824 }
14825 });
14826 let scale = f.args.get(1).and_then(|a| {
14827 if let Expression::Literal(lit) = a
14828 {
14829 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14830 n.parse::<u32>().ok()
14831 } else { None }
14832 } else {
14833 None
14834 }
14835 });
14836 Some(DataType::Decimal { precision, scale })
14837 }
14838 _ => None,
14839 }
14840 }
14841 _ => None,
14842 }
14843 }
14844
14845 if let Some(mut dt) = expr_to_datatype(&type_expr) {
14846 // For TSQL source: VARCHAR/CHAR without length defaults to 30
14847 let is_tsql_source =
14848 matches!(source, DialectType::TSQL | DialectType::Fabric);
14849 if is_tsql_source {
14850 match &dt {
14851 DataType::VarChar { length: None, .. } => {
14852 dt = DataType::VarChar {
14853 length: Some(30),
14854 parenthesized_length: false,
14855 };
14856 }
14857 DataType::Char { length: None } => {
14858 dt = DataType::Char { length: Some(30) };
14859 }
14860 _ => {}
14861 }
14862 }
14863
14864 // Determine if this is a string type
14865 let is_string_type = matches!(
14866 dt,
14867 DataType::VarChar { .. }
14868 | DataType::Char { .. }
14869 | DataType::Text
14870 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
14871 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
14872 || name.starts_with("VARCHAR(") || name == "VARCHAR"
14873 || name == "STRING");
14874
14875 // Determine if this is a date/time type
14876 let is_datetime_type = matches!(
14877 dt,
14878 DataType::Timestamp { .. } | DataType::Date
14879 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
14880 || name == "DATETIME2" || name == "SMALLDATETIME");
14881
14882 // Check for date conversion with style
14883 if style.is_some() {
14884 let style_num = style.and_then(|s| {
14885 if let Expression::Literal(lit) = s {
14886 if let crate::expressions::Literal::Number(n) =
14887 lit.as_ref()
14888 {
14889 n.parse::<u32>().ok()
14890 } else {
14891 None
14892 }
14893 } else {
14894 None
14895 }
14896 });
14897
14898 // TSQL CONVERT date styles (Java format)
14899 let format_str = style_num.and_then(|n| match n {
14900 101 => Some("MM/dd/yyyy"),
14901 102 => Some("yyyy.MM.dd"),
14902 103 => Some("dd/MM/yyyy"),
14903 104 => Some("dd.MM.yyyy"),
14904 105 => Some("dd-MM-yyyy"),
14905 108 => Some("HH:mm:ss"),
14906 110 => Some("MM-dd-yyyy"),
14907 112 => Some("yyyyMMdd"),
14908 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
14909 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
14910 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
14911 _ => None,
14912 });
14913
14914 // Non-string, non-datetime types with style: just CAST, ignore the style
14915 if !is_string_type && !is_datetime_type {
14916 let cast_expr = if is_try {
14917 Expression::TryCast(Box::new(
14918 crate::expressions::Cast {
14919 this: value_expr,
14920 to: dt,
14921 trailing_comments: Vec::new(),
14922 double_colon_syntax: false,
14923 format: None,
14924 default: None,
14925 inferred_type: None,
14926 },
14927 ))
14928 } else {
14929 Expression::Cast(Box::new(
14930 crate::expressions::Cast {
14931 this: value_expr,
14932 to: dt,
14933 trailing_comments: Vec::new(),
14934 double_colon_syntax: false,
14935 format: None,
14936 default: None,
14937 inferred_type: None,
14938 },
14939 ))
14940 };
14941 return Ok(cast_expr);
14942 }
14943
14944 if let Some(java_fmt) = format_str {
14945 let c_fmt = java_fmt
14946 .replace("yyyy", "%Y")
14947 .replace("MM", "%m")
14948 .replace("dd", "%d")
14949 .replace("HH", "%H")
14950 .replace("mm", "%M")
14951 .replace("ss", "%S")
14952 .replace("SSSSSS", "%f")
14953 .replace("SSS", "%f")
14954 .replace("'T'", "T");
14955
14956 // For datetime target types: style is the INPUT format for parsing strings -> dates
14957 if is_datetime_type {
14958 match target {
14959 DialectType::DuckDB => {
14960 return Ok(Expression::Function(Box::new(
14961 Function::new(
14962 "STRPTIME".to_string(),
14963 vec![
14964 value_expr,
14965 Expression::string(&c_fmt),
14966 ],
14967 ),
14968 )));
14969 }
14970 DialectType::Spark
14971 | DialectType::Databricks => {
14972 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
14973 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
14974 let func_name =
14975 if matches!(dt, DataType::Date) {
14976 "TO_DATE"
14977 } else {
14978 "TO_TIMESTAMP"
14979 };
14980 return Ok(Expression::Function(Box::new(
14981 Function::new(
14982 func_name.to_string(),
14983 vec![
14984 value_expr,
14985 Expression::string(java_fmt),
14986 ],
14987 ),
14988 )));
14989 }
14990 DialectType::Hive => {
14991 return Ok(Expression::Function(Box::new(
14992 Function::new(
14993 "TO_TIMESTAMP".to_string(),
14994 vec![
14995 value_expr,
14996 Expression::string(java_fmt),
14997 ],
14998 ),
14999 )));
15000 }
15001 _ => {
15002 return Ok(Expression::Cast(Box::new(
15003 crate::expressions::Cast {
15004 this: value_expr,
15005 to: dt,
15006 trailing_comments: Vec::new(),
15007 double_colon_syntax: false,
15008 format: None,
15009 default: None,
15010 inferred_type: None,
15011 },
15012 )));
15013 }
15014 }
15015 }
15016
15017 // For string target types: style is the OUTPUT format for dates -> strings
15018 match target {
15019 DialectType::DuckDB => Ok(Expression::Function(
15020 Box::new(Function::new(
15021 "STRPTIME".to_string(),
15022 vec![
15023 value_expr,
15024 Expression::string(&c_fmt),
15025 ],
15026 )),
15027 )),
15028 DialectType::Spark | DialectType::Databricks => {
15029 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
15030 // Determine the target string type
15031 let string_dt = match &dt {
15032 DataType::VarChar {
15033 length: Some(l),
15034 ..
15035 } => DataType::VarChar {
15036 length: Some(*l),
15037 parenthesized_length: false,
15038 },
15039 DataType::Text => DataType::Custom {
15040 name: "STRING".to_string(),
15041 },
15042 _ => DataType::Custom {
15043 name: "STRING".to_string(),
15044 },
15045 };
15046 let date_format_expr = Expression::Function(
15047 Box::new(Function::new(
15048 "DATE_FORMAT".to_string(),
15049 vec![
15050 value_expr,
15051 Expression::string(java_fmt),
15052 ],
15053 )),
15054 );
15055 let cast_expr = if is_try {
15056 Expression::TryCast(Box::new(
15057 crate::expressions::Cast {
15058 this: date_format_expr,
15059 to: string_dt,
15060 trailing_comments: Vec::new(),
15061 double_colon_syntax: false,
15062 format: None,
15063 default: None,
15064 inferred_type: None,
15065 },
15066 ))
15067 } else {
15068 Expression::Cast(Box::new(
15069 crate::expressions::Cast {
15070 this: date_format_expr,
15071 to: string_dt,
15072 trailing_comments: Vec::new(),
15073 double_colon_syntax: false,
15074 format: None,
15075 default: None,
15076 inferred_type: None,
15077 },
15078 ))
15079 };
15080 Ok(cast_expr)
15081 }
15082 DialectType::MySQL | DialectType::SingleStore => {
15083 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
15084 let mysql_fmt = java_fmt
15085 .replace("yyyy", "%Y")
15086 .replace("MM", "%m")
15087 .replace("dd", "%d")
15088 .replace("HH:mm:ss.SSSSSS", "%T")
15089 .replace("HH:mm:ss", "%T")
15090 .replace("HH", "%H")
15091 .replace("mm", "%i")
15092 .replace("ss", "%S");
15093 let date_format_expr = Expression::Function(
15094 Box::new(Function::new(
15095 "DATE_FORMAT".to_string(),
15096 vec![
15097 value_expr,
15098 Expression::string(&mysql_fmt),
15099 ],
15100 )),
15101 );
15102 // MySQL uses CHAR for string casts
15103 let mysql_dt = match &dt {
15104 DataType::VarChar { length, .. } => {
15105 DataType::Char { length: *length }
15106 }
15107 _ => dt,
15108 };
15109 Ok(Expression::Cast(Box::new(
15110 crate::expressions::Cast {
15111 this: date_format_expr,
15112 to: mysql_dt,
15113 trailing_comments: Vec::new(),
15114 double_colon_syntax: false,
15115 format: None,
15116 default: None,
15117 inferred_type: None,
15118 },
15119 )))
15120 }
15121 DialectType::Hive => {
15122 let func_name = "TO_TIMESTAMP";
15123 Ok(Expression::Function(Box::new(
15124 Function::new(
15125 func_name.to_string(),
15126 vec![
15127 value_expr,
15128 Expression::string(java_fmt),
15129 ],
15130 ),
15131 )))
15132 }
15133 _ => Ok(Expression::Cast(Box::new(
15134 crate::expressions::Cast {
15135 this: value_expr,
15136 to: dt,
15137 trailing_comments: Vec::new(),
15138 double_colon_syntax: false,
15139 format: None,
15140 default: None,
15141 inferred_type: None,
15142 },
15143 ))),
15144 }
15145 } else {
15146 // Unknown style, just CAST
15147 let cast_expr = if is_try {
15148 Expression::TryCast(Box::new(
15149 crate::expressions::Cast {
15150 this: value_expr,
15151 to: dt,
15152 trailing_comments: Vec::new(),
15153 double_colon_syntax: false,
15154 format: None,
15155 default: None,
15156 inferred_type: None,
15157 },
15158 ))
15159 } else {
15160 Expression::Cast(Box::new(
15161 crate::expressions::Cast {
15162 this: value_expr,
15163 to: dt,
15164 trailing_comments: Vec::new(),
15165 double_colon_syntax: false,
15166 format: None,
15167 default: None,
15168 inferred_type: None,
15169 },
15170 ))
15171 };
15172 Ok(cast_expr)
15173 }
15174 } else {
15175 // No style - simple CAST
15176 let final_dt = if matches!(
15177 target,
15178 DialectType::MySQL | DialectType::SingleStore
15179 ) {
15180 match &dt {
15181 DataType::Int { .. }
15182 | DataType::BigInt { .. }
15183 | DataType::SmallInt { .. }
15184 | DataType::TinyInt { .. } => DataType::Custom {
15185 name: "SIGNED".to_string(),
15186 },
15187 DataType::VarChar { length, .. } => {
15188 DataType::Char { length: *length }
15189 }
15190 _ => dt,
15191 }
15192 } else {
15193 dt
15194 };
15195 let cast_expr = if is_try {
15196 Expression::TryCast(Box::new(
15197 crate::expressions::Cast {
15198 this: value_expr,
15199 to: final_dt,
15200 trailing_comments: Vec::new(),
15201 double_colon_syntax: false,
15202 format: None,
15203 default: None,
15204 inferred_type: None,
15205 },
15206 ))
15207 } else {
15208 Expression::Cast(Box::new(crate::expressions::Cast {
15209 this: value_expr,
15210 to: final_dt,
15211 trailing_comments: Vec::new(),
15212 double_colon_syntax: false,
15213 format: None,
15214 default: None,
15215 inferred_type: None,
15216 }))
15217 };
15218 Ok(cast_expr)
15219 }
15220 } else {
15221 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
15222 Ok(Expression::Function(f))
15223 }
15224 }
15225 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
15226 "STRFTIME" if f.args.len() == 2 => {
15227 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
15228 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
15229 // SQLite: args[0] = format, args[1] = value
15230 (f.args[1].clone(), &f.args[0])
15231 } else {
15232 // DuckDB and others: args[0] = value, args[1] = format
15233 (f.args[0].clone(), &f.args[1])
15234 };
15235
15236 // Helper to convert C-style format to Java-style
15237 fn c_to_java_format(fmt: &str) -> String {
15238 fmt.replace("%Y", "yyyy")
15239 .replace("%m", "MM")
15240 .replace("%d", "dd")
15241 .replace("%H", "HH")
15242 .replace("%M", "mm")
15243 .replace("%S", "ss")
15244 .replace("%f", "SSSSSS")
15245 .replace("%y", "yy")
15246 .replace("%-m", "M")
15247 .replace("%-d", "d")
15248 .replace("%-H", "H")
15249 .replace("%-I", "h")
15250 .replace("%I", "hh")
15251 .replace("%p", "a")
15252 .replace("%j", "DDD")
15253 .replace("%a", "EEE")
15254 .replace("%b", "MMM")
15255 .replace("%F", "yyyy-MM-dd")
15256 .replace("%T", "HH:mm:ss")
15257 }
15258
15259 // Helper: recursively convert format strings within expressions (handles CONCAT)
15260 fn convert_fmt_expr(
15261 expr: &Expression,
15262 converter: &dyn Fn(&str) -> String,
15263 ) -> Expression {
15264 match expr {
15265 Expression::Literal(lit)
15266 if matches!(
15267 lit.as_ref(),
15268 crate::expressions::Literal::String(_)
15269 ) =>
15270 {
15271 let crate::expressions::Literal::String(s) =
15272 lit.as_ref()
15273 else {
15274 unreachable!()
15275 };
15276 Expression::string(&converter(s))
15277 }
15278 Expression::Function(func)
15279 if func.name.eq_ignore_ascii_case("CONCAT") =>
15280 {
15281 let new_args: Vec<Expression> = func
15282 .args
15283 .iter()
15284 .map(|a| convert_fmt_expr(a, converter))
15285 .collect();
15286 Expression::Function(Box::new(Function::new(
15287 "CONCAT".to_string(),
15288 new_args,
15289 )))
15290 }
15291 other => other.clone(),
15292 }
15293 }
15294
15295 match target {
15296 DialectType::DuckDB => {
15297 if matches!(source, DialectType::SQLite) {
15298 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
15299 let cast_val = Expression::Cast(Box::new(Cast {
15300 this: val,
15301 to: crate::expressions::DataType::Timestamp {
15302 precision: None,
15303 timezone: false,
15304 },
15305 trailing_comments: Vec::new(),
15306 double_colon_syntax: false,
15307 format: None,
15308 default: None,
15309 inferred_type: None,
15310 }));
15311 Ok(Expression::Function(Box::new(Function::new(
15312 "STRFTIME".to_string(),
15313 vec![cast_val, fmt_expr.clone()],
15314 ))))
15315 } else {
15316 Ok(Expression::Function(f))
15317 }
15318 }
15319 DialectType::Spark
15320 | DialectType::Databricks
15321 | DialectType::Hive => {
15322 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
15323 let converted_fmt =
15324 convert_fmt_expr(fmt_expr, &c_to_java_format);
15325 Ok(Expression::Function(Box::new(Function::new(
15326 "DATE_FORMAT".to_string(),
15327 vec![val, converted_fmt],
15328 ))))
15329 }
15330 DialectType::TSQL | DialectType::Fabric => {
15331 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
15332 let converted_fmt =
15333 convert_fmt_expr(fmt_expr, &c_to_java_format);
15334 Ok(Expression::Function(Box::new(Function::new(
15335 "FORMAT".to_string(),
15336 vec![val, converted_fmt],
15337 ))))
15338 }
15339 DialectType::Presto
15340 | DialectType::Trino
15341 | DialectType::Athena => {
15342 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
15343 if let Expression::Literal(lit) = fmt_expr {
15344 if let crate::expressions::Literal::String(s) =
15345 lit.as_ref()
15346 {
15347 let presto_fmt = duckdb_to_presto_format(s);
15348 Ok(Expression::Function(Box::new(Function::new(
15349 "DATE_FORMAT".to_string(),
15350 vec![val, Expression::string(&presto_fmt)],
15351 ))))
15352 } else {
15353 Ok(Expression::Function(Box::new(Function::new(
15354 "DATE_FORMAT".to_string(),
15355 vec![val, fmt_expr.clone()],
15356 ))))
15357 }
15358 } else {
15359 Ok(Expression::Function(Box::new(Function::new(
15360 "DATE_FORMAT".to_string(),
15361 vec![val, fmt_expr.clone()],
15362 ))))
15363 }
15364 }
15365 DialectType::BigQuery => {
15366 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
15367 if let Expression::Literal(lit) = fmt_expr {
15368 if let crate::expressions::Literal::String(s) =
15369 lit.as_ref()
15370 {
15371 let bq_fmt = duckdb_to_bigquery_format(s);
15372 Ok(Expression::Function(Box::new(Function::new(
15373 "FORMAT_DATE".to_string(),
15374 vec![Expression::string(&bq_fmt), val],
15375 ))))
15376 } else {
15377 Ok(Expression::Function(Box::new(Function::new(
15378 "FORMAT_DATE".to_string(),
15379 vec![fmt_expr.clone(), val],
15380 ))))
15381 }
15382 } else {
15383 Ok(Expression::Function(Box::new(Function::new(
15384 "FORMAT_DATE".to_string(),
15385 vec![fmt_expr.clone(), val],
15386 ))))
15387 }
15388 }
15389 DialectType::PostgreSQL | DialectType::Redshift => {
15390 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
15391 if let Expression::Literal(lit) = fmt_expr {
15392 if let crate::expressions::Literal::String(s) =
15393 lit.as_ref()
15394 {
15395 let pg_fmt = s
15396 .replace("%Y", "YYYY")
15397 .replace("%m", "MM")
15398 .replace("%d", "DD")
15399 .replace("%H", "HH24")
15400 .replace("%M", "MI")
15401 .replace("%S", "SS")
15402 .replace("%y", "YY")
15403 .replace("%-m", "FMMM")
15404 .replace("%-d", "FMDD")
15405 .replace("%-H", "FMHH24")
15406 .replace("%-I", "FMHH12")
15407 .replace("%p", "AM")
15408 .replace("%F", "YYYY-MM-DD")
15409 .replace("%T", "HH24:MI:SS");
15410 Ok(Expression::Function(Box::new(Function::new(
15411 "TO_CHAR".to_string(),
15412 vec![val, Expression::string(&pg_fmt)],
15413 ))))
15414 } else {
15415 Ok(Expression::Function(Box::new(Function::new(
15416 "TO_CHAR".to_string(),
15417 vec![val, fmt_expr.clone()],
15418 ))))
15419 }
15420 } else {
15421 Ok(Expression::Function(Box::new(Function::new(
15422 "TO_CHAR".to_string(),
15423 vec![val, fmt_expr.clone()],
15424 ))))
15425 }
15426 }
15427 _ => Ok(Expression::Function(f)),
15428 }
15429 }
15430 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
15431 "STRPTIME" if f.args.len() == 2 => {
15432 let val = f.args[0].clone();
15433 let fmt_expr = &f.args[1];
15434
15435 fn c_to_java_format_parse(fmt: &str) -> String {
15436 fmt.replace("%Y", "yyyy")
15437 .replace("%m", "MM")
15438 .replace("%d", "dd")
15439 .replace("%H", "HH")
15440 .replace("%M", "mm")
15441 .replace("%S", "ss")
15442 .replace("%f", "SSSSSS")
15443 .replace("%y", "yy")
15444 .replace("%-m", "M")
15445 .replace("%-d", "d")
15446 .replace("%-H", "H")
15447 .replace("%-I", "h")
15448 .replace("%I", "hh")
15449 .replace("%p", "a")
15450 .replace("%F", "yyyy-MM-dd")
15451 .replace("%T", "HH:mm:ss")
15452 }
15453
15454 match target {
15455 DialectType::DuckDB => Ok(Expression::Function(f)),
15456 DialectType::Spark | DialectType::Databricks => {
15457 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
15458 if let Expression::Literal(lit) = fmt_expr {
15459 if let crate::expressions::Literal::String(s) =
15460 lit.as_ref()
15461 {
15462 let java_fmt = c_to_java_format_parse(s);
15463 Ok(Expression::Function(Box::new(Function::new(
15464 "TO_TIMESTAMP".to_string(),
15465 vec![val, Expression::string(&java_fmt)],
15466 ))))
15467 } else {
15468 Ok(Expression::Function(Box::new(Function::new(
15469 "TO_TIMESTAMP".to_string(),
15470 vec![val, fmt_expr.clone()],
15471 ))))
15472 }
15473 } else {
15474 Ok(Expression::Function(Box::new(Function::new(
15475 "TO_TIMESTAMP".to_string(),
15476 vec![val, fmt_expr.clone()],
15477 ))))
15478 }
15479 }
15480 DialectType::Hive => {
15481 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
15482 if let Expression::Literal(lit) = fmt_expr {
15483 if let crate::expressions::Literal::String(s) =
15484 lit.as_ref()
15485 {
15486 let java_fmt = c_to_java_format_parse(s);
15487 let unix_ts =
15488 Expression::Function(Box::new(Function::new(
15489 "UNIX_TIMESTAMP".to_string(),
15490 vec![val, Expression::string(&java_fmt)],
15491 )));
15492 let from_unix =
15493 Expression::Function(Box::new(Function::new(
15494 "FROM_UNIXTIME".to_string(),
15495 vec![unix_ts],
15496 )));
15497 Ok(Expression::Cast(Box::new(
15498 crate::expressions::Cast {
15499 this: from_unix,
15500 to: DataType::Timestamp {
15501 timezone: false,
15502 precision: None,
15503 },
15504 trailing_comments: Vec::new(),
15505 double_colon_syntax: false,
15506 format: None,
15507 default: None,
15508 inferred_type: None,
15509 },
15510 )))
15511 } else {
15512 Ok(Expression::Function(f))
15513 }
15514 } else {
15515 Ok(Expression::Function(f))
15516 }
15517 }
15518 DialectType::Presto
15519 | DialectType::Trino
15520 | DialectType::Athena => {
15521 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
15522 if let Expression::Literal(lit) = fmt_expr {
15523 if let crate::expressions::Literal::String(s) =
15524 lit.as_ref()
15525 {
15526 let presto_fmt = duckdb_to_presto_format(s);
15527 Ok(Expression::Function(Box::new(Function::new(
15528 "DATE_PARSE".to_string(),
15529 vec![val, Expression::string(&presto_fmt)],
15530 ))))
15531 } else {
15532 Ok(Expression::Function(Box::new(Function::new(
15533 "DATE_PARSE".to_string(),
15534 vec![val, fmt_expr.clone()],
15535 ))))
15536 }
15537 } else {
15538 Ok(Expression::Function(Box::new(Function::new(
15539 "DATE_PARSE".to_string(),
15540 vec![val, fmt_expr.clone()],
15541 ))))
15542 }
15543 }
15544 DialectType::BigQuery => {
15545 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
15546 if let Expression::Literal(lit) = fmt_expr {
15547 if let crate::expressions::Literal::String(s) =
15548 lit.as_ref()
15549 {
15550 let bq_fmt = duckdb_to_bigquery_format(s);
15551 Ok(Expression::Function(Box::new(Function::new(
15552 "PARSE_TIMESTAMP".to_string(),
15553 vec![Expression::string(&bq_fmt), val],
15554 ))))
15555 } else {
15556 Ok(Expression::Function(Box::new(Function::new(
15557 "PARSE_TIMESTAMP".to_string(),
15558 vec![fmt_expr.clone(), val],
15559 ))))
15560 }
15561 } else {
15562 Ok(Expression::Function(Box::new(Function::new(
15563 "PARSE_TIMESTAMP".to_string(),
15564 vec![fmt_expr.clone(), val],
15565 ))))
15566 }
15567 }
15568 _ => Ok(Expression::Function(f)),
15569 }
15570 }
15571 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
15572 "DATE_FORMAT"
15573 if f.args.len() >= 2
15574 && matches!(
15575 source,
15576 DialectType::Presto
15577 | DialectType::Trino
15578 | DialectType::Athena
15579 ) =>
15580 {
15581 let val = f.args[0].clone();
15582 let fmt_expr = &f.args[1];
15583
15584 match target {
15585 DialectType::Presto
15586 | DialectType::Trino
15587 | DialectType::Athena => {
15588 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
15589 if let Expression::Literal(lit) = fmt_expr {
15590 if let crate::expressions::Literal::String(s) =
15591 lit.as_ref()
15592 {
15593 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15594 Ok(Expression::Function(Box::new(Function::new(
15595 "DATE_FORMAT".to_string(),
15596 vec![val, Expression::string(&normalized)],
15597 ))))
15598 } else {
15599 Ok(Expression::Function(f))
15600 }
15601 } else {
15602 Ok(Expression::Function(f))
15603 }
15604 }
15605 DialectType::Hive
15606 | DialectType::Spark
15607 | DialectType::Databricks => {
15608 // Convert Presto C-style to Java-style format
15609 if let Expression::Literal(lit) = fmt_expr {
15610 if let crate::expressions::Literal::String(s) =
15611 lit.as_ref()
15612 {
15613 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15614 Ok(Expression::Function(Box::new(Function::new(
15615 "DATE_FORMAT".to_string(),
15616 vec![val, Expression::string(&java_fmt)],
15617 ))))
15618 } else {
15619 Ok(Expression::Function(f))
15620 }
15621 } else {
15622 Ok(Expression::Function(f))
15623 }
15624 }
15625 DialectType::DuckDB => {
15626 // Convert to STRFTIME(val, duckdb_fmt)
15627 if let Expression::Literal(lit) = fmt_expr {
15628 if let crate::expressions::Literal::String(s) =
15629 lit.as_ref()
15630 {
15631 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15632 Ok(Expression::Function(Box::new(Function::new(
15633 "STRFTIME".to_string(),
15634 vec![val, Expression::string(&duckdb_fmt)],
15635 ))))
15636 } else {
15637 Ok(Expression::Function(Box::new(Function::new(
15638 "STRFTIME".to_string(),
15639 vec![val, fmt_expr.clone()],
15640 ))))
15641 }
15642 } else {
15643 Ok(Expression::Function(Box::new(Function::new(
15644 "STRFTIME".to_string(),
15645 vec![val, fmt_expr.clone()],
15646 ))))
15647 }
15648 }
15649 DialectType::BigQuery => {
15650 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
15651 if let Expression::Literal(lit) = fmt_expr {
15652 if let crate::expressions::Literal::String(s) =
15653 lit.as_ref()
15654 {
15655 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
15656 Ok(Expression::Function(Box::new(Function::new(
15657 "FORMAT_DATE".to_string(),
15658 vec![Expression::string(&bq_fmt), val],
15659 ))))
15660 } else {
15661 Ok(Expression::Function(Box::new(Function::new(
15662 "FORMAT_DATE".to_string(),
15663 vec![fmt_expr.clone(), val],
15664 ))))
15665 }
15666 } else {
15667 Ok(Expression::Function(Box::new(Function::new(
15668 "FORMAT_DATE".to_string(),
15669 vec![fmt_expr.clone(), val],
15670 ))))
15671 }
15672 }
15673 _ => Ok(Expression::Function(f)),
15674 }
15675 }
15676 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
15677 "DATE_PARSE"
15678 if f.args.len() >= 2
15679 && matches!(
15680 source,
15681 DialectType::Presto
15682 | DialectType::Trino
15683 | DialectType::Athena
15684 ) =>
15685 {
15686 let val = f.args[0].clone();
15687 let fmt_expr = &f.args[1];
15688
15689 match target {
15690 DialectType::Presto
15691 | DialectType::Trino
15692 | DialectType::Athena => {
15693 // Presto -> Presto: normalize format
15694 if let Expression::Literal(lit) = fmt_expr {
15695 if let crate::expressions::Literal::String(s) =
15696 lit.as_ref()
15697 {
15698 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15699 Ok(Expression::Function(Box::new(Function::new(
15700 "DATE_PARSE".to_string(),
15701 vec![val, Expression::string(&normalized)],
15702 ))))
15703 } else {
15704 Ok(Expression::Function(f))
15705 }
15706 } else {
15707 Ok(Expression::Function(f))
15708 }
15709 }
15710 DialectType::Hive => {
15711 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
15712 if let Expression::Literal(lit) = fmt_expr {
15713 if let crate::expressions::Literal::String(s) =
15714 lit.as_ref()
15715 {
15716 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
15717 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
15718 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15719 this: val,
15720 to: DataType::Timestamp { timezone: false, precision: None },
15721 trailing_comments: Vec::new(),
15722 double_colon_syntax: false,
15723 format: None,
15724 default: None,
15725 inferred_type: None,
15726 })))
15727 } else {
15728 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15729 Ok(Expression::Function(Box::new(Function::new(
15730 "TO_TIMESTAMP".to_string(),
15731 vec![val, Expression::string(&java_fmt)],
15732 ))))
15733 }
15734 } else {
15735 Ok(Expression::Function(f))
15736 }
15737 } else {
15738 Ok(Expression::Function(f))
15739 }
15740 }
15741 DialectType::Spark | DialectType::Databricks => {
15742 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
15743 if let Expression::Literal(lit) = fmt_expr {
15744 if let crate::expressions::Literal::String(s) =
15745 lit.as_ref()
15746 {
15747 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15748 Ok(Expression::Function(Box::new(Function::new(
15749 "TO_TIMESTAMP".to_string(),
15750 vec![val, Expression::string(&java_fmt)],
15751 ))))
15752 } else {
15753 Ok(Expression::Function(f))
15754 }
15755 } else {
15756 Ok(Expression::Function(f))
15757 }
15758 }
15759 DialectType::DuckDB => {
15760 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
15761 if let Expression::Literal(lit) = fmt_expr {
15762 if let crate::expressions::Literal::String(s) =
15763 lit.as_ref()
15764 {
15765 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15766 Ok(Expression::Function(Box::new(Function::new(
15767 "STRPTIME".to_string(),
15768 vec![val, Expression::string(&duckdb_fmt)],
15769 ))))
15770 } else {
15771 Ok(Expression::Function(Box::new(Function::new(
15772 "STRPTIME".to_string(),
15773 vec![val, fmt_expr.clone()],
15774 ))))
15775 }
15776 } else {
15777 Ok(Expression::Function(Box::new(Function::new(
15778 "STRPTIME".to_string(),
15779 vec![val, fmt_expr.clone()],
15780 ))))
15781 }
15782 }
15783 _ => Ok(Expression::Function(f)),
15784 }
15785 }
15786 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
15787 "FROM_BASE64"
15788 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15789 {
15790 Ok(Expression::Function(Box::new(Function::new(
15791 "UNBASE64".to_string(),
15792 f.args,
15793 ))))
15794 }
15795 "TO_BASE64"
15796 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15797 {
15798 Ok(Expression::Function(Box::new(Function::new(
15799 "BASE64".to_string(),
15800 f.args,
15801 ))))
15802 }
15803 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
15804 "FROM_UNIXTIME"
15805 if f.args.len() == 1
15806 && matches!(
15807 source,
15808 DialectType::Presto
15809 | DialectType::Trino
15810 | DialectType::Athena
15811 )
15812 && matches!(
15813 target,
15814 DialectType::Spark | DialectType::Databricks
15815 ) =>
15816 {
15817 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
15818 let from_unix = Expression::Function(Box::new(Function::new(
15819 "FROM_UNIXTIME".to_string(),
15820 f.args,
15821 )));
15822 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15823 this: from_unix,
15824 to: DataType::Timestamp {
15825 timezone: false,
15826 precision: None,
15827 },
15828 trailing_comments: Vec::new(),
15829 double_colon_syntax: false,
15830 format: None,
15831 default: None,
15832 inferred_type: None,
15833 })))
15834 }
15835 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
15836 "DATE_FORMAT"
15837 if f.args.len() >= 2
15838 && !matches!(
15839 target,
15840 DialectType::Hive
15841 | DialectType::Spark
15842 | DialectType::Databricks
15843 | DialectType::MySQL
15844 | DialectType::SingleStore
15845 ) =>
15846 {
15847 let val = f.args[0].clone();
15848 let fmt_expr = &f.args[1];
15849 let is_hive_source = matches!(
15850 source,
15851 DialectType::Hive
15852 | DialectType::Spark
15853 | DialectType::Databricks
15854 );
15855
15856 fn java_to_c_format(fmt: &str) -> String {
15857 // Replace Java patterns with C strftime patterns.
15858 // Uses multi-pass to handle patterns that conflict.
15859 // First pass: replace multi-char patterns (longer first)
15860 let result = fmt
15861 .replace("yyyy", "%Y")
15862 .replace("SSSSSS", "%f")
15863 .replace("EEEE", "%W")
15864 .replace("MM", "%m")
15865 .replace("dd", "%d")
15866 .replace("HH", "%H")
15867 .replace("mm", "%M")
15868 .replace("ss", "%S")
15869 .replace("yy", "%y");
15870 // Second pass: handle single-char timezone patterns
15871 // z -> %Z (timezone name), Z -> %z (timezone offset)
15872 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
15873 let mut out = String::new();
15874 let chars: Vec<char> = result.chars().collect();
15875 let mut i = 0;
15876 while i < chars.len() {
15877 if chars[i] == '%' && i + 1 < chars.len() {
15878 // Already a format specifier, skip both chars
15879 out.push(chars[i]);
15880 out.push(chars[i + 1]);
15881 i += 2;
15882 } else if chars[i] == 'z' {
15883 out.push_str("%Z");
15884 i += 1;
15885 } else if chars[i] == 'Z' {
15886 out.push_str("%z");
15887 i += 1;
15888 } else {
15889 out.push(chars[i]);
15890 i += 1;
15891 }
15892 }
15893 out
15894 }
15895
15896 fn java_to_presto_format(fmt: &str) -> String {
15897 // Presto uses %T for HH:MM:SS
15898 let c_fmt = java_to_c_format(fmt);
15899 c_fmt.replace("%H:%M:%S", "%T")
15900 }
15901
15902 fn java_to_bq_format(fmt: &str) -> String {
15903 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
15904 let c_fmt = java_to_c_format(fmt);
15905 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
15906 }
15907
15908 // For Hive source, CAST string literals to appropriate type
15909 let cast_val = if is_hive_source {
15910 match &val {
15911 Expression::Literal(lit)
15912 if matches!(
15913 lit.as_ref(),
15914 crate::expressions::Literal::String(_)
15915 ) =>
15916 {
15917 match target {
15918 DialectType::DuckDB
15919 | DialectType::Presto
15920 | DialectType::Trino
15921 | DialectType::Athena => {
15922 Self::ensure_cast_timestamp(val.clone())
15923 }
15924 DialectType::BigQuery => {
15925 // BigQuery: CAST(val AS DATETIME)
15926 Expression::Cast(Box::new(
15927 crate::expressions::Cast {
15928 this: val.clone(),
15929 to: DataType::Custom {
15930 name: "DATETIME".to_string(),
15931 },
15932 trailing_comments: vec![],
15933 double_colon_syntax: false,
15934 format: None,
15935 default: None,
15936 inferred_type: None,
15937 },
15938 ))
15939 }
15940 _ => val.clone(),
15941 }
15942 }
15943 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
15944 Expression::Cast(c)
15945 if matches!(c.to, DataType::Date)
15946 && matches!(
15947 target,
15948 DialectType::Presto
15949 | DialectType::Trino
15950 | DialectType::Athena
15951 ) =>
15952 {
15953 Expression::Cast(Box::new(crate::expressions::Cast {
15954 this: val.clone(),
15955 to: DataType::Timestamp {
15956 timezone: false,
15957 precision: None,
15958 },
15959 trailing_comments: vec![],
15960 double_colon_syntax: false,
15961 format: None,
15962 default: None,
15963 inferred_type: None,
15964 }))
15965 }
15966 Expression::Literal(lit)
15967 if matches!(
15968 lit.as_ref(),
15969 crate::expressions::Literal::Date(_)
15970 ) && matches!(
15971 target,
15972 DialectType::Presto
15973 | DialectType::Trino
15974 | DialectType::Athena
15975 ) =>
15976 {
15977 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
15978 let cast_date = Self::date_literal_to_cast(val.clone());
15979 Expression::Cast(Box::new(crate::expressions::Cast {
15980 this: cast_date,
15981 to: DataType::Timestamp {
15982 timezone: false,
15983 precision: None,
15984 },
15985 trailing_comments: vec![],
15986 double_colon_syntax: false,
15987 format: None,
15988 default: None,
15989 inferred_type: None,
15990 }))
15991 }
15992 _ => val.clone(),
15993 }
15994 } else {
15995 val.clone()
15996 };
15997
15998 match target {
15999 DialectType::DuckDB => {
16000 if let Expression::Literal(lit) = fmt_expr {
16001 if let crate::expressions::Literal::String(s) =
16002 lit.as_ref()
16003 {
16004 let c_fmt = if is_hive_source {
16005 java_to_c_format(s)
16006 } else {
16007 s.clone()
16008 };
16009 Ok(Expression::Function(Box::new(Function::new(
16010 "STRFTIME".to_string(),
16011 vec![cast_val, Expression::string(&c_fmt)],
16012 ))))
16013 } else {
16014 Ok(Expression::Function(Box::new(Function::new(
16015 "STRFTIME".to_string(),
16016 vec![cast_val, fmt_expr.clone()],
16017 ))))
16018 }
16019 } else {
16020 Ok(Expression::Function(Box::new(Function::new(
16021 "STRFTIME".to_string(),
16022 vec![cast_val, fmt_expr.clone()],
16023 ))))
16024 }
16025 }
16026 DialectType::Presto
16027 | DialectType::Trino
16028 | DialectType::Athena => {
16029 if is_hive_source {
16030 if let Expression::Literal(lit) = fmt_expr {
16031 if let crate::expressions::Literal::String(s) =
16032 lit.as_ref()
16033 {
16034 let p_fmt = java_to_presto_format(s);
16035 Ok(Expression::Function(Box::new(
16036 Function::new(
16037 "DATE_FORMAT".to_string(),
16038 vec![
16039 cast_val,
16040 Expression::string(&p_fmt),
16041 ],
16042 ),
16043 )))
16044 } else {
16045 Ok(Expression::Function(Box::new(
16046 Function::new(
16047 "DATE_FORMAT".to_string(),
16048 vec![cast_val, fmt_expr.clone()],
16049 ),
16050 )))
16051 }
16052 } else {
16053 Ok(Expression::Function(Box::new(Function::new(
16054 "DATE_FORMAT".to_string(),
16055 vec![cast_val, fmt_expr.clone()],
16056 ))))
16057 }
16058 } else {
16059 Ok(Expression::Function(Box::new(Function::new(
16060 "DATE_FORMAT".to_string(),
16061 f.args,
16062 ))))
16063 }
16064 }
16065 DialectType::BigQuery => {
16066 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
16067 if let Expression::Literal(lit) = fmt_expr {
16068 if let crate::expressions::Literal::String(s) =
16069 lit.as_ref()
16070 {
16071 let bq_fmt = if is_hive_source {
16072 java_to_bq_format(s)
16073 } else {
16074 java_to_c_format(s)
16075 };
16076 Ok(Expression::Function(Box::new(Function::new(
16077 "FORMAT_DATE".to_string(),
16078 vec![Expression::string(&bq_fmt), cast_val],
16079 ))))
16080 } else {
16081 Ok(Expression::Function(Box::new(Function::new(
16082 "FORMAT_DATE".to_string(),
16083 vec![fmt_expr.clone(), cast_val],
16084 ))))
16085 }
16086 } else {
16087 Ok(Expression::Function(Box::new(Function::new(
16088 "FORMAT_DATE".to_string(),
16089 vec![fmt_expr.clone(), cast_val],
16090 ))))
16091 }
16092 }
16093 DialectType::PostgreSQL | DialectType::Redshift => {
16094 if let Expression::Literal(lit) = fmt_expr {
16095 if let crate::expressions::Literal::String(s) =
16096 lit.as_ref()
16097 {
16098 let pg_fmt = s
16099 .replace("yyyy", "YYYY")
16100 .replace("MM", "MM")
16101 .replace("dd", "DD")
16102 .replace("HH", "HH24")
16103 .replace("mm", "MI")
16104 .replace("ss", "SS")
16105 .replace("yy", "YY");
16106 Ok(Expression::Function(Box::new(Function::new(
16107 "TO_CHAR".to_string(),
16108 vec![val, Expression::string(&pg_fmt)],
16109 ))))
16110 } else {
16111 Ok(Expression::Function(Box::new(Function::new(
16112 "TO_CHAR".to_string(),
16113 vec![val, fmt_expr.clone()],
16114 ))))
16115 }
16116 } else {
16117 Ok(Expression::Function(Box::new(Function::new(
16118 "TO_CHAR".to_string(),
16119 vec![val, fmt_expr.clone()],
16120 ))))
16121 }
16122 }
16123 _ => Ok(Expression::Function(f)),
16124 }
16125 }
16126 // DATEDIFF(unit, start, end) - 3-arg form
16127 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
16128 "DATEDIFF" if f.args.len() == 3 => {
16129 let mut args = f.args;
16130 // SQLite source: args = (date1, date2, unit_string)
16131 // Standard source: args = (unit, start, end)
16132 let (_arg0, arg1, arg2, unit_str) =
16133 if matches!(source, DialectType::SQLite) {
16134 let date1 = args.remove(0);
16135 let date2 = args.remove(0);
16136 let unit_expr = args.remove(0);
16137 let unit_s = Self::get_unit_str_static(&unit_expr);
16138
16139 // For SQLite target, generate JULIANDAY arithmetic directly
16140 if matches!(target, DialectType::SQLite) {
16141 let jd_first = Expression::Function(Box::new(
16142 Function::new("JULIANDAY".to_string(), vec![date1]),
16143 ));
16144 let jd_second = Expression::Function(Box::new(
16145 Function::new("JULIANDAY".to_string(), vec![date2]),
16146 ));
16147 let diff = Expression::Sub(Box::new(
16148 crate::expressions::BinaryOp::new(
16149 jd_first, jd_second,
16150 ),
16151 ));
16152 let paren_diff = Expression::Paren(Box::new(
16153 crate::expressions::Paren {
16154 this: diff,
16155 trailing_comments: Vec::new(),
16156 },
16157 ));
16158 let adjusted = match unit_s.as_str() {
16159 "HOUR" => Expression::Mul(Box::new(
16160 crate::expressions::BinaryOp::new(
16161 paren_diff,
16162 Expression::Literal(Box::new(
16163 Literal::Number("24.0".to_string()),
16164 )),
16165 ),
16166 )),
16167 "MINUTE" => Expression::Mul(Box::new(
16168 crate::expressions::BinaryOp::new(
16169 paren_diff,
16170 Expression::Literal(Box::new(
16171 Literal::Number("1440.0".to_string()),
16172 )),
16173 ),
16174 )),
16175 "SECOND" => Expression::Mul(Box::new(
16176 crate::expressions::BinaryOp::new(
16177 paren_diff,
16178 Expression::Literal(Box::new(
16179 Literal::Number("86400.0".to_string()),
16180 )),
16181 ),
16182 )),
16183 "MONTH" => Expression::Div(Box::new(
16184 crate::expressions::BinaryOp::new(
16185 paren_diff,
16186 Expression::Literal(Box::new(
16187 Literal::Number("30.0".to_string()),
16188 )),
16189 ),
16190 )),
16191 "YEAR" => Expression::Div(Box::new(
16192 crate::expressions::BinaryOp::new(
16193 paren_diff,
16194 Expression::Literal(Box::new(
16195 Literal::Number("365.0".to_string()),
16196 )),
16197 ),
16198 )),
16199 _ => paren_diff,
16200 };
16201 return Ok(Expression::Cast(Box::new(Cast {
16202 this: adjusted,
16203 to: DataType::Int {
16204 length: None,
16205 integer_spelling: true,
16206 },
16207 trailing_comments: vec![],
16208 double_colon_syntax: false,
16209 format: None,
16210 default: None,
16211 inferred_type: None,
16212 })));
16213 }
16214
16215 // For other targets, remap to standard (unit, start, end) form
16216 let unit_ident =
16217 Expression::Identifier(Identifier::new(&unit_s));
16218 (unit_ident, date1, date2, unit_s)
16219 } else {
16220 let arg0 = args.remove(0);
16221 let arg1 = args.remove(0);
16222 let arg2 = args.remove(0);
16223 let unit_s = Self::get_unit_str_static(&arg0);
16224 (arg0, arg1, arg2, unit_s)
16225 };
16226
16227 // For Hive/Spark source, string literal dates need to be cast
16228 // Note: Databricks is excluded - it handles string args like standard SQL
16229 let is_hive_spark =
16230 matches!(source, DialectType::Hive | DialectType::Spark);
16231
16232 match target {
16233 DialectType::Snowflake => {
16234 let unit =
16235 Expression::Identifier(Identifier::new(&unit_str));
16236 // Use ensure_to_date_preserved to add TO_DATE with a marker
16237 // that prevents the Snowflake TO_DATE handler from converting it to CAST
16238 let d1 = if is_hive_spark {
16239 Self::ensure_to_date_preserved(arg1)
16240 } else {
16241 arg1
16242 };
16243 let d2 = if is_hive_spark {
16244 Self::ensure_to_date_preserved(arg2)
16245 } else {
16246 arg2
16247 };
16248 Ok(Expression::Function(Box::new(Function::new(
16249 "DATEDIFF".to_string(),
16250 vec![unit, d1, d2],
16251 ))))
16252 }
16253 DialectType::Redshift => {
16254 let unit =
16255 Expression::Identifier(Identifier::new(&unit_str));
16256 let d1 = if is_hive_spark {
16257 Self::ensure_cast_date(arg1)
16258 } else {
16259 arg1
16260 };
16261 let d2 = if is_hive_spark {
16262 Self::ensure_cast_date(arg2)
16263 } else {
16264 arg2
16265 };
16266 Ok(Expression::Function(Box::new(Function::new(
16267 "DATEDIFF".to_string(),
16268 vec![unit, d1, d2],
16269 ))))
16270 }
16271 DialectType::TSQL => {
16272 let unit =
16273 Expression::Identifier(Identifier::new(&unit_str));
16274 Ok(Expression::Function(Box::new(Function::new(
16275 "DATEDIFF".to_string(),
16276 vec![unit, arg1, arg2],
16277 ))))
16278 }
16279 DialectType::DuckDB => {
16280 let is_redshift_tsql = matches!(
16281 source,
16282 DialectType::Redshift | DialectType::TSQL
16283 );
16284 if is_hive_spark {
16285 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
16286 let d1 = Self::ensure_cast_date(arg1);
16287 let d2 = Self::ensure_cast_date(arg2);
16288 Ok(Expression::Function(Box::new(Function::new(
16289 "DATE_DIFF".to_string(),
16290 vec![Expression::string(&unit_str), d1, d2],
16291 ))))
16292 } else if matches!(source, DialectType::Snowflake) {
16293 // For Snowflake source: special handling per unit
16294 match unit_str.as_str() {
16295 "NANOSECOND" => {
16296 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
16297 fn cast_to_timestamp_ns(
16298 expr: Expression,
16299 ) -> Expression
16300 {
16301 Expression::Cast(Box::new(Cast {
16302 this: expr,
16303 to: DataType::Custom {
16304 name: "TIMESTAMP_NS".to_string(),
16305 },
16306 trailing_comments: vec![],
16307 double_colon_syntax: false,
16308 format: None,
16309 default: None,
16310 inferred_type: None,
16311 }))
16312 }
16313 let epoch_end = Expression::Function(Box::new(
16314 Function::new(
16315 "EPOCH_NS".to_string(),
16316 vec![cast_to_timestamp_ns(arg2)],
16317 ),
16318 ));
16319 let epoch_start = Expression::Function(
16320 Box::new(Function::new(
16321 "EPOCH_NS".to_string(),
16322 vec![cast_to_timestamp_ns(arg1)],
16323 )),
16324 );
16325 Ok(Expression::Sub(Box::new(BinaryOp::new(
16326 epoch_end,
16327 epoch_start,
16328 ))))
16329 }
16330 "WEEK" => {
16331 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
16332 let d1 = Self::force_cast_date(arg1);
16333 let d2 = Self::force_cast_date(arg2);
16334 let dt1 = Expression::Function(Box::new(
16335 Function::new(
16336 "DATE_TRUNC".to_string(),
16337 vec![Expression::string("WEEK"), d1],
16338 ),
16339 ));
16340 let dt2 = Expression::Function(Box::new(
16341 Function::new(
16342 "DATE_TRUNC".to_string(),
16343 vec![Expression::string("WEEK"), d2],
16344 ),
16345 ));
16346 Ok(Expression::Function(Box::new(
16347 Function::new(
16348 "DATE_DIFF".to_string(),
16349 vec![
16350 Expression::string(&unit_str),
16351 dt1,
16352 dt2,
16353 ],
16354 ),
16355 )))
16356 }
16357 _ => {
16358 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
16359 let d1 = Self::force_cast_date(arg1);
16360 let d2 = Self::force_cast_date(arg2);
16361 Ok(Expression::Function(Box::new(
16362 Function::new(
16363 "DATE_DIFF".to_string(),
16364 vec![
16365 Expression::string(&unit_str),
16366 d1,
16367 d2,
16368 ],
16369 ),
16370 )))
16371 }
16372 }
16373 } else if is_redshift_tsql {
16374 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
16375 let d1 = Self::force_cast_timestamp(arg1);
16376 let d2 = Self::force_cast_timestamp(arg2);
16377 Ok(Expression::Function(Box::new(Function::new(
16378 "DATE_DIFF".to_string(),
16379 vec![Expression::string(&unit_str), d1, d2],
16380 ))))
16381 } else {
16382 // Keep as DATEDIFF so DuckDB's transform_datediff handles
16383 // DATE_TRUNC for WEEK, CAST for string literals, etc.
16384 let unit =
16385 Expression::Identifier(Identifier::new(&unit_str));
16386 Ok(Expression::Function(Box::new(Function::new(
16387 "DATEDIFF".to_string(),
16388 vec![unit, arg1, arg2],
16389 ))))
16390 }
16391 }
16392 DialectType::BigQuery => {
16393 let is_redshift_tsql = matches!(
16394 source,
16395 DialectType::Redshift
16396 | DialectType::TSQL
16397 | DialectType::Snowflake
16398 );
16399 let cast_d1 = if is_hive_spark {
16400 Self::ensure_cast_date(arg1)
16401 } else if is_redshift_tsql {
16402 Self::force_cast_datetime(arg1)
16403 } else {
16404 Self::ensure_cast_datetime(arg1)
16405 };
16406 let cast_d2 = if is_hive_spark {
16407 Self::ensure_cast_date(arg2)
16408 } else if is_redshift_tsql {
16409 Self::force_cast_datetime(arg2)
16410 } else {
16411 Self::ensure_cast_datetime(arg2)
16412 };
16413 let unit =
16414 Expression::Identifier(Identifier::new(&unit_str));
16415 Ok(Expression::Function(Box::new(Function::new(
16416 "DATE_DIFF".to_string(),
16417 vec![cast_d2, cast_d1, unit],
16418 ))))
16419 }
16420 DialectType::Presto
16421 | DialectType::Trino
16422 | DialectType::Athena => {
16423 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
16424 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
16425 let is_redshift_tsql = matches!(
16426 source,
16427 DialectType::Redshift
16428 | DialectType::TSQL
16429 | DialectType::Snowflake
16430 );
16431 let d1 = if is_hive_spark {
16432 Self::double_cast_timestamp_date(arg1)
16433 } else if is_redshift_tsql {
16434 Self::force_cast_timestamp(arg1)
16435 } else {
16436 arg1
16437 };
16438 let d2 = if is_hive_spark {
16439 Self::double_cast_timestamp_date(arg2)
16440 } else if is_redshift_tsql {
16441 Self::force_cast_timestamp(arg2)
16442 } else {
16443 arg2
16444 };
16445 Ok(Expression::Function(Box::new(Function::new(
16446 "DATE_DIFF".to_string(),
16447 vec![Expression::string(&unit_str), d1, d2],
16448 ))))
16449 }
16450 DialectType::Hive => match unit_str.as_str() {
16451 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
16452 this: Expression::Function(Box::new(Function::new(
16453 "MONTHS_BETWEEN".to_string(),
16454 vec![arg2, arg1],
16455 ))),
16456 to: DataType::Int {
16457 length: None,
16458 integer_spelling: false,
16459 },
16460 trailing_comments: vec![],
16461 double_colon_syntax: false,
16462 format: None,
16463 default: None,
16464 inferred_type: None,
16465 }))),
16466 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
16467 this: Expression::Div(Box::new(
16468 crate::expressions::BinaryOp::new(
16469 Expression::Function(Box::new(Function::new(
16470 "DATEDIFF".to_string(),
16471 vec![arg2, arg1],
16472 ))),
16473 Expression::number(7),
16474 ),
16475 )),
16476 to: DataType::Int {
16477 length: None,
16478 integer_spelling: false,
16479 },
16480 trailing_comments: vec![],
16481 double_colon_syntax: false,
16482 format: None,
16483 default: None,
16484 inferred_type: None,
16485 }))),
16486 _ => Ok(Expression::Function(Box::new(Function::new(
16487 "DATEDIFF".to_string(),
16488 vec![arg2, arg1],
16489 )))),
16490 },
16491 DialectType::Spark | DialectType::Databricks => {
16492 let unit =
16493 Expression::Identifier(Identifier::new(&unit_str));
16494 Ok(Expression::Function(Box::new(Function::new(
16495 "DATEDIFF".to_string(),
16496 vec![unit, arg1, arg2],
16497 ))))
16498 }
16499 _ => {
16500 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
16501 let d1 = if is_hive_spark {
16502 Self::ensure_cast_date(arg1)
16503 } else {
16504 arg1
16505 };
16506 let d2 = if is_hive_spark {
16507 Self::ensure_cast_date(arg2)
16508 } else {
16509 arg2
16510 };
16511 let unit =
16512 Expression::Identifier(Identifier::new(&unit_str));
16513 Ok(Expression::Function(Box::new(Function::new(
16514 "DATEDIFF".to_string(),
16515 vec![unit, d1, d2],
16516 ))))
16517 }
16518 }
16519 }
16520 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
16521 "DATEDIFF" if f.args.len() == 2 => {
16522 let mut args = f.args;
16523 let arg0 = args.remove(0);
16524 let arg1 = args.remove(0);
16525
16526 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
16527 // Also recognizes TryCast/Cast to DATE that may have been produced by
16528 // cross-dialect TO_DATE -> TRY_CAST conversion
16529 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
16530 if let Expression::Function(ref f) = e {
16531 if f.name.eq_ignore_ascii_case("TO_DATE")
16532 && f.args.len() == 1
16533 {
16534 return (f.args[0].clone(), true);
16535 }
16536 }
16537 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
16538 if let Expression::TryCast(ref c) = e {
16539 if matches!(c.to, DataType::Date) {
16540 return (e, true); // Already properly cast, return as-is
16541 }
16542 }
16543 (e, false)
16544 };
16545
16546 match target {
16547 DialectType::DuckDB => {
16548 // For Hive source, always CAST to DATE
16549 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
16550 let cast_d0 = if matches!(
16551 source,
16552 DialectType::Hive
16553 | DialectType::Spark
16554 | DialectType::Databricks
16555 ) {
16556 let (inner, was_to_date) = unwrap_to_date(arg1);
16557 if was_to_date {
16558 // Already a date expression, use directly
16559 if matches!(&inner, Expression::TryCast(_)) {
16560 inner // Already TRY_CAST(x AS DATE)
16561 } else {
16562 Self::try_cast_date(inner)
16563 }
16564 } else {
16565 Self::force_cast_date(inner)
16566 }
16567 } else {
16568 Self::ensure_cast_date(arg1)
16569 };
16570 let cast_d1 = if matches!(
16571 source,
16572 DialectType::Hive
16573 | DialectType::Spark
16574 | DialectType::Databricks
16575 ) {
16576 let (inner, was_to_date) = unwrap_to_date(arg0);
16577 if was_to_date {
16578 if matches!(&inner, Expression::TryCast(_)) {
16579 inner
16580 } else {
16581 Self::try_cast_date(inner)
16582 }
16583 } else {
16584 Self::force_cast_date(inner)
16585 }
16586 } else {
16587 Self::ensure_cast_date(arg0)
16588 };
16589 Ok(Expression::Function(Box::new(Function::new(
16590 "DATE_DIFF".to_string(),
16591 vec![Expression::string("DAY"), cast_d0, cast_d1],
16592 ))))
16593 }
16594 DialectType::Presto
16595 | DialectType::Trino
16596 | DialectType::Athena => {
16597 // For Hive/Spark source, apply double_cast_timestamp_date
16598 // For other sources (MySQL etc.), just swap args without casting
16599 if matches!(
16600 source,
16601 DialectType::Hive
16602 | DialectType::Spark
16603 | DialectType::Databricks
16604 ) {
16605 let cast_fn = |e: Expression| -> Expression {
16606 let (inner, was_to_date) = unwrap_to_date(e);
16607 if was_to_date {
16608 let first_cast =
16609 Self::double_cast_timestamp_date(inner);
16610 Self::double_cast_timestamp_date(first_cast)
16611 } else {
16612 Self::double_cast_timestamp_date(inner)
16613 }
16614 };
16615 Ok(Expression::Function(Box::new(Function::new(
16616 "DATE_DIFF".to_string(),
16617 vec![
16618 Expression::string("DAY"),
16619 cast_fn(arg1),
16620 cast_fn(arg0),
16621 ],
16622 ))))
16623 } else {
16624 Ok(Expression::Function(Box::new(Function::new(
16625 "DATE_DIFF".to_string(),
16626 vec![Expression::string("DAY"), arg1, arg0],
16627 ))))
16628 }
16629 }
16630 DialectType::Redshift => {
16631 let unit = Expression::Identifier(Identifier::new("DAY"));
16632 Ok(Expression::Function(Box::new(Function::new(
16633 "DATEDIFF".to_string(),
16634 vec![unit, arg1, arg0],
16635 ))))
16636 }
16637 _ => Ok(Expression::Function(Box::new(Function::new(
16638 "DATEDIFF".to_string(),
16639 vec![arg0, arg1],
16640 )))),
16641 }
16642 }
16643 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
16644 "DATE_DIFF" if f.args.len() == 3 => {
16645 let mut args = f.args;
16646 let arg0 = args.remove(0);
16647 let arg1 = args.remove(0);
16648 let arg2 = args.remove(0);
16649 let unit_str = Self::get_unit_str_static(&arg0);
16650
16651 match target {
16652 DialectType::DuckDB => {
16653 // DuckDB: DATE_DIFF('UNIT', start, end)
16654 Ok(Expression::Function(Box::new(Function::new(
16655 "DATE_DIFF".to_string(),
16656 vec![Expression::string(&unit_str), arg1, arg2],
16657 ))))
16658 }
16659 DialectType::Presto
16660 | DialectType::Trino
16661 | DialectType::Athena => {
16662 Ok(Expression::Function(Box::new(Function::new(
16663 "DATE_DIFF".to_string(),
16664 vec![Expression::string(&unit_str), arg1, arg2],
16665 ))))
16666 }
16667 DialectType::ClickHouse => {
16668 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
16669 let unit =
16670 Expression::Identifier(Identifier::new(&unit_str));
16671 Ok(Expression::Function(Box::new(Function::new(
16672 "DATE_DIFF".to_string(),
16673 vec![unit, arg1, arg2],
16674 ))))
16675 }
16676 DialectType::Snowflake | DialectType::Redshift => {
16677 let unit =
16678 Expression::Identifier(Identifier::new(&unit_str));
16679 Ok(Expression::Function(Box::new(Function::new(
16680 "DATEDIFF".to_string(),
16681 vec![unit, arg1, arg2],
16682 ))))
16683 }
16684 _ => {
16685 let unit =
16686 Expression::Identifier(Identifier::new(&unit_str));
16687 Ok(Expression::Function(Box::new(Function::new(
16688 "DATEDIFF".to_string(),
16689 vec![unit, arg1, arg2],
16690 ))))
16691 }
16692 }
16693 }
16694 // DATEADD(unit, val, date) - 3-arg form
16695 "DATEADD" if f.args.len() == 3 => {
16696 let mut args = f.args;
16697 let arg0 = args.remove(0);
16698 let arg1 = args.remove(0);
16699 let arg2 = args.remove(0);
16700 let unit_str = Self::get_unit_str_static(&arg0);
16701
16702 // Normalize TSQL unit abbreviations to standard names
16703 let unit_str = match unit_str.as_str() {
16704 "YY" | "YYYY" => "YEAR".to_string(),
16705 "QQ" | "Q" => "QUARTER".to_string(),
16706 "MM" | "M" => "MONTH".to_string(),
16707 "WK" | "WW" => "WEEK".to_string(),
16708 "DD" | "D" | "DY" => "DAY".to_string(),
16709 "HH" => "HOUR".to_string(),
16710 "MI" | "N" => "MINUTE".to_string(),
16711 "SS" | "S" => "SECOND".to_string(),
16712 "MS" => "MILLISECOND".to_string(),
16713 "MCS" | "US" => "MICROSECOND".to_string(),
16714 _ => unit_str,
16715 };
16716 match target {
16717 DialectType::Snowflake => {
16718 let unit =
16719 Expression::Identifier(Identifier::new(&unit_str));
16720 // Cast string literal to TIMESTAMP, but not for Snowflake source
16721 // (Snowflake natively accepts string literals in DATEADD)
16722 let arg2 = if matches!(
16723 &arg2,
16724 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16725 ) && !matches!(source, DialectType::Snowflake)
16726 {
16727 Expression::Cast(Box::new(Cast {
16728 this: arg2,
16729 to: DataType::Timestamp {
16730 precision: None,
16731 timezone: false,
16732 },
16733 trailing_comments: Vec::new(),
16734 double_colon_syntax: false,
16735 format: None,
16736 default: None,
16737 inferred_type: None,
16738 }))
16739 } else {
16740 arg2
16741 };
16742 Ok(Expression::Function(Box::new(Function::new(
16743 "DATEADD".to_string(),
16744 vec![unit, arg1, arg2],
16745 ))))
16746 }
16747 DialectType::TSQL => {
16748 let unit =
16749 Expression::Identifier(Identifier::new(&unit_str));
16750 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
16751 let arg2 = if matches!(
16752 &arg2,
16753 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16754 ) && !matches!(
16755 source,
16756 DialectType::Spark
16757 | DialectType::Databricks
16758 | DialectType::Hive
16759 ) {
16760 Expression::Cast(Box::new(Cast {
16761 this: arg2,
16762 to: DataType::Custom {
16763 name: "DATETIME2".to_string(),
16764 },
16765 trailing_comments: Vec::new(),
16766 double_colon_syntax: false,
16767 format: None,
16768 default: None,
16769 inferred_type: None,
16770 }))
16771 } else {
16772 arg2
16773 };
16774 Ok(Expression::Function(Box::new(Function::new(
16775 "DATEADD".to_string(),
16776 vec![unit, arg1, arg2],
16777 ))))
16778 }
16779 DialectType::Redshift => {
16780 let unit =
16781 Expression::Identifier(Identifier::new(&unit_str));
16782 Ok(Expression::Function(Box::new(Function::new(
16783 "DATEADD".to_string(),
16784 vec![unit, arg1, arg2],
16785 ))))
16786 }
16787 DialectType::Databricks => {
16788 let unit =
16789 Expression::Identifier(Identifier::new(&unit_str));
16790 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
16791 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
16792 let func_name = if matches!(
16793 source,
16794 DialectType::TSQL
16795 | DialectType::Fabric
16796 | DialectType::Databricks
16797 | DialectType::Snowflake
16798 ) {
16799 "DATEADD"
16800 } else {
16801 "DATE_ADD"
16802 };
16803 Ok(Expression::Function(Box::new(Function::new(
16804 func_name.to_string(),
16805 vec![unit, arg1, arg2],
16806 ))))
16807 }
16808 DialectType::DuckDB => {
16809 // Special handling for NANOSECOND from Snowflake
16810 if unit_str == "NANOSECOND"
16811 && matches!(source, DialectType::Snowflake)
16812 {
16813 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
16814 let cast_ts = Expression::Cast(Box::new(Cast {
16815 this: arg2,
16816 to: DataType::Custom {
16817 name: "TIMESTAMP_NS".to_string(),
16818 },
16819 trailing_comments: vec![],
16820 double_colon_syntax: false,
16821 format: None,
16822 default: None,
16823 inferred_type: None,
16824 }));
16825 let epoch_ns =
16826 Expression::Function(Box::new(Function::new(
16827 "EPOCH_NS".to_string(),
16828 vec![cast_ts],
16829 )));
16830 let sum = Expression::Add(Box::new(BinaryOp::new(
16831 epoch_ns, arg1,
16832 )));
16833 Ok(Expression::Function(Box::new(Function::new(
16834 "MAKE_TIMESTAMP_NS".to_string(),
16835 vec![sum],
16836 ))))
16837 } else {
16838 // DuckDB: convert to date + INTERVAL syntax with CAST
16839 let iu = Self::parse_interval_unit_static(&unit_str);
16840 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16841 this: Some(arg1),
16842 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16843 }));
16844 // Cast string literal to TIMESTAMP
16845 let arg2 = if matches!(
16846 &arg2,
16847 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16848 ) {
16849 Expression::Cast(Box::new(Cast {
16850 this: arg2,
16851 to: DataType::Timestamp {
16852 precision: None,
16853 timezone: false,
16854 },
16855 trailing_comments: Vec::new(),
16856 double_colon_syntax: false,
16857 format: None,
16858 default: None,
16859 inferred_type: None,
16860 }))
16861 } else {
16862 arg2
16863 };
16864 Ok(Expression::Add(Box::new(
16865 crate::expressions::BinaryOp::new(arg2, interval),
16866 )))
16867 }
16868 }
16869 DialectType::Spark => {
16870 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
16871 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
16872 if matches!(source, DialectType::TSQL | DialectType::Fabric)
16873 {
16874 fn multiply_expr_spark(
16875 expr: Expression,
16876 factor: i64,
16877 ) -> Expression
16878 {
16879 if let Expression::Literal(lit) = &expr {
16880 if let crate::expressions::Literal::Number(n) =
16881 lit.as_ref()
16882 {
16883 if let Ok(val) = n.parse::<i64>() {
16884 return Expression::Literal(Box::new(
16885 crate::expressions::Literal::Number(
16886 (val * factor).to_string(),
16887 ),
16888 ));
16889 }
16890 }
16891 }
16892 Expression::Mul(Box::new(
16893 crate::expressions::BinaryOp::new(
16894 expr,
16895 Expression::Literal(Box::new(
16896 crate::expressions::Literal::Number(
16897 factor.to_string(),
16898 ),
16899 )),
16900 ),
16901 ))
16902 }
16903 let normalized_unit = match unit_str.as_str() {
16904 "YEAR" | "YY" | "YYYY" => "YEAR",
16905 "QUARTER" | "QQ" | "Q" => "QUARTER",
16906 "MONTH" | "MM" | "M" => "MONTH",
16907 "WEEK" | "WK" | "WW" => "WEEK",
16908 "DAY" | "DD" | "D" | "DY" => "DAY",
16909 _ => &unit_str,
16910 };
16911 match normalized_unit {
16912 "YEAR" => {
16913 let months = multiply_expr_spark(arg1, 12);
16914 Ok(Expression::Function(Box::new(
16915 Function::new(
16916 "ADD_MONTHS".to_string(),
16917 vec![arg2, months],
16918 ),
16919 )))
16920 }
16921 "QUARTER" => {
16922 let months = multiply_expr_spark(arg1, 3);
16923 Ok(Expression::Function(Box::new(
16924 Function::new(
16925 "ADD_MONTHS".to_string(),
16926 vec![arg2, months],
16927 ),
16928 )))
16929 }
16930 "MONTH" => Ok(Expression::Function(Box::new(
16931 Function::new(
16932 "ADD_MONTHS".to_string(),
16933 vec![arg2, arg1],
16934 ),
16935 ))),
16936 "WEEK" => {
16937 let days = multiply_expr_spark(arg1, 7);
16938 Ok(Expression::Function(Box::new(
16939 Function::new(
16940 "DATE_ADD".to_string(),
16941 vec![arg2, days],
16942 ),
16943 )))
16944 }
16945 "DAY" => Ok(Expression::Function(Box::new(
16946 Function::new(
16947 "DATE_ADD".to_string(),
16948 vec![arg2, arg1],
16949 ),
16950 ))),
16951 _ => {
16952 let unit = Expression::Identifier(
16953 Identifier::new(&unit_str),
16954 );
16955 Ok(Expression::Function(Box::new(
16956 Function::new(
16957 "DATE_ADD".to_string(),
16958 vec![unit, arg1, arg2],
16959 ),
16960 )))
16961 }
16962 }
16963 } else {
16964 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
16965 let unit =
16966 Expression::Identifier(Identifier::new(&unit_str));
16967 Ok(Expression::Function(Box::new(Function::new(
16968 "DATE_ADD".to_string(),
16969 vec![unit, arg1, arg2],
16970 ))))
16971 }
16972 }
16973 DialectType::Hive => match unit_str.as_str() {
16974 "MONTH" => {
16975 Ok(Expression::Function(Box::new(Function::new(
16976 "ADD_MONTHS".to_string(),
16977 vec![arg2, arg1],
16978 ))))
16979 }
16980 _ => Ok(Expression::Function(Box::new(Function::new(
16981 "DATE_ADD".to_string(),
16982 vec![arg2, arg1],
16983 )))),
16984 },
16985 DialectType::Presto
16986 | DialectType::Trino
16987 | DialectType::Athena => {
16988 // Cast string literal date to TIMESTAMP
16989 let arg2 = if matches!(
16990 &arg2,
16991 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16992 ) {
16993 Expression::Cast(Box::new(Cast {
16994 this: arg2,
16995 to: DataType::Timestamp {
16996 precision: None,
16997 timezone: false,
16998 },
16999 trailing_comments: Vec::new(),
17000 double_colon_syntax: false,
17001 format: None,
17002 default: None,
17003 inferred_type: None,
17004 }))
17005 } else {
17006 arg2
17007 };
17008 Ok(Expression::Function(Box::new(Function::new(
17009 "DATE_ADD".to_string(),
17010 vec![Expression::string(&unit_str), arg1, arg2],
17011 ))))
17012 }
17013 DialectType::MySQL => {
17014 let iu = Self::parse_interval_unit_static(&unit_str);
17015 Ok(Expression::DateAdd(Box::new(
17016 crate::expressions::DateAddFunc {
17017 this: arg2,
17018 interval: arg1,
17019 unit: iu,
17020 },
17021 )))
17022 }
17023 DialectType::PostgreSQL => {
17024 // Cast string literal date to TIMESTAMP
17025 let arg2 = if matches!(
17026 &arg2,
17027 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17028 ) {
17029 Expression::Cast(Box::new(Cast {
17030 this: arg2,
17031 to: DataType::Timestamp {
17032 precision: None,
17033 timezone: false,
17034 },
17035 trailing_comments: Vec::new(),
17036 double_colon_syntax: false,
17037 format: None,
17038 default: None,
17039 inferred_type: None,
17040 }))
17041 } else {
17042 arg2
17043 };
17044 let interval = Expression::Interval(Box::new(
17045 crate::expressions::Interval {
17046 this: Some(Expression::string(&format!(
17047 "{} {}",
17048 Self::expr_to_string_static(&arg1),
17049 unit_str
17050 ))),
17051 unit: None,
17052 },
17053 ));
17054 Ok(Expression::Add(Box::new(
17055 crate::expressions::BinaryOp::new(arg2, interval),
17056 )))
17057 }
17058 DialectType::BigQuery => {
17059 let iu = Self::parse_interval_unit_static(&unit_str);
17060 let interval = Expression::Interval(Box::new(
17061 crate::expressions::Interval {
17062 this: Some(arg1),
17063 unit: Some(
17064 crate::expressions::IntervalUnitSpec::Simple {
17065 unit: iu,
17066 use_plural: false,
17067 },
17068 ),
17069 },
17070 ));
17071 // Non-TSQL sources: CAST string literal to DATETIME
17072 let arg2 = if !matches!(
17073 source,
17074 DialectType::TSQL | DialectType::Fabric
17075 ) && matches!(
17076 &arg2,
17077 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17078 ) {
17079 Expression::Cast(Box::new(Cast {
17080 this: arg2,
17081 to: DataType::Custom {
17082 name: "DATETIME".to_string(),
17083 },
17084 trailing_comments: Vec::new(),
17085 double_colon_syntax: false,
17086 format: None,
17087 default: None,
17088 inferred_type: None,
17089 }))
17090 } else {
17091 arg2
17092 };
17093 Ok(Expression::Function(Box::new(Function::new(
17094 "DATE_ADD".to_string(),
17095 vec![arg2, interval],
17096 ))))
17097 }
17098 _ => {
17099 let unit =
17100 Expression::Identifier(Identifier::new(&unit_str));
17101 Ok(Expression::Function(Box::new(Function::new(
17102 "DATEADD".to_string(),
17103 vec![unit, arg1, arg2],
17104 ))))
17105 }
17106 }
17107 }
17108 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
17109 // or (date, val, 'UNIT') from Generic canonical form
17110 "DATE_ADD" if f.args.len() == 3 => {
17111 let mut args = f.args;
17112 let arg0 = args.remove(0);
17113 let arg1 = args.remove(0);
17114 let arg2 = args.remove(0);
17115 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
17116 // where arg2 is a string literal matching a unit name
17117 let arg2_unit = match &arg2 {
17118 Expression::Literal(lit)
17119 if matches!(lit.as_ref(), Literal::String(_)) =>
17120 {
17121 let Literal::String(s) = lit.as_ref() else {
17122 unreachable!()
17123 };
17124 let u = s.to_ascii_uppercase();
17125 if matches!(
17126 u.as_str(),
17127 "DAY"
17128 | "MONTH"
17129 | "YEAR"
17130 | "HOUR"
17131 | "MINUTE"
17132 | "SECOND"
17133 | "WEEK"
17134 | "QUARTER"
17135 | "MILLISECOND"
17136 | "MICROSECOND"
17137 ) {
17138 Some(u)
17139 } else {
17140 None
17141 }
17142 }
17143 _ => None,
17144 };
17145 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
17146 let (unit_str, val, date) = if let Some(u) = arg2_unit {
17147 (u, arg1, arg0)
17148 } else {
17149 (Self::get_unit_str_static(&arg0), arg1, arg2)
17150 };
17151 // Alias for backward compat with the rest of the match
17152 let arg1 = val;
17153 let arg2 = date;
17154
17155 match target {
17156 DialectType::Presto
17157 | DialectType::Trino
17158 | DialectType::Athena => {
17159 Ok(Expression::Function(Box::new(Function::new(
17160 "DATE_ADD".to_string(),
17161 vec![Expression::string(&unit_str), arg1, arg2],
17162 ))))
17163 }
17164 DialectType::DuckDB => {
17165 let iu = Self::parse_interval_unit_static(&unit_str);
17166 let interval = Expression::Interval(Box::new(
17167 crate::expressions::Interval {
17168 this: Some(arg1),
17169 unit: Some(
17170 crate::expressions::IntervalUnitSpec::Simple {
17171 unit: iu,
17172 use_plural: false,
17173 },
17174 ),
17175 },
17176 ));
17177 Ok(Expression::Add(Box::new(
17178 crate::expressions::BinaryOp::new(arg2, interval),
17179 )))
17180 }
17181 DialectType::PostgreSQL
17182 | DialectType::Materialize
17183 | DialectType::RisingWave => {
17184 // PostgreSQL: x + INTERVAL '1 DAY'
17185 let amount_str = Self::expr_to_string_static(&arg1);
17186 let interval = Expression::Interval(Box::new(
17187 crate::expressions::Interval {
17188 this: Some(Expression::string(&format!(
17189 "{} {}",
17190 amount_str, unit_str
17191 ))),
17192 unit: None,
17193 },
17194 ));
17195 Ok(Expression::Add(Box::new(
17196 crate::expressions::BinaryOp::new(arg2, interval),
17197 )))
17198 }
17199 DialectType::Snowflake
17200 | DialectType::TSQL
17201 | DialectType::Redshift => {
17202 let unit =
17203 Expression::Identifier(Identifier::new(&unit_str));
17204 Ok(Expression::Function(Box::new(Function::new(
17205 "DATEADD".to_string(),
17206 vec![unit, arg1, arg2],
17207 ))))
17208 }
17209 DialectType::BigQuery
17210 | DialectType::MySQL
17211 | DialectType::Doris
17212 | DialectType::StarRocks
17213 | DialectType::Drill => {
17214 // DATE_ADD(date, INTERVAL amount UNIT)
17215 let iu = Self::parse_interval_unit_static(&unit_str);
17216 let interval = Expression::Interval(Box::new(
17217 crate::expressions::Interval {
17218 this: Some(arg1),
17219 unit: Some(
17220 crate::expressions::IntervalUnitSpec::Simple {
17221 unit: iu,
17222 use_plural: false,
17223 },
17224 ),
17225 },
17226 ));
17227 Ok(Expression::Function(Box::new(Function::new(
17228 "DATE_ADD".to_string(),
17229 vec![arg2, interval],
17230 ))))
17231 }
17232 DialectType::SQLite => {
17233 // SQLite: DATE(x, '1 DAY')
17234 // Build the string '1 DAY' from amount and unit
17235 let amount_str = match &arg1 {
17236 Expression::Literal(lit)
17237 if matches!(lit.as_ref(), Literal::Number(_)) =>
17238 {
17239 let Literal::Number(n) = lit.as_ref() else {
17240 unreachable!()
17241 };
17242 n.clone()
17243 }
17244 _ => "1".to_string(),
17245 };
17246 Ok(Expression::Function(Box::new(Function::new(
17247 "DATE".to_string(),
17248 vec![
17249 arg2,
17250 Expression::string(format!(
17251 "{} {}",
17252 amount_str, unit_str
17253 )),
17254 ],
17255 ))))
17256 }
17257 DialectType::Dremio => {
17258 // Dremio: DATE_ADD(date, amount) - drops unit
17259 Ok(Expression::Function(Box::new(Function::new(
17260 "DATE_ADD".to_string(),
17261 vec![arg2, arg1],
17262 ))))
17263 }
17264 DialectType::Spark => {
17265 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
17266 if unit_str == "DAY" {
17267 Ok(Expression::Function(Box::new(Function::new(
17268 "DATE_ADD".to_string(),
17269 vec![arg2, arg1],
17270 ))))
17271 } else {
17272 let unit =
17273 Expression::Identifier(Identifier::new(&unit_str));
17274 Ok(Expression::Function(Box::new(Function::new(
17275 "DATE_ADD".to_string(),
17276 vec![unit, arg1, arg2],
17277 ))))
17278 }
17279 }
17280 DialectType::Databricks => {
17281 let unit =
17282 Expression::Identifier(Identifier::new(&unit_str));
17283 Ok(Expression::Function(Box::new(Function::new(
17284 "DATE_ADD".to_string(),
17285 vec![unit, arg1, arg2],
17286 ))))
17287 }
17288 DialectType::Hive => {
17289 // Hive: DATE_ADD(date, val) for DAY
17290 Ok(Expression::Function(Box::new(Function::new(
17291 "DATE_ADD".to_string(),
17292 vec![arg2, arg1],
17293 ))))
17294 }
17295 _ => {
17296 let unit =
17297 Expression::Identifier(Identifier::new(&unit_str));
17298 Ok(Expression::Function(Box::new(Function::new(
17299 "DATE_ADD".to_string(),
17300 vec![unit, arg1, arg2],
17301 ))))
17302 }
17303 }
17304 }
17305 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
17306 "DATE_ADD"
17307 if f.args.len() == 2
17308 && matches!(
17309 source,
17310 DialectType::Hive
17311 | DialectType::Spark
17312 | DialectType::Databricks
17313 | DialectType::Generic
17314 ) =>
17315 {
17316 let mut args = f.args;
17317 let date = args.remove(0);
17318 let days = args.remove(0);
17319 match target {
17320 DialectType::Hive | DialectType::Spark => {
17321 // Keep as DATE_ADD(date, days) for Hive/Spark
17322 Ok(Expression::Function(Box::new(Function::new(
17323 "DATE_ADD".to_string(),
17324 vec![date, days],
17325 ))))
17326 }
17327 DialectType::Databricks => {
17328 // Databricks: DATEADD(DAY, days, date)
17329 Ok(Expression::Function(Box::new(Function::new(
17330 "DATEADD".to_string(),
17331 vec![
17332 Expression::Identifier(Identifier::new("DAY")),
17333 days,
17334 date,
17335 ],
17336 ))))
17337 }
17338 DialectType::DuckDB => {
17339 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
17340 let cast_date = Self::ensure_cast_date(date);
17341 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
17342 let interval_val = if matches!(
17343 days,
17344 Expression::Mul(_)
17345 | Expression::Sub(_)
17346 | Expression::Add(_)
17347 ) {
17348 Expression::Paren(Box::new(crate::expressions::Paren {
17349 this: days,
17350 trailing_comments: vec![],
17351 }))
17352 } else {
17353 days
17354 };
17355 let interval = Expression::Interval(Box::new(
17356 crate::expressions::Interval {
17357 this: Some(interval_val),
17358 unit: Some(
17359 crate::expressions::IntervalUnitSpec::Simple {
17360 unit: crate::expressions::IntervalUnit::Day,
17361 use_plural: false,
17362 },
17363 ),
17364 },
17365 ));
17366 Ok(Expression::Add(Box::new(
17367 crate::expressions::BinaryOp::new(cast_date, interval),
17368 )))
17369 }
17370 DialectType::Snowflake => {
17371 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17372 let cast_date = if matches!(
17373 source,
17374 DialectType::Hive
17375 | DialectType::Spark
17376 | DialectType::Databricks
17377 ) {
17378 if matches!(
17379 date,
17380 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17381 ) {
17382 Self::double_cast_timestamp_date(date)
17383 } else {
17384 date
17385 }
17386 } else {
17387 date
17388 };
17389 Ok(Expression::Function(Box::new(Function::new(
17390 "DATEADD".to_string(),
17391 vec![
17392 Expression::Identifier(Identifier::new("DAY")),
17393 days,
17394 cast_date,
17395 ],
17396 ))))
17397 }
17398 DialectType::Redshift => {
17399 Ok(Expression::Function(Box::new(Function::new(
17400 "DATEADD".to_string(),
17401 vec![
17402 Expression::Identifier(Identifier::new("DAY")),
17403 days,
17404 date,
17405 ],
17406 ))))
17407 }
17408 DialectType::TSQL | DialectType::Fabric => {
17409 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
17410 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
17411 let cast_date = if matches!(
17412 source,
17413 DialectType::Hive | DialectType::Spark
17414 ) {
17415 if matches!(
17416 date,
17417 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17418 ) {
17419 Self::double_cast_datetime2_date(date)
17420 } else {
17421 date
17422 }
17423 } else {
17424 date
17425 };
17426 Ok(Expression::Function(Box::new(Function::new(
17427 "DATEADD".to_string(),
17428 vec![
17429 Expression::Identifier(Identifier::new("DAY")),
17430 days,
17431 cast_date,
17432 ],
17433 ))))
17434 }
17435 DialectType::Presto
17436 | DialectType::Trino
17437 | DialectType::Athena => {
17438 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17439 let cast_date = if matches!(
17440 source,
17441 DialectType::Hive
17442 | DialectType::Spark
17443 | DialectType::Databricks
17444 ) {
17445 if matches!(
17446 date,
17447 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17448 ) {
17449 Self::double_cast_timestamp_date(date)
17450 } else {
17451 date
17452 }
17453 } else {
17454 date
17455 };
17456 Ok(Expression::Function(Box::new(Function::new(
17457 "DATE_ADD".to_string(),
17458 vec![Expression::string("DAY"), days, cast_date],
17459 ))))
17460 }
17461 DialectType::BigQuery => {
17462 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
17463 let cast_date = if matches!(
17464 source,
17465 DialectType::Hive
17466 | DialectType::Spark
17467 | DialectType::Databricks
17468 ) {
17469 Self::double_cast_datetime_date(date)
17470 } else {
17471 date
17472 };
17473 // Wrap complex expressions in Paren for interval
17474 let interval_val = if matches!(
17475 days,
17476 Expression::Mul(_)
17477 | Expression::Sub(_)
17478 | Expression::Add(_)
17479 ) {
17480 Expression::Paren(Box::new(crate::expressions::Paren {
17481 this: days,
17482 trailing_comments: vec![],
17483 }))
17484 } else {
17485 days
17486 };
17487 let interval = Expression::Interval(Box::new(
17488 crate::expressions::Interval {
17489 this: Some(interval_val),
17490 unit: Some(
17491 crate::expressions::IntervalUnitSpec::Simple {
17492 unit: crate::expressions::IntervalUnit::Day,
17493 use_plural: false,
17494 },
17495 ),
17496 },
17497 ));
17498 Ok(Expression::Function(Box::new(Function::new(
17499 "DATE_ADD".to_string(),
17500 vec![cast_date, interval],
17501 ))))
17502 }
17503 DialectType::MySQL => {
17504 let iu = crate::expressions::IntervalUnit::Day;
17505 Ok(Expression::DateAdd(Box::new(
17506 crate::expressions::DateAddFunc {
17507 this: date,
17508 interval: days,
17509 unit: iu,
17510 },
17511 )))
17512 }
17513 DialectType::PostgreSQL => {
17514 let interval = Expression::Interval(Box::new(
17515 crate::expressions::Interval {
17516 this: Some(Expression::string(&format!(
17517 "{} DAY",
17518 Self::expr_to_string_static(&days)
17519 ))),
17520 unit: None,
17521 },
17522 ));
17523 Ok(Expression::Add(Box::new(
17524 crate::expressions::BinaryOp::new(date, interval),
17525 )))
17526 }
17527 DialectType::Doris
17528 | DialectType::StarRocks
17529 | DialectType::Drill => {
17530 // DATE_ADD(date, INTERVAL days DAY)
17531 let interval = Expression::Interval(Box::new(
17532 crate::expressions::Interval {
17533 this: Some(days),
17534 unit: Some(
17535 crate::expressions::IntervalUnitSpec::Simple {
17536 unit: crate::expressions::IntervalUnit::Day,
17537 use_plural: false,
17538 },
17539 ),
17540 },
17541 ));
17542 Ok(Expression::Function(Box::new(Function::new(
17543 "DATE_ADD".to_string(),
17544 vec![date, interval],
17545 ))))
17546 }
17547 _ => Ok(Expression::Function(Box::new(Function::new(
17548 "DATE_ADD".to_string(),
17549 vec![date, days],
17550 )))),
17551 }
17552 }
17553 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
17554 "DATE_ADD"
17555 if f.args.len() == 2
17556 && matches!(
17557 source,
17558 DialectType::MySQL | DialectType::SingleStore
17559 )
17560 && matches!(&f.args[1], Expression::Interval(_)) =>
17561 {
17562 let mut args = f.args;
17563 let date = args.remove(0);
17564 let interval_expr = args.remove(0);
17565 let (val, unit) = Self::extract_interval_parts(&interval_expr)
17566 .unwrap_or_else(|| {
17567 (
17568 interval_expr.clone(),
17569 crate::expressions::IntervalUnit::Day,
17570 )
17571 });
17572 let unit_str = Self::interval_unit_to_string(&unit);
17573 let is_literal = matches!(&val,
17574 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
17575 );
17576
17577 match target {
17578 DialectType::MySQL | DialectType::SingleStore => {
17579 // Keep as DATE_ADD(date, INTERVAL val UNIT)
17580 Ok(Expression::Function(Box::new(Function::new(
17581 "DATE_ADD".to_string(),
17582 vec![date, interval_expr],
17583 ))))
17584 }
17585 DialectType::PostgreSQL => {
17586 if is_literal {
17587 // Literal: date + INTERVAL 'val UNIT'
17588 let interval = Expression::Interval(Box::new(
17589 crate::expressions::Interval {
17590 this: Some(Expression::Literal(Box::new(
17591 Literal::String(format!(
17592 "{} {}",
17593 Self::expr_to_string(&val),
17594 unit_str
17595 )),
17596 ))),
17597 unit: None,
17598 },
17599 ));
17600 Ok(Expression::Add(Box::new(
17601 crate::expressions::BinaryOp::new(date, interval),
17602 )))
17603 } else {
17604 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
17605 let interval_one = Expression::Interval(Box::new(
17606 crate::expressions::Interval {
17607 this: Some(Expression::Literal(Box::new(
17608 Literal::String(format!("1 {}", unit_str)),
17609 ))),
17610 unit: None,
17611 },
17612 ));
17613 let mul = Expression::Mul(Box::new(
17614 crate::expressions::BinaryOp::new(
17615 interval_one,
17616 val,
17617 ),
17618 ));
17619 Ok(Expression::Add(Box::new(
17620 crate::expressions::BinaryOp::new(date, mul),
17621 )))
17622 }
17623 }
17624 _ => {
17625 // Default: keep as DATE_ADD(date, interval)
17626 Ok(Expression::Function(Box::new(Function::new(
17627 "DATE_ADD".to_string(),
17628 vec![date, interval_expr],
17629 ))))
17630 }
17631 }
17632 }
17633 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
17634 "DATE_SUB"
17635 if f.args.len() == 2
17636 && matches!(
17637 source,
17638 DialectType::Hive
17639 | DialectType::Spark
17640 | DialectType::Databricks
17641 ) =>
17642 {
17643 let mut args = f.args;
17644 let date = args.remove(0);
17645 let days = args.remove(0);
17646 // Helper to create days * -1
17647 let make_neg_days = |d: Expression| -> Expression {
17648 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
17649 d,
17650 Expression::Literal(Box::new(Literal::Number(
17651 "-1".to_string(),
17652 ))),
17653 )))
17654 };
17655 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
17656 match target {
17657 DialectType::Hive
17658 | DialectType::Spark
17659 | DialectType::Databricks => {
17660 // Keep as DATE_SUB(date, days) for Hive/Spark
17661 Ok(Expression::Function(Box::new(Function::new(
17662 "DATE_SUB".to_string(),
17663 vec![date, days],
17664 ))))
17665 }
17666 DialectType::DuckDB => {
17667 let cast_date = Self::ensure_cast_date(date);
17668 let neg = make_neg_days(days);
17669 let interval = Expression::Interval(Box::new(
17670 crate::expressions::Interval {
17671 this: Some(Expression::Paren(Box::new(
17672 crate::expressions::Paren {
17673 this: neg,
17674 trailing_comments: vec![],
17675 },
17676 ))),
17677 unit: Some(
17678 crate::expressions::IntervalUnitSpec::Simple {
17679 unit: crate::expressions::IntervalUnit::Day,
17680 use_plural: false,
17681 },
17682 ),
17683 },
17684 ));
17685 Ok(Expression::Add(Box::new(
17686 crate::expressions::BinaryOp::new(cast_date, interval),
17687 )))
17688 }
17689 DialectType::Snowflake => {
17690 let cast_date = if is_string_literal {
17691 Self::double_cast_timestamp_date(date)
17692 } else {
17693 date
17694 };
17695 let neg = make_neg_days(days);
17696 Ok(Expression::Function(Box::new(Function::new(
17697 "DATEADD".to_string(),
17698 vec![
17699 Expression::Identifier(Identifier::new("DAY")),
17700 neg,
17701 cast_date,
17702 ],
17703 ))))
17704 }
17705 DialectType::Redshift => {
17706 let neg = make_neg_days(days);
17707 Ok(Expression::Function(Box::new(Function::new(
17708 "DATEADD".to_string(),
17709 vec![
17710 Expression::Identifier(Identifier::new("DAY")),
17711 neg,
17712 date,
17713 ],
17714 ))))
17715 }
17716 DialectType::TSQL | DialectType::Fabric => {
17717 let cast_date = if is_string_literal {
17718 Self::double_cast_datetime2_date(date)
17719 } else {
17720 date
17721 };
17722 let neg = make_neg_days(days);
17723 Ok(Expression::Function(Box::new(Function::new(
17724 "DATEADD".to_string(),
17725 vec![
17726 Expression::Identifier(Identifier::new("DAY")),
17727 neg,
17728 cast_date,
17729 ],
17730 ))))
17731 }
17732 DialectType::Presto
17733 | DialectType::Trino
17734 | DialectType::Athena => {
17735 let cast_date = if is_string_literal {
17736 Self::double_cast_timestamp_date(date)
17737 } else {
17738 date
17739 };
17740 let neg = make_neg_days(days);
17741 Ok(Expression::Function(Box::new(Function::new(
17742 "DATE_ADD".to_string(),
17743 vec![Expression::string("DAY"), neg, cast_date],
17744 ))))
17745 }
17746 DialectType::BigQuery => {
17747 let cast_date = if is_string_literal {
17748 Self::double_cast_datetime_date(date)
17749 } else {
17750 date
17751 };
17752 let neg = make_neg_days(days);
17753 let interval = Expression::Interval(Box::new(
17754 crate::expressions::Interval {
17755 this: Some(Expression::Paren(Box::new(
17756 crate::expressions::Paren {
17757 this: neg,
17758 trailing_comments: vec![],
17759 },
17760 ))),
17761 unit: Some(
17762 crate::expressions::IntervalUnitSpec::Simple {
17763 unit: crate::expressions::IntervalUnit::Day,
17764 use_plural: false,
17765 },
17766 ),
17767 },
17768 ));
17769 Ok(Expression::Function(Box::new(Function::new(
17770 "DATE_ADD".to_string(),
17771 vec![cast_date, interval],
17772 ))))
17773 }
17774 _ => Ok(Expression::Function(Box::new(Function::new(
17775 "DATE_SUB".to_string(),
17776 vec![date, days],
17777 )))),
17778 }
17779 }
17780 // ADD_MONTHS(date, val) -> target-specific
17781 "ADD_MONTHS" if f.args.len() == 2 => {
17782 let mut args = f.args;
17783 let date = args.remove(0);
17784 let val = args.remove(0);
17785 match target {
17786 DialectType::TSQL => {
17787 let cast_date = Self::ensure_cast_datetime2(date);
17788 Ok(Expression::Function(Box::new(Function::new(
17789 "DATEADD".to_string(),
17790 vec![
17791 Expression::Identifier(Identifier::new("MONTH")),
17792 val,
17793 cast_date,
17794 ],
17795 ))))
17796 }
17797 DialectType::DuckDB => {
17798 let interval = Expression::Interval(Box::new(
17799 crate::expressions::Interval {
17800 this: Some(val),
17801 unit: Some(
17802 crate::expressions::IntervalUnitSpec::Simple {
17803 unit:
17804 crate::expressions::IntervalUnit::Month,
17805 use_plural: false,
17806 },
17807 ),
17808 },
17809 ));
17810 Ok(Expression::Add(Box::new(
17811 crate::expressions::BinaryOp::new(date, interval),
17812 )))
17813 }
17814 DialectType::Snowflake => {
17815 // Keep ADD_MONTHS when source is Snowflake
17816 if matches!(source, DialectType::Snowflake) {
17817 Ok(Expression::Function(Box::new(Function::new(
17818 "ADD_MONTHS".to_string(),
17819 vec![date, val],
17820 ))))
17821 } else {
17822 Ok(Expression::Function(Box::new(Function::new(
17823 "DATEADD".to_string(),
17824 vec![
17825 Expression::Identifier(Identifier::new(
17826 "MONTH",
17827 )),
17828 val,
17829 date,
17830 ],
17831 ))))
17832 }
17833 }
17834 DialectType::Redshift => {
17835 Ok(Expression::Function(Box::new(Function::new(
17836 "DATEADD".to_string(),
17837 vec![
17838 Expression::Identifier(Identifier::new("MONTH")),
17839 val,
17840 date,
17841 ],
17842 ))))
17843 }
17844 DialectType::Presto
17845 | DialectType::Trino
17846 | DialectType::Athena => {
17847 Ok(Expression::Function(Box::new(Function::new(
17848 "DATE_ADD".to_string(),
17849 vec![Expression::string("MONTH"), val, date],
17850 ))))
17851 }
17852 DialectType::BigQuery => {
17853 let interval = Expression::Interval(Box::new(
17854 crate::expressions::Interval {
17855 this: Some(val),
17856 unit: Some(
17857 crate::expressions::IntervalUnitSpec::Simple {
17858 unit:
17859 crate::expressions::IntervalUnit::Month,
17860 use_plural: false,
17861 },
17862 ),
17863 },
17864 ));
17865 Ok(Expression::Function(Box::new(Function::new(
17866 "DATE_ADD".to_string(),
17867 vec![date, interval],
17868 ))))
17869 }
17870 _ => Ok(Expression::Function(Box::new(Function::new(
17871 "ADD_MONTHS".to_string(),
17872 vec![date, val],
17873 )))),
17874 }
17875 }
17876 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
17877 "DATETRUNC" if f.args.len() == 2 => {
17878 let mut args = f.args;
17879 let arg0 = args.remove(0);
17880 let arg1 = args.remove(0);
17881 let unit_str = Self::get_unit_str_static(&arg0);
17882 match target {
17883 DialectType::TSQL | DialectType::Fabric => {
17884 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
17885 Ok(Expression::Function(Box::new(Function::new(
17886 "DATETRUNC".to_string(),
17887 vec![
17888 Expression::Identifier(Identifier::new(&unit_str)),
17889 arg1,
17890 ],
17891 ))))
17892 }
17893 DialectType::DuckDB => {
17894 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
17895 let date = Self::ensure_cast_timestamp(arg1);
17896 Ok(Expression::Function(Box::new(Function::new(
17897 "DATE_TRUNC".to_string(),
17898 vec![Expression::string(&unit_str), date],
17899 ))))
17900 }
17901 DialectType::ClickHouse => {
17902 // ClickHouse: dateTrunc('UNIT', expr)
17903 Ok(Expression::Function(Box::new(Function::new(
17904 "dateTrunc".to_string(),
17905 vec![Expression::string(&unit_str), arg1],
17906 ))))
17907 }
17908 _ => {
17909 // Standard: DATE_TRUNC('UNIT', expr)
17910 let unit = Expression::string(&unit_str);
17911 Ok(Expression::Function(Box::new(Function::new(
17912 "DATE_TRUNC".to_string(),
17913 vec![unit, arg1],
17914 ))))
17915 }
17916 }
17917 }
17918 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
17919 "GETDATE" if f.args.is_empty() => match target {
17920 DialectType::TSQL => Ok(Expression::Function(f)),
17921 DialectType::Redshift => Ok(Expression::Function(Box::new(
17922 Function::new("GETDATE".to_string(), vec![]),
17923 ))),
17924 _ => Ok(Expression::CurrentTimestamp(
17925 crate::expressions::CurrentTimestamp {
17926 precision: None,
17927 sysdate: false,
17928 },
17929 )),
17930 },
17931 // TO_HEX(x) / HEX(x) -> target-specific hex function
17932 "TO_HEX" | "HEX" if f.args.len() == 1 => {
17933 let name = match target {
17934 DialectType::Presto | DialectType::Trino => "TO_HEX",
17935 DialectType::Spark
17936 | DialectType::Databricks
17937 | DialectType::Hive => "HEX",
17938 DialectType::DuckDB
17939 | DialectType::PostgreSQL
17940 | DialectType::Redshift => "TO_HEX",
17941 _ => &f.name,
17942 };
17943 Ok(Expression::Function(Box::new(Function::new(
17944 name.to_string(),
17945 f.args,
17946 ))))
17947 }
17948 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
17949 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
17950 match target {
17951 DialectType::BigQuery => {
17952 // BigQuery: UNHEX(x) -> FROM_HEX(x)
17953 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
17954 // because BigQuery MD5 returns BYTES, not hex string
17955 let arg = &f.args[0];
17956 let wrapped_arg = match arg {
17957 Expression::Function(inner_f)
17958 if inner_f.name.eq_ignore_ascii_case("MD5")
17959 || inner_f
17960 .name
17961 .eq_ignore_ascii_case("SHA1")
17962 || inner_f
17963 .name
17964 .eq_ignore_ascii_case("SHA256")
17965 || inner_f
17966 .name
17967 .eq_ignore_ascii_case("SHA512") =>
17968 {
17969 // Wrap hash function in TO_HEX for BigQuery
17970 Expression::Function(Box::new(Function::new(
17971 "TO_HEX".to_string(),
17972 vec![arg.clone()],
17973 )))
17974 }
17975 _ => f.args.into_iter().next().unwrap(),
17976 };
17977 Ok(Expression::Function(Box::new(Function::new(
17978 "FROM_HEX".to_string(),
17979 vec![wrapped_arg],
17980 ))))
17981 }
17982 _ => {
17983 let name = match target {
17984 DialectType::Presto | DialectType::Trino => "FROM_HEX",
17985 DialectType::Spark
17986 | DialectType::Databricks
17987 | DialectType::Hive => "UNHEX",
17988 _ => &f.name,
17989 };
17990 Ok(Expression::Function(Box::new(Function::new(
17991 name.to_string(),
17992 f.args,
17993 ))))
17994 }
17995 }
17996 }
17997 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
17998 "TO_UTF8" if f.args.len() == 1 => match target {
17999 DialectType::Spark | DialectType::Databricks => {
18000 let mut args = f.args;
18001 args.push(Expression::string("utf-8"));
18002 Ok(Expression::Function(Box::new(Function::new(
18003 "ENCODE".to_string(),
18004 args,
18005 ))))
18006 }
18007 _ => Ok(Expression::Function(f)),
18008 },
18009 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
18010 "FROM_UTF8" if f.args.len() == 1 => match target {
18011 DialectType::Spark | DialectType::Databricks => {
18012 let mut args = f.args;
18013 args.push(Expression::string("utf-8"));
18014 Ok(Expression::Function(Box::new(Function::new(
18015 "DECODE".to_string(),
18016 args,
18017 ))))
18018 }
18019 _ => Ok(Expression::Function(f)),
18020 },
18021 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
18022 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
18023 let name = match target {
18024 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
18025 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
18026 DialectType::PostgreSQL | DialectType::Redshift => {
18027 "STARTS_WITH"
18028 }
18029 _ => &f.name,
18030 };
18031 Ok(Expression::Function(Box::new(Function::new(
18032 name.to_string(),
18033 f.args,
18034 ))))
18035 }
18036 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
18037 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
18038 let name = match target {
18039 DialectType::Presto
18040 | DialectType::Trino
18041 | DialectType::Athena => "APPROX_DISTINCT",
18042 _ => "APPROX_COUNT_DISTINCT",
18043 };
18044 Ok(Expression::Function(Box::new(Function::new(
18045 name.to_string(),
18046 f.args,
18047 ))))
18048 }
18049 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
18050 "JSON_EXTRACT"
18051 if f.args.len() == 2
18052 && !matches!(source, DialectType::BigQuery)
18053 && matches!(
18054 target,
18055 DialectType::Spark
18056 | DialectType::Databricks
18057 | DialectType::Hive
18058 ) =>
18059 {
18060 Ok(Expression::Function(Box::new(Function::new(
18061 "GET_JSON_OBJECT".to_string(),
18062 f.args,
18063 ))))
18064 }
18065 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
18066 "JSON_EXTRACT"
18067 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
18068 {
18069 let mut args = f.args;
18070 let path = args.remove(1);
18071 let this = args.remove(0);
18072 Ok(Expression::JsonExtract(Box::new(
18073 crate::expressions::JsonExtractFunc {
18074 this,
18075 path,
18076 returning: None,
18077 arrow_syntax: true,
18078 hash_arrow_syntax: false,
18079 wrapper_option: None,
18080 quotes_option: None,
18081 on_scalar_string: false,
18082 on_error: None,
18083 },
18084 )))
18085 }
18086 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
18087 "JSON_FORMAT" if f.args.len() == 1 => {
18088 match target {
18089 DialectType::Spark | DialectType::Databricks => {
18090 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
18091 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
18092 if matches!(
18093 source,
18094 DialectType::Presto
18095 | DialectType::Trino
18096 | DialectType::Athena
18097 ) {
18098 if let Some(Expression::ParseJson(pj)) = f.args.first()
18099 {
18100 if let Expression::Literal(lit) = &pj.this {
18101 if let Literal::String(s) = lit.as_ref() {
18102 let wrapped =
18103 Expression::Literal(Box::new(
18104 Literal::String(format!("[{}]", s)),
18105 ));
18106 let schema_of_json = Expression::Function(
18107 Box::new(Function::new(
18108 "SCHEMA_OF_JSON".to_string(),
18109 vec![wrapped.clone()],
18110 )),
18111 );
18112 let from_json = Expression::Function(
18113 Box::new(Function::new(
18114 "FROM_JSON".to_string(),
18115 vec![wrapped, schema_of_json],
18116 )),
18117 );
18118 let to_json = Expression::Function(
18119 Box::new(Function::new(
18120 "TO_JSON".to_string(),
18121 vec![from_json],
18122 )),
18123 );
18124 return Ok(Expression::Function(Box::new(
18125 Function::new(
18126 "REGEXP_EXTRACT".to_string(),
18127 vec![
18128 to_json,
18129 Expression::Literal(Box::new(
18130 Literal::String(
18131 "^.(.*).$".to_string(),
18132 ),
18133 )),
18134 Expression::Literal(Box::new(
18135 Literal::Number(
18136 "1".to_string(),
18137 ),
18138 )),
18139 ],
18140 ),
18141 )));
18142 }
18143 }
18144 }
18145 }
18146
18147 // Strip inner CAST(... AS JSON) or TO_JSON() if present
18148 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
18149 let mut args = f.args;
18150 if let Some(Expression::Cast(ref c)) = args.first() {
18151 if matches!(&c.to, DataType::Json | DataType::JsonB) {
18152 args = vec![c.this.clone()];
18153 }
18154 } else if let Some(Expression::Function(ref inner_f)) =
18155 args.first()
18156 {
18157 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
18158 && inner_f.args.len() == 1
18159 {
18160 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
18161 args = inner_f.args.clone();
18162 }
18163 }
18164 Ok(Expression::Function(Box::new(Function::new(
18165 "TO_JSON".to_string(),
18166 args,
18167 ))))
18168 }
18169 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18170 Function::new("TO_JSON_STRING".to_string(), f.args),
18171 ))),
18172 DialectType::DuckDB => {
18173 // CAST(TO_JSON(x) AS TEXT)
18174 let to_json = Expression::Function(Box::new(
18175 Function::new("TO_JSON".to_string(), f.args),
18176 ));
18177 Ok(Expression::Cast(Box::new(Cast {
18178 this: to_json,
18179 to: DataType::Text,
18180 trailing_comments: Vec::new(),
18181 double_colon_syntax: false,
18182 format: None,
18183 default: None,
18184 inferred_type: None,
18185 })))
18186 }
18187 _ => Ok(Expression::Function(f)),
18188 }
18189 }
18190 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
18191 "SYSDATE" if f.args.is_empty() => {
18192 match target {
18193 DialectType::Oracle | DialectType::Redshift => {
18194 Ok(Expression::Function(f))
18195 }
18196 DialectType::Snowflake => {
18197 // Snowflake uses SYSDATE() with parens
18198 let mut f = *f;
18199 f.no_parens = false;
18200 Ok(Expression::Function(Box::new(f)))
18201 }
18202 DialectType::DuckDB => {
18203 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
18204 Ok(Expression::AtTimeZone(Box::new(
18205 crate::expressions::AtTimeZone {
18206 this: Expression::CurrentTimestamp(
18207 crate::expressions::CurrentTimestamp {
18208 precision: None,
18209 sysdate: false,
18210 },
18211 ),
18212 zone: Expression::Literal(Box::new(
18213 Literal::String("UTC".to_string()),
18214 )),
18215 },
18216 )))
18217 }
18218 _ => Ok(Expression::CurrentTimestamp(
18219 crate::expressions::CurrentTimestamp {
18220 precision: None,
18221 sysdate: true,
18222 },
18223 )),
18224 }
18225 }
18226 // LOGICAL_OR(x) -> BOOL_OR(x)
18227 "LOGICAL_OR" if f.args.len() == 1 => {
18228 let name = match target {
18229 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18230 _ => &f.name,
18231 };
18232 Ok(Expression::Function(Box::new(Function::new(
18233 name.to_string(),
18234 f.args,
18235 ))))
18236 }
18237 // LOGICAL_AND(x) -> BOOL_AND(x)
18238 "LOGICAL_AND" if f.args.len() == 1 => {
18239 let name = match target {
18240 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
18241 _ => &f.name,
18242 };
18243 Ok(Expression::Function(Box::new(Function::new(
18244 name.to_string(),
18245 f.args,
18246 ))))
18247 }
18248 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
18249 "MONTHS_ADD" if f.args.len() == 2 => match target {
18250 DialectType::Oracle => Ok(Expression::Function(Box::new(
18251 Function::new("ADD_MONTHS".to_string(), f.args),
18252 ))),
18253 _ => Ok(Expression::Function(f)),
18254 },
18255 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
18256 "ARRAY_JOIN" if f.args.len() >= 2 => {
18257 match target {
18258 DialectType::Spark | DialectType::Databricks => {
18259 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
18260 Ok(Expression::Function(f))
18261 }
18262 DialectType::Hive => {
18263 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
18264 let mut args = f.args;
18265 let arr = args.remove(0);
18266 let sep = args.remove(0);
18267 // Drop any remaining args (null_replacement)
18268 Ok(Expression::Function(Box::new(Function::new(
18269 "CONCAT_WS".to_string(),
18270 vec![sep, arr],
18271 ))))
18272 }
18273 DialectType::Presto | DialectType::Trino => {
18274 Ok(Expression::Function(f))
18275 }
18276 _ => Ok(Expression::Function(f)),
18277 }
18278 }
18279 // LOCATE(substr, str, pos) 3-arg -> target-specific
18280 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
18281 "LOCATE"
18282 if f.args.len() == 3
18283 && matches!(
18284 target,
18285 DialectType::Presto
18286 | DialectType::Trino
18287 | DialectType::Athena
18288 | DialectType::DuckDB
18289 ) =>
18290 {
18291 let mut args = f.args;
18292 let substr = args.remove(0);
18293 let string = args.remove(0);
18294 let pos = args.remove(0);
18295 // STRPOS(SUBSTRING(string, pos), substr)
18296 let substring_call = Expression::Function(Box::new(Function::new(
18297 "SUBSTRING".to_string(),
18298 vec![string.clone(), pos.clone()],
18299 )));
18300 let strpos_call = Expression::Function(Box::new(Function::new(
18301 "STRPOS".to_string(),
18302 vec![substring_call, substr.clone()],
18303 )));
18304 // STRPOS(...) + pos - 1
18305 let pos_adjusted =
18306 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
18307 Expression::Add(Box::new(
18308 crate::expressions::BinaryOp::new(
18309 strpos_call.clone(),
18310 pos.clone(),
18311 ),
18312 )),
18313 Expression::number(1),
18314 )));
18315 // STRPOS(...) = 0
18316 let is_zero =
18317 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
18318 strpos_call.clone(),
18319 Expression::number(0),
18320 )));
18321
18322 match target {
18323 DialectType::Presto
18324 | DialectType::Trino
18325 | DialectType::Athena => {
18326 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
18327 Ok(Expression::Function(Box::new(Function::new(
18328 "IF".to_string(),
18329 vec![is_zero, Expression::number(0), pos_adjusted],
18330 ))))
18331 }
18332 DialectType::DuckDB => {
18333 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
18334 Ok(Expression::Case(Box::new(crate::expressions::Case {
18335 operand: None,
18336 whens: vec![(is_zero, Expression::number(0))],
18337 else_: Some(pos_adjusted),
18338 comments: Vec::new(),
18339 inferred_type: None,
18340 })))
18341 }
18342 _ => Ok(Expression::Function(Box::new(Function::new(
18343 "LOCATE".to_string(),
18344 vec![substr, string, pos],
18345 )))),
18346 }
18347 }
18348 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
18349 "STRPOS"
18350 if f.args.len() == 3
18351 && matches!(
18352 target,
18353 DialectType::BigQuery
18354 | DialectType::Oracle
18355 | DialectType::Teradata
18356 ) =>
18357 {
18358 let mut args = f.args;
18359 let haystack = args.remove(0);
18360 let needle = args.remove(0);
18361 let occurrence = args.remove(0);
18362 Ok(Expression::Function(Box::new(Function::new(
18363 "INSTR".to_string(),
18364 vec![haystack, needle, Expression::number(1), occurrence],
18365 ))))
18366 }
18367 // SCHEMA_NAME(id) -> target-specific
18368 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
18369 DialectType::MySQL | DialectType::SingleStore => {
18370 Ok(Expression::Function(Box::new(Function::new(
18371 "SCHEMA".to_string(),
18372 vec![],
18373 ))))
18374 }
18375 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18376 crate::expressions::CurrentSchema { this: None },
18377 ))),
18378 DialectType::SQLite => Ok(Expression::string("main")),
18379 _ => Ok(Expression::Function(f)),
18380 },
18381 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
18382 "STRTOL" if f.args.len() == 2 => match target {
18383 DialectType::Presto | DialectType::Trino => {
18384 Ok(Expression::Function(Box::new(Function::new(
18385 "FROM_BASE".to_string(),
18386 f.args,
18387 ))))
18388 }
18389 _ => Ok(Expression::Function(f)),
18390 },
18391 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
18392 "EDITDIST3" if f.args.len() == 2 => match target {
18393 DialectType::Spark | DialectType::Databricks => {
18394 Ok(Expression::Function(Box::new(Function::new(
18395 "LEVENSHTEIN".to_string(),
18396 f.args,
18397 ))))
18398 }
18399 _ => Ok(Expression::Function(f)),
18400 },
18401 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
18402 "FORMAT"
18403 if f.args.len() == 2
18404 && matches!(
18405 source,
18406 DialectType::MySQL | DialectType::SingleStore
18407 )
18408 && matches!(target, DialectType::DuckDB) =>
18409 {
18410 let mut args = f.args;
18411 let num_expr = args.remove(0);
18412 let decimals_expr = args.remove(0);
18413 // Extract decimal count
18414 let dec_count = match &decimals_expr {
18415 Expression::Literal(lit)
18416 if matches!(lit.as_ref(), Literal::Number(_)) =>
18417 {
18418 let Literal::Number(n) = lit.as_ref() else {
18419 unreachable!()
18420 };
18421 n.clone()
18422 }
18423 _ => "0".to_string(),
18424 };
18425 let fmt_str = format!("{{:,.{}f}}", dec_count);
18426 Ok(Expression::Function(Box::new(Function::new(
18427 "FORMAT".to_string(),
18428 vec![Expression::string(&fmt_str), num_expr],
18429 ))))
18430 }
18431 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
18432 "FORMAT"
18433 if f.args.len() == 2
18434 && matches!(
18435 source,
18436 DialectType::TSQL | DialectType::Fabric
18437 ) =>
18438 {
18439 let val_expr = f.args[0].clone();
18440 let fmt_expr = f.args[1].clone();
18441 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
18442 // Only expand shortcodes that are NOT also valid numeric format specifiers.
18443 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
18444 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
18445 let (expanded_fmt, is_shortcode) = match &fmt_expr {
18446 Expression::Literal(lit)
18447 if matches!(
18448 lit.as_ref(),
18449 crate::expressions::Literal::String(_)
18450 ) =>
18451 {
18452 let crate::expressions::Literal::String(s) = lit.as_ref()
18453 else {
18454 unreachable!()
18455 };
18456 match s.as_str() {
18457 "m" | "M" => (Expression::string("MMMM d"), true),
18458 "t" => (Expression::string("h:mm tt"), true),
18459 "T" => (Expression::string("h:mm:ss tt"), true),
18460 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
18461 _ => (fmt_expr.clone(), false),
18462 }
18463 }
18464 _ => (fmt_expr.clone(), false),
18465 };
18466 // Check if the format looks like a date format
18467 let is_date_format = is_shortcode
18468 || match &expanded_fmt {
18469 Expression::Literal(lit)
18470 if matches!(
18471 lit.as_ref(),
18472 crate::expressions::Literal::String(_)
18473 ) =>
18474 {
18475 let crate::expressions::Literal::String(s) =
18476 lit.as_ref()
18477 else {
18478 unreachable!()
18479 };
18480 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
18481 s.contains("yyyy")
18482 || s.contains("YYYY")
18483 || s.contains("MM")
18484 || s.contains("dd")
18485 || s.contains("MMMM")
18486 || s.contains("HH")
18487 || s.contains("hh")
18488 || s.contains("ss")
18489 }
18490 _ => false,
18491 };
18492 match target {
18493 DialectType::Spark | DialectType::Databricks => {
18494 let func_name = if is_date_format {
18495 "DATE_FORMAT"
18496 } else {
18497 "FORMAT_NUMBER"
18498 };
18499 Ok(Expression::Function(Box::new(Function::new(
18500 func_name.to_string(),
18501 vec![val_expr, expanded_fmt],
18502 ))))
18503 }
18504 _ => {
18505 // For TSQL and other targets, expand shortcodes but keep FORMAT
18506 if is_shortcode {
18507 Ok(Expression::Function(Box::new(Function::new(
18508 "FORMAT".to_string(),
18509 vec![val_expr, expanded_fmt],
18510 ))))
18511 } else {
18512 Ok(Expression::Function(f))
18513 }
18514 }
18515 }
18516 }
18517 // FORMAT('%s', x) from Trino/Presto -> target-specific
18518 "FORMAT"
18519 if f.args.len() >= 2
18520 && matches!(
18521 source,
18522 DialectType::Trino
18523 | DialectType::Presto
18524 | DialectType::Athena
18525 ) =>
18526 {
18527 let fmt_expr = f.args[0].clone();
18528 let value_args: Vec<Expression> = f.args[1..].to_vec();
18529 match target {
18530 // DuckDB: replace %s with {} in format string
18531 DialectType::DuckDB => {
18532 let new_fmt = match &fmt_expr {
18533 Expression::Literal(lit)
18534 if matches!(lit.as_ref(), Literal::String(_)) =>
18535 {
18536 let Literal::String(s) = lit.as_ref() else {
18537 unreachable!()
18538 };
18539 Expression::Literal(Box::new(Literal::String(
18540 s.replace("%s", "{}"),
18541 )))
18542 }
18543 _ => fmt_expr,
18544 };
18545 let mut args = vec![new_fmt];
18546 args.extend(value_args);
18547 Ok(Expression::Function(Box::new(Function::new(
18548 "FORMAT".to_string(),
18549 args,
18550 ))))
18551 }
18552 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
18553 DialectType::Snowflake => match &fmt_expr {
18554 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
18555 {
18556 let Literal::String(_) = lit.as_ref() else {
18557 unreachable!()
18558 };
18559 Ok(Expression::Function(Box::new(Function::new(
18560 "TO_CHAR".to_string(),
18561 value_args,
18562 ))))
18563 }
18564 _ => Ok(Expression::Function(f)),
18565 },
18566 // Default: keep FORMAT as-is
18567 _ => Ok(Expression::Function(f)),
18568 }
18569 }
18570 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
18571 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
18572 if f.args.len() == 2 =>
18573 {
18574 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
18575 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
18576 if matches!(target, DialectType::DuckDB)
18577 && matches!(source, DialectType::Snowflake)
18578 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
18579 {
18580 let value = f.args[0].clone();
18581 let array = f.args[1].clone();
18582
18583 // value IS NULL
18584 let value_is_null =
18585 Expression::IsNull(Box::new(crate::expressions::IsNull {
18586 this: value.clone(),
18587 not: false,
18588 postfix_form: false,
18589 }));
18590
18591 // ARRAY_LENGTH(array)
18592 let array_length =
18593 Expression::Function(Box::new(Function::new(
18594 "ARRAY_LENGTH".to_string(),
18595 vec![array.clone()],
18596 )));
18597 // LIST_COUNT(array)
18598 let list_count = Expression::Function(Box::new(Function::new(
18599 "LIST_COUNT".to_string(),
18600 vec![array.clone()],
18601 )));
18602 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
18603 let neq =
18604 Expression::Neq(Box::new(crate::expressions::BinaryOp {
18605 left: array_length,
18606 right: list_count,
18607 left_comments: vec![],
18608 operator_comments: vec![],
18609 trailing_comments: vec![],
18610 inferred_type: None,
18611 }));
18612 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
18613 let nullif =
18614 Expression::Nullif(Box::new(crate::expressions::Nullif {
18615 this: Box::new(neq),
18616 expression: Box::new(Expression::Boolean(
18617 crate::expressions::BooleanLiteral { value: false },
18618 )),
18619 }));
18620
18621 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
18622 let array_contains =
18623 Expression::Function(Box::new(Function::new(
18624 "ARRAY_CONTAINS".to_string(),
18625 vec![array, value],
18626 )));
18627
18628 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
18629 return Ok(Expression::Case(Box::new(Case {
18630 operand: None,
18631 whens: vec![(value_is_null, nullif)],
18632 else_: Some(array_contains),
18633 comments: Vec::new(),
18634 inferred_type: None,
18635 })));
18636 }
18637 match target {
18638 DialectType::PostgreSQL | DialectType::Redshift => {
18639 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
18640 let arr = f.args[0].clone();
18641 let needle = f.args[1].clone();
18642 // Convert [] to ARRAY[] for PostgreSQL
18643 let pg_arr = match arr {
18644 Expression::Array(a) => Expression::ArrayFunc(
18645 Box::new(crate::expressions::ArrayConstructor {
18646 expressions: a.expressions,
18647 bracket_notation: false,
18648 use_list_keyword: false,
18649 }),
18650 ),
18651 _ => arr,
18652 };
18653 // needle = ANY(arr) using the Any quantified expression
18654 let any_expr = Expression::Any(Box::new(
18655 crate::expressions::QuantifiedExpr {
18656 this: needle.clone(),
18657 subquery: pg_arr,
18658 op: Some(crate::expressions::QuantifiedOp::Eq),
18659 },
18660 ));
18661 let coalesce = Expression::Coalesce(Box::new(
18662 crate::expressions::VarArgFunc {
18663 expressions: vec![
18664 any_expr,
18665 Expression::Boolean(
18666 crate::expressions::BooleanLiteral {
18667 value: false,
18668 },
18669 ),
18670 ],
18671 original_name: None,
18672 inferred_type: None,
18673 },
18674 ));
18675 let is_null_check = Expression::IsNull(Box::new(
18676 crate::expressions::IsNull {
18677 this: needle,
18678 not: false,
18679 postfix_form: false,
18680 },
18681 ));
18682 Ok(Expression::Case(Box::new(Case {
18683 operand: None,
18684 whens: vec![(
18685 is_null_check,
18686 Expression::Null(crate::expressions::Null),
18687 )],
18688 else_: Some(coalesce),
18689 comments: Vec::new(),
18690 inferred_type: None,
18691 })))
18692 }
18693 _ => Ok(Expression::Function(Box::new(Function::new(
18694 "ARRAY_CONTAINS".to_string(),
18695 f.args,
18696 )))),
18697 }
18698 }
18699 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
18700 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
18701 match target {
18702 DialectType::PostgreSQL | DialectType::Redshift => {
18703 // arr1 && arr2 with ARRAY[] syntax
18704 let mut args = f.args;
18705 let arr1 = args.remove(0);
18706 let arr2 = args.remove(0);
18707 let pg_arr1 = match arr1 {
18708 Expression::Array(a) => Expression::ArrayFunc(
18709 Box::new(crate::expressions::ArrayConstructor {
18710 expressions: a.expressions,
18711 bracket_notation: false,
18712 use_list_keyword: false,
18713 }),
18714 ),
18715 _ => arr1,
18716 };
18717 let pg_arr2 = match arr2 {
18718 Expression::Array(a) => Expression::ArrayFunc(
18719 Box::new(crate::expressions::ArrayConstructor {
18720 expressions: a.expressions,
18721 bracket_notation: false,
18722 use_list_keyword: false,
18723 }),
18724 ),
18725 _ => arr2,
18726 };
18727 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18728 pg_arr1, pg_arr2,
18729 ))))
18730 }
18731 DialectType::DuckDB => {
18732 // DuckDB: arr1 && arr2 (native support)
18733 let mut args = f.args;
18734 let arr1 = args.remove(0);
18735 let arr2 = args.remove(0);
18736 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18737 arr1, arr2,
18738 ))))
18739 }
18740 _ => Ok(Expression::Function(Box::new(Function::new(
18741 "LIST_HAS_ANY".to_string(),
18742 f.args,
18743 )))),
18744 }
18745 }
18746 // APPROX_QUANTILE(x, q) -> target-specific
18747 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
18748 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18749 Function::new("APPROX_PERCENTILE".to_string(), f.args),
18750 ))),
18751 DialectType::DuckDB => Ok(Expression::Function(f)),
18752 _ => Ok(Expression::Function(f)),
18753 },
18754 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
18755 "MAKE_DATE" if f.args.len() == 3 => match target {
18756 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18757 Function::new("DATE".to_string(), f.args),
18758 ))),
18759 _ => Ok(Expression::Function(f)),
18760 },
18761 // RANGE(start, end[, step]) -> target-specific
18762 "RANGE"
18763 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
18764 {
18765 let start = f.args[0].clone();
18766 let end = f.args[1].clone();
18767 let step = f.args.get(2).cloned();
18768 match target {
18769 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
18770 // so just rename without adjusting the end argument.
18771 DialectType::Snowflake => {
18772 let mut args = vec![start, end];
18773 if let Some(s) = step {
18774 args.push(s);
18775 }
18776 Ok(Expression::Function(Box::new(Function::new(
18777 "ARRAY_GENERATE_RANGE".to_string(),
18778 args,
18779 ))))
18780 }
18781 DialectType::Spark | DialectType::Databricks => {
18782 // RANGE(start, end) -> SEQUENCE(start, end-1)
18783 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
18784 // RANGE(start, start) -> ARRAY() (empty)
18785 // RANGE(start, end, 0) -> ARRAY() (empty)
18786 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
18787
18788 // Check for constant args
18789 fn extract_i64(e: &Expression) -> Option<i64> {
18790 match e {
18791 Expression::Literal(lit)
18792 if matches!(
18793 lit.as_ref(),
18794 Literal::Number(_)
18795 ) =>
18796 {
18797 let Literal::Number(n) = lit.as_ref() else {
18798 unreachable!()
18799 };
18800 n.parse::<i64>().ok()
18801 }
18802 Expression::Neg(u) => {
18803 if let Expression::Literal(lit) = &u.this {
18804 if let Literal::Number(n) = lit.as_ref() {
18805 n.parse::<i64>().ok().map(|v| -v)
18806 } else {
18807 None
18808 }
18809 } else {
18810 None
18811 }
18812 }
18813 _ => None,
18814 }
18815 }
18816 let start_val = extract_i64(&start);
18817 let end_val = extract_i64(&end);
18818 let step_val = step.as_ref().and_then(|s| extract_i64(s));
18819
18820 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
18821 if step_val == Some(0) {
18822 return Ok(Expression::Function(Box::new(
18823 Function::new("ARRAY".to_string(), vec![]),
18824 )));
18825 }
18826 if let (Some(s), Some(e_val)) = (start_val, end_val) {
18827 if s == e_val {
18828 return Ok(Expression::Function(Box::new(
18829 Function::new("ARRAY".to_string(), vec![]),
18830 )));
18831 }
18832 }
18833
18834 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
18835 // All constants - compute new end = end - step (if step provided) or end - 1
18836 match step_val {
18837 Some(st) if st < 0 => {
18838 // Negative step: SEQUENCE(start, end - step, step)
18839 let new_end = e_val - st; // end - step (= end + |step|)
18840 let mut args =
18841 vec![start, Expression::number(new_end)];
18842 if let Some(s) = step {
18843 args.push(s);
18844 }
18845 Ok(Expression::Function(Box::new(
18846 Function::new("SEQUENCE".to_string(), args),
18847 )))
18848 }
18849 Some(st) => {
18850 let new_end = e_val - st;
18851 let mut args =
18852 vec![start, Expression::number(new_end)];
18853 if let Some(s) = step {
18854 args.push(s);
18855 }
18856 Ok(Expression::Function(Box::new(
18857 Function::new("SEQUENCE".to_string(), args),
18858 )))
18859 }
18860 None => {
18861 // No step: SEQUENCE(start, end - 1)
18862 let new_end = e_val - 1;
18863 Ok(Expression::Function(Box::new(
18864 Function::new(
18865 "SEQUENCE".to_string(),
18866 vec![
18867 start,
18868 Expression::number(new_end),
18869 ],
18870 ),
18871 )))
18872 }
18873 }
18874 } else {
18875 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
18876 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
18877 end.clone(),
18878 Expression::number(1),
18879 )));
18880 let cond = Expression::Lt(Box::new(BinaryOp::new(
18881 Expression::Paren(Box::new(Paren {
18882 this: end_m1.clone(),
18883 trailing_comments: Vec::new(),
18884 })),
18885 start.clone(),
18886 )));
18887 let empty = Expression::Function(Box::new(
18888 Function::new("ARRAY".to_string(), vec![]),
18889 ));
18890 let mut seq_args = vec![
18891 start,
18892 Expression::Paren(Box::new(Paren {
18893 this: end_m1,
18894 trailing_comments: Vec::new(),
18895 })),
18896 ];
18897 if let Some(s) = step {
18898 seq_args.push(s);
18899 }
18900 let seq = Expression::Function(Box::new(
18901 Function::new("SEQUENCE".to_string(), seq_args),
18902 ));
18903 Ok(Expression::IfFunc(Box::new(
18904 crate::expressions::IfFunc {
18905 condition: cond,
18906 true_value: empty,
18907 false_value: Some(seq),
18908 original_name: None,
18909 inferred_type: None,
18910 },
18911 )))
18912 }
18913 }
18914 DialectType::SQLite => {
18915 // RANGE(start, end) -> GENERATE_SERIES(start, end)
18916 // The subquery wrapping is handled at the Alias level
18917 let mut args = vec![start, end];
18918 if let Some(s) = step {
18919 args.push(s);
18920 }
18921 Ok(Expression::Function(Box::new(Function::new(
18922 "GENERATE_SERIES".to_string(),
18923 args,
18924 ))))
18925 }
18926 _ => Ok(Expression::Function(f)),
18927 }
18928 }
18929 // ARRAY_REVERSE_SORT -> target-specific
18930 // (handled above as well, but also need DuckDB self-normalization)
18931 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18932 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
18933 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18934 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
18935 ))),
18936 DialectType::Spark | DialectType::Databricks => {
18937 Ok(Expression::Function(Box::new(Function::new(
18938 "MAP_FROM_ARRAYS".to_string(),
18939 f.args,
18940 ))))
18941 }
18942 _ => Ok(Expression::Function(Box::new(Function::new(
18943 "MAP".to_string(),
18944 f.args,
18945 )))),
18946 },
18947 // VARIANCE(x) -> varSamp(x) for ClickHouse
18948 "VARIANCE" if f.args.len() == 1 => match target {
18949 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18950 Function::new("varSamp".to_string(), f.args),
18951 ))),
18952 _ => Ok(Expression::Function(f)),
18953 },
18954 // STDDEV(x) -> stddevSamp(x) for ClickHouse
18955 "STDDEV" if f.args.len() == 1 => match target {
18956 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18957 Function::new("stddevSamp".to_string(), f.args),
18958 ))),
18959 _ => Ok(Expression::Function(f)),
18960 },
18961 // ISINF(x) -> IS_INF(x) for BigQuery
18962 "ISINF" if f.args.len() == 1 => match target {
18963 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18964 Function::new("IS_INF".to_string(), f.args),
18965 ))),
18966 _ => Ok(Expression::Function(f)),
18967 },
18968 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
18969 "CONTAINS" if f.args.len() == 2 => match target {
18970 DialectType::Spark
18971 | DialectType::Databricks
18972 | DialectType::Hive => Ok(Expression::Function(Box::new(
18973 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18974 ))),
18975 _ => Ok(Expression::Function(f)),
18976 },
18977 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
18978 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
18979 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18980 Ok(Expression::Function(Box::new(Function::new(
18981 "CONTAINS".to_string(),
18982 f.args,
18983 ))))
18984 }
18985 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18986 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18987 ))),
18988 _ => Ok(Expression::Function(f)),
18989 },
18990 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
18991 "TO_UNIXTIME" if f.args.len() == 1 => match target {
18992 DialectType::Hive
18993 | DialectType::Spark
18994 | DialectType::Databricks => Ok(Expression::Function(Box::new(
18995 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
18996 ))),
18997 _ => Ok(Expression::Function(f)),
18998 },
18999 // FROM_UNIXTIME(x) -> target-specific
19000 "FROM_UNIXTIME" if f.args.len() == 1 => {
19001 match target {
19002 DialectType::Hive
19003 | DialectType::Spark
19004 | DialectType::Databricks
19005 | DialectType::Presto
19006 | DialectType::Trino => Ok(Expression::Function(f)),
19007 DialectType::DuckDB => {
19008 // DuckDB: TO_TIMESTAMP(x)
19009 let arg = f.args.into_iter().next().unwrap();
19010 Ok(Expression::Function(Box::new(Function::new(
19011 "TO_TIMESTAMP".to_string(),
19012 vec![arg],
19013 ))))
19014 }
19015 DialectType::PostgreSQL => {
19016 // PG: TO_TIMESTAMP(col)
19017 let arg = f.args.into_iter().next().unwrap();
19018 Ok(Expression::Function(Box::new(Function::new(
19019 "TO_TIMESTAMP".to_string(),
19020 vec![arg],
19021 ))))
19022 }
19023 DialectType::Redshift => {
19024 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
19025 let arg = f.args.into_iter().next().unwrap();
19026 let epoch_ts = Expression::Literal(Box::new(
19027 Literal::Timestamp("epoch".to_string()),
19028 ));
19029 let interval = Expression::Interval(Box::new(
19030 crate::expressions::Interval {
19031 this: Some(Expression::string("1 SECOND")),
19032 unit: None,
19033 },
19034 ));
19035 let mul =
19036 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
19037 let add =
19038 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
19039 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19040 this: add,
19041 trailing_comments: Vec::new(),
19042 })))
19043 }
19044 _ => Ok(Expression::Function(f)),
19045 }
19046 }
19047 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
19048 "FROM_UNIXTIME"
19049 if f.args.len() == 2
19050 && matches!(
19051 source,
19052 DialectType::Hive
19053 | DialectType::Spark
19054 | DialectType::Databricks
19055 ) =>
19056 {
19057 let mut args = f.args;
19058 let unix_ts = args.remove(0);
19059 let fmt_expr = args.remove(0);
19060 match target {
19061 DialectType::DuckDB => {
19062 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
19063 let to_ts = Expression::Function(Box::new(Function::new(
19064 "TO_TIMESTAMP".to_string(),
19065 vec![unix_ts],
19066 )));
19067 if let Expression::Literal(lit) = &fmt_expr {
19068 if let crate::expressions::Literal::String(s) =
19069 lit.as_ref()
19070 {
19071 let c_fmt = Self::hive_format_to_c_format(s);
19072 Ok(Expression::Function(Box::new(Function::new(
19073 "STRFTIME".to_string(),
19074 vec![to_ts, Expression::string(&c_fmt)],
19075 ))))
19076 } else {
19077 Ok(Expression::Function(Box::new(Function::new(
19078 "STRFTIME".to_string(),
19079 vec![to_ts, fmt_expr],
19080 ))))
19081 }
19082 } else {
19083 Ok(Expression::Function(Box::new(Function::new(
19084 "STRFTIME".to_string(),
19085 vec![to_ts, fmt_expr],
19086 ))))
19087 }
19088 }
19089 DialectType::Presto
19090 | DialectType::Trino
19091 | DialectType::Athena => {
19092 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
19093 let from_unix =
19094 Expression::Function(Box::new(Function::new(
19095 "FROM_UNIXTIME".to_string(),
19096 vec![unix_ts],
19097 )));
19098 if let Expression::Literal(lit) = &fmt_expr {
19099 if let crate::expressions::Literal::String(s) =
19100 lit.as_ref()
19101 {
19102 let p_fmt = Self::hive_format_to_presto_format(s);
19103 Ok(Expression::Function(Box::new(Function::new(
19104 "DATE_FORMAT".to_string(),
19105 vec![from_unix, Expression::string(&p_fmt)],
19106 ))))
19107 } else {
19108 Ok(Expression::Function(Box::new(Function::new(
19109 "DATE_FORMAT".to_string(),
19110 vec![from_unix, fmt_expr],
19111 ))))
19112 }
19113 } else {
19114 Ok(Expression::Function(Box::new(Function::new(
19115 "DATE_FORMAT".to_string(),
19116 vec![from_unix, fmt_expr],
19117 ))))
19118 }
19119 }
19120 _ => {
19121 // Keep as FROM_UNIXTIME(x, fmt) for other targets
19122 Ok(Expression::Function(Box::new(Function::new(
19123 "FROM_UNIXTIME".to_string(),
19124 vec![unix_ts, fmt_expr],
19125 ))))
19126 }
19127 }
19128 }
19129 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
19130 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
19131 let unit_str = Self::get_unit_str_static(&f.args[0]);
19132 // Get the raw unit text preserving original case
19133 let raw_unit = match &f.args[0] {
19134 Expression::Identifier(id) => id.name.clone(),
19135 Expression::Var(v) => v.this.clone(),
19136 Expression::Literal(lit)
19137 if matches!(
19138 lit.as_ref(),
19139 crate::expressions::Literal::String(_)
19140 ) =>
19141 {
19142 let crate::expressions::Literal::String(s) = lit.as_ref()
19143 else {
19144 unreachable!()
19145 };
19146 s.clone()
19147 }
19148 Expression::Column(col) => col.name.name.clone(),
19149 _ => unit_str.clone(),
19150 };
19151 match target {
19152 DialectType::TSQL | DialectType::Fabric => {
19153 // Preserve original case of unit for TSQL
19154 let unit_name = match unit_str.as_str() {
19155 "YY" | "YYYY" => "YEAR".to_string(),
19156 "QQ" | "Q" => "QUARTER".to_string(),
19157 "MM" | "M" => "MONTH".to_string(),
19158 "WK" | "WW" => "WEEK".to_string(),
19159 "DD" | "D" | "DY" => "DAY".to_string(),
19160 "HH" => "HOUR".to_string(),
19161 "MI" | "N" => "MINUTE".to_string(),
19162 "SS" | "S" => "SECOND".to_string(),
19163 _ => raw_unit.clone(), // preserve original case
19164 };
19165 let mut args = f.args;
19166 args[0] =
19167 Expression::Identifier(Identifier::new(&unit_name));
19168 Ok(Expression::Function(Box::new(Function::new(
19169 "DATEPART".to_string(),
19170 args,
19171 ))))
19172 }
19173 DialectType::Spark | DialectType::Databricks => {
19174 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
19175 // Preserve original case for non-abbreviation units
19176 let unit = match unit_str.as_str() {
19177 "YY" | "YYYY" => "YEAR".to_string(),
19178 "QQ" | "Q" => "QUARTER".to_string(),
19179 "MM" | "M" => "MONTH".to_string(),
19180 "WK" | "WW" => "WEEK".to_string(),
19181 "DD" | "D" | "DY" => "DAY".to_string(),
19182 "HH" => "HOUR".to_string(),
19183 "MI" | "N" => "MINUTE".to_string(),
19184 "SS" | "S" => "SECOND".to_string(),
19185 _ => raw_unit, // preserve original case
19186 };
19187 Ok(Expression::Extract(Box::new(
19188 crate::expressions::ExtractFunc {
19189 this: f.args[1].clone(),
19190 field: crate::expressions::DateTimeField::Custom(
19191 unit,
19192 ),
19193 },
19194 )))
19195 }
19196 _ => Ok(Expression::Function(Box::new(Function::new(
19197 "DATE_PART".to_string(),
19198 f.args,
19199 )))),
19200 }
19201 }
19202 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
19203 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
19204 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
19205 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
19206 "DATENAME" if f.args.len() == 2 => {
19207 let unit_str = Self::get_unit_str_static(&f.args[0]);
19208 let date_expr = f.args[1].clone();
19209 match unit_str.as_str() {
19210 "MM" | "M" | "MONTH" => match target {
19211 DialectType::TSQL => {
19212 let cast_date = Expression::Cast(Box::new(
19213 crate::expressions::Cast {
19214 this: date_expr,
19215 to: DataType::Custom {
19216 name: "DATETIME2".to_string(),
19217 },
19218 trailing_comments: Vec::new(),
19219 double_colon_syntax: false,
19220 format: None,
19221 default: None,
19222 inferred_type: None,
19223 },
19224 ));
19225 Ok(Expression::Function(Box::new(Function::new(
19226 "FORMAT".to_string(),
19227 vec![cast_date, Expression::string("MMMM")],
19228 ))))
19229 }
19230 DialectType::Spark | DialectType::Databricks => {
19231 let cast_date = Expression::Cast(Box::new(
19232 crate::expressions::Cast {
19233 this: date_expr,
19234 to: DataType::Timestamp {
19235 timezone: false,
19236 precision: None,
19237 },
19238 trailing_comments: Vec::new(),
19239 double_colon_syntax: false,
19240 format: None,
19241 default: None,
19242 inferred_type: None,
19243 },
19244 ));
19245 Ok(Expression::Function(Box::new(Function::new(
19246 "DATE_FORMAT".to_string(),
19247 vec![cast_date, Expression::string("MMMM")],
19248 ))))
19249 }
19250 _ => Ok(Expression::Function(f)),
19251 },
19252 "DW" | "WEEKDAY" => match target {
19253 DialectType::TSQL => {
19254 let cast_date = Expression::Cast(Box::new(
19255 crate::expressions::Cast {
19256 this: date_expr,
19257 to: DataType::Custom {
19258 name: "DATETIME2".to_string(),
19259 },
19260 trailing_comments: Vec::new(),
19261 double_colon_syntax: false,
19262 format: None,
19263 default: None,
19264 inferred_type: None,
19265 },
19266 ));
19267 Ok(Expression::Function(Box::new(Function::new(
19268 "FORMAT".to_string(),
19269 vec![cast_date, Expression::string("dddd")],
19270 ))))
19271 }
19272 DialectType::Spark | DialectType::Databricks => {
19273 let cast_date = Expression::Cast(Box::new(
19274 crate::expressions::Cast {
19275 this: date_expr,
19276 to: DataType::Timestamp {
19277 timezone: false,
19278 precision: None,
19279 },
19280 trailing_comments: Vec::new(),
19281 double_colon_syntax: false,
19282 format: None,
19283 default: None,
19284 inferred_type: None,
19285 },
19286 ));
19287 Ok(Expression::Function(Box::new(Function::new(
19288 "DATE_FORMAT".to_string(),
19289 vec![cast_date, Expression::string("EEEE")],
19290 ))))
19291 }
19292 _ => Ok(Expression::Function(f)),
19293 },
19294 _ => Ok(Expression::Function(f)),
19295 }
19296 }
19297 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
19298 "STRING_AGG" if f.args.len() >= 2 => {
19299 let x = f.args[0].clone();
19300 let sep = f.args[1].clone();
19301 match target {
19302 DialectType::MySQL
19303 | DialectType::SingleStore
19304 | DialectType::Doris
19305 | DialectType::StarRocks => Ok(Expression::GroupConcat(
19306 Box::new(crate::expressions::GroupConcatFunc {
19307 this: x,
19308 separator: Some(sep),
19309 order_by: None,
19310 distinct: false,
19311 filter: None,
19312 limit: None,
19313 inferred_type: None,
19314 }),
19315 )),
19316 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
19317 crate::expressions::GroupConcatFunc {
19318 this: x,
19319 separator: Some(sep),
19320 order_by: None,
19321 distinct: false,
19322 filter: None,
19323 limit: None,
19324 inferred_type: None,
19325 },
19326 ))),
19327 DialectType::PostgreSQL | DialectType::Redshift => {
19328 Ok(Expression::StringAgg(Box::new(
19329 crate::expressions::StringAggFunc {
19330 this: x,
19331 separator: Some(sep),
19332 order_by: None,
19333 distinct: false,
19334 filter: None,
19335 limit: None,
19336 inferred_type: None,
19337 },
19338 )))
19339 }
19340 _ => Ok(Expression::Function(f)),
19341 }
19342 }
19343 "TRY_DIVIDE" if f.args.len() == 2 => {
19344 let mut args = f.args;
19345 let x = args.remove(0);
19346 let y = args.remove(0);
19347 match target {
19348 DialectType::Spark | DialectType::Databricks => {
19349 Ok(Expression::Function(Box::new(Function::new(
19350 "TRY_DIVIDE".to_string(),
19351 vec![x, y],
19352 ))))
19353 }
19354 DialectType::Snowflake => {
19355 let y_ref = match &y {
19356 Expression::Column(_)
19357 | Expression::Literal(_)
19358 | Expression::Identifier(_) => y.clone(),
19359 _ => Expression::Paren(Box::new(Paren {
19360 this: y.clone(),
19361 trailing_comments: vec![],
19362 })),
19363 };
19364 let x_ref = match &x {
19365 Expression::Column(_)
19366 | Expression::Literal(_)
19367 | Expression::Identifier(_) => x.clone(),
19368 _ => Expression::Paren(Box::new(Paren {
19369 this: x.clone(),
19370 trailing_comments: vec![],
19371 })),
19372 };
19373 let condition = Expression::Neq(Box::new(
19374 crate::expressions::BinaryOp::new(
19375 y_ref.clone(),
19376 Expression::number(0),
19377 ),
19378 ));
19379 let div_expr = Expression::Div(Box::new(
19380 crate::expressions::BinaryOp::new(x_ref, y_ref),
19381 ));
19382 Ok(Expression::IfFunc(Box::new(
19383 crate::expressions::IfFunc {
19384 condition,
19385 true_value: div_expr,
19386 false_value: Some(Expression::Null(Null)),
19387 original_name: Some("IFF".to_string()),
19388 inferred_type: None,
19389 },
19390 )))
19391 }
19392 DialectType::DuckDB => {
19393 let y_ref = match &y {
19394 Expression::Column(_)
19395 | Expression::Literal(_)
19396 | Expression::Identifier(_) => y.clone(),
19397 _ => Expression::Paren(Box::new(Paren {
19398 this: y.clone(),
19399 trailing_comments: vec![],
19400 })),
19401 };
19402 let x_ref = match &x {
19403 Expression::Column(_)
19404 | Expression::Literal(_)
19405 | Expression::Identifier(_) => x.clone(),
19406 _ => Expression::Paren(Box::new(Paren {
19407 this: x.clone(),
19408 trailing_comments: vec![],
19409 })),
19410 };
19411 let condition = Expression::Neq(Box::new(
19412 crate::expressions::BinaryOp::new(
19413 y_ref.clone(),
19414 Expression::number(0),
19415 ),
19416 ));
19417 let div_expr = Expression::Div(Box::new(
19418 crate::expressions::BinaryOp::new(x_ref, y_ref),
19419 ));
19420 Ok(Expression::Case(Box::new(Case {
19421 operand: None,
19422 whens: vec![(condition, div_expr)],
19423 else_: Some(Expression::Null(Null)),
19424 comments: Vec::new(),
19425 inferred_type: None,
19426 })))
19427 }
19428 _ => Ok(Expression::Function(Box::new(Function::new(
19429 "TRY_DIVIDE".to_string(),
19430 vec![x, y],
19431 )))),
19432 }
19433 }
19434 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
19435 "JSON_ARRAYAGG" => match target {
19436 DialectType::PostgreSQL => {
19437 Ok(Expression::Function(Box::new(Function {
19438 name: "JSON_AGG".to_string(),
19439 ..(*f)
19440 })))
19441 }
19442 _ => Ok(Expression::Function(f)),
19443 },
19444 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
19445 "SCHEMA_NAME" => match target {
19446 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19447 crate::expressions::CurrentSchema { this: None },
19448 ))),
19449 DialectType::SQLite => Ok(Expression::string("main")),
19450 _ => Ok(Expression::Function(f)),
19451 },
19452 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
19453 "TO_TIMESTAMP"
19454 if f.args.len() == 2
19455 && matches!(
19456 source,
19457 DialectType::Spark
19458 | DialectType::Databricks
19459 | DialectType::Hive
19460 )
19461 && matches!(target, DialectType::DuckDB) =>
19462 {
19463 let mut args = f.args;
19464 let val = args.remove(0);
19465 let fmt_expr = args.remove(0);
19466 if let Expression::Literal(ref lit) = fmt_expr {
19467 if let Literal::String(ref s) = lit.as_ref() {
19468 // Convert Java/Spark format to C strptime format
19469 fn java_to_c_fmt(fmt: &str) -> String {
19470 let result = fmt
19471 .replace("yyyy", "%Y")
19472 .replace("SSSSSS", "%f")
19473 .replace("EEEE", "%W")
19474 .replace("MM", "%m")
19475 .replace("dd", "%d")
19476 .replace("HH", "%H")
19477 .replace("mm", "%M")
19478 .replace("ss", "%S")
19479 .replace("yy", "%y");
19480 let mut out = String::new();
19481 let chars: Vec<char> = result.chars().collect();
19482 let mut i = 0;
19483 while i < chars.len() {
19484 if chars[i] == '%' && i + 1 < chars.len() {
19485 out.push(chars[i]);
19486 out.push(chars[i + 1]);
19487 i += 2;
19488 } else if chars[i] == 'z' {
19489 out.push_str("%Z");
19490 i += 1;
19491 } else if chars[i] == 'Z' {
19492 out.push_str("%z");
19493 i += 1;
19494 } else {
19495 out.push(chars[i]);
19496 i += 1;
19497 }
19498 }
19499 out
19500 }
19501 let c_fmt = java_to_c_fmt(s);
19502 Ok(Expression::Function(Box::new(Function::new(
19503 "STRPTIME".to_string(),
19504 vec![val, Expression::string(&c_fmt)],
19505 ))))
19506 } else {
19507 Ok(Expression::Function(Box::new(Function::new(
19508 "STRPTIME".to_string(),
19509 vec![val, fmt_expr],
19510 ))))
19511 }
19512 } else {
19513 Ok(Expression::Function(Box::new(Function::new(
19514 "STRPTIME".to_string(),
19515 vec![val, fmt_expr],
19516 ))))
19517 }
19518 }
19519 // TO_DATE(x) 1-arg from Doris: date conversion
19520 "TO_DATE"
19521 if f.args.len() == 1
19522 && matches!(
19523 source,
19524 DialectType::Doris | DialectType::StarRocks
19525 ) =>
19526 {
19527 let arg = f.args.into_iter().next().unwrap();
19528 match target {
19529 DialectType::Oracle
19530 | DialectType::DuckDB
19531 | DialectType::TSQL => {
19532 // CAST(x AS DATE)
19533 Ok(Expression::Cast(Box::new(Cast {
19534 this: arg,
19535 to: DataType::Date,
19536 double_colon_syntax: false,
19537 trailing_comments: vec![],
19538 format: None,
19539 default: None,
19540 inferred_type: None,
19541 })))
19542 }
19543 DialectType::MySQL | DialectType::SingleStore => {
19544 // DATE(x)
19545 Ok(Expression::Function(Box::new(Function::new(
19546 "DATE".to_string(),
19547 vec![arg],
19548 ))))
19549 }
19550 _ => {
19551 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
19552 Ok(Expression::Function(Box::new(Function::new(
19553 "TO_DATE".to_string(),
19554 vec![arg],
19555 ))))
19556 }
19557 }
19558 }
19559 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
19560 "TO_DATE"
19561 if f.args.len() == 1
19562 && matches!(
19563 source,
19564 DialectType::Spark
19565 | DialectType::Databricks
19566 | DialectType::Hive
19567 ) =>
19568 {
19569 let arg = f.args.into_iter().next().unwrap();
19570 match target {
19571 DialectType::DuckDB => {
19572 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
19573 Ok(Expression::TryCast(Box::new(Cast {
19574 this: arg,
19575 to: DataType::Date,
19576 double_colon_syntax: false,
19577 trailing_comments: vec![],
19578 format: None,
19579 default: None,
19580 inferred_type: None,
19581 })))
19582 }
19583 DialectType::Presto
19584 | DialectType::Trino
19585 | DialectType::Athena => {
19586 // CAST(CAST(x AS TIMESTAMP) AS DATE)
19587 Ok(Self::double_cast_timestamp_date(arg))
19588 }
19589 DialectType::Snowflake => {
19590 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
19591 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
19592 Ok(Expression::Function(Box::new(Function::new(
19593 "TRY_TO_DATE".to_string(),
19594 vec![arg, Expression::string("yyyy-mm-DD")],
19595 ))))
19596 }
19597 _ => {
19598 // Default: keep as TO_DATE(x)
19599 Ok(Expression::Function(Box::new(Function::new(
19600 "TO_DATE".to_string(),
19601 vec![arg],
19602 ))))
19603 }
19604 }
19605 }
19606 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
19607 "TO_DATE"
19608 if f.args.len() == 2
19609 && matches!(
19610 source,
19611 DialectType::Spark
19612 | DialectType::Databricks
19613 | DialectType::Hive
19614 ) =>
19615 {
19616 let mut args = f.args;
19617 let val = args.remove(0);
19618 let fmt_expr = args.remove(0);
19619 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
19620
19621 if is_default_format {
19622 // Default format: same as 1-arg form
19623 match target {
19624 DialectType::DuckDB => {
19625 Ok(Expression::TryCast(Box::new(Cast {
19626 this: val,
19627 to: DataType::Date,
19628 double_colon_syntax: false,
19629 trailing_comments: vec![],
19630 format: None,
19631 default: None,
19632 inferred_type: None,
19633 })))
19634 }
19635 DialectType::Presto
19636 | DialectType::Trino
19637 | DialectType::Athena => {
19638 Ok(Self::double_cast_timestamp_date(val))
19639 }
19640 DialectType::Snowflake => {
19641 // TRY_TO_DATE(x, format) with Snowflake format mapping
19642 let sf_fmt = "yyyy-MM-dd"
19643 .replace("yyyy", "yyyy")
19644 .replace("MM", "mm")
19645 .replace("dd", "DD");
19646 Ok(Expression::Function(Box::new(Function::new(
19647 "TRY_TO_DATE".to_string(),
19648 vec![val, Expression::string(&sf_fmt)],
19649 ))))
19650 }
19651 _ => Ok(Expression::Function(Box::new(Function::new(
19652 "TO_DATE".to_string(),
19653 vec![val],
19654 )))),
19655 }
19656 } else {
19657 // Non-default format: use format-based parsing
19658 if let Expression::Literal(ref lit) = fmt_expr {
19659 if let Literal::String(ref s) = lit.as_ref() {
19660 match target {
19661 DialectType::DuckDB => {
19662 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
19663 fn java_to_c_fmt_todate(fmt: &str) -> String {
19664 let result = fmt
19665 .replace("yyyy", "%Y")
19666 .replace("SSSSSS", "%f")
19667 .replace("EEEE", "%W")
19668 .replace("MM", "%m")
19669 .replace("dd", "%d")
19670 .replace("HH", "%H")
19671 .replace("mm", "%M")
19672 .replace("ss", "%S")
19673 .replace("yy", "%y");
19674 let mut out = String::new();
19675 let chars: Vec<char> =
19676 result.chars().collect();
19677 let mut i = 0;
19678 while i < chars.len() {
19679 if chars[i] == '%'
19680 && i + 1 < chars.len()
19681 {
19682 out.push(chars[i]);
19683 out.push(chars[i + 1]);
19684 i += 2;
19685 } else if chars[i] == 'z' {
19686 out.push_str("%Z");
19687 i += 1;
19688 } else if chars[i] == 'Z' {
19689 out.push_str("%z");
19690 i += 1;
19691 } else {
19692 out.push(chars[i]);
19693 i += 1;
19694 }
19695 }
19696 out
19697 }
19698 let c_fmt = java_to_c_fmt_todate(s);
19699 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
19700 let try_strptime = Expression::Function(
19701 Box::new(Function::new(
19702 "TRY_STRPTIME".to_string(),
19703 vec![val, Expression::string(&c_fmt)],
19704 )),
19705 );
19706 let cast_ts =
19707 Expression::Cast(Box::new(Cast {
19708 this: try_strptime,
19709 to: DataType::Timestamp {
19710 precision: None,
19711 timezone: false,
19712 },
19713 double_colon_syntax: false,
19714 trailing_comments: vec![],
19715 format: None,
19716 default: None,
19717 inferred_type: None,
19718 }));
19719 Ok(Expression::Cast(Box::new(Cast {
19720 this: cast_ts,
19721 to: DataType::Date,
19722 double_colon_syntax: false,
19723 trailing_comments: vec![],
19724 format: None,
19725 default: None,
19726 inferred_type: None,
19727 })))
19728 }
19729 DialectType::Presto
19730 | DialectType::Trino
19731 | DialectType::Athena => {
19732 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
19733 let p_fmt = s
19734 .replace("yyyy", "%Y")
19735 .replace("SSSSSS", "%f")
19736 .replace("MM", "%m")
19737 .replace("dd", "%d")
19738 .replace("HH", "%H")
19739 .replace("mm", "%M")
19740 .replace("ss", "%S")
19741 .replace("yy", "%y");
19742 let date_parse = Expression::Function(
19743 Box::new(Function::new(
19744 "DATE_PARSE".to_string(),
19745 vec![val, Expression::string(&p_fmt)],
19746 )),
19747 );
19748 Ok(Expression::Cast(Box::new(Cast {
19749 this: date_parse,
19750 to: DataType::Date,
19751 double_colon_syntax: false,
19752 trailing_comments: vec![],
19753 format: None,
19754 default: None,
19755 inferred_type: None,
19756 })))
19757 }
19758 DialectType::Snowflake => {
19759 // TRY_TO_DATE(x, snowflake_fmt)
19760 Ok(Expression::Function(Box::new(
19761 Function::new(
19762 "TRY_TO_DATE".to_string(),
19763 vec![val, Expression::string(s)],
19764 ),
19765 )))
19766 }
19767 _ => Ok(Expression::Function(Box::new(
19768 Function::new(
19769 "TO_DATE".to_string(),
19770 vec![val, fmt_expr],
19771 ),
19772 ))),
19773 }
19774 } else {
19775 Ok(Expression::Function(Box::new(Function::new(
19776 "TO_DATE".to_string(),
19777 vec![val, fmt_expr],
19778 ))))
19779 }
19780 } else {
19781 Ok(Expression::Function(Box::new(Function::new(
19782 "TO_DATE".to_string(),
19783 vec![val, fmt_expr],
19784 ))))
19785 }
19786 }
19787 }
19788 // TO_TIMESTAMP(x) 1-arg: epoch conversion
19789 "TO_TIMESTAMP"
19790 if f.args.len() == 1
19791 && matches!(source, DialectType::DuckDB)
19792 && matches!(
19793 target,
19794 DialectType::BigQuery
19795 | DialectType::Presto
19796 | DialectType::Trino
19797 | DialectType::Hive
19798 | DialectType::Spark
19799 | DialectType::Databricks
19800 | DialectType::Athena
19801 ) =>
19802 {
19803 let arg = f.args.into_iter().next().unwrap();
19804 let func_name = match target {
19805 DialectType::BigQuery => "TIMESTAMP_SECONDS",
19806 DialectType::Presto
19807 | DialectType::Trino
19808 | DialectType::Athena
19809 | DialectType::Hive
19810 | DialectType::Spark
19811 | DialectType::Databricks => "FROM_UNIXTIME",
19812 _ => "TO_TIMESTAMP",
19813 };
19814 Ok(Expression::Function(Box::new(Function::new(
19815 func_name.to_string(),
19816 vec![arg],
19817 ))))
19818 }
19819 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
19820 "CONCAT" if f.args.len() == 1 => {
19821 let arg = f.args.into_iter().next().unwrap();
19822 match target {
19823 DialectType::Presto
19824 | DialectType::Trino
19825 | DialectType::Athena => {
19826 // CONCAT(a) -> CAST(a AS VARCHAR)
19827 Ok(Expression::Cast(Box::new(Cast {
19828 this: arg,
19829 to: DataType::VarChar {
19830 length: None,
19831 parenthesized_length: false,
19832 },
19833 trailing_comments: vec![],
19834 double_colon_syntax: false,
19835 format: None,
19836 default: None,
19837 inferred_type: None,
19838 })))
19839 }
19840 DialectType::TSQL => {
19841 // CONCAT(a) -> a
19842 Ok(arg)
19843 }
19844 DialectType::DuckDB => {
19845 // Keep CONCAT(a) for DuckDB (native support)
19846 Ok(Expression::Function(Box::new(Function::new(
19847 "CONCAT".to_string(),
19848 vec![arg],
19849 ))))
19850 }
19851 DialectType::Spark | DialectType::Databricks => {
19852 let coalesced = Expression::Coalesce(Box::new(
19853 crate::expressions::VarArgFunc {
19854 expressions: vec![arg, Expression::string("")],
19855 original_name: None,
19856 inferred_type: None,
19857 },
19858 ));
19859 Ok(Expression::Function(Box::new(Function::new(
19860 "CONCAT".to_string(),
19861 vec![coalesced],
19862 ))))
19863 }
19864 _ => Ok(Expression::Function(Box::new(Function::new(
19865 "CONCAT".to_string(),
19866 vec![arg],
19867 )))),
19868 }
19869 }
19870 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
19871 "REGEXP_EXTRACT"
19872 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
19873 {
19874 // If group_index is 0, drop it
19875 let drop_group = match &f.args[2] {
19876 Expression::Literal(lit)
19877 if matches!(lit.as_ref(), Literal::Number(_)) =>
19878 {
19879 let Literal::Number(n) = lit.as_ref() else {
19880 unreachable!()
19881 };
19882 n == "0"
19883 }
19884 _ => false,
19885 };
19886 if drop_group {
19887 let mut args = f.args;
19888 args.truncate(2);
19889 Ok(Expression::Function(Box::new(Function::new(
19890 "REGEXP_EXTRACT".to_string(),
19891 args,
19892 ))))
19893 } else {
19894 Ok(Expression::Function(f))
19895 }
19896 }
19897 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
19898 "REGEXP_EXTRACT"
19899 if f.args.len() == 4
19900 && matches!(target, DialectType::Snowflake) =>
19901 {
19902 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
19903 let mut args = f.args;
19904 let this = args.remove(0);
19905 let pattern = args.remove(0);
19906 let group = args.remove(0);
19907 let flags = args.remove(0);
19908 Ok(Expression::Function(Box::new(Function::new(
19909 "REGEXP_SUBSTR".to_string(),
19910 vec![
19911 this,
19912 pattern,
19913 Expression::number(1),
19914 Expression::number(1),
19915 flags,
19916 group,
19917 ],
19918 ))))
19919 }
19920 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
19921 "REGEXP_SUBSTR"
19922 if f.args.len() == 3
19923 && matches!(
19924 target,
19925 DialectType::DuckDB
19926 | DialectType::Presto
19927 | DialectType::Trino
19928 | DialectType::Spark
19929 | DialectType::Databricks
19930 ) =>
19931 {
19932 let mut args = f.args;
19933 let this = args.remove(0);
19934 let pattern = args.remove(0);
19935 let position = args.remove(0);
19936 // Wrap subject in SUBSTRING(this, position) to apply the offset
19937 let substring_expr = Expression::Function(Box::new(Function::new(
19938 "SUBSTRING".to_string(),
19939 vec![this, position],
19940 )));
19941 let target_name = match target {
19942 DialectType::DuckDB => "REGEXP_EXTRACT",
19943 _ => "REGEXP_EXTRACT",
19944 };
19945 Ok(Expression::Function(Box::new(Function::new(
19946 target_name.to_string(),
19947 vec![substring_expr, pattern],
19948 ))))
19949 }
19950 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
19951 "TO_DAYS" if f.args.len() == 1 => {
19952 let x = f.args.into_iter().next().unwrap();
19953 let epoch = Expression::string("0000-01-01");
19954 // Build the final target-specific expression directly
19955 let datediff_expr = match target {
19956 DialectType::MySQL | DialectType::SingleStore => {
19957 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
19958 Expression::Function(Box::new(Function::new(
19959 "DATEDIFF".to_string(),
19960 vec![x, epoch],
19961 )))
19962 }
19963 DialectType::DuckDB => {
19964 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
19965 let cast_epoch = Expression::Cast(Box::new(Cast {
19966 this: epoch,
19967 to: DataType::Date,
19968 trailing_comments: Vec::new(),
19969 double_colon_syntax: false,
19970 format: None,
19971 default: None,
19972 inferred_type: None,
19973 }));
19974 let cast_x = Expression::Cast(Box::new(Cast {
19975 this: x,
19976 to: DataType::Date,
19977 trailing_comments: Vec::new(),
19978 double_colon_syntax: false,
19979 format: None,
19980 default: None,
19981 inferred_type: None,
19982 }));
19983 Expression::Function(Box::new(Function::new(
19984 "DATE_DIFF".to_string(),
19985 vec![Expression::string("DAY"), cast_epoch, cast_x],
19986 )))
19987 }
19988 DialectType::Presto
19989 | DialectType::Trino
19990 | DialectType::Athena => {
19991 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
19992 let cast_epoch = Self::double_cast_timestamp_date(epoch);
19993 let cast_x = Self::double_cast_timestamp_date(x);
19994 Expression::Function(Box::new(Function::new(
19995 "DATE_DIFF".to_string(),
19996 vec![Expression::string("DAY"), cast_epoch, cast_x],
19997 )))
19998 }
19999 _ => {
20000 // Default: (DATEDIFF(x, '0000-01-01') + 1)
20001 Expression::Function(Box::new(Function::new(
20002 "DATEDIFF".to_string(),
20003 vec![x, epoch],
20004 )))
20005 }
20006 };
20007 let add_one = Expression::Add(Box::new(BinaryOp::new(
20008 datediff_expr,
20009 Expression::number(1),
20010 )));
20011 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
20012 this: add_one,
20013 trailing_comments: Vec::new(),
20014 })))
20015 }
20016 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
20017 "STR_TO_DATE"
20018 if f.args.len() == 2
20019 && matches!(
20020 target,
20021 DialectType::Presto | DialectType::Trino
20022 ) =>
20023 {
20024 let mut args = f.args;
20025 let x = args.remove(0);
20026 let format_expr = args.remove(0);
20027 // Check if the format contains time components
20028 let has_time = if let Expression::Literal(ref lit) = format_expr {
20029 if let Literal::String(ref fmt) = lit.as_ref() {
20030 fmt.contains("%H")
20031 || fmt.contains("%T")
20032 || fmt.contains("%M")
20033 || fmt.contains("%S")
20034 || fmt.contains("%I")
20035 || fmt.contains("%p")
20036 } else {
20037 false
20038 }
20039 } else {
20040 false
20041 };
20042 let date_parse = Expression::Function(Box::new(Function::new(
20043 "DATE_PARSE".to_string(),
20044 vec![x, format_expr],
20045 )));
20046 if has_time {
20047 // Has time components: just DATE_PARSE
20048 Ok(date_parse)
20049 } else {
20050 // Date-only: CAST(DATE_PARSE(...) AS DATE)
20051 Ok(Expression::Cast(Box::new(Cast {
20052 this: date_parse,
20053 to: DataType::Date,
20054 trailing_comments: Vec::new(),
20055 double_colon_syntax: false,
20056 format: None,
20057 default: None,
20058 inferred_type: None,
20059 })))
20060 }
20061 }
20062 "STR_TO_DATE"
20063 if f.args.len() == 2
20064 && matches!(
20065 target,
20066 DialectType::PostgreSQL | DialectType::Redshift
20067 ) =>
20068 {
20069 let mut args = f.args;
20070 let x = args.remove(0);
20071 let fmt = args.remove(0);
20072 let pg_fmt = match fmt {
20073 Expression::Literal(lit)
20074 if matches!(lit.as_ref(), Literal::String(_)) =>
20075 {
20076 let Literal::String(s) = lit.as_ref() else {
20077 unreachable!()
20078 };
20079 Expression::string(
20080 &s.replace("%Y", "YYYY")
20081 .replace("%m", "MM")
20082 .replace("%d", "DD")
20083 .replace("%H", "HH24")
20084 .replace("%M", "MI")
20085 .replace("%S", "SS"),
20086 )
20087 }
20088 other => other,
20089 };
20090 let to_date = Expression::Function(Box::new(Function::new(
20091 "TO_DATE".to_string(),
20092 vec![x, pg_fmt],
20093 )));
20094 Ok(Expression::Cast(Box::new(Cast {
20095 this: to_date,
20096 to: DataType::Timestamp {
20097 timezone: false,
20098 precision: None,
20099 },
20100 trailing_comments: Vec::new(),
20101 double_colon_syntax: false,
20102 format: None,
20103 default: None,
20104 inferred_type: None,
20105 })))
20106 }
20107 // RANGE(start, end) -> GENERATE_SERIES for SQLite
20108 "RANGE"
20109 if (f.args.len() == 1 || f.args.len() == 2)
20110 && matches!(target, DialectType::SQLite) =>
20111 {
20112 if f.args.len() == 2 {
20113 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
20114 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
20115 let mut args = f.args;
20116 let start = args.remove(0);
20117 let end = args.remove(0);
20118 Ok(Expression::Function(Box::new(Function::new(
20119 "GENERATE_SERIES".to_string(),
20120 vec![start, end],
20121 ))))
20122 } else {
20123 Ok(Expression::Function(f))
20124 }
20125 }
20126 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
20127 // When source is Snowflake, keep as-is (args already in correct form)
20128 "UNIFORM"
20129 if matches!(target, DialectType::Snowflake)
20130 && (f.args.len() == 2 || f.args.len() == 3) =>
20131 {
20132 if matches!(source, DialectType::Snowflake) {
20133 // Snowflake -> Snowflake: keep as-is
20134 Ok(Expression::Function(f))
20135 } else {
20136 let mut args = f.args;
20137 let low = args.remove(0);
20138 let high = args.remove(0);
20139 let random = if !args.is_empty() {
20140 let seed = args.remove(0);
20141 Expression::Function(Box::new(Function::new(
20142 "RANDOM".to_string(),
20143 vec![seed],
20144 )))
20145 } else {
20146 Expression::Function(Box::new(Function::new(
20147 "RANDOM".to_string(),
20148 vec![],
20149 )))
20150 };
20151 Ok(Expression::Function(Box::new(Function::new(
20152 "UNIFORM".to_string(),
20153 vec![low, high, random],
20154 ))))
20155 }
20156 }
20157 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20158 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
20159 let mut args = f.args;
20160 let ts_arg = args.remove(0);
20161 let tz_arg = args.remove(0);
20162 // Cast string literal to TIMESTAMP for all targets
20163 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20164 {
20165 Expression::Cast(Box::new(Cast {
20166 this: ts_arg,
20167 to: DataType::Timestamp {
20168 timezone: false,
20169 precision: None,
20170 },
20171 trailing_comments: vec![],
20172 double_colon_syntax: false,
20173 format: None,
20174 default: None,
20175 inferred_type: None,
20176 }))
20177 } else {
20178 ts_arg
20179 };
20180 match target {
20181 DialectType::Spark | DialectType::Databricks => {
20182 Ok(Expression::Function(Box::new(Function::new(
20183 "TO_UTC_TIMESTAMP".to_string(),
20184 vec![ts_cast, tz_arg],
20185 ))))
20186 }
20187 DialectType::Snowflake => {
20188 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
20189 Ok(Expression::Function(Box::new(Function::new(
20190 "CONVERT_TIMEZONE".to_string(),
20191 vec![tz_arg, Expression::string("UTC"), ts_cast],
20192 ))))
20193 }
20194 DialectType::Presto
20195 | DialectType::Trino
20196 | DialectType::Athena => {
20197 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
20198 let wtz = Expression::Function(Box::new(Function::new(
20199 "WITH_TIMEZONE".to_string(),
20200 vec![ts_cast, tz_arg],
20201 )));
20202 Ok(Expression::AtTimeZone(Box::new(
20203 crate::expressions::AtTimeZone {
20204 this: wtz,
20205 zone: Expression::string("UTC"),
20206 },
20207 )))
20208 }
20209 DialectType::BigQuery => {
20210 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
20211 let cast_dt = Expression::Cast(Box::new(Cast {
20212 this: if let Expression::Cast(c) = ts_cast {
20213 c.this
20214 } else {
20215 ts_cast.clone()
20216 },
20217 to: DataType::Custom {
20218 name: "DATETIME".to_string(),
20219 },
20220 trailing_comments: vec![],
20221 double_colon_syntax: false,
20222 format: None,
20223 default: None,
20224 inferred_type: None,
20225 }));
20226 let ts_func =
20227 Expression::Function(Box::new(Function::new(
20228 "TIMESTAMP".to_string(),
20229 vec![cast_dt, tz_arg],
20230 )));
20231 Ok(Expression::Function(Box::new(Function::new(
20232 "DATETIME".to_string(),
20233 vec![ts_func, Expression::string("UTC")],
20234 ))))
20235 }
20236 _ => {
20237 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
20238 let atz1 = Expression::AtTimeZone(Box::new(
20239 crate::expressions::AtTimeZone {
20240 this: ts_cast,
20241 zone: tz_arg,
20242 },
20243 ));
20244 Ok(Expression::AtTimeZone(Box::new(
20245 crate::expressions::AtTimeZone {
20246 this: atz1,
20247 zone: Expression::string("UTC"),
20248 },
20249 )))
20250 }
20251 }
20252 }
20253 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20254 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
20255 let mut args = f.args;
20256 let ts_arg = args.remove(0);
20257 let tz_arg = args.remove(0);
20258 // Cast string literal to TIMESTAMP
20259 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20260 {
20261 Expression::Cast(Box::new(Cast {
20262 this: ts_arg,
20263 to: DataType::Timestamp {
20264 timezone: false,
20265 precision: None,
20266 },
20267 trailing_comments: vec![],
20268 double_colon_syntax: false,
20269 format: None,
20270 default: None,
20271 inferred_type: None,
20272 }))
20273 } else {
20274 ts_arg
20275 };
20276 match target {
20277 DialectType::Spark | DialectType::Databricks => {
20278 Ok(Expression::Function(Box::new(Function::new(
20279 "FROM_UTC_TIMESTAMP".to_string(),
20280 vec![ts_cast, tz_arg],
20281 ))))
20282 }
20283 DialectType::Presto
20284 | DialectType::Trino
20285 | DialectType::Athena => {
20286 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
20287 Ok(Expression::Function(Box::new(Function::new(
20288 "AT_TIMEZONE".to_string(),
20289 vec![ts_cast, tz_arg],
20290 ))))
20291 }
20292 DialectType::Snowflake => {
20293 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
20294 Ok(Expression::Function(Box::new(Function::new(
20295 "CONVERT_TIMEZONE".to_string(),
20296 vec![Expression::string("UTC"), tz_arg, ts_cast],
20297 ))))
20298 }
20299 _ => {
20300 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
20301 Ok(Expression::AtTimeZone(Box::new(
20302 crate::expressions::AtTimeZone {
20303 this: ts_cast,
20304 zone: tz_arg,
20305 },
20306 )))
20307 }
20308 }
20309 }
20310 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20311 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
20312 let name = match target {
20313 DialectType::Snowflake => "OBJECT_CONSTRUCT",
20314 _ => "MAP",
20315 };
20316 Ok(Expression::Function(Box::new(Function::new(
20317 name.to_string(),
20318 f.args,
20319 ))))
20320 }
20321 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
20322 "STR_TO_MAP" if f.args.len() >= 1 => match target {
20323 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20324 Ok(Expression::Function(Box::new(Function::new(
20325 "SPLIT_TO_MAP".to_string(),
20326 f.args,
20327 ))))
20328 }
20329 _ => Ok(Expression::Function(f)),
20330 },
20331 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
20332 "TIME_TO_STR" if f.args.len() == 2 => {
20333 let mut args = f.args;
20334 let this = args.remove(0);
20335 let fmt_expr = args.remove(0);
20336 let format = if let Expression::Literal(lit) = fmt_expr {
20337 if let Literal::String(s) = lit.as_ref() {
20338 s.clone()
20339 } else {
20340 String::new()
20341 }
20342 } else {
20343 "%Y-%m-%d %H:%M:%S".to_string()
20344 };
20345 Ok(Expression::TimeToStr(Box::new(
20346 crate::expressions::TimeToStr {
20347 this: Box::new(this),
20348 format,
20349 culture: None,
20350 zone: None,
20351 },
20352 )))
20353 }
20354 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
20355 "STR_TO_TIME" if f.args.len() == 2 => {
20356 let mut args = f.args;
20357 let this = args.remove(0);
20358 let fmt_expr = args.remove(0);
20359 let format = if let Expression::Literal(lit) = fmt_expr {
20360 if let Literal::String(s) = lit.as_ref() {
20361 s.clone()
20362 } else {
20363 String::new()
20364 }
20365 } else {
20366 "%Y-%m-%d %H:%M:%S".to_string()
20367 };
20368 Ok(Expression::StrToTime(Box::new(
20369 crate::expressions::StrToTime {
20370 this: Box::new(this),
20371 format,
20372 zone: None,
20373 safe: None,
20374 target_type: None,
20375 },
20376 )))
20377 }
20378 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
20379 "STR_TO_UNIX" if f.args.len() >= 1 => {
20380 let mut args = f.args;
20381 let this = args.remove(0);
20382 let format = if !args.is_empty() {
20383 if let Expression::Literal(lit) = args.remove(0) {
20384 if let Literal::String(s) = lit.as_ref() {
20385 Some(s.clone())
20386 } else {
20387 None
20388 }
20389 } else {
20390 None
20391 }
20392 } else {
20393 None
20394 };
20395 Ok(Expression::StrToUnix(Box::new(
20396 crate::expressions::StrToUnix {
20397 this: Some(Box::new(this)),
20398 format,
20399 },
20400 )))
20401 }
20402 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
20403 "TIME_TO_UNIX" if f.args.len() == 1 => {
20404 let mut args = f.args;
20405 let this = args.remove(0);
20406 Ok(Expression::TimeToUnix(Box::new(
20407 crate::expressions::UnaryFunc {
20408 this,
20409 original_name: None,
20410 inferred_type: None,
20411 },
20412 )))
20413 }
20414 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
20415 "UNIX_TO_STR" if f.args.len() >= 1 => {
20416 let mut args = f.args;
20417 let this = args.remove(0);
20418 let format = if !args.is_empty() {
20419 if let Expression::Literal(lit) = args.remove(0) {
20420 if let Literal::String(s) = lit.as_ref() {
20421 Some(s.clone())
20422 } else {
20423 None
20424 }
20425 } else {
20426 None
20427 }
20428 } else {
20429 None
20430 };
20431 Ok(Expression::UnixToStr(Box::new(
20432 crate::expressions::UnixToStr {
20433 this: Box::new(this),
20434 format,
20435 },
20436 )))
20437 }
20438 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
20439 "UNIX_TO_TIME" if f.args.len() == 1 => {
20440 let mut args = f.args;
20441 let this = args.remove(0);
20442 Ok(Expression::UnixToTime(Box::new(
20443 crate::expressions::UnixToTime {
20444 this: Box::new(this),
20445 scale: None,
20446 zone: None,
20447 hours: None,
20448 minutes: None,
20449 format: None,
20450 target_type: None,
20451 },
20452 )))
20453 }
20454 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
20455 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
20456 let mut args = f.args;
20457 let this = args.remove(0);
20458 Ok(Expression::TimeStrToDate(Box::new(
20459 crate::expressions::UnaryFunc {
20460 this,
20461 original_name: None,
20462 inferred_type: None,
20463 },
20464 )))
20465 }
20466 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
20467 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
20468 let mut args = f.args;
20469 let this = args.remove(0);
20470 Ok(Expression::TimeStrToTime(Box::new(
20471 crate::expressions::TimeStrToTime {
20472 this: Box::new(this),
20473 zone: None,
20474 },
20475 )))
20476 }
20477 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
20478 "MONTHS_BETWEEN" if f.args.len() == 2 => {
20479 match target {
20480 DialectType::DuckDB => {
20481 let mut args = f.args;
20482 let end_date = args.remove(0);
20483 let start_date = args.remove(0);
20484 let cast_end = Self::ensure_cast_date(end_date);
20485 let cast_start = Self::ensure_cast_date(start_date);
20486 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
20487 let dd = Expression::Function(Box::new(Function::new(
20488 "DATE_DIFF".to_string(),
20489 vec![
20490 Expression::string("MONTH"),
20491 cast_start.clone(),
20492 cast_end.clone(),
20493 ],
20494 )));
20495 let day_end =
20496 Expression::Function(Box::new(Function::new(
20497 "DAY".to_string(),
20498 vec![cast_end.clone()],
20499 )));
20500 let day_start =
20501 Expression::Function(Box::new(Function::new(
20502 "DAY".to_string(),
20503 vec![cast_start.clone()],
20504 )));
20505 let last_day_end =
20506 Expression::Function(Box::new(Function::new(
20507 "LAST_DAY".to_string(),
20508 vec![cast_end.clone()],
20509 )));
20510 let last_day_start =
20511 Expression::Function(Box::new(Function::new(
20512 "LAST_DAY".to_string(),
20513 vec![cast_start.clone()],
20514 )));
20515 let day_last_end = Expression::Function(Box::new(
20516 Function::new("DAY".to_string(), vec![last_day_end]),
20517 ));
20518 let day_last_start = Expression::Function(Box::new(
20519 Function::new("DAY".to_string(), vec![last_day_start]),
20520 ));
20521 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20522 day_end.clone(),
20523 day_last_end,
20524 )));
20525 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20526 day_start.clone(),
20527 day_last_start,
20528 )));
20529 let both_cond =
20530 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20531 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
20532 day_end, day_start,
20533 )));
20534 let day_diff_paren = Expression::Paren(Box::new(
20535 crate::expressions::Paren {
20536 this: day_diff,
20537 trailing_comments: Vec::new(),
20538 },
20539 ));
20540 let frac = Expression::Div(Box::new(BinaryOp::new(
20541 day_diff_paren,
20542 Expression::Literal(Box::new(Literal::Number(
20543 "31.0".to_string(),
20544 ))),
20545 )));
20546 let case_expr = Expression::Case(Box::new(Case {
20547 operand: None,
20548 whens: vec![(both_cond, Expression::number(0))],
20549 else_: Some(frac),
20550 comments: Vec::new(),
20551 inferred_type: None,
20552 }));
20553 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20554 }
20555 DialectType::Snowflake | DialectType::Redshift => {
20556 let mut args = f.args;
20557 let end_date = args.remove(0);
20558 let start_date = args.remove(0);
20559 let unit = Expression::Identifier(Identifier::new("MONTH"));
20560 Ok(Expression::Function(Box::new(Function::new(
20561 "DATEDIFF".to_string(),
20562 vec![unit, start_date, end_date],
20563 ))))
20564 }
20565 DialectType::Presto
20566 | DialectType::Trino
20567 | DialectType::Athena => {
20568 let mut args = f.args;
20569 let end_date = args.remove(0);
20570 let start_date = args.remove(0);
20571 Ok(Expression::Function(Box::new(Function::new(
20572 "DATE_DIFF".to_string(),
20573 vec![Expression::string("MONTH"), start_date, end_date],
20574 ))))
20575 }
20576 _ => Ok(Expression::Function(f)),
20577 }
20578 }
20579 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
20580 // Drop the roundOff arg for non-Spark targets, keep it for Spark
20581 "MONTHS_BETWEEN" if f.args.len() == 3 => {
20582 match target {
20583 DialectType::Spark | DialectType::Databricks => {
20584 Ok(Expression::Function(f))
20585 }
20586 _ => {
20587 // Drop the 3rd arg and delegate to the 2-arg logic
20588 let mut args = f.args;
20589 let end_date = args.remove(0);
20590 let start_date = args.remove(0);
20591 // Re-create as 2-arg and process
20592 let f2 = Function::new(
20593 "MONTHS_BETWEEN".to_string(),
20594 vec![end_date, start_date],
20595 );
20596 let e2 = Expression::Function(Box::new(f2));
20597 Self::cross_dialect_normalize(e2, source, target)
20598 }
20599 }
20600 }
20601 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
20602 "TO_TIMESTAMP"
20603 if f.args.len() == 1
20604 && matches!(
20605 source,
20606 DialectType::Spark
20607 | DialectType::Databricks
20608 | DialectType::Hive
20609 ) =>
20610 {
20611 let arg = f.args.into_iter().next().unwrap();
20612 Ok(Expression::Cast(Box::new(Cast {
20613 this: arg,
20614 to: DataType::Timestamp {
20615 timezone: false,
20616 precision: None,
20617 },
20618 trailing_comments: vec![],
20619 double_colon_syntax: false,
20620 format: None,
20621 default: None,
20622 inferred_type: None,
20623 })))
20624 }
20625 // STRING(x) -> CAST(x AS STRING) for Spark target
20626 "STRING"
20627 if f.args.len() == 1
20628 && matches!(
20629 source,
20630 DialectType::Spark | DialectType::Databricks
20631 ) =>
20632 {
20633 let arg = f.args.into_iter().next().unwrap();
20634 let dt = match target {
20635 DialectType::Spark
20636 | DialectType::Databricks
20637 | DialectType::Hive => DataType::Custom {
20638 name: "STRING".to_string(),
20639 },
20640 _ => DataType::Text,
20641 };
20642 Ok(Expression::Cast(Box::new(Cast {
20643 this: arg,
20644 to: dt,
20645 trailing_comments: vec![],
20646 double_colon_syntax: false,
20647 format: None,
20648 default: None,
20649 inferred_type: None,
20650 })))
20651 }
20652 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
20653 "LOGICAL_OR" if f.args.len() == 1 => {
20654 let name = match target {
20655 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
20656 _ => "LOGICAL_OR",
20657 };
20658 Ok(Expression::Function(Box::new(Function::new(
20659 name.to_string(),
20660 f.args,
20661 ))))
20662 }
20663 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
20664 "SPLIT"
20665 if f.args.len() == 2
20666 && matches!(
20667 source,
20668 DialectType::Spark
20669 | DialectType::Databricks
20670 | DialectType::Hive
20671 ) =>
20672 {
20673 let name = match target {
20674 DialectType::DuckDB => "STR_SPLIT_REGEX",
20675 DialectType::Presto
20676 | DialectType::Trino
20677 | DialectType::Athena => "REGEXP_SPLIT",
20678 DialectType::Spark
20679 | DialectType::Databricks
20680 | DialectType::Hive => "SPLIT",
20681 _ => "SPLIT",
20682 };
20683 Ok(Expression::Function(Box::new(Function::new(
20684 name.to_string(),
20685 f.args,
20686 ))))
20687 }
20688 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
20689 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
20690 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20691 Ok(Expression::Function(Box::new(Function::new(
20692 "ELEMENT_AT".to_string(),
20693 f.args,
20694 ))))
20695 }
20696 DialectType::DuckDB => {
20697 let mut args = f.args;
20698 let arr = args.remove(0);
20699 let idx = args.remove(0);
20700 Ok(Expression::Subscript(Box::new(
20701 crate::expressions::Subscript {
20702 this: arr,
20703 index: idx,
20704 },
20705 )))
20706 }
20707 _ => Ok(Expression::Function(f)),
20708 },
20709 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
20710 "ARRAY_FILTER" if f.args.len() == 2 => {
20711 let name = match target {
20712 DialectType::DuckDB => "LIST_FILTER",
20713 DialectType::StarRocks => "ARRAY_FILTER",
20714 _ => "FILTER",
20715 };
20716 Ok(Expression::Function(Box::new(Function::new(
20717 name.to_string(),
20718 f.args,
20719 ))))
20720 }
20721 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
20722 "FILTER" if f.args.len() == 2 => {
20723 let name = match target {
20724 DialectType::DuckDB => "LIST_FILTER",
20725 DialectType::StarRocks => "ARRAY_FILTER",
20726 _ => "FILTER",
20727 };
20728 Ok(Expression::Function(Box::new(Function::new(
20729 name.to_string(),
20730 f.args,
20731 ))))
20732 }
20733 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
20734 "REDUCE" if f.args.len() >= 3 => {
20735 let name = match target {
20736 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
20737 _ => "REDUCE",
20738 };
20739 Ok(Expression::Function(Box::new(Function::new(
20740 name.to_string(),
20741 f.args,
20742 ))))
20743 }
20744 // CURRENT_SCHEMA() -> dialect-specific
20745 "CURRENT_SCHEMA" => {
20746 match target {
20747 DialectType::PostgreSQL => {
20748 // PostgreSQL: CURRENT_SCHEMA (no parens)
20749 Ok(Expression::Function(Box::new(Function {
20750 name: "CURRENT_SCHEMA".to_string(),
20751 args: vec![],
20752 distinct: false,
20753 trailing_comments: vec![],
20754 use_bracket_syntax: false,
20755 no_parens: true,
20756 quoted: false,
20757 span: None,
20758 inferred_type: None,
20759 })))
20760 }
20761 DialectType::MySQL
20762 | DialectType::Doris
20763 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
20764 Function::new("SCHEMA".to_string(), vec![]),
20765 ))),
20766 DialectType::TSQL => Ok(Expression::Function(Box::new(
20767 Function::new("SCHEMA_NAME".to_string(), vec![]),
20768 ))),
20769 DialectType::SQLite => Ok(Expression::Literal(Box::new(
20770 Literal::String("main".to_string()),
20771 ))),
20772 _ => Ok(Expression::Function(f)),
20773 }
20774 }
20775 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20776 "LTRIM" if f.args.len() == 2 => match target {
20777 DialectType::Spark
20778 | DialectType::Hive
20779 | DialectType::Databricks
20780 | DialectType::ClickHouse => {
20781 let mut args = f.args;
20782 let str_expr = args.remove(0);
20783 let chars = args.remove(0);
20784 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20785 this: str_expr,
20786 characters: Some(chars),
20787 position: crate::expressions::TrimPosition::Leading,
20788 sql_standard_syntax: true,
20789 position_explicit: true,
20790 })))
20791 }
20792 _ => Ok(Expression::Function(f)),
20793 },
20794 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20795 "RTRIM" if f.args.len() == 2 => match target {
20796 DialectType::Spark
20797 | DialectType::Hive
20798 | DialectType::Databricks
20799 | DialectType::ClickHouse => {
20800 let mut args = f.args;
20801 let str_expr = args.remove(0);
20802 let chars = args.remove(0);
20803 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20804 this: str_expr,
20805 characters: Some(chars),
20806 position: crate::expressions::TrimPosition::Trailing,
20807 sql_standard_syntax: true,
20808 position_explicit: true,
20809 })))
20810 }
20811 _ => Ok(Expression::Function(f)),
20812 },
20813 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20814 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
20815 DialectType::ClickHouse => {
20816 let mut new_f = *f;
20817 new_f.name = "arrayReverse".to_string();
20818 Ok(Expression::Function(Box::new(new_f)))
20819 }
20820 _ => Ok(Expression::Function(f)),
20821 },
20822 // UUID() -> NEWID() for TSQL
20823 "UUID" if f.args.is_empty() => match target {
20824 DialectType::TSQL | DialectType::Fabric => {
20825 Ok(Expression::Function(Box::new(Function::new(
20826 "NEWID".to_string(),
20827 vec![],
20828 ))))
20829 }
20830 _ => Ok(Expression::Function(f)),
20831 },
20832 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
20833 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
20834 DialectType::ClickHouse => {
20835 let mut new_f = *f;
20836 new_f.name = "farmFingerprint64".to_string();
20837 Ok(Expression::Function(Box::new(new_f)))
20838 }
20839 DialectType::Redshift => {
20840 let mut new_f = *f;
20841 new_f.name = "FARMFINGERPRINT64".to_string();
20842 Ok(Expression::Function(Box::new(new_f)))
20843 }
20844 _ => Ok(Expression::Function(f)),
20845 },
20846 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
20847 "JSON_KEYS" => match target {
20848 DialectType::Databricks | DialectType::Spark => {
20849 let mut new_f = *f;
20850 new_f.name = "JSON_OBJECT_KEYS".to_string();
20851 Ok(Expression::Function(Box::new(new_f)))
20852 }
20853 DialectType::Snowflake => {
20854 let mut new_f = *f;
20855 new_f.name = "OBJECT_KEYS".to_string();
20856 Ok(Expression::Function(Box::new(new_f)))
20857 }
20858 _ => Ok(Expression::Function(f)),
20859 },
20860 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
20861 "WEEKOFYEAR" => match target {
20862 DialectType::Snowflake => {
20863 let mut new_f = *f;
20864 new_f.name = "WEEKISO".to_string();
20865 Ok(Expression::Function(Box::new(new_f)))
20866 }
20867 _ => Ok(Expression::Function(f)),
20868 },
20869 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
20870 "FORMAT"
20871 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
20872 {
20873 match target {
20874 DialectType::Databricks | DialectType::Spark => {
20875 let mut new_f = *f;
20876 new_f.name = "FORMAT_STRING".to_string();
20877 Ok(Expression::Function(Box::new(new_f)))
20878 }
20879 _ => Ok(Expression::Function(f)),
20880 }
20881 }
20882 // CONCAT_WS from Generic is null-propagating in SQLGlot fixtures.
20883 // Trino also requires non-separator arguments cast to VARCHAR.
20884 "CONCAT_WS" if f.args.len() >= 2 => {
20885 fn concat_ws_null_case(
20886 args: Vec<Expression>,
20887 else_expr: Expression,
20888 ) -> Expression {
20889 let mut null_checks = args.iter().cloned().map(|arg| {
20890 Expression::IsNull(Box::new(crate::expressions::IsNull {
20891 this: arg,
20892 not: false,
20893 postfix_form: false,
20894 }))
20895 });
20896 let first_null_check = null_checks
20897 .next()
20898 .expect("CONCAT_WS with >= 2 args must yield a null check");
20899 let null_check =
20900 null_checks.fold(first_null_check, |left, right| {
20901 Expression::Or(Box::new(BinaryOp {
20902 left,
20903 right,
20904 left_comments: Vec::new(),
20905 operator_comments: Vec::new(),
20906 trailing_comments: Vec::new(),
20907 inferred_type: None,
20908 }))
20909 });
20910 Expression::Case(Box::new(Case {
20911 operand: None,
20912 whens: vec![(null_check, Expression::Null(Null))],
20913 else_: Some(else_expr),
20914 comments: vec![],
20915 inferred_type: None,
20916 }))
20917 }
20918
20919 match target {
20920 DialectType::Trino
20921 if matches!(source, DialectType::Generic) =>
20922 {
20923 let original_args = f.args.clone();
20924 let mut args = f.args;
20925 let sep = args.remove(0);
20926 let cast_args: Vec<Expression> = args
20927 .into_iter()
20928 .map(|a| {
20929 Expression::Cast(Box::new(Cast {
20930 this: a,
20931 to: DataType::VarChar {
20932 length: None,
20933 parenthesized_length: false,
20934 },
20935 double_colon_syntax: false,
20936 trailing_comments: Vec::new(),
20937 format: None,
20938 default: None,
20939 inferred_type: None,
20940 }))
20941 })
20942 .collect();
20943 let mut new_args = vec![sep];
20944 new_args.extend(cast_args);
20945 let else_expr = Expression::Function(Box::new(
20946 Function::new("CONCAT_WS".to_string(), new_args),
20947 ));
20948 Ok(concat_ws_null_case(original_args, else_expr))
20949 }
20950 DialectType::Presto
20951 | DialectType::Trino
20952 | DialectType::Athena => {
20953 let mut args = f.args;
20954 let sep = args.remove(0);
20955 let cast_args: Vec<Expression> = args
20956 .into_iter()
20957 .map(|a| {
20958 Expression::Cast(Box::new(Cast {
20959 this: a,
20960 to: DataType::VarChar {
20961 length: None,
20962 parenthesized_length: false,
20963 },
20964 double_colon_syntax: false,
20965 trailing_comments: Vec::new(),
20966 format: None,
20967 default: None,
20968 inferred_type: None,
20969 }))
20970 })
20971 .collect();
20972 let mut new_args = vec![sep];
20973 new_args.extend(cast_args);
20974 Ok(Expression::Function(Box::new(Function::new(
20975 "CONCAT_WS".to_string(),
20976 new_args,
20977 ))))
20978 }
20979 DialectType::Spark
20980 | DialectType::Hive
20981 | DialectType::DuckDB
20982 if matches!(source, DialectType::Generic) =>
20983 {
20984 let args = f.args;
20985 let else_expr = Expression::Function(Box::new(
20986 Function::new("CONCAT_WS".to_string(), args.clone()),
20987 ));
20988 Ok(concat_ws_null_case(args, else_expr))
20989 }
20990 _ => Ok(Expression::Function(f)),
20991 }
20992 }
20993 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
20994 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
20995 DialectType::DuckDB
20996 if f.args.len() == 3
20997 && matches!(source, DialectType::Snowflake) =>
20998 {
20999 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
21000 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
21001 let mut args = f.args;
21002 let arr = args.remove(0);
21003 let start = args.remove(0);
21004 let end = args.remove(0);
21005
21006 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
21007 let adjusted_start = Expression::Case(Box::new(Case {
21008 operand: None,
21009 whens: vec![(
21010 Expression::Gte(Box::new(BinaryOp {
21011 left: start.clone(),
21012 right: Expression::number(0),
21013 left_comments: vec![],
21014 operator_comments: vec![],
21015 trailing_comments: vec![],
21016 inferred_type: None,
21017 })),
21018 Expression::Add(Box::new(BinaryOp {
21019 left: start.clone(),
21020 right: Expression::number(1),
21021 left_comments: vec![],
21022 operator_comments: vec![],
21023 trailing_comments: vec![],
21024 inferred_type: None,
21025 })),
21026 )],
21027 else_: Some(start),
21028 comments: vec![],
21029 inferred_type: None,
21030 }));
21031
21032 // CASE WHEN end < 0 THEN end - 1 ELSE end END
21033 let adjusted_end = Expression::Case(Box::new(Case {
21034 operand: None,
21035 whens: vec![(
21036 Expression::Lt(Box::new(BinaryOp {
21037 left: end.clone(),
21038 right: Expression::number(0),
21039 left_comments: vec![],
21040 operator_comments: vec![],
21041 trailing_comments: vec![],
21042 inferred_type: None,
21043 })),
21044 Expression::Sub(Box::new(BinaryOp {
21045 left: end.clone(),
21046 right: Expression::number(1),
21047 left_comments: vec![],
21048 operator_comments: vec![],
21049 trailing_comments: vec![],
21050 inferred_type: None,
21051 })),
21052 )],
21053 else_: Some(end),
21054 comments: vec![],
21055 inferred_type: None,
21056 }));
21057
21058 Ok(Expression::Function(Box::new(Function::new(
21059 "ARRAY_SLICE".to_string(),
21060 vec![arr, adjusted_start, adjusted_end],
21061 ))))
21062 }
21063 DialectType::Presto
21064 | DialectType::Trino
21065 | DialectType::Athena
21066 | DialectType::Databricks
21067 | DialectType::Spark => {
21068 let mut new_f = *f;
21069 new_f.name = "SLICE".to_string();
21070 Ok(Expression::Function(Box::new(new_f)))
21071 }
21072 DialectType::ClickHouse => {
21073 let mut new_f = *f;
21074 new_f.name = "arraySlice".to_string();
21075 Ok(Expression::Function(Box::new(new_f)))
21076 }
21077 _ => Ok(Expression::Function(f)),
21078 },
21079 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
21080 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
21081 DialectType::DuckDB => {
21082 let mut args = f.args;
21083 let arr = args.remove(0);
21084 let val = args.remove(0);
21085 Ok(Expression::Function(Box::new(Function::new(
21086 "LIST_PREPEND".to_string(),
21087 vec![val, arr],
21088 ))))
21089 }
21090 _ => Ok(Expression::Function(f)),
21091 },
21092 // ARRAY_REMOVE(arr, target) -> dialect-specific
21093 "ARRAY_REMOVE" if f.args.len() == 2 => {
21094 match target {
21095 DialectType::DuckDB => {
21096 let mut args = f.args;
21097 let arr = args.remove(0);
21098 let target_val = args.remove(0);
21099 let u_id = crate::expressions::Identifier::new("_u");
21100 // LIST_FILTER(arr, _u -> _u <> target)
21101 let lambda = Expression::Lambda(Box::new(
21102 crate::expressions::LambdaExpr {
21103 parameters: vec![u_id.clone()],
21104 body: Expression::Neq(Box::new(BinaryOp {
21105 left: Expression::Identifier(u_id),
21106 right: target_val,
21107 left_comments: Vec::new(),
21108 operator_comments: Vec::new(),
21109 trailing_comments: Vec::new(),
21110 inferred_type: None,
21111 })),
21112 colon: false,
21113 parameter_types: Vec::new(),
21114 },
21115 ));
21116 Ok(Expression::Function(Box::new(Function::new(
21117 "LIST_FILTER".to_string(),
21118 vec![arr, lambda],
21119 ))))
21120 }
21121 DialectType::ClickHouse => {
21122 let mut args = f.args;
21123 let arr = args.remove(0);
21124 let target_val = args.remove(0);
21125 let u_id = crate::expressions::Identifier::new("_u");
21126 // arrayFilter(_u -> _u <> target, arr)
21127 let lambda = Expression::Lambda(Box::new(
21128 crate::expressions::LambdaExpr {
21129 parameters: vec![u_id.clone()],
21130 body: Expression::Neq(Box::new(BinaryOp {
21131 left: Expression::Identifier(u_id),
21132 right: target_val,
21133 left_comments: Vec::new(),
21134 operator_comments: Vec::new(),
21135 trailing_comments: Vec::new(),
21136 inferred_type: None,
21137 })),
21138 colon: false,
21139 parameter_types: Vec::new(),
21140 },
21141 ));
21142 Ok(Expression::Function(Box::new(Function::new(
21143 "arrayFilter".to_string(),
21144 vec![lambda, arr],
21145 ))))
21146 }
21147 DialectType::BigQuery => {
21148 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
21149 let mut args = f.args;
21150 let arr = args.remove(0);
21151 let target_val = args.remove(0);
21152 let u_id = crate::expressions::Identifier::new("_u");
21153 let u_col = Expression::Column(Box::new(
21154 crate::expressions::Column {
21155 name: u_id.clone(),
21156 table: None,
21157 join_mark: false,
21158 trailing_comments: Vec::new(),
21159 span: None,
21160 inferred_type: None,
21161 },
21162 ));
21163 // UNNEST(the_array) AS _u
21164 let unnest_expr = Expression::Unnest(Box::new(
21165 crate::expressions::UnnestFunc {
21166 this: arr,
21167 expressions: Vec::new(),
21168 with_ordinality: false,
21169 alias: None,
21170 offset_alias: None,
21171 },
21172 ));
21173 let aliased_unnest = Expression::Alias(Box::new(
21174 crate::expressions::Alias {
21175 this: unnest_expr,
21176 alias: u_id.clone(),
21177 column_aliases: Vec::new(),
21178 alias_explicit_as: false,
21179 alias_keyword: None,
21180 pre_alias_comments: Vec::new(),
21181 trailing_comments: Vec::new(),
21182 inferred_type: None,
21183 },
21184 ));
21185 // _u <> target
21186 let where_cond = Expression::Neq(Box::new(BinaryOp {
21187 left: u_col.clone(),
21188 right: target_val,
21189 left_comments: Vec::new(),
21190 operator_comments: Vec::new(),
21191 trailing_comments: Vec::new(),
21192 inferred_type: None,
21193 }));
21194 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
21195 let subquery = Expression::Select(Box::new(
21196 crate::expressions::Select::new()
21197 .column(u_col)
21198 .from(aliased_unnest)
21199 .where_(where_cond),
21200 ));
21201 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
21202 Ok(Expression::ArrayFunc(Box::new(
21203 crate::expressions::ArrayConstructor {
21204 expressions: vec![subquery],
21205 bracket_notation: false,
21206 use_list_keyword: false,
21207 },
21208 )))
21209 }
21210 _ => Ok(Expression::Function(f)),
21211 }
21212 }
21213 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
21214 "PARSE_JSON" if f.args.len() == 1 => {
21215 match target {
21216 DialectType::SQLite
21217 | DialectType::Doris
21218 | DialectType::MySQL
21219 | DialectType::StarRocks => {
21220 // Strip PARSE_JSON, return the inner argument
21221 Ok(f.args.into_iter().next().unwrap())
21222 }
21223 _ => Ok(Expression::Function(f)),
21224 }
21225 }
21226 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
21227 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
21228 "JSON_REMOVE" => Ok(Expression::Function(f)),
21229 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
21230 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
21231 "JSON_SET" => Ok(Expression::Function(f)),
21232 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
21233 // Behavior per search value type:
21234 // NULL literal -> CASE WHEN x IS NULL THEN result
21235 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
21236 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
21237 "DECODE" if f.args.len() >= 3 => {
21238 // Keep as DECODE for targets that support it natively
21239 let keep_as_decode = matches!(
21240 target,
21241 DialectType::Oracle
21242 | DialectType::Snowflake
21243 | DialectType::Redshift
21244 | DialectType::Teradata
21245 | DialectType::Spark
21246 | DialectType::Databricks
21247 );
21248 if keep_as_decode {
21249 return Ok(Expression::Function(f));
21250 }
21251
21252 let mut args = f.args;
21253 let this_expr = args.remove(0);
21254 let mut pairs = Vec::new();
21255 let mut default = None;
21256 let mut i = 0;
21257 while i + 1 < args.len() {
21258 pairs.push((args[i].clone(), args[i + 1].clone()));
21259 i += 2;
21260 }
21261 if i < args.len() {
21262 default = Some(args[i].clone());
21263 }
21264 // Helper: check if expression is a literal value
21265 fn is_literal(e: &Expression) -> bool {
21266 matches!(
21267 e,
21268 Expression::Literal(_)
21269 | Expression::Boolean(_)
21270 | Expression::Neg(_)
21271 )
21272 }
21273 let whens: Vec<(Expression, Expression)> = pairs
21274 .into_iter()
21275 .map(|(search, result)| {
21276 if matches!(&search, Expression::Null(_)) {
21277 // NULL search -> IS NULL
21278 let condition = Expression::Is(Box::new(BinaryOp {
21279 left: this_expr.clone(),
21280 right: Expression::Null(crate::expressions::Null),
21281 left_comments: Vec::new(),
21282 operator_comments: Vec::new(),
21283 trailing_comments: Vec::new(),
21284 inferred_type: None,
21285 }));
21286 (condition, result)
21287 } else if is_literal(&search) {
21288 // Literal search -> simple equality
21289 let eq = Expression::Eq(Box::new(BinaryOp {
21290 left: this_expr.clone(),
21291 right: search,
21292 left_comments: Vec::new(),
21293 operator_comments: Vec::new(),
21294 trailing_comments: Vec::new(),
21295 inferred_type: None,
21296 }));
21297 (eq, result)
21298 } else {
21299 // Non-literal (column ref, expression) -> null-safe comparison
21300 let needs_paren = matches!(
21301 &search,
21302 Expression::Eq(_)
21303 | Expression::Neq(_)
21304 | Expression::Gt(_)
21305 | Expression::Gte(_)
21306 | Expression::Lt(_)
21307 | Expression::Lte(_)
21308 );
21309 let search_for_eq = if needs_paren {
21310 Expression::Paren(Box::new(
21311 crate::expressions::Paren {
21312 this: search.clone(),
21313 trailing_comments: Vec::new(),
21314 },
21315 ))
21316 } else {
21317 search.clone()
21318 };
21319 let eq = Expression::Eq(Box::new(BinaryOp {
21320 left: this_expr.clone(),
21321 right: search_for_eq,
21322 left_comments: Vec::new(),
21323 operator_comments: Vec::new(),
21324 trailing_comments: Vec::new(),
21325 inferred_type: None,
21326 }));
21327 let search_for_null = if needs_paren {
21328 Expression::Paren(Box::new(
21329 crate::expressions::Paren {
21330 this: search.clone(),
21331 trailing_comments: Vec::new(),
21332 },
21333 ))
21334 } else {
21335 search.clone()
21336 };
21337 let x_is_null = Expression::Is(Box::new(BinaryOp {
21338 left: this_expr.clone(),
21339 right: Expression::Null(crate::expressions::Null),
21340 left_comments: Vec::new(),
21341 operator_comments: Vec::new(),
21342 trailing_comments: Vec::new(),
21343 inferred_type: None,
21344 }));
21345 let s_is_null = Expression::Is(Box::new(BinaryOp {
21346 left: search_for_null,
21347 right: Expression::Null(crate::expressions::Null),
21348 left_comments: Vec::new(),
21349 operator_comments: Vec::new(),
21350 trailing_comments: Vec::new(),
21351 inferred_type: None,
21352 }));
21353 let both_null = Expression::And(Box::new(BinaryOp {
21354 left: x_is_null,
21355 right: s_is_null,
21356 left_comments: Vec::new(),
21357 operator_comments: Vec::new(),
21358 trailing_comments: Vec::new(),
21359 inferred_type: None,
21360 }));
21361 let condition = Expression::Or(Box::new(BinaryOp {
21362 left: eq,
21363 right: Expression::Paren(Box::new(
21364 crate::expressions::Paren {
21365 this: both_null,
21366 trailing_comments: Vec::new(),
21367 },
21368 )),
21369 left_comments: Vec::new(),
21370 operator_comments: Vec::new(),
21371 trailing_comments: Vec::new(),
21372 inferred_type: None,
21373 }));
21374 (condition, result)
21375 }
21376 })
21377 .collect();
21378 Ok(Expression::Case(Box::new(Case {
21379 operand: None,
21380 whens,
21381 else_: default,
21382 comments: Vec::new(),
21383 inferred_type: None,
21384 })))
21385 }
21386 // LEVENSHTEIN(a, b, ...) -> dialect-specific
21387 "LEVENSHTEIN" => {
21388 match target {
21389 DialectType::BigQuery => {
21390 let mut new_f = *f;
21391 new_f.name = "EDIT_DISTANCE".to_string();
21392 Ok(Expression::Function(Box::new(new_f)))
21393 }
21394 DialectType::Drill => {
21395 let mut new_f = *f;
21396 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
21397 Ok(Expression::Function(Box::new(new_f)))
21398 }
21399 DialectType::PostgreSQL if f.args.len() == 6 => {
21400 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
21401 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
21402 let mut new_f = *f;
21403 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
21404 Ok(Expression::Function(Box::new(new_f)))
21405 }
21406 _ => Ok(Expression::Function(f)),
21407 }
21408 }
21409 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
21410 "ARRAY_MAX" => {
21411 let name = match target {
21412 DialectType::ClickHouse => "arrayMax",
21413 DialectType::DuckDB => "LIST_MAX",
21414 _ => "ARRAY_MAX",
21415 };
21416 let mut new_f = *f;
21417 new_f.name = name.to_string();
21418 Ok(Expression::Function(Box::new(new_f)))
21419 }
21420 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
21421 "ARRAY_MIN" => {
21422 let name = match target {
21423 DialectType::ClickHouse => "arrayMin",
21424 DialectType::DuckDB => "LIST_MIN",
21425 _ => "ARRAY_MIN",
21426 };
21427 let mut new_f = *f;
21428 new_f.name = name.to_string();
21429 Ok(Expression::Function(Box::new(new_f)))
21430 }
21431 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
21432 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
21433 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
21434 let mut args = f.args;
21435 let b = args.pop().unwrap();
21436 let a = args.pop().unwrap();
21437 match target {
21438 DialectType::ClickHouse => {
21439 let upper_a = Expression::Upper(Box::new(
21440 crate::expressions::UnaryFunc::new(a),
21441 ));
21442 let upper_b = Expression::Upper(Box::new(
21443 crate::expressions::UnaryFunc::new(b),
21444 ));
21445 Ok(Expression::Function(Box::new(Function::new(
21446 "jaroWinklerSimilarity".to_string(),
21447 vec![upper_a, upper_b],
21448 ))))
21449 }
21450 DialectType::DuckDB => {
21451 let upper_a = Expression::Upper(Box::new(
21452 crate::expressions::UnaryFunc::new(a),
21453 ));
21454 let upper_b = Expression::Upper(Box::new(
21455 crate::expressions::UnaryFunc::new(b),
21456 ));
21457 let score = Expression::Function(Box::new(Function::new(
21458 "JARO_WINKLER_SIMILARITY".to_string(),
21459 vec![upper_a, upper_b],
21460 )));
21461 let scaled = Expression::Mul(Box::new(BinaryOp {
21462 left: score,
21463 right: Expression::number(100),
21464 left_comments: Vec::new(),
21465 operator_comments: Vec::new(),
21466 trailing_comments: Vec::new(),
21467 inferred_type: None,
21468 }));
21469 Ok(Expression::Cast(Box::new(Cast {
21470 this: scaled,
21471 to: DataType::Int {
21472 length: None,
21473 integer_spelling: false,
21474 },
21475 trailing_comments: Vec::new(),
21476 double_colon_syntax: false,
21477 format: None,
21478 default: None,
21479 inferred_type: None,
21480 })))
21481 }
21482 _ => Ok(Expression::Function(Box::new(Function::new(
21483 "JAROWINKLER_SIMILARITY".to_string(),
21484 vec![a, b],
21485 )))),
21486 }
21487 }
21488 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
21489 "CURRENT_SCHEMAS" => match target {
21490 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21491 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
21492 ))),
21493 _ => Ok(Expression::Function(f)),
21494 },
21495 // TRUNC/TRUNCATE (numeric) -> dialect-specific
21496 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
21497 match target {
21498 DialectType::TSQL | DialectType::Fabric => {
21499 // ROUND(x, decimals, 1) - the 1 flag means truncation
21500 let mut args = f.args;
21501 let this = if args.is_empty() {
21502 return Ok(Expression::Function(Box::new(
21503 Function::new("TRUNC".to_string(), args),
21504 )));
21505 } else {
21506 args.remove(0)
21507 };
21508 let decimals = if args.is_empty() {
21509 Expression::Literal(Box::new(Literal::Number(
21510 "0".to_string(),
21511 )))
21512 } else {
21513 args.remove(0)
21514 };
21515 Ok(Expression::Function(Box::new(Function::new(
21516 "ROUND".to_string(),
21517 vec![
21518 this,
21519 decimals,
21520 Expression::Literal(Box::new(Literal::Number(
21521 "1".to_string(),
21522 ))),
21523 ],
21524 ))))
21525 }
21526 DialectType::Presto
21527 | DialectType::Trino
21528 | DialectType::Athena => {
21529 // TRUNCATE(x, decimals)
21530 let mut new_f = *f;
21531 new_f.name = "TRUNCATE".to_string();
21532 Ok(Expression::Function(Box::new(new_f)))
21533 }
21534 DialectType::MySQL
21535 | DialectType::SingleStore
21536 | DialectType::TiDB => {
21537 // TRUNCATE(x, decimals)
21538 let mut new_f = *f;
21539 new_f.name = "TRUNCATE".to_string();
21540 Ok(Expression::Function(Box::new(new_f)))
21541 }
21542 DialectType::DuckDB => {
21543 // DuckDB supports TRUNC(x, decimals) — preserve both args
21544 let mut args = f.args;
21545 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
21546 if args.len() == 2
21547 && matches!(source, DialectType::Snowflake)
21548 {
21549 let decimals = args.remove(1);
21550 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
21551 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
21552 let wrapped = if !is_int {
21553 Expression::Cast(Box::new(
21554 crate::expressions::Cast {
21555 this: decimals,
21556 to: DataType::Int {
21557 length: None,
21558 integer_spelling: false,
21559 },
21560 double_colon_syntax: false,
21561 trailing_comments: Vec::new(),
21562 format: None,
21563 default: None,
21564 inferred_type: None,
21565 },
21566 ))
21567 } else {
21568 decimals
21569 };
21570 args.push(wrapped);
21571 }
21572 Ok(Expression::Function(Box::new(Function::new(
21573 "TRUNC".to_string(),
21574 args,
21575 ))))
21576 }
21577 DialectType::ClickHouse => {
21578 // trunc(x, decimals) - lowercase
21579 let mut new_f = *f;
21580 new_f.name = "trunc".to_string();
21581 Ok(Expression::Function(Box::new(new_f)))
21582 }
21583 DialectType::Spark | DialectType::Databricks => {
21584 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
21585 let this = f.args.into_iter().next().unwrap_or(
21586 Expression::Literal(Box::new(Literal::Number(
21587 "0".to_string(),
21588 ))),
21589 );
21590 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
21591 this,
21592 to: crate::expressions::DataType::BigInt {
21593 length: None,
21594 },
21595 double_colon_syntax: false,
21596 trailing_comments: Vec::new(),
21597 format: None,
21598 default: None,
21599 inferred_type: None,
21600 })))
21601 }
21602 _ => {
21603 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
21604 let mut new_f = *f;
21605 new_f.name = "TRUNC".to_string();
21606 Ok(Expression::Function(Box::new(new_f)))
21607 }
21608 }
21609 }
21610 // CURRENT_VERSION() -> VERSION() for most dialects
21611 "CURRENT_VERSION" => match target {
21612 DialectType::Snowflake
21613 | DialectType::Databricks
21614 | DialectType::StarRocks => Ok(Expression::Function(f)),
21615 DialectType::SQLite => {
21616 let mut new_f = *f;
21617 new_f.name = "SQLITE_VERSION".to_string();
21618 Ok(Expression::Function(Box::new(new_f)))
21619 }
21620 _ => {
21621 let mut new_f = *f;
21622 new_f.name = "VERSION".to_string();
21623 Ok(Expression::Function(Box::new(new_f)))
21624 }
21625 },
21626 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21627 "ARRAY_REVERSE" => match target {
21628 DialectType::ClickHouse => {
21629 let mut new_f = *f;
21630 new_f.name = "arrayReverse".to_string();
21631 Ok(Expression::Function(Box::new(new_f)))
21632 }
21633 _ => Ok(Expression::Function(f)),
21634 },
21635 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
21636 "GENERATE_DATE_ARRAY" => {
21637 let mut args = f.args;
21638 if matches!(target, DialectType::BigQuery) {
21639 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
21640 if args.len() == 2 {
21641 let default_interval = Expression::Interval(Box::new(
21642 crate::expressions::Interval {
21643 this: Some(Expression::Literal(Box::new(
21644 Literal::String("1".to_string()),
21645 ))),
21646 unit: Some(
21647 crate::expressions::IntervalUnitSpec::Simple {
21648 unit: crate::expressions::IntervalUnit::Day,
21649 use_plural: false,
21650 },
21651 ),
21652 },
21653 ));
21654 args.push(default_interval);
21655 }
21656 Ok(Expression::Function(Box::new(Function::new(
21657 "GENERATE_DATE_ARRAY".to_string(),
21658 args,
21659 ))))
21660 } else if matches!(target, DialectType::DuckDB) {
21661 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
21662 let start = args.get(0).cloned();
21663 let end = args.get(1).cloned();
21664 let step = args.get(2).cloned().or_else(|| {
21665 Some(Expression::Interval(Box::new(
21666 crate::expressions::Interval {
21667 this: Some(Expression::Literal(Box::new(
21668 Literal::String("1".to_string()),
21669 ))),
21670 unit: Some(
21671 crate::expressions::IntervalUnitSpec::Simple {
21672 unit: crate::expressions::IntervalUnit::Day,
21673 use_plural: false,
21674 },
21675 ),
21676 },
21677 )))
21678 });
21679 let gen_series = Expression::GenerateSeries(Box::new(
21680 crate::expressions::GenerateSeries {
21681 start: start.map(Box::new),
21682 end: end.map(Box::new),
21683 step: step.map(Box::new),
21684 is_end_exclusive: None,
21685 },
21686 ));
21687 Ok(Expression::Cast(Box::new(Cast {
21688 this: gen_series,
21689 to: DataType::Array {
21690 element_type: Box::new(DataType::Date),
21691 dimension: None,
21692 },
21693 trailing_comments: vec![],
21694 double_colon_syntax: false,
21695 format: None,
21696 default: None,
21697 inferred_type: None,
21698 })))
21699 } else if matches!(
21700 target,
21701 DialectType::Presto | DialectType::Trino | DialectType::Athena
21702 ) {
21703 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
21704 let start = args.get(0).cloned();
21705 let end = args.get(1).cloned();
21706 let step = args.get(2).cloned().or_else(|| {
21707 Some(Expression::Interval(Box::new(
21708 crate::expressions::Interval {
21709 this: Some(Expression::Literal(Box::new(
21710 Literal::String("1".to_string()),
21711 ))),
21712 unit: Some(
21713 crate::expressions::IntervalUnitSpec::Simple {
21714 unit: crate::expressions::IntervalUnit::Day,
21715 use_plural: false,
21716 },
21717 ),
21718 },
21719 )))
21720 });
21721 let gen_series = Expression::GenerateSeries(Box::new(
21722 crate::expressions::GenerateSeries {
21723 start: start.map(Box::new),
21724 end: end.map(Box::new),
21725 step: step.map(Box::new),
21726 is_end_exclusive: None,
21727 },
21728 ));
21729 Ok(gen_series)
21730 } else if matches!(
21731 target,
21732 DialectType::Spark | DialectType::Databricks
21733 ) {
21734 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
21735 let start = args.get(0).cloned();
21736 let end = args.get(1).cloned();
21737 let step = args.get(2).cloned().or_else(|| {
21738 Some(Expression::Interval(Box::new(
21739 crate::expressions::Interval {
21740 this: Some(Expression::Literal(Box::new(
21741 Literal::String("1".to_string()),
21742 ))),
21743 unit: Some(
21744 crate::expressions::IntervalUnitSpec::Simple {
21745 unit: crate::expressions::IntervalUnit::Day,
21746 use_plural: false,
21747 },
21748 ),
21749 },
21750 )))
21751 });
21752 let gen_series = Expression::GenerateSeries(Box::new(
21753 crate::expressions::GenerateSeries {
21754 start: start.map(Box::new),
21755 end: end.map(Box::new),
21756 step: step.map(Box::new),
21757 is_end_exclusive: None,
21758 },
21759 ));
21760 Ok(gen_series)
21761 } else if matches!(target, DialectType::Snowflake) {
21762 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
21763 if args.len() == 2 {
21764 let default_interval = Expression::Interval(Box::new(
21765 crate::expressions::Interval {
21766 this: Some(Expression::Literal(Box::new(
21767 Literal::String("1".to_string()),
21768 ))),
21769 unit: Some(
21770 crate::expressions::IntervalUnitSpec::Simple {
21771 unit: crate::expressions::IntervalUnit::Day,
21772 use_plural: false,
21773 },
21774 ),
21775 },
21776 ));
21777 args.push(default_interval);
21778 }
21779 Ok(Expression::Function(Box::new(Function::new(
21780 "GENERATE_DATE_ARRAY".to_string(),
21781 args,
21782 ))))
21783 } else if matches!(
21784 target,
21785 DialectType::MySQL
21786 | DialectType::TSQL
21787 | DialectType::Fabric
21788 | DialectType::Redshift
21789 ) {
21790 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
21791 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
21792 Ok(Expression::Function(Box::new(Function::new(
21793 "GENERATE_DATE_ARRAY".to_string(),
21794 args,
21795 ))))
21796 } else {
21797 // PostgreSQL/others: convert to GenerateSeries
21798 let start = args.get(0).cloned();
21799 let end = args.get(1).cloned();
21800 let step = args.get(2).cloned().or_else(|| {
21801 Some(Expression::Interval(Box::new(
21802 crate::expressions::Interval {
21803 this: Some(Expression::Literal(Box::new(
21804 Literal::String("1".to_string()),
21805 ))),
21806 unit: Some(
21807 crate::expressions::IntervalUnitSpec::Simple {
21808 unit: crate::expressions::IntervalUnit::Day,
21809 use_plural: false,
21810 },
21811 ),
21812 },
21813 )))
21814 });
21815 Ok(Expression::GenerateSeries(Box::new(
21816 crate::expressions::GenerateSeries {
21817 start: start.map(Box::new),
21818 end: end.map(Box::new),
21819 step: step.map(Box::new),
21820 is_end_exclusive: None,
21821 },
21822 )))
21823 }
21824 }
21825 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
21826 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21827 "ARRAYS_OVERLAP"
21828 if f.args.len() == 2
21829 && matches!(source, DialectType::Snowflake)
21830 && matches!(target, DialectType::DuckDB) =>
21831 {
21832 let mut args = f.args;
21833 let arr1 = args.remove(0);
21834 let arr2 = args.remove(0);
21835
21836 // (arr1 && arr2)
21837 let overlap = Expression::Paren(Box::new(Paren {
21838 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
21839 left: arr1.clone(),
21840 right: arr2.clone(),
21841 left_comments: vec![],
21842 operator_comments: vec![],
21843 trailing_comments: vec![],
21844 inferred_type: None,
21845 })),
21846 trailing_comments: vec![],
21847 }));
21848
21849 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
21850 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
21851 left: Expression::Function(Box::new(Function::new(
21852 "ARRAY_LENGTH".to_string(),
21853 vec![arr1.clone()],
21854 ))),
21855 right: Expression::Function(Box::new(Function::new(
21856 "LIST_COUNT".to_string(),
21857 vec![arr1],
21858 ))),
21859 left_comments: vec![],
21860 operator_comments: vec![],
21861 trailing_comments: vec![],
21862 inferred_type: None,
21863 }));
21864
21865 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
21866 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
21867 left: Expression::Function(Box::new(Function::new(
21868 "ARRAY_LENGTH".to_string(),
21869 vec![arr2.clone()],
21870 ))),
21871 right: Expression::Function(Box::new(Function::new(
21872 "LIST_COUNT".to_string(),
21873 vec![arr2],
21874 ))),
21875 left_comments: vec![],
21876 operator_comments: vec![],
21877 trailing_comments: vec![],
21878 inferred_type: None,
21879 }));
21880
21881 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21882 let null_check = Expression::Paren(Box::new(Paren {
21883 this: Expression::And(Box::new(BinaryOp {
21884 left: arr1_has_null,
21885 right: arr2_has_null,
21886 left_comments: vec![],
21887 operator_comments: vec![],
21888 trailing_comments: vec![],
21889 inferred_type: None,
21890 })),
21891 trailing_comments: vec![],
21892 }));
21893
21894 // (arr1 && arr2) OR (null_check)
21895 Ok(Expression::Or(Box::new(BinaryOp {
21896 left: overlap,
21897 right: null_check,
21898 left_comments: vec![],
21899 operator_comments: vec![],
21900 trailing_comments: vec![],
21901 inferred_type: None,
21902 })))
21903 }
21904 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
21905 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
21906 "ARRAY_INTERSECTION"
21907 if f.args.len() == 2
21908 && matches!(source, DialectType::Snowflake)
21909 && matches!(target, DialectType::DuckDB) =>
21910 {
21911 let mut args = f.args;
21912 let arr1 = args.remove(0);
21913 let arr2 = args.remove(0);
21914
21915 // Build: arr1 IS NULL
21916 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
21917 this: arr1.clone(),
21918 not: false,
21919 postfix_form: false,
21920 }));
21921 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
21922 this: arr2.clone(),
21923 not: false,
21924 postfix_form: false,
21925 }));
21926 let null_check = Expression::Or(Box::new(BinaryOp {
21927 left: arr1_is_null,
21928 right: arr2_is_null,
21929 left_comments: vec![],
21930 operator_comments: vec![],
21931 trailing_comments: vec![],
21932 inferred_type: None,
21933 }));
21934
21935 // GENERATE_SERIES(1, LENGTH(arr1))
21936 let gen_series = Expression::Function(Box::new(Function::new(
21937 "GENERATE_SERIES".to_string(),
21938 vec![
21939 Expression::number(1),
21940 Expression::Function(Box::new(Function::new(
21941 "LENGTH".to_string(),
21942 vec![arr1.clone()],
21943 ))),
21944 ],
21945 )));
21946
21947 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
21948 let list_zip = Expression::Function(Box::new(Function::new(
21949 "LIST_ZIP".to_string(),
21950 vec![arr1.clone(), gen_series],
21951 )));
21952
21953 // pair[1] and pair[2]
21954 let pair_col = Expression::column("pair");
21955 let pair_1 = Expression::Subscript(Box::new(
21956 crate::expressions::Subscript {
21957 this: pair_col.clone(),
21958 index: Expression::number(1),
21959 },
21960 ));
21961 let pair_2 = Expression::Subscript(Box::new(
21962 crate::expressions::Subscript {
21963 this: pair_col.clone(),
21964 index: Expression::number(2),
21965 },
21966 ));
21967
21968 // arr1[1:pair[2]]
21969 let arr1_slice = Expression::ArraySlice(Box::new(
21970 crate::expressions::ArraySlice {
21971 this: arr1.clone(),
21972 start: Some(Expression::number(1)),
21973 end: Some(pair_2),
21974 },
21975 ));
21976
21977 // e IS NOT DISTINCT FROM pair[1]
21978 let e_col = Expression::column("e");
21979 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
21980 left: e_col.clone(),
21981 right: pair_1.clone(),
21982 left_comments: vec![],
21983 operator_comments: vec![],
21984 trailing_comments: vec![],
21985 inferred_type: None,
21986 }));
21987
21988 // e -> e IS NOT DISTINCT FROM pair[1]
21989 let inner_lambda1 =
21990 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21991 parameters: vec![crate::expressions::Identifier::new("e")],
21992 body: is_not_distinct,
21993 colon: false,
21994 parameter_types: vec![],
21995 }));
21996
21997 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
21998 let inner_filter1 = Expression::Function(Box::new(Function::new(
21999 "LIST_FILTER".to_string(),
22000 vec![arr1_slice, inner_lambda1],
22001 )));
22002
22003 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
22004 let len1 = Expression::Function(Box::new(Function::new(
22005 "LENGTH".to_string(),
22006 vec![inner_filter1],
22007 )));
22008
22009 // e -> e IS NOT DISTINCT FROM pair[1]
22010 let inner_lambda2 =
22011 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22012 parameters: vec![crate::expressions::Identifier::new("e")],
22013 body: Expression::NullSafeEq(Box::new(BinaryOp {
22014 left: e_col,
22015 right: pair_1.clone(),
22016 left_comments: vec![],
22017 operator_comments: vec![],
22018 trailing_comments: vec![],
22019 inferred_type: None,
22020 })),
22021 colon: false,
22022 parameter_types: vec![],
22023 }));
22024
22025 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
22026 let inner_filter2 = Expression::Function(Box::new(Function::new(
22027 "LIST_FILTER".to_string(),
22028 vec![arr2.clone(), inner_lambda2],
22029 )));
22030
22031 // LENGTH(LIST_FILTER(arr2, ...))
22032 let len2 = Expression::Function(Box::new(Function::new(
22033 "LENGTH".to_string(),
22034 vec![inner_filter2],
22035 )));
22036
22037 // LENGTH(...) <= LENGTH(...)
22038 let cond = Expression::Paren(Box::new(Paren {
22039 this: Expression::Lte(Box::new(BinaryOp {
22040 left: len1,
22041 right: len2,
22042 left_comments: vec![],
22043 operator_comments: vec![],
22044 trailing_comments: vec![],
22045 inferred_type: None,
22046 })),
22047 trailing_comments: vec![],
22048 }));
22049
22050 // pair -> (condition)
22051 let filter_lambda =
22052 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22053 parameters: vec![crate::expressions::Identifier::new(
22054 "pair",
22055 )],
22056 body: cond,
22057 colon: false,
22058 parameter_types: vec![],
22059 }));
22060
22061 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
22062 let outer_filter = Expression::Function(Box::new(Function::new(
22063 "LIST_FILTER".to_string(),
22064 vec![list_zip, filter_lambda],
22065 )));
22066
22067 // pair -> pair[1]
22068 let transform_lambda =
22069 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22070 parameters: vec![crate::expressions::Identifier::new(
22071 "pair",
22072 )],
22073 body: pair_1,
22074 colon: false,
22075 parameter_types: vec![],
22076 }));
22077
22078 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22079 let list_transform = Expression::Function(Box::new(Function::new(
22080 "LIST_TRANSFORM".to_string(),
22081 vec![outer_filter, transform_lambda],
22082 )));
22083
22084 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
22085 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22086 // END
22087 Ok(Expression::Case(Box::new(Case {
22088 operand: None,
22089 whens: vec![(null_check, Expression::Null(Null))],
22090 else_: Some(list_transform),
22091 comments: vec![],
22092 inferred_type: None,
22093 })))
22094 }
22095 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
22096 "ARRAY_CONSTRUCT" => {
22097 if matches!(target, DialectType::Snowflake) {
22098 Ok(Expression::Function(f))
22099 } else {
22100 Ok(Expression::Array(Box::new(crate::expressions::Array {
22101 expressions: f.args,
22102 })))
22103 }
22104 }
22105 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
22106 "ARRAY"
22107 if !f.args.iter().any(|a| {
22108 matches!(a, Expression::Select(_) | Expression::Subquery(_))
22109 }) =>
22110 {
22111 match target {
22112 DialectType::DuckDB
22113 | DialectType::Snowflake
22114 | DialectType::Presto
22115 | DialectType::Trino
22116 | DialectType::Athena => {
22117 Ok(Expression::Array(Box::new(crate::expressions::Array {
22118 expressions: f.args,
22119 })))
22120 }
22121 _ => Ok(Expression::Function(f)),
22122 }
22123 }
22124 _ => Ok(Expression::Function(f)),
22125 }
22126 } else if let Expression::AggregateFunction(mut af) = e {
22127 let name = af.name.to_ascii_uppercase();
22128 match name.as_str() {
22129 "ARBITRARY" if af.args.len() == 1 => {
22130 let arg = af.args.into_iter().next().unwrap();
22131 Ok(convert_arbitrary(arg, target))
22132 }
22133 "JSON_ARRAYAGG" => {
22134 match target {
22135 DialectType::PostgreSQL => {
22136 af.name = "JSON_AGG".to_string();
22137 // Add NULLS FIRST to ORDER BY items for PostgreSQL
22138 for ordered in af.order_by.iter_mut() {
22139 if ordered.nulls_first.is_none() {
22140 ordered.nulls_first = Some(true);
22141 }
22142 }
22143 Ok(Expression::AggregateFunction(af))
22144 }
22145 _ => Ok(Expression::AggregateFunction(af)),
22146 }
22147 }
22148 _ => Ok(Expression::AggregateFunction(af)),
22149 }
22150 } else if let Expression::JSONArrayAgg(ja) = e {
22151 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
22152 match target {
22153 DialectType::PostgreSQL => {
22154 let mut order_by = Vec::new();
22155 if let Some(order_expr) = ja.order {
22156 if let Expression::OrderBy(ob) = *order_expr {
22157 for mut ordered in ob.expressions {
22158 if ordered.nulls_first.is_none() {
22159 ordered.nulls_first = Some(true);
22160 }
22161 order_by.push(ordered);
22162 }
22163 }
22164 }
22165 Ok(Expression::AggregateFunction(Box::new(
22166 crate::expressions::AggregateFunction {
22167 name: "JSON_AGG".to_string(),
22168 args: vec![*ja.this],
22169 distinct: false,
22170 filter: None,
22171 order_by,
22172 limit: None,
22173 ignore_nulls: None,
22174 inferred_type: None,
22175 },
22176 )))
22177 }
22178 _ => Ok(Expression::JSONArrayAgg(ja)),
22179 }
22180 } else if let Expression::JSONArray(ja) = e {
22181 match target {
22182 DialectType::Snowflake
22183 if ja.null_handling.is_none()
22184 && ja.return_type.is_none()
22185 && ja.strict.is_none() =>
22186 {
22187 let array_construct = Expression::ArrayFunc(Box::new(
22188 crate::expressions::ArrayConstructor {
22189 expressions: ja.expressions,
22190 bracket_notation: false,
22191 use_list_keyword: false,
22192 },
22193 ));
22194 Ok(Expression::Function(Box::new(Function::new(
22195 "TO_VARIANT".to_string(),
22196 vec![array_construct],
22197 ))))
22198 }
22199 _ => Ok(Expression::JSONArray(ja)),
22200 }
22201 } else if let Expression::JsonArray(f) = e {
22202 match target {
22203 DialectType::Snowflake => {
22204 let array_construct = Expression::ArrayFunc(Box::new(
22205 crate::expressions::ArrayConstructor {
22206 expressions: f.expressions,
22207 bracket_notation: false,
22208 use_list_keyword: false,
22209 },
22210 ));
22211 Ok(Expression::Function(Box::new(Function::new(
22212 "TO_VARIANT".to_string(),
22213 vec![array_construct],
22214 ))))
22215 }
22216 _ => Ok(Expression::JsonArray(f)),
22217 }
22218 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
22219 let function_name = match cpa.this.as_ref() {
22220 Expression::Identifier(ident) => Some(ident.name.as_str()),
22221 _ => None,
22222 };
22223 match function_name {
22224 Some(name)
22225 if name.eq_ignore_ascii_case("groupConcat")
22226 && cpa.expressions.len() == 1 =>
22227 {
22228 match target {
22229 DialectType::MySQL | DialectType::SingleStore => {
22230 let this = cpa.expressions[0].clone();
22231 let separator = cpa.params.first().cloned();
22232 Ok(Expression::GroupConcat(Box::new(
22233 crate::expressions::GroupConcatFunc {
22234 this,
22235 separator,
22236 order_by: None,
22237 distinct: false,
22238 filter: None,
22239 limit: None,
22240 inferred_type: None,
22241 },
22242 )))
22243 }
22244 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
22245 let this = cpa.expressions[0].clone();
22246 let separator = cpa.params.first().cloned();
22247 crate::expressions::ListAggFunc {
22248 this,
22249 separator,
22250 on_overflow: None,
22251 order_by: None,
22252 distinct: false,
22253 filter: None,
22254 inferred_type: None,
22255 }
22256 }))),
22257 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22258 }
22259 }
22260 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22261 }
22262 } else if let Expression::ToNumber(tn) = e {
22263 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
22264 let arg = *tn.this;
22265 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
22266 this: arg,
22267 to: crate::expressions::DataType::Double {
22268 precision: None,
22269 scale: None,
22270 },
22271 double_colon_syntax: false,
22272 trailing_comments: Vec::new(),
22273 format: None,
22274 default: None,
22275 inferred_type: None,
22276 })))
22277 } else {
22278 Ok(e)
22279 }
22280 }
22281
22282 Action::RegexpLikeToDuckDB => {
22283 if let Expression::RegexpLike(f) = e {
22284 let mut args = vec![f.this, f.pattern];
22285 if let Some(flags) = f.flags {
22286 args.push(flags);
22287 }
22288 Ok(Expression::Function(Box::new(Function::new(
22289 "REGEXP_MATCHES".to_string(),
22290 args,
22291 ))))
22292 } else {
22293 Ok(e)
22294 }
22295 }
22296 Action::EpochConvert => {
22297 if let Expression::Epoch(f) = e {
22298 let arg = f.this;
22299 let name = match target {
22300 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22301 "UNIX_TIMESTAMP"
22302 }
22303 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
22304 DialectType::BigQuery => "TIME_TO_UNIX",
22305 _ => "EPOCH",
22306 };
22307 Ok(Expression::Function(Box::new(Function::new(
22308 name.to_string(),
22309 vec![arg],
22310 ))))
22311 } else {
22312 Ok(e)
22313 }
22314 }
22315 Action::EpochMsConvert => {
22316 use crate::expressions::{BinaryOp, Cast};
22317 if let Expression::EpochMs(f) = e {
22318 let arg = f.this;
22319 match target {
22320 DialectType::Spark | DialectType::Databricks => {
22321 Ok(Expression::Function(Box::new(Function::new(
22322 "TIMESTAMP_MILLIS".to_string(),
22323 vec![arg],
22324 ))))
22325 }
22326 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22327 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
22328 ))),
22329 DialectType::Presto | DialectType::Trino => {
22330 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
22331 let cast_arg = Expression::Cast(Box::new(Cast {
22332 this: arg,
22333 to: DataType::Double {
22334 precision: None,
22335 scale: None,
22336 },
22337 trailing_comments: Vec::new(),
22338 double_colon_syntax: false,
22339 format: None,
22340 default: None,
22341 inferred_type: None,
22342 }));
22343 let div = Expression::Div(Box::new(BinaryOp::new(
22344 cast_arg,
22345 Expression::Function(Box::new(Function::new(
22346 "POW".to_string(),
22347 vec![Expression::number(10), Expression::number(3)],
22348 ))),
22349 )));
22350 Ok(Expression::Function(Box::new(Function::new(
22351 "FROM_UNIXTIME".to_string(),
22352 vec![div],
22353 ))))
22354 }
22355 DialectType::MySQL => {
22356 // FROM_UNIXTIME(x / POWER(10, 3))
22357 let div = Expression::Div(Box::new(BinaryOp::new(
22358 arg,
22359 Expression::Function(Box::new(Function::new(
22360 "POWER".to_string(),
22361 vec![Expression::number(10), Expression::number(3)],
22362 ))),
22363 )));
22364 Ok(Expression::Function(Box::new(Function::new(
22365 "FROM_UNIXTIME".to_string(),
22366 vec![div],
22367 ))))
22368 }
22369 DialectType::PostgreSQL | DialectType::Redshift => {
22370 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
22371 let cast_arg = Expression::Cast(Box::new(Cast {
22372 this: arg,
22373 to: DataType::Custom {
22374 name: "DOUBLE PRECISION".to_string(),
22375 },
22376 trailing_comments: Vec::new(),
22377 double_colon_syntax: false,
22378 format: None,
22379 default: None,
22380 inferred_type: None,
22381 }));
22382 let div = Expression::Div(Box::new(BinaryOp::new(
22383 cast_arg,
22384 Expression::Function(Box::new(Function::new(
22385 "POWER".to_string(),
22386 vec![Expression::number(10), Expression::number(3)],
22387 ))),
22388 )));
22389 Ok(Expression::Function(Box::new(Function::new(
22390 "TO_TIMESTAMP".to_string(),
22391 vec![div],
22392 ))))
22393 }
22394 DialectType::ClickHouse => {
22395 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
22396 let cast_arg = Expression::Cast(Box::new(Cast {
22397 this: arg,
22398 to: DataType::Nullable {
22399 inner: Box::new(DataType::BigInt { length: None }),
22400 },
22401 trailing_comments: Vec::new(),
22402 double_colon_syntax: false,
22403 format: None,
22404 default: None,
22405 inferred_type: None,
22406 }));
22407 Ok(Expression::Function(Box::new(Function::new(
22408 "fromUnixTimestamp64Milli".to_string(),
22409 vec![cast_arg],
22410 ))))
22411 }
22412 _ => Ok(Expression::Function(Box::new(Function::new(
22413 "EPOCH_MS".to_string(),
22414 vec![arg],
22415 )))),
22416 }
22417 } else {
22418 Ok(e)
22419 }
22420 }
22421 Action::TSQLTypeNormalize => {
22422 if let Expression::DataType(dt) = e {
22423 let new_dt = match &dt {
22424 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
22425 DataType::Decimal {
22426 precision: Some(15),
22427 scale: Some(4),
22428 }
22429 }
22430 DataType::Custom { name }
22431 if name.eq_ignore_ascii_case("SMALLMONEY") =>
22432 {
22433 DataType::Decimal {
22434 precision: Some(6),
22435 scale: Some(4),
22436 }
22437 }
22438 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
22439 DataType::Timestamp {
22440 timezone: false,
22441 precision: None,
22442 }
22443 }
22444 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
22445 DataType::Float {
22446 precision: None,
22447 scale: None,
22448 real_spelling: false,
22449 }
22450 }
22451 DataType::Float {
22452 real_spelling: true,
22453 ..
22454 } => DataType::Float {
22455 precision: None,
22456 scale: None,
22457 real_spelling: false,
22458 },
22459 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
22460 DataType::Custom {
22461 name: "BLOB".to_string(),
22462 }
22463 }
22464 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
22465 DataType::Boolean
22466 }
22467 DataType::Custom { name }
22468 if name.eq_ignore_ascii_case("ROWVERSION") =>
22469 {
22470 DataType::Custom {
22471 name: "BINARY".to_string(),
22472 }
22473 }
22474 DataType::Custom { name }
22475 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
22476 {
22477 match target {
22478 DialectType::Spark
22479 | DialectType::Databricks
22480 | DialectType::Hive => DataType::Custom {
22481 name: "STRING".to_string(),
22482 },
22483 _ => DataType::VarChar {
22484 length: Some(36),
22485 parenthesized_length: true,
22486 },
22487 }
22488 }
22489 DataType::Custom { name }
22490 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
22491 {
22492 match target {
22493 DialectType::Spark
22494 | DialectType::Databricks
22495 | DialectType::Hive => DataType::Timestamp {
22496 timezone: false,
22497 precision: None,
22498 },
22499 _ => DataType::Timestamp {
22500 timezone: true,
22501 precision: None,
22502 },
22503 }
22504 }
22505 DataType::Custom { ref name }
22506 if name.len() >= 10
22507 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
22508 {
22509 // DATETIME2(n) -> TIMESTAMP
22510 DataType::Timestamp {
22511 timezone: false,
22512 precision: None,
22513 }
22514 }
22515 DataType::Custom { ref name }
22516 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
22517 {
22518 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
22519 match target {
22520 DialectType::Spark
22521 | DialectType::Databricks
22522 | DialectType::Hive => DataType::Timestamp {
22523 timezone: false,
22524 precision: None,
22525 },
22526 _ => return Ok(Expression::DataType(dt)),
22527 }
22528 }
22529 DataType::Custom { ref name }
22530 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
22531 {
22532 // Parse NUMERIC(p,s) back to Decimal(p,s)
22533 let upper = name.to_ascii_uppercase();
22534 if let Some(inner) = upper
22535 .strip_prefix("NUMERIC(")
22536 .and_then(|s| s.strip_suffix(')'))
22537 {
22538 let parts: Vec<&str> = inner.split(',').collect();
22539 let precision =
22540 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
22541 let scale =
22542 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
22543 DataType::Decimal { precision, scale }
22544 } else if upper == "NUMERIC" {
22545 DataType::Decimal {
22546 precision: None,
22547 scale: None,
22548 }
22549 } else {
22550 return Ok(Expression::DataType(dt));
22551 }
22552 }
22553 DataType::Float {
22554 precision: Some(p), ..
22555 } => {
22556 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
22557 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
22558 let boundary = match target {
22559 DialectType::Hive
22560 | DialectType::Spark
22561 | DialectType::Databricks => 32,
22562 _ => 24,
22563 };
22564 if *p <= boundary {
22565 DataType::Float {
22566 precision: None,
22567 scale: None,
22568 real_spelling: false,
22569 }
22570 } else {
22571 DataType::Double {
22572 precision: None,
22573 scale: None,
22574 }
22575 }
22576 }
22577 DataType::TinyInt { .. } => match target {
22578 DialectType::DuckDB => DataType::Custom {
22579 name: "UTINYINT".to_string(),
22580 },
22581 DialectType::Hive
22582 | DialectType::Spark
22583 | DialectType::Databricks => DataType::SmallInt { length: None },
22584 _ => return Ok(Expression::DataType(dt)),
22585 },
22586 // INTEGER -> INT for Spark/Databricks
22587 DataType::Int {
22588 length,
22589 integer_spelling: true,
22590 } => DataType::Int {
22591 length: *length,
22592 integer_spelling: false,
22593 },
22594 _ => return Ok(Expression::DataType(dt)),
22595 };
22596 Ok(Expression::DataType(new_dt))
22597 } else {
22598 Ok(e)
22599 }
22600 }
22601 Action::MySQLSafeDivide => {
22602 use crate::expressions::{BinaryOp, Cast};
22603 if let Expression::Div(op) = e {
22604 let left = op.left;
22605 let right = op.right;
22606 // For SQLite: CAST left as REAL but NO NULLIF wrapping
22607 if matches!(target, DialectType::SQLite) {
22608 let new_left = Expression::Cast(Box::new(Cast {
22609 this: left,
22610 to: DataType::Float {
22611 precision: None,
22612 scale: None,
22613 real_spelling: true,
22614 },
22615 trailing_comments: Vec::new(),
22616 double_colon_syntax: false,
22617 format: None,
22618 default: None,
22619 inferred_type: None,
22620 }));
22621 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
22622 }
22623 // Wrap right in NULLIF(right, 0)
22624 let nullif_right = Expression::Function(Box::new(Function::new(
22625 "NULLIF".to_string(),
22626 vec![right, Expression::number(0)],
22627 )));
22628 // For some dialects, also CAST the left side
22629 let new_left = match target {
22630 DialectType::PostgreSQL
22631 | DialectType::Redshift
22632 | DialectType::Teradata
22633 | DialectType::Materialize
22634 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
22635 this: left,
22636 to: DataType::Custom {
22637 name: "DOUBLE PRECISION".to_string(),
22638 },
22639 trailing_comments: Vec::new(),
22640 double_colon_syntax: false,
22641 format: None,
22642 default: None,
22643 inferred_type: None,
22644 })),
22645 DialectType::Drill
22646 | DialectType::Trino
22647 | DialectType::Presto
22648 | DialectType::Athena => Expression::Cast(Box::new(Cast {
22649 this: left,
22650 to: DataType::Double {
22651 precision: None,
22652 scale: None,
22653 },
22654 trailing_comments: Vec::new(),
22655 double_colon_syntax: false,
22656 format: None,
22657 default: None,
22658 inferred_type: None,
22659 })),
22660 DialectType::TSQL => Expression::Cast(Box::new(Cast {
22661 this: left,
22662 to: DataType::Float {
22663 precision: None,
22664 scale: None,
22665 real_spelling: false,
22666 },
22667 trailing_comments: Vec::new(),
22668 double_colon_syntax: false,
22669 format: None,
22670 default: None,
22671 inferred_type: None,
22672 })),
22673 _ => left,
22674 };
22675 Ok(Expression::Div(Box::new(BinaryOp::new(
22676 new_left,
22677 nullif_right,
22678 ))))
22679 } else {
22680 Ok(e)
22681 }
22682 }
22683 Action::AlterTableRenameStripSchema => {
22684 if let Expression::AlterTable(mut at) = e {
22685 if let Some(crate::expressions::AlterTableAction::RenameTable(
22686 ref mut new_tbl,
22687 )) = at.actions.first_mut()
22688 {
22689 new_tbl.schema = None;
22690 new_tbl.catalog = None;
22691 }
22692 Ok(Expression::AlterTable(at))
22693 } else {
22694 Ok(e)
22695 }
22696 }
22697 Action::NullsOrdering => {
22698 // Fill in the source dialect's implied null ordering default.
22699 // This makes implicit null ordering explicit so the target generator
22700 // can correctly strip or keep it.
22701 //
22702 // Dialect null ordering categories:
22703 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
22704 // ASC -> NULLS LAST, DESC -> NULLS FIRST
22705 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
22706 // ASC -> NULLS FIRST, DESC -> NULLS LAST
22707 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
22708 // NULLS LAST always (both ASC and DESC)
22709 if let Expression::Ordered(mut o) = e {
22710 let is_asc = !o.desc;
22711
22712 let is_source_nulls_large = matches!(
22713 source,
22714 DialectType::Oracle
22715 | DialectType::PostgreSQL
22716 | DialectType::Redshift
22717 | DialectType::Snowflake
22718 );
22719 let is_source_nulls_last = matches!(
22720 source,
22721 DialectType::DuckDB
22722 | DialectType::Presto
22723 | DialectType::Trino
22724 | DialectType::Dremio
22725 | DialectType::Athena
22726 | DialectType::ClickHouse
22727 | DialectType::Drill
22728 | DialectType::Exasol
22729 | DialectType::DataFusion
22730 );
22731
22732 // Determine target category to check if default matches
22733 let is_target_nulls_large = matches!(
22734 target,
22735 DialectType::Oracle
22736 | DialectType::PostgreSQL
22737 | DialectType::Redshift
22738 | DialectType::Snowflake
22739 );
22740 let is_target_nulls_last = matches!(
22741 target,
22742 DialectType::DuckDB
22743 | DialectType::Presto
22744 | DialectType::Trino
22745 | DialectType::Dremio
22746 | DialectType::Athena
22747 | DialectType::ClickHouse
22748 | DialectType::Drill
22749 | DialectType::Exasol
22750 | DialectType::DataFusion
22751 );
22752
22753 // Compute the implied nulls_first for source
22754 let source_nulls_first = if is_source_nulls_large {
22755 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
22756 } else if is_source_nulls_last {
22757 false // NULLS LAST always
22758 } else {
22759 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
22760 };
22761
22762 // Compute the target's default
22763 let target_nulls_first = if is_target_nulls_large {
22764 !is_asc
22765 } else if is_target_nulls_last {
22766 false
22767 } else {
22768 is_asc
22769 };
22770
22771 // Only add explicit nulls ordering if source and target defaults differ
22772 if source_nulls_first != target_nulls_first {
22773 o.nulls_first = Some(source_nulls_first);
22774 }
22775 // If they match, leave nulls_first as None so the generator won't output it
22776
22777 Ok(Expression::Ordered(o))
22778 } else {
22779 Ok(e)
22780 }
22781 }
22782 Action::StringAggConvert => {
22783 match e {
22784 Expression::WithinGroup(wg) => {
22785 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
22786 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
22787 let (x_opt, sep_opt, distinct) = match wg.this {
22788 Expression::AggregateFunction(ref af)
22789 if af.name.eq_ignore_ascii_case("STRING_AGG")
22790 && af.args.len() >= 2 =>
22791 {
22792 (
22793 Some(af.args[0].clone()),
22794 Some(af.args[1].clone()),
22795 af.distinct,
22796 )
22797 }
22798 Expression::Function(ref f)
22799 if f.name.eq_ignore_ascii_case("STRING_AGG")
22800 && f.args.len() >= 2 =>
22801 {
22802 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
22803 }
22804 Expression::StringAgg(ref sa) => {
22805 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
22806 }
22807 _ => (None, None, false),
22808 };
22809 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
22810 let order_by = wg.order_by;
22811
22812 match target {
22813 DialectType::TSQL | DialectType::Fabric => {
22814 // Keep as WithinGroup(StringAgg) for TSQL
22815 Ok(Expression::WithinGroup(Box::new(
22816 crate::expressions::WithinGroup {
22817 this: Expression::StringAgg(Box::new(
22818 crate::expressions::StringAggFunc {
22819 this: x,
22820 separator: Some(sep),
22821 order_by: None, // order_by goes in WithinGroup, not StringAgg
22822 distinct,
22823 filter: None,
22824 limit: None,
22825 inferred_type: None,
22826 },
22827 )),
22828 order_by,
22829 },
22830 )))
22831 }
22832 DialectType::MySQL
22833 | DialectType::SingleStore
22834 | DialectType::Doris
22835 | DialectType::StarRocks => {
22836 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
22837 Ok(Expression::GroupConcat(Box::new(
22838 crate::expressions::GroupConcatFunc {
22839 this: x,
22840 separator: Some(sep),
22841 order_by: Some(order_by),
22842 distinct,
22843 filter: None,
22844 limit: None,
22845 inferred_type: None,
22846 },
22847 )))
22848 }
22849 DialectType::SQLite => {
22850 // GROUP_CONCAT(x, sep) - no ORDER BY support
22851 Ok(Expression::GroupConcat(Box::new(
22852 crate::expressions::GroupConcatFunc {
22853 this: x,
22854 separator: Some(sep),
22855 order_by: None,
22856 distinct,
22857 filter: None,
22858 limit: None,
22859 inferred_type: None,
22860 },
22861 )))
22862 }
22863 DialectType::PostgreSQL | DialectType::Redshift => {
22864 // STRING_AGG(x, sep ORDER BY z)
22865 Ok(Expression::StringAgg(Box::new(
22866 crate::expressions::StringAggFunc {
22867 this: x,
22868 separator: Some(sep),
22869 order_by: Some(order_by),
22870 distinct,
22871 filter: None,
22872 limit: None,
22873 inferred_type: None,
22874 },
22875 )))
22876 }
22877 _ => {
22878 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
22879 Ok(Expression::StringAgg(Box::new(
22880 crate::expressions::StringAggFunc {
22881 this: x,
22882 separator: Some(sep),
22883 order_by: Some(order_by),
22884 distinct,
22885 filter: None,
22886 limit: None,
22887 inferred_type: None,
22888 },
22889 )))
22890 }
22891 }
22892 } else {
22893 Ok(Expression::WithinGroup(wg))
22894 }
22895 }
22896 Expression::StringAgg(sa) => {
22897 match target {
22898 DialectType::MySQL
22899 | DialectType::SingleStore
22900 | DialectType::Doris
22901 | DialectType::StarRocks => {
22902 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
22903 Ok(Expression::GroupConcat(Box::new(
22904 crate::expressions::GroupConcatFunc {
22905 this: sa.this,
22906 separator: sa.separator,
22907 order_by: sa.order_by,
22908 distinct: sa.distinct,
22909 filter: sa.filter,
22910 limit: None,
22911 inferred_type: None,
22912 },
22913 )))
22914 }
22915 DialectType::SQLite => {
22916 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
22917 Ok(Expression::GroupConcat(Box::new(
22918 crate::expressions::GroupConcatFunc {
22919 this: sa.this,
22920 separator: sa.separator,
22921 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
22922 distinct: sa.distinct,
22923 filter: sa.filter,
22924 limit: None,
22925 inferred_type: None,
22926 },
22927 )))
22928 }
22929 DialectType::Spark | DialectType::Databricks => {
22930 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
22931 Ok(Expression::ListAgg(Box::new(
22932 crate::expressions::ListAggFunc {
22933 this: sa.this,
22934 separator: sa.separator,
22935 on_overflow: None,
22936 order_by: sa.order_by,
22937 distinct: sa.distinct,
22938 filter: None,
22939 inferred_type: None,
22940 },
22941 )))
22942 }
22943 _ => Ok(Expression::StringAgg(sa)),
22944 }
22945 }
22946 _ => Ok(e),
22947 }
22948 }
22949 Action::GroupConcatConvert => {
22950 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
22951 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
22952 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
22953 if let Expression::Function(ref f) = expr {
22954 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22955 let mut result = f.args[0].clone();
22956 for arg in &f.args[1..] {
22957 result = Expression::Concat(Box::new(BinaryOp {
22958 left: result,
22959 right: arg.clone(),
22960 left_comments: vec![],
22961 operator_comments: vec![],
22962 trailing_comments: vec![],
22963 inferred_type: None,
22964 }));
22965 }
22966 return result;
22967 }
22968 }
22969 expr
22970 }
22971 fn expand_concat_to_plus(expr: Expression) -> Expression {
22972 if let Expression::Function(ref f) = expr {
22973 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22974 let mut result = f.args[0].clone();
22975 for arg in &f.args[1..] {
22976 result = Expression::Add(Box::new(BinaryOp {
22977 left: result,
22978 right: arg.clone(),
22979 left_comments: vec![],
22980 operator_comments: vec![],
22981 trailing_comments: vec![],
22982 inferred_type: None,
22983 }));
22984 }
22985 return result;
22986 }
22987 }
22988 expr
22989 }
22990 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
22991 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
22992 if let Expression::Function(ref f) = expr {
22993 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22994 let new_args: Vec<Expression> = f
22995 .args
22996 .iter()
22997 .map(|arg| {
22998 Expression::Cast(Box::new(crate::expressions::Cast {
22999 this: arg.clone(),
23000 to: crate::expressions::DataType::VarChar {
23001 length: None,
23002 parenthesized_length: false,
23003 },
23004 trailing_comments: Vec::new(),
23005 double_colon_syntax: false,
23006 format: None,
23007 default: None,
23008 inferred_type: None,
23009 }))
23010 })
23011 .collect();
23012 return Expression::Function(Box::new(
23013 crate::expressions::Function::new(
23014 "CONCAT".to_string(),
23015 new_args,
23016 ),
23017 ));
23018 }
23019 }
23020 expr
23021 }
23022 if let Expression::GroupConcat(gc) = e {
23023 match target {
23024 DialectType::Presto => {
23025 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
23026 let sep = gc.separator.unwrap_or(Expression::string(","));
23027 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23028 let this = wrap_concat_args_in_varchar_cast(gc.this);
23029 let array_agg =
23030 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
23031 this,
23032 distinct: gc.distinct,
23033 filter: gc.filter,
23034 order_by: gc.order_by.unwrap_or_default(),
23035 name: None,
23036 ignore_nulls: None,
23037 having_max: None,
23038 limit: None,
23039 inferred_type: None,
23040 }));
23041 Ok(Expression::ArrayJoin(Box::new(
23042 crate::expressions::ArrayJoinFunc {
23043 this: array_agg,
23044 separator: sep,
23045 null_replacement: None,
23046 },
23047 )))
23048 }
23049 DialectType::Trino => {
23050 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23051 let sep = gc.separator.unwrap_or(Expression::string(","));
23052 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23053 let this = wrap_concat_args_in_varchar_cast(gc.this);
23054 Ok(Expression::ListAgg(Box::new(
23055 crate::expressions::ListAggFunc {
23056 this,
23057 separator: Some(sep),
23058 on_overflow: None,
23059 order_by: gc.order_by,
23060 distinct: gc.distinct,
23061 filter: gc.filter,
23062 inferred_type: None,
23063 },
23064 )))
23065 }
23066 DialectType::PostgreSQL
23067 | DialectType::Redshift
23068 | DialectType::Snowflake
23069 | DialectType::DuckDB
23070 | DialectType::Hive
23071 | DialectType::ClickHouse => {
23072 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
23073 let sep = gc.separator.unwrap_or(Expression::string(","));
23074 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
23075 let this = expand_concat_to_dpipe(gc.this);
23076 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
23077 let order_by = if target == DialectType::PostgreSQL {
23078 gc.order_by.map(|ords| {
23079 ords.into_iter()
23080 .map(|mut o| {
23081 if o.nulls_first.is_none() {
23082 if o.desc {
23083 o.nulls_first = Some(false);
23084 // NULLS LAST
23085 } else {
23086 o.nulls_first = Some(true);
23087 // NULLS FIRST
23088 }
23089 }
23090 o
23091 })
23092 .collect()
23093 })
23094 } else {
23095 gc.order_by
23096 };
23097 Ok(Expression::StringAgg(Box::new(
23098 crate::expressions::StringAggFunc {
23099 this,
23100 separator: Some(sep),
23101 order_by,
23102 distinct: gc.distinct,
23103 filter: gc.filter,
23104 limit: None,
23105 inferred_type: None,
23106 },
23107 )))
23108 }
23109 DialectType::TSQL => {
23110 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
23111 // TSQL doesn't support DISTINCT in STRING_AGG
23112 let sep = gc.separator.unwrap_or(Expression::string(","));
23113 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
23114 let this = expand_concat_to_plus(gc.this);
23115 Ok(Expression::StringAgg(Box::new(
23116 crate::expressions::StringAggFunc {
23117 this,
23118 separator: Some(sep),
23119 order_by: gc.order_by,
23120 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
23121 filter: gc.filter,
23122 limit: None,
23123 inferred_type: None,
23124 },
23125 )))
23126 }
23127 DialectType::SQLite => {
23128 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
23129 // SQLite GROUP_CONCAT doesn't support ORDER BY
23130 // Expand CONCAT(a,b,c) -> a || b || c
23131 let this = expand_concat_to_dpipe(gc.this);
23132 Ok(Expression::GroupConcat(Box::new(
23133 crate::expressions::GroupConcatFunc {
23134 this,
23135 separator: gc.separator,
23136 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23137 distinct: gc.distinct,
23138 filter: gc.filter,
23139 limit: None,
23140 inferred_type: None,
23141 },
23142 )))
23143 }
23144 DialectType::Spark | DialectType::Databricks => {
23145 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23146 let sep = gc.separator.unwrap_or(Expression::string(","));
23147 Ok(Expression::ListAgg(Box::new(
23148 crate::expressions::ListAggFunc {
23149 this: gc.this,
23150 separator: Some(sep),
23151 on_overflow: None,
23152 order_by: gc.order_by,
23153 distinct: gc.distinct,
23154 filter: None,
23155 inferred_type: None,
23156 },
23157 )))
23158 }
23159 DialectType::MySQL
23160 | DialectType::SingleStore
23161 | DialectType::StarRocks => {
23162 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
23163 if gc.separator.is_none() {
23164 let mut gc = gc;
23165 gc.separator = Some(Expression::string(","));
23166 Ok(Expression::GroupConcat(gc))
23167 } else {
23168 Ok(Expression::GroupConcat(gc))
23169 }
23170 }
23171 _ => Ok(Expression::GroupConcat(gc)),
23172 }
23173 } else {
23174 Ok(e)
23175 }
23176 }
23177 Action::TempTableHash => {
23178 match e {
23179 Expression::CreateTable(mut ct) => {
23180 // TSQL #table -> TEMPORARY TABLE with # stripped from name
23181 let name = &ct.name.name.name;
23182 if name.starts_with('#') {
23183 ct.name.name.name = name.trim_start_matches('#').to_string();
23184 }
23185 // Set temporary flag
23186 ct.temporary = true;
23187 Ok(Expression::CreateTable(ct))
23188 }
23189 Expression::Table(mut tr) => {
23190 // Strip # from table references
23191 let name = &tr.name.name;
23192 if name.starts_with('#') {
23193 tr.name.name = name.trim_start_matches('#').to_string();
23194 }
23195 Ok(Expression::Table(tr))
23196 }
23197 Expression::DropTable(mut dt) => {
23198 // Strip # from DROP TABLE names
23199 for table_ref in &mut dt.names {
23200 if table_ref.name.name.starts_with('#') {
23201 table_ref.name.name =
23202 table_ref.name.name.trim_start_matches('#').to_string();
23203 }
23204 }
23205 Ok(Expression::DropTable(dt))
23206 }
23207 _ => Ok(e),
23208 }
23209 }
23210 Action::NvlClearOriginal => {
23211 if let Expression::Nvl(mut f) = e {
23212 f.original_name = None;
23213 Ok(Expression::Nvl(f))
23214 } else {
23215 Ok(e)
23216 }
23217 }
23218 Action::HiveCastToTryCast => {
23219 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
23220 if let Expression::Cast(mut c) = e {
23221 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
23222 // (Spark's TIMESTAMP is always timezone-aware)
23223 if matches!(target, DialectType::DuckDB)
23224 && matches!(source, DialectType::Spark | DialectType::Databricks)
23225 && matches!(
23226 c.to,
23227 DataType::Timestamp {
23228 timezone: false,
23229 ..
23230 }
23231 )
23232 {
23233 c.to = DataType::Custom {
23234 name: "TIMESTAMPTZ".to_string(),
23235 };
23236 }
23237 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
23238 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
23239 if matches!(target, DialectType::Databricks | DialectType::Spark)
23240 && matches!(
23241 source,
23242 DialectType::Spark | DialectType::Databricks | DialectType::Hive
23243 )
23244 && Self::has_varchar_char_type(&c.to)
23245 {
23246 c.to = Self::normalize_varchar_to_string(c.to);
23247 }
23248 Ok(Expression::TryCast(c))
23249 } else {
23250 Ok(e)
23251 }
23252 }
23253 Action::XorExpand => {
23254 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
23255 // Snowflake: use BOOLXOR(a, b) instead
23256 if let Expression::Xor(xor) = e {
23257 // Collect all XOR operands
23258 let mut operands = Vec::new();
23259 if let Some(this) = xor.this {
23260 operands.push(*this);
23261 }
23262 if let Some(expr) = xor.expression {
23263 operands.push(*expr);
23264 }
23265 operands.extend(xor.expressions);
23266
23267 // Snowflake: use BOOLXOR(a, b)
23268 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
23269 let a = operands.remove(0);
23270 let b = operands.remove(0);
23271 return Ok(Expression::Function(Box::new(Function::new(
23272 "BOOLXOR".to_string(),
23273 vec![a, b],
23274 ))));
23275 }
23276
23277 // Helper to build (a AND NOT b) OR (NOT a AND b)
23278 let make_xor = |a: Expression, b: Expression| -> Expression {
23279 let not_b = Expression::Not(Box::new(
23280 crate::expressions::UnaryOp::new(b.clone()),
23281 ));
23282 let not_a = Expression::Not(Box::new(
23283 crate::expressions::UnaryOp::new(a.clone()),
23284 ));
23285 let left_and = Expression::And(Box::new(BinaryOp {
23286 left: a,
23287 right: Expression::Paren(Box::new(Paren {
23288 this: not_b,
23289 trailing_comments: Vec::new(),
23290 })),
23291 left_comments: Vec::new(),
23292 operator_comments: Vec::new(),
23293 trailing_comments: Vec::new(),
23294 inferred_type: None,
23295 }));
23296 let right_and = Expression::And(Box::new(BinaryOp {
23297 left: Expression::Paren(Box::new(Paren {
23298 this: not_a,
23299 trailing_comments: Vec::new(),
23300 })),
23301 right: b,
23302 left_comments: Vec::new(),
23303 operator_comments: Vec::new(),
23304 trailing_comments: Vec::new(),
23305 inferred_type: None,
23306 }));
23307 Expression::Or(Box::new(BinaryOp {
23308 left: Expression::Paren(Box::new(Paren {
23309 this: left_and,
23310 trailing_comments: Vec::new(),
23311 })),
23312 right: Expression::Paren(Box::new(Paren {
23313 this: right_and,
23314 trailing_comments: Vec::new(),
23315 })),
23316 left_comments: Vec::new(),
23317 operator_comments: Vec::new(),
23318 trailing_comments: Vec::new(),
23319 inferred_type: None,
23320 }))
23321 };
23322
23323 if operands.len() >= 2 {
23324 let mut result = make_xor(operands.remove(0), operands.remove(0));
23325 for operand in operands {
23326 result = make_xor(result, operand);
23327 }
23328 Ok(result)
23329 } else if operands.len() == 1 {
23330 Ok(operands.remove(0))
23331 } else {
23332 // No operands - return FALSE (shouldn't happen)
23333 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
23334 value: false,
23335 }))
23336 }
23337 } else {
23338 Ok(e)
23339 }
23340 }
23341 Action::DatePartUnquote => {
23342 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
23343 // Convert the quoted string first arg to a bare Column/Identifier
23344 if let Expression::Function(mut f) = e {
23345 if let Some(Expression::Literal(lit)) = f.args.first() {
23346 if let crate::expressions::Literal::String(s) = lit.as_ref() {
23347 let bare_name = s.to_ascii_lowercase();
23348 f.args[0] =
23349 Expression::Column(Box::new(crate::expressions::Column {
23350 name: Identifier::new(bare_name),
23351 table: None,
23352 join_mark: false,
23353 trailing_comments: Vec::new(),
23354 span: None,
23355 inferred_type: None,
23356 }));
23357 }
23358 }
23359 Ok(Expression::Function(f))
23360 } else {
23361 Ok(e)
23362 }
23363 }
23364 Action::ArrayLengthConvert => {
23365 // Extract the argument from the expression
23366 let arg = match e {
23367 Expression::Cardinality(ref f) => f.this.clone(),
23368 Expression::ArrayLength(ref f) => f.this.clone(),
23369 Expression::ArraySize(ref f) => f.this.clone(),
23370 _ => return Ok(e),
23371 };
23372 match target {
23373 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23374 Ok(Expression::Function(Box::new(Function::new(
23375 "SIZE".to_string(),
23376 vec![arg],
23377 ))))
23378 }
23379 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23380 Ok(Expression::Cardinality(Box::new(
23381 crate::expressions::UnaryFunc::new(arg),
23382 )))
23383 }
23384 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
23385 crate::expressions::UnaryFunc::new(arg),
23386 ))),
23387 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
23388 crate::expressions::UnaryFunc::new(arg),
23389 ))),
23390 DialectType::PostgreSQL | DialectType::Redshift => {
23391 // PostgreSQL ARRAY_LENGTH requires dimension arg
23392 Ok(Expression::Function(Box::new(Function::new(
23393 "ARRAY_LENGTH".to_string(),
23394 vec![arg, Expression::number(1)],
23395 ))))
23396 }
23397 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
23398 crate::expressions::UnaryFunc::new(arg),
23399 ))),
23400 _ => Ok(e), // Keep original
23401 }
23402 }
23403
23404 Action::JsonExtractToArrow => {
23405 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
23406 if let Expression::JsonExtract(mut f) = e {
23407 f.arrow_syntax = true;
23408 // Transform path: convert bracket notation to dot notation
23409 // SQLite strips wildcards, DuckDB preserves them
23410 if let Expression::Literal(ref lit) = f.path {
23411 if let Literal::String(ref s) = lit.as_ref() {
23412 let mut transformed = s.clone();
23413 if matches!(target, DialectType::SQLite) {
23414 transformed = Self::strip_json_wildcards(&transformed);
23415 }
23416 transformed = Self::bracket_to_dot_notation(&transformed);
23417 if transformed != *s {
23418 f.path = Expression::string(&transformed);
23419 }
23420 }
23421 }
23422 Ok(Expression::JsonExtract(f))
23423 } else {
23424 Ok(e)
23425 }
23426 }
23427
23428 Action::JsonExtractToGetJsonObject => {
23429 if let Expression::JsonExtract(f) = e {
23430 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23431 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
23432 // Use proper decomposition that handles brackets
23433 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
23434 if let Literal::String(ref s) = lit.as_ref() {
23435 let parts = Self::decompose_json_path(s);
23436 parts.into_iter().map(|k| Expression::string(&k)).collect()
23437 } else {
23438 vec![]
23439 }
23440 } else {
23441 vec![f.path]
23442 };
23443 let func_name = if matches!(target, DialectType::Redshift) {
23444 "JSON_EXTRACT_PATH_TEXT"
23445 } else {
23446 "JSON_EXTRACT_PATH"
23447 };
23448 let mut args = vec![f.this];
23449 args.extend(keys);
23450 Ok(Expression::Function(Box::new(Function::new(
23451 func_name.to_string(),
23452 args,
23453 ))))
23454 } else {
23455 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23456 // Convert bracket double quotes to single quotes
23457 let path = if let Expression::Literal(ref lit) = f.path {
23458 if let Literal::String(ref s) = lit.as_ref() {
23459 let normalized = Self::bracket_to_single_quotes(s);
23460 if normalized != *s {
23461 Expression::string(&normalized)
23462 } else {
23463 f.path.clone()
23464 }
23465 } else {
23466 f.path.clone()
23467 }
23468 } else {
23469 f.path.clone()
23470 };
23471 Ok(Expression::Function(Box::new(Function::new(
23472 "GET_JSON_OBJECT".to_string(),
23473 vec![f.this, path],
23474 ))))
23475 }
23476 } else {
23477 Ok(e)
23478 }
23479 }
23480
23481 Action::JsonExtractScalarToGetJsonObject => {
23482 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23483 if let Expression::JsonExtractScalar(f) = e {
23484 Ok(Expression::Function(Box::new(Function::new(
23485 "GET_JSON_OBJECT".to_string(),
23486 vec![f.this, f.path],
23487 ))))
23488 } else {
23489 Ok(e)
23490 }
23491 }
23492
23493 Action::JsonExtractToTsql => {
23494 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
23495 let (this, path) = match e {
23496 Expression::JsonExtract(f) => (f.this, f.path),
23497 Expression::JsonExtractScalar(f) => (f.this, f.path),
23498 _ => return Ok(e),
23499 };
23500 // Transform path: strip wildcards, convert bracket notation to dot notation
23501 let transformed_path = if let Expression::Literal(ref lit) = path {
23502 if let Literal::String(ref s) = lit.as_ref() {
23503 let stripped = Self::strip_json_wildcards(s);
23504 let dotted = Self::bracket_to_dot_notation(&stripped);
23505 Expression::string(&dotted)
23506 } else {
23507 path.clone()
23508 }
23509 } else {
23510 path
23511 };
23512 let json_query = Expression::Function(Box::new(Function::new(
23513 "JSON_QUERY".to_string(),
23514 vec![this.clone(), transformed_path.clone()],
23515 )));
23516 let json_value = Expression::Function(Box::new(Function::new(
23517 "JSON_VALUE".to_string(),
23518 vec![this, transformed_path],
23519 )));
23520 Ok(Expression::Function(Box::new(Function::new(
23521 "ISNULL".to_string(),
23522 vec![json_query, json_value],
23523 ))))
23524 }
23525
23526 Action::JsonExtractToClickHouse => {
23527 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
23528 let (this, path) = match e {
23529 Expression::JsonExtract(f) => (f.this, f.path),
23530 Expression::JsonExtractScalar(f) => (f.this, f.path),
23531 _ => return Ok(e),
23532 };
23533 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
23534 if let Literal::String(ref s) = lit.as_ref() {
23535 let parts = Self::decompose_json_path(s);
23536 let mut result = vec![this];
23537 for part in parts {
23538 // ClickHouse uses 1-based integer indices for array access
23539 if let Ok(idx) = part.parse::<i64>() {
23540 result.push(Expression::number(idx + 1));
23541 } else {
23542 result.push(Expression::string(&part));
23543 }
23544 }
23545 result
23546 } else {
23547 vec![]
23548 }
23549 } else {
23550 vec![this, path]
23551 };
23552 Ok(Expression::Function(Box::new(Function::new(
23553 "JSONExtractString".to_string(),
23554 args,
23555 ))))
23556 }
23557
23558 Action::JsonExtractScalarConvert => {
23559 // JSON_EXTRACT_SCALAR -> target-specific
23560 if let Expression::JsonExtractScalar(f) = e {
23561 match target {
23562 DialectType::PostgreSQL | DialectType::Redshift => {
23563 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
23564 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
23565 {
23566 if let Literal::String(ref s) = lit.as_ref() {
23567 let parts = Self::decompose_json_path(s);
23568 parts.into_iter().map(|k| Expression::string(&k)).collect()
23569 } else {
23570 vec![]
23571 }
23572 } else {
23573 vec![f.path]
23574 };
23575 let mut args = vec![f.this];
23576 args.extend(keys);
23577 Ok(Expression::Function(Box::new(Function::new(
23578 "JSON_EXTRACT_PATH_TEXT".to_string(),
23579 args,
23580 ))))
23581 }
23582 DialectType::Snowflake => {
23583 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
23584 let stripped_path = if let Expression::Literal(ref lit) = f.path {
23585 if let Literal::String(ref s) = lit.as_ref() {
23586 let stripped = Self::strip_json_dollar_prefix(s);
23587 Expression::string(&stripped)
23588 } else {
23589 f.path.clone()
23590 }
23591 } else {
23592 f.path
23593 };
23594 Ok(Expression::Function(Box::new(Function::new(
23595 "JSON_EXTRACT_PATH_TEXT".to_string(),
23596 vec![f.this, stripped_path],
23597 ))))
23598 }
23599 DialectType::SQLite | DialectType::DuckDB => {
23600 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
23601 Ok(Expression::JsonExtractScalar(Box::new(
23602 crate::expressions::JsonExtractFunc {
23603 this: f.this,
23604 path: f.path,
23605 returning: f.returning,
23606 arrow_syntax: true,
23607 hash_arrow_syntax: false,
23608 wrapper_option: None,
23609 quotes_option: None,
23610 on_scalar_string: false,
23611 on_error: None,
23612 },
23613 )))
23614 }
23615 _ => Ok(Expression::JsonExtractScalar(f)),
23616 }
23617 } else {
23618 Ok(e)
23619 }
23620 }
23621
23622 Action::JsonPathNormalize => {
23623 // Normalize JSON path format for BigQuery, MySQL, etc.
23624 if let Expression::JsonExtract(mut f) = e {
23625 if let Expression::Literal(ref lit) = f.path {
23626 if let Literal::String(ref s) = lit.as_ref() {
23627 let mut normalized = s.clone();
23628 // Convert bracket notation and handle wildcards per dialect
23629 match target {
23630 DialectType::BigQuery => {
23631 // BigQuery strips wildcards and uses single quotes in brackets
23632 normalized = Self::strip_json_wildcards(&normalized);
23633 normalized = Self::bracket_to_single_quotes(&normalized);
23634 }
23635 DialectType::MySQL => {
23636 // MySQL preserves wildcards, converts brackets to dot notation
23637 normalized = Self::bracket_to_dot_notation(&normalized);
23638 }
23639 _ => {}
23640 }
23641 if normalized != *s {
23642 f.path = Expression::string(&normalized);
23643 }
23644 }
23645 }
23646 Ok(Expression::JsonExtract(f))
23647 } else {
23648 Ok(e)
23649 }
23650 }
23651
23652 Action::JsonQueryValueConvert => {
23653 // JsonQuery/JsonValue -> target-specific
23654 let (f, is_query) = match e {
23655 Expression::JsonQuery(f) => (f, true),
23656 Expression::JsonValue(f) => (f, false),
23657 _ => return Ok(e),
23658 };
23659 match target {
23660 DialectType::TSQL | DialectType::Fabric => {
23661 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
23662 let json_query = Expression::Function(Box::new(Function::new(
23663 "JSON_QUERY".to_string(),
23664 vec![f.this.clone(), f.path.clone()],
23665 )));
23666 let json_value = Expression::Function(Box::new(Function::new(
23667 "JSON_VALUE".to_string(),
23668 vec![f.this, f.path],
23669 )));
23670 Ok(Expression::Function(Box::new(Function::new(
23671 "ISNULL".to_string(),
23672 vec![json_query, json_value],
23673 ))))
23674 }
23675 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23676 Ok(Expression::Function(Box::new(Function::new(
23677 "GET_JSON_OBJECT".to_string(),
23678 vec![f.this, f.path],
23679 ))))
23680 }
23681 DialectType::PostgreSQL | DialectType::Redshift => {
23682 Ok(Expression::Function(Box::new(Function::new(
23683 "JSON_EXTRACT_PATH_TEXT".to_string(),
23684 vec![f.this, f.path],
23685 ))))
23686 }
23687 DialectType::DuckDB | DialectType::SQLite => {
23688 // json -> path arrow syntax
23689 Ok(Expression::JsonExtract(Box::new(
23690 crate::expressions::JsonExtractFunc {
23691 this: f.this,
23692 path: f.path,
23693 returning: f.returning,
23694 arrow_syntax: true,
23695 hash_arrow_syntax: false,
23696 wrapper_option: f.wrapper_option,
23697 quotes_option: f.quotes_option,
23698 on_scalar_string: f.on_scalar_string,
23699 on_error: f.on_error,
23700 },
23701 )))
23702 }
23703 DialectType::Snowflake => {
23704 // GET_PATH(PARSE_JSON(json), 'path')
23705 // Strip $. prefix from path
23706 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
23707 let json_expr = match &f.this {
23708 Expression::Function(ref inner_f)
23709 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23710 {
23711 f.this
23712 }
23713 Expression::ParseJson(_) => {
23714 // Already a ParseJson expression, which generates as PARSE_JSON(...)
23715 f.this
23716 }
23717 _ => Expression::Function(Box::new(Function::new(
23718 "PARSE_JSON".to_string(),
23719 vec![f.this],
23720 ))),
23721 };
23722 let path_str = match &f.path {
23723 Expression::Literal(lit)
23724 if matches!(lit.as_ref(), Literal::String(_)) =>
23725 {
23726 let Literal::String(s) = lit.as_ref() else {
23727 unreachable!()
23728 };
23729 let stripped = s.strip_prefix("$.").unwrap_or(s);
23730 Expression::Literal(Box::new(Literal::String(
23731 stripped.to_string(),
23732 )))
23733 }
23734 other => other.clone(),
23735 };
23736 Ok(Expression::Function(Box::new(Function::new(
23737 "GET_PATH".to_string(),
23738 vec![json_expr, path_str],
23739 ))))
23740 }
23741 _ => {
23742 // Default: keep as JSON_QUERY/JSON_VALUE function
23743 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
23744 Ok(Expression::Function(Box::new(Function::new(
23745 func_name.to_string(),
23746 vec![f.this, f.path],
23747 ))))
23748 }
23749 }
23750 }
23751
23752 Action::JsonLiteralToJsonParse => {
23753 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
23754 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
23755 if let Expression::Cast(c) = e {
23756 let func_name = if matches!(target, DialectType::Snowflake) {
23757 "PARSE_JSON"
23758 } else {
23759 "JSON_PARSE"
23760 };
23761 Ok(Expression::Function(Box::new(Function::new(
23762 func_name.to_string(),
23763 vec![c.this],
23764 ))))
23765 } else {
23766 Ok(e)
23767 }
23768 }
23769
23770 Action::DuckDBCastJsonToVariant => {
23771 if let Expression::Cast(c) = e {
23772 Ok(Expression::Cast(Box::new(Cast {
23773 this: c.this,
23774 to: DataType::Custom {
23775 name: "VARIANT".to_string(),
23776 },
23777 trailing_comments: c.trailing_comments,
23778 double_colon_syntax: false,
23779 format: None,
23780 default: None,
23781 inferred_type: None,
23782 })))
23783 } else {
23784 Ok(e)
23785 }
23786 }
23787
23788 Action::DuckDBTryCastJsonToTryJsonParse => {
23789 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
23790 if let Expression::TryCast(c) = e {
23791 let json_parse = Expression::Function(Box::new(Function::new(
23792 "JSON_PARSE".to_string(),
23793 vec![c.this],
23794 )));
23795 Ok(Expression::Function(Box::new(Function::new(
23796 "TRY".to_string(),
23797 vec![json_parse],
23798 ))))
23799 } else {
23800 Ok(e)
23801 }
23802 }
23803
23804 Action::DuckDBJsonFuncToJsonParse => {
23805 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
23806 if let Expression::Function(f) = e {
23807 let args = f.args;
23808 Ok(Expression::Function(Box::new(Function::new(
23809 "JSON_PARSE".to_string(),
23810 args,
23811 ))))
23812 } else {
23813 Ok(e)
23814 }
23815 }
23816
23817 Action::DuckDBJsonValidToIsJson => {
23818 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
23819 if let Expression::Function(mut f) = e {
23820 let arg = f.args.remove(0);
23821 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
23822 this: arg,
23823 json_type: None,
23824 unique_keys: None,
23825 negated: false,
23826 })))
23827 } else {
23828 Ok(e)
23829 }
23830 }
23831
23832 Action::AtTimeZoneConvert => {
23833 // AT TIME ZONE -> target-specific conversion
23834 if let Expression::AtTimeZone(atz) = e {
23835 match target {
23836 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23837 Ok(Expression::Function(Box::new(Function::new(
23838 "AT_TIMEZONE".to_string(),
23839 vec![atz.this, atz.zone],
23840 ))))
23841 }
23842 DialectType::Spark | DialectType::Databricks => {
23843 Ok(Expression::Function(Box::new(Function::new(
23844 "FROM_UTC_TIMESTAMP".to_string(),
23845 vec![atz.this, atz.zone],
23846 ))))
23847 }
23848 DialectType::Snowflake => {
23849 // CONVERT_TIMEZONE('zone', expr)
23850 Ok(Expression::Function(Box::new(Function::new(
23851 "CONVERT_TIMEZONE".to_string(),
23852 vec![atz.zone, atz.this],
23853 ))))
23854 }
23855 DialectType::BigQuery => {
23856 // TIMESTAMP(DATETIME(expr, 'zone'))
23857 let datetime_call = Expression::Function(Box::new(Function::new(
23858 "DATETIME".to_string(),
23859 vec![atz.this, atz.zone],
23860 )));
23861 Ok(Expression::Function(Box::new(Function::new(
23862 "TIMESTAMP".to_string(),
23863 vec![datetime_call],
23864 ))))
23865 }
23866 _ => Ok(Expression::Function(Box::new(Function::new(
23867 "AT_TIMEZONE".to_string(),
23868 vec![atz.this, atz.zone],
23869 )))),
23870 }
23871 } else {
23872 Ok(e)
23873 }
23874 }
23875
23876 Action::DayOfWeekConvert => {
23877 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
23878 if let Expression::DayOfWeek(f) = e {
23879 match target {
23880 DialectType::DuckDB => Ok(Expression::Function(Box::new(
23881 Function::new("ISODOW".to_string(), vec![f.this]),
23882 ))),
23883 DialectType::Spark | DialectType::Databricks => {
23884 // ((DAYOFWEEK(x) % 7) + 1)
23885 let dayofweek = Expression::Function(Box::new(Function::new(
23886 "DAYOFWEEK".to_string(),
23887 vec![f.this],
23888 )));
23889 let modulo = Expression::Mod(Box::new(BinaryOp {
23890 left: dayofweek,
23891 right: Expression::number(7),
23892 left_comments: Vec::new(),
23893 operator_comments: Vec::new(),
23894 trailing_comments: Vec::new(),
23895 inferred_type: None,
23896 }));
23897 let paren_mod = Expression::Paren(Box::new(Paren {
23898 this: modulo,
23899 trailing_comments: Vec::new(),
23900 }));
23901 let add_one = Expression::Add(Box::new(BinaryOp {
23902 left: paren_mod,
23903 right: Expression::number(1),
23904 left_comments: Vec::new(),
23905 operator_comments: Vec::new(),
23906 trailing_comments: Vec::new(),
23907 inferred_type: None,
23908 }));
23909 Ok(Expression::Paren(Box::new(Paren {
23910 this: add_one,
23911 trailing_comments: Vec::new(),
23912 })))
23913 }
23914 _ => Ok(Expression::DayOfWeek(f)),
23915 }
23916 } else {
23917 Ok(e)
23918 }
23919 }
23920
23921 Action::MaxByMinByConvert => {
23922 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
23923 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
23924 // Handle both Expression::Function and Expression::AggregateFunction
23925 let (is_max, args) = match &e {
23926 Expression::Function(f) => {
23927 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
23928 }
23929 Expression::AggregateFunction(af) => {
23930 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
23931 }
23932 _ => return Ok(e),
23933 };
23934 match target {
23935 DialectType::ClickHouse => {
23936 let name = if is_max { "argMax" } else { "argMin" };
23937 let mut args = args;
23938 args.truncate(2);
23939 Ok(Expression::Function(Box::new(Function::new(
23940 name.to_string(),
23941 args,
23942 ))))
23943 }
23944 DialectType::DuckDB => {
23945 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
23946 Ok(Expression::Function(Box::new(Function::new(
23947 name.to_string(),
23948 args,
23949 ))))
23950 }
23951 DialectType::Spark | DialectType::Databricks => {
23952 let mut args = args;
23953 args.truncate(2);
23954 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
23955 Ok(Expression::Function(Box::new(Function::new(
23956 name.to_string(),
23957 args,
23958 ))))
23959 }
23960 _ => Ok(e),
23961 }
23962 }
23963
23964 Action::ElementAtConvert => {
23965 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
23966 let (arr, idx) = if let Expression::ElementAt(bf) = e {
23967 (bf.this, bf.expression)
23968 } else if let Expression::Function(ref f) = e {
23969 if f.args.len() >= 2 {
23970 if let Expression::Function(f) = e {
23971 let mut args = f.args;
23972 let arr = args.remove(0);
23973 let idx = args.remove(0);
23974 (arr, idx)
23975 } else {
23976 unreachable!("outer condition already matched Expression::Function")
23977 }
23978 } else {
23979 return Ok(e);
23980 }
23981 } else {
23982 return Ok(e);
23983 };
23984 match target {
23985 DialectType::PostgreSQL => {
23986 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
23987 let arr_expr = Expression::Paren(Box::new(Paren {
23988 this: arr,
23989 trailing_comments: vec![],
23990 }));
23991 Ok(Expression::Subscript(Box::new(
23992 crate::expressions::Subscript {
23993 this: arr_expr,
23994 index: idx,
23995 },
23996 )))
23997 }
23998 DialectType::BigQuery => {
23999 // BigQuery: convert ARRAY[...] to bare [...] for subscript
24000 let arr_expr = match arr {
24001 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
24002 crate::expressions::ArrayConstructor {
24003 expressions: af.expressions,
24004 bracket_notation: true,
24005 use_list_keyword: false,
24006 },
24007 )),
24008 other => other,
24009 };
24010 let safe_ordinal = Expression::Function(Box::new(Function::new(
24011 "SAFE_ORDINAL".to_string(),
24012 vec![idx],
24013 )));
24014 Ok(Expression::Subscript(Box::new(
24015 crate::expressions::Subscript {
24016 this: arr_expr,
24017 index: safe_ordinal,
24018 },
24019 )))
24020 }
24021 _ => Ok(Expression::Function(Box::new(Function::new(
24022 "ELEMENT_AT".to_string(),
24023 vec![arr, idx],
24024 )))),
24025 }
24026 }
24027
24028 Action::CurrentUserParens => {
24029 // CURRENT_USER -> CURRENT_USER() for Snowflake
24030 Ok(Expression::Function(Box::new(Function::new(
24031 "CURRENT_USER".to_string(),
24032 vec![],
24033 ))))
24034 }
24035
24036 Action::ArrayAggToCollectList => {
24037 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
24038 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
24039 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
24040 match e {
24041 Expression::AggregateFunction(mut af) => {
24042 let is_simple =
24043 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
24044 let args = if af.args.is_empty() {
24045 vec![]
24046 } else {
24047 vec![af.args[0].clone()]
24048 };
24049 af.name = "COLLECT_LIST".to_string();
24050 af.args = args;
24051 if is_simple {
24052 af.order_by = Vec::new();
24053 }
24054 Ok(Expression::AggregateFunction(af))
24055 }
24056 Expression::ArrayAgg(agg) => {
24057 let is_simple =
24058 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
24059 Ok(Expression::AggregateFunction(Box::new(
24060 crate::expressions::AggregateFunction {
24061 name: "COLLECT_LIST".to_string(),
24062 args: vec![agg.this.clone()],
24063 distinct: agg.distinct,
24064 filter: agg.filter.clone(),
24065 order_by: if is_simple {
24066 Vec::new()
24067 } else {
24068 agg.order_by.clone()
24069 },
24070 limit: agg.limit.clone(),
24071 ignore_nulls: agg.ignore_nulls,
24072 inferred_type: None,
24073 },
24074 )))
24075 }
24076 _ => Ok(e),
24077 }
24078 }
24079
24080 Action::ArraySyntaxConvert => {
24081 match e {
24082 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
24083 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
24084 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
24085 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
24086 expressions: arr.expressions,
24087 bracket_notation: true,
24088 use_list_keyword: false,
24089 })),
24090 ),
24091 // ARRAY(y) function style -> ArrayFunc for target dialect
24092 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
24093 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
24094 let bracket = matches!(
24095 target,
24096 DialectType::BigQuery
24097 | DialectType::DuckDB
24098 | DialectType::Snowflake
24099 | DialectType::ClickHouse
24100 | DialectType::StarRocks
24101 );
24102 Ok(Expression::ArrayFunc(Box::new(
24103 crate::expressions::ArrayConstructor {
24104 expressions: f.args,
24105 bracket_notation: bracket,
24106 use_list_keyword: false,
24107 },
24108 )))
24109 }
24110 _ => Ok(e),
24111 }
24112 }
24113
24114 Action::CastToJsonForSpark => {
24115 // CAST(x AS JSON) -> TO_JSON(x) for Spark
24116 if let Expression::Cast(c) = e {
24117 Ok(Expression::Function(Box::new(Function::new(
24118 "TO_JSON".to_string(),
24119 vec![c.this],
24120 ))))
24121 } else {
24122 Ok(e)
24123 }
24124 }
24125
24126 Action::CastJsonToFromJson => {
24127 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
24128 if let Expression::Cast(c) = e {
24129 // Extract the string literal from ParseJson
24130 let literal_expr = if let Expression::ParseJson(pj) = c.this {
24131 pj.this
24132 } else {
24133 c.this
24134 };
24135 // Convert the target DataType to Spark's type string format
24136 let type_str = Self::data_type_to_spark_string(&c.to);
24137 Ok(Expression::Function(Box::new(Function::new(
24138 "FROM_JSON".to_string(),
24139 vec![
24140 literal_expr,
24141 Expression::Literal(Box::new(Literal::String(type_str))),
24142 ],
24143 ))))
24144 } else {
24145 Ok(e)
24146 }
24147 }
24148
24149 Action::ToJsonConvert => {
24150 // TO_JSON(x) -> target-specific conversion
24151 if let Expression::ToJson(f) = e {
24152 let arg = f.this;
24153 match target {
24154 DialectType::Presto | DialectType::Trino => {
24155 // JSON_FORMAT(CAST(x AS JSON))
24156 let cast_json = Expression::Cast(Box::new(Cast {
24157 this: arg,
24158 to: DataType::Custom {
24159 name: "JSON".to_string(),
24160 },
24161 trailing_comments: vec![],
24162 double_colon_syntax: false,
24163 format: None,
24164 default: None,
24165 inferred_type: None,
24166 }));
24167 Ok(Expression::Function(Box::new(Function::new(
24168 "JSON_FORMAT".to_string(),
24169 vec![cast_json],
24170 ))))
24171 }
24172 DialectType::BigQuery => Ok(Expression::Function(Box::new(
24173 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
24174 ))),
24175 DialectType::DuckDB => {
24176 // CAST(TO_JSON(x) AS TEXT)
24177 let to_json =
24178 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
24179 this: arg,
24180 original_name: None,
24181 inferred_type: None,
24182 }));
24183 Ok(Expression::Cast(Box::new(Cast {
24184 this: to_json,
24185 to: DataType::Text,
24186 trailing_comments: vec![],
24187 double_colon_syntax: false,
24188 format: None,
24189 default: None,
24190 inferred_type: None,
24191 })))
24192 }
24193 _ => Ok(Expression::ToJson(Box::new(
24194 crate::expressions::UnaryFunc {
24195 this: arg,
24196 original_name: None,
24197 inferred_type: None,
24198 },
24199 ))),
24200 }
24201 } else {
24202 Ok(e)
24203 }
24204 }
24205
24206 Action::VarianceToClickHouse => {
24207 if let Expression::Variance(f) = e {
24208 Ok(Expression::Function(Box::new(Function::new(
24209 "varSamp".to_string(),
24210 vec![f.this],
24211 ))))
24212 } else {
24213 Ok(e)
24214 }
24215 }
24216
24217 Action::StddevToClickHouse => {
24218 if let Expression::Stddev(f) = e {
24219 Ok(Expression::Function(Box::new(Function::new(
24220 "stddevSamp".to_string(),
24221 vec![f.this],
24222 ))))
24223 } else {
24224 Ok(e)
24225 }
24226 }
24227
24228 Action::ApproxQuantileConvert => {
24229 if let Expression::ApproxQuantile(aq) = e {
24230 let mut args = vec![*aq.this];
24231 if let Some(q) = aq.quantile {
24232 args.push(*q);
24233 }
24234 Ok(Expression::Function(Box::new(Function::new(
24235 "APPROX_PERCENTILE".to_string(),
24236 args,
24237 ))))
24238 } else {
24239 Ok(e)
24240 }
24241 }
24242
24243 Action::DollarParamConvert => {
24244 if let Expression::Parameter(p) = e {
24245 Ok(Expression::Parameter(Box::new(
24246 crate::expressions::Parameter {
24247 name: p.name,
24248 index: p.index,
24249 style: crate::expressions::ParameterStyle::At,
24250 quoted: p.quoted,
24251 string_quoted: p.string_quoted,
24252 expression: p.expression,
24253 },
24254 )))
24255 } else {
24256 Ok(e)
24257 }
24258 }
24259
24260 Action::EscapeStringNormalize => {
24261 if let Expression::Literal(ref lit) = e {
24262 if let Literal::EscapeString(s) = lit.as_ref() {
24263 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
24264 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
24265 s[2..].to_string()
24266 } else {
24267 s.clone()
24268 };
24269 let normalized = stripped
24270 .replace('\n', "\\n")
24271 .replace('\r', "\\r")
24272 .replace('\t', "\\t");
24273 match target {
24274 DialectType::BigQuery => {
24275 // BigQuery: e'...' -> CAST(b'...' AS STRING)
24276 // Use Raw for the b'...' part to avoid double-escaping
24277 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
24278 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
24279 }
24280 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
24281 normalized,
24282 )))),
24283 }
24284 } else {
24285 Ok(e)
24286 }
24287 } else {
24288 Ok(e)
24289 }
24290 }
24291
24292 Action::StraightJoinCase => {
24293 // straight_join: keep lowercase for DuckDB, quote for MySQL
24294 if let Expression::Column(col) = e {
24295 if col.name.name == "STRAIGHT_JOIN" {
24296 let mut new_col = col;
24297 new_col.name.name = "straight_join".to_string();
24298 if matches!(target, DialectType::MySQL) {
24299 // MySQL: needs quoting since it's a reserved keyword
24300 new_col.name.quoted = true;
24301 }
24302 Ok(Expression::Column(new_col))
24303 } else {
24304 Ok(Expression::Column(col))
24305 }
24306 } else {
24307 Ok(e)
24308 }
24309 }
24310
24311 Action::TablesampleReservoir => {
24312 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
24313 if let Expression::TableSample(mut ts) = e {
24314 if let Some(ref mut sample) = ts.sample {
24315 sample.method = crate::expressions::SampleMethod::Reservoir;
24316 sample.explicit_method = true;
24317 }
24318 Ok(Expression::TableSample(ts))
24319 } else {
24320 Ok(e)
24321 }
24322 }
24323
24324 Action::TablesampleSnowflakeStrip => {
24325 // Strip method and PERCENT for Snowflake target from non-Snowflake source
24326 match e {
24327 Expression::TableSample(mut ts) => {
24328 if let Some(ref mut sample) = ts.sample {
24329 sample.suppress_method_output = true;
24330 sample.unit_after_size = false;
24331 sample.is_percent = false;
24332 }
24333 Ok(Expression::TableSample(ts))
24334 }
24335 Expression::Table(mut t) => {
24336 if let Some(ref mut sample) = t.table_sample {
24337 sample.suppress_method_output = true;
24338 sample.unit_after_size = false;
24339 sample.is_percent = false;
24340 }
24341 Ok(Expression::Table(t))
24342 }
24343 _ => Ok(e),
24344 }
24345 }
24346
24347 Action::FirstToAnyValue => {
24348 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
24349 if let Expression::First(mut agg) = e {
24350 agg.ignore_nulls = None;
24351 agg.name = Some("ANY_VALUE".to_string());
24352 Ok(Expression::AnyValue(agg))
24353 } else {
24354 Ok(e)
24355 }
24356 }
24357
24358 Action::ArrayIndexConvert => {
24359 // Subscript index: 1-based to 0-based for BigQuery
24360 if let Expression::Subscript(mut sub) = e {
24361 if let Expression::Literal(ref lit) = sub.index {
24362 if let Literal::Number(ref n) = lit.as_ref() {
24363 if let Ok(val) = n.parse::<i64>() {
24364 sub.index = Expression::Literal(Box::new(Literal::Number(
24365 (val - 1).to_string(),
24366 )));
24367 }
24368 }
24369 }
24370 Ok(Expression::Subscript(sub))
24371 } else {
24372 Ok(e)
24373 }
24374 }
24375
24376 Action::AnyValueIgnoreNulls => {
24377 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
24378 if let Expression::AnyValue(mut av) = e {
24379 if av.ignore_nulls.is_none() {
24380 av.ignore_nulls = Some(true);
24381 }
24382 Ok(Expression::AnyValue(av))
24383 } else {
24384 Ok(e)
24385 }
24386 }
24387
24388 Action::BigQueryNullsOrdering => {
24389 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
24390 if let Expression::WindowFunction(mut wf) = e {
24391 for o in &mut wf.over.order_by {
24392 o.nulls_first = None;
24393 }
24394 Ok(Expression::WindowFunction(wf))
24395 } else if let Expression::Ordered(mut o) = e {
24396 o.nulls_first = None;
24397 Ok(Expression::Ordered(o))
24398 } else {
24399 Ok(e)
24400 }
24401 }
24402
24403 Action::SnowflakeFloatProtect => {
24404 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
24405 // Snowflake's target transform from converting it to DOUBLE.
24406 // Non-Snowflake sources should keep their FLOAT spelling.
24407 if let Expression::DataType(DataType::Float { .. }) = e {
24408 Ok(Expression::DataType(DataType::Custom {
24409 name: "FLOAT".to_string(),
24410 }))
24411 } else {
24412 Ok(e)
24413 }
24414 }
24415
24416 Action::MysqlNullsOrdering => {
24417 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
24418 if let Expression::Ordered(mut o) = e {
24419 let nulls_last = o.nulls_first == Some(false);
24420 let desc = o.desc;
24421 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
24422 // If requested ordering matches default, just strip NULLS clause
24423 let matches_default = if desc {
24424 // DESC default is NULLS FIRST, so nulls_first=true matches
24425 o.nulls_first == Some(true)
24426 } else {
24427 // ASC default is NULLS LAST, so nulls_first=false matches
24428 nulls_last
24429 };
24430 if matches_default {
24431 o.nulls_first = None;
24432 Ok(Expression::Ordered(o))
24433 } else {
24434 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
24435 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
24436 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
24437 let null_val = if desc { 1 } else { 0 };
24438 let non_null_val = if desc { 0 } else { 1 };
24439 let _case_expr = Expression::Case(Box::new(Case {
24440 operand: None,
24441 whens: vec![(
24442 Expression::IsNull(Box::new(crate::expressions::IsNull {
24443 this: o.this.clone(),
24444 not: false,
24445 postfix_form: false,
24446 })),
24447 Expression::number(null_val),
24448 )],
24449 else_: Some(Expression::number(non_null_val)),
24450 comments: Vec::new(),
24451 inferred_type: None,
24452 }));
24453 o.nulls_first = None;
24454 // Return a tuple of [case_expr, ordered_expr]
24455 // We need to return both as part of the ORDER BY
24456 // But since transform_recursive processes individual expressions,
24457 // we can't easily add extra ORDER BY items here.
24458 // Instead, strip the nulls_first
24459 o.nulls_first = None;
24460 Ok(Expression::Ordered(o))
24461 }
24462 } else {
24463 Ok(e)
24464 }
24465 }
24466
24467 Action::MysqlNullsLastRewrite => {
24468 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
24469 // to simulate NULLS LAST for ASC ordering
24470 if let Expression::WindowFunction(mut wf) = e {
24471 let mut new_order_by = Vec::new();
24472 for o in wf.over.order_by {
24473 if !o.desc {
24474 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
24475 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
24476 let case_expr = Expression::Case(Box::new(Case {
24477 operand: None,
24478 whens: vec![(
24479 Expression::IsNull(Box::new(crate::expressions::IsNull {
24480 this: o.this.clone(),
24481 not: false,
24482 postfix_form: false,
24483 })),
24484 Expression::Literal(Box::new(Literal::Number(
24485 "1".to_string(),
24486 ))),
24487 )],
24488 else_: Some(Expression::Literal(Box::new(Literal::Number(
24489 "0".to_string(),
24490 )))),
24491 comments: Vec::new(),
24492 inferred_type: None,
24493 }));
24494 new_order_by.push(crate::expressions::Ordered {
24495 this: case_expr,
24496 desc: false,
24497 nulls_first: None,
24498 explicit_asc: false,
24499 with_fill: None,
24500 });
24501 let mut ordered = o;
24502 ordered.nulls_first = None;
24503 new_order_by.push(ordered);
24504 } else {
24505 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
24506 // No change needed
24507 let mut ordered = o;
24508 ordered.nulls_first = None;
24509 new_order_by.push(ordered);
24510 }
24511 }
24512 wf.over.order_by = new_order_by;
24513 Ok(Expression::WindowFunction(wf))
24514 } else {
24515 Ok(e)
24516 }
24517 }
24518
24519 Action::RespectNullsConvert => {
24520 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
24521 if let Expression::WindowFunction(mut wf) = e {
24522 match &mut wf.this {
24523 Expression::FirstValue(ref mut vf) => {
24524 if vf.ignore_nulls == Some(false) {
24525 vf.ignore_nulls = None;
24526 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
24527 // but that's handled by the generator's NULLS ordering
24528 }
24529 }
24530 Expression::LastValue(ref mut vf) => {
24531 if vf.ignore_nulls == Some(false) {
24532 vf.ignore_nulls = None;
24533 }
24534 }
24535 _ => {}
24536 }
24537 Ok(Expression::WindowFunction(wf))
24538 } else {
24539 Ok(e)
24540 }
24541 }
24542
24543 Action::SnowflakeWindowFrameStrip => {
24544 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24545 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
24546 if let Expression::WindowFunction(mut wf) = e {
24547 wf.over.frame = None;
24548 Ok(Expression::WindowFunction(wf))
24549 } else {
24550 Ok(e)
24551 }
24552 }
24553
24554 Action::SnowflakeWindowFrameAdd => {
24555 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24556 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
24557 if let Expression::WindowFunction(mut wf) = e {
24558 wf.over.frame = Some(crate::expressions::WindowFrame {
24559 kind: crate::expressions::WindowFrameKind::Rows,
24560 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
24561 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
24562 exclude: None,
24563 kind_text: None,
24564 start_side_text: None,
24565 end_side_text: None,
24566 });
24567 Ok(Expression::WindowFunction(wf))
24568 } else {
24569 Ok(e)
24570 }
24571 }
24572
24573 Action::CreateTableStripComment => {
24574 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
24575 if let Expression::CreateTable(mut ct) = e {
24576 for col in &mut ct.columns {
24577 col.comment = None;
24578 col.constraints.retain(|c| {
24579 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
24580 });
24581 // Also remove Comment from constraint_order
24582 col.constraint_order.retain(|c| {
24583 !matches!(c, crate::expressions::ConstraintType::Comment)
24584 });
24585 }
24586 // Strip properties (USING, PARTITIONED BY, etc.)
24587 ct.properties.clear();
24588 Ok(Expression::CreateTable(ct))
24589 } else {
24590 Ok(e)
24591 }
24592 }
24593
24594 Action::AlterTableToSpRename => {
24595 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
24596 if let Expression::AlterTable(ref at) = e {
24597 if let Some(crate::expressions::AlterTableAction::RenameTable(
24598 ref new_tbl,
24599 )) = at.actions.first()
24600 {
24601 // Build the old table name using TSQL bracket quoting
24602 let old_name = if let Some(ref schema) = at.name.schema {
24603 if at.name.name.quoted || schema.quoted {
24604 format!("[{}].[{}]", schema.name, at.name.name.name)
24605 } else {
24606 format!("{}.{}", schema.name, at.name.name.name)
24607 }
24608 } else {
24609 if at.name.name.quoted {
24610 format!("[{}]", at.name.name.name)
24611 } else {
24612 at.name.name.name.clone()
24613 }
24614 };
24615 let new_name = new_tbl.name.name.clone();
24616 // EXEC sp_rename 'old_name', 'new_name'
24617 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
24618 Ok(Expression::Raw(crate::expressions::Raw { sql }))
24619 } else {
24620 Ok(e)
24621 }
24622 } else {
24623 Ok(e)
24624 }
24625 }
24626
24627 Action::SnowflakeIntervalFormat => {
24628 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
24629 if let Expression::Interval(mut iv) = e {
24630 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
24631 (&iv.this, &iv.unit)
24632 {
24633 if let Literal::String(ref val) = lit.as_ref() {
24634 let unit_str = match unit_spec {
24635 crate::expressions::IntervalUnitSpec::Simple {
24636 unit, ..
24637 } => match unit {
24638 crate::expressions::IntervalUnit::Year => "YEAR",
24639 crate::expressions::IntervalUnit::Quarter => "QUARTER",
24640 crate::expressions::IntervalUnit::Month => "MONTH",
24641 crate::expressions::IntervalUnit::Week => "WEEK",
24642 crate::expressions::IntervalUnit::Day => "DAY",
24643 crate::expressions::IntervalUnit::Hour => "HOUR",
24644 crate::expressions::IntervalUnit::Minute => "MINUTE",
24645 crate::expressions::IntervalUnit::Second => "SECOND",
24646 crate::expressions::IntervalUnit::Millisecond => {
24647 "MILLISECOND"
24648 }
24649 crate::expressions::IntervalUnit::Microsecond => {
24650 "MICROSECOND"
24651 }
24652 crate::expressions::IntervalUnit::Nanosecond => {
24653 "NANOSECOND"
24654 }
24655 },
24656 _ => "",
24657 };
24658 if !unit_str.is_empty() {
24659 let combined = format!("{} {}", val, unit_str);
24660 iv.this = Some(Expression::Literal(Box::new(Literal::String(
24661 combined,
24662 ))));
24663 iv.unit = None;
24664 }
24665 }
24666 }
24667 Ok(Expression::Interval(iv))
24668 } else {
24669 Ok(e)
24670 }
24671 }
24672
24673 Action::ArrayConcatBracketConvert => {
24674 // Expression::Array/ArrayFunc -> target-specific
24675 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
24676 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
24677 match e {
24678 Expression::Array(arr) => {
24679 if matches!(target, DialectType::Redshift) {
24680 Ok(Expression::Function(Box::new(Function::new(
24681 "ARRAY".to_string(),
24682 arr.expressions,
24683 ))))
24684 } else {
24685 Ok(Expression::ArrayFunc(Box::new(
24686 crate::expressions::ArrayConstructor {
24687 expressions: arr.expressions,
24688 bracket_notation: false,
24689 use_list_keyword: false,
24690 },
24691 )))
24692 }
24693 }
24694 Expression::ArrayFunc(arr) => {
24695 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
24696 if matches!(target, DialectType::Redshift) {
24697 Ok(Expression::Function(Box::new(Function::new(
24698 "ARRAY".to_string(),
24699 arr.expressions,
24700 ))))
24701 } else {
24702 Ok(Expression::ArrayFunc(arr))
24703 }
24704 }
24705 _ => Ok(e),
24706 }
24707 }
24708
24709 Action::BitAggFloatCast => {
24710 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
24711 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24712 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24713 let int_type = DataType::Int {
24714 length: None,
24715 integer_spelling: false,
24716 };
24717 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
24718 if let Expression::Cast(c) = agg_this {
24719 match &c.to {
24720 DataType::Float { .. }
24721 | DataType::Double { .. }
24722 | DataType::Custom { .. } => {
24723 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24724 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
24725 let inner_type = match &c.to {
24726 DataType::Float {
24727 precision, scale, ..
24728 } => DataType::Float {
24729 precision: *precision,
24730 scale: *scale,
24731 real_spelling: true,
24732 },
24733 other => other.clone(),
24734 };
24735 let inner_cast =
24736 Expression::Cast(Box::new(crate::expressions::Cast {
24737 this: c.this.clone(),
24738 to: inner_type,
24739 trailing_comments: Vec::new(),
24740 double_colon_syntax: false,
24741 format: None,
24742 default: None,
24743 inferred_type: None,
24744 }));
24745 let rounded = Expression::Function(Box::new(Function::new(
24746 "ROUND".to_string(),
24747 vec![inner_cast],
24748 )));
24749 Expression::Cast(Box::new(crate::expressions::Cast {
24750 this: rounded,
24751 to: int_dt,
24752 trailing_comments: Vec::new(),
24753 double_colon_syntax: false,
24754 format: None,
24755 default: None,
24756 inferred_type: None,
24757 }))
24758 }
24759 DataType::Decimal { .. } => {
24760 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24761 Expression::Cast(Box::new(crate::expressions::Cast {
24762 this: Expression::Cast(c),
24763 to: int_dt,
24764 trailing_comments: Vec::new(),
24765 double_colon_syntax: false,
24766 format: None,
24767 default: None,
24768 inferred_type: None,
24769 }))
24770 }
24771 _ => Expression::Cast(c),
24772 }
24773 } else {
24774 agg_this
24775 }
24776 };
24777 match e {
24778 Expression::BitwiseOrAgg(mut f) => {
24779 f.this = wrap_agg(f.this, int_type);
24780 Ok(Expression::BitwiseOrAgg(f))
24781 }
24782 Expression::BitwiseAndAgg(mut f) => {
24783 let int_type = DataType::Int {
24784 length: None,
24785 integer_spelling: false,
24786 };
24787 f.this = wrap_agg(f.this, int_type);
24788 Ok(Expression::BitwiseAndAgg(f))
24789 }
24790 Expression::BitwiseXorAgg(mut f) => {
24791 let int_type = DataType::Int {
24792 length: None,
24793 integer_spelling: false,
24794 };
24795 f.this = wrap_agg(f.this, int_type);
24796 Ok(Expression::BitwiseXorAgg(f))
24797 }
24798 _ => Ok(e),
24799 }
24800 }
24801
24802 Action::BitAggSnowflakeRename => {
24803 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
24804 match e {
24805 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
24806 Function::new("BITORAGG".to_string(), vec![f.this]),
24807 ))),
24808 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
24809 Function::new("BITANDAGG".to_string(), vec![f.this]),
24810 ))),
24811 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
24812 Function::new("BITXORAGG".to_string(), vec![f.this]),
24813 ))),
24814 _ => Ok(e),
24815 }
24816 }
24817
24818 Action::StrftimeCastTimestamp => {
24819 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
24820 if let Expression::Cast(mut c) = e {
24821 if matches!(
24822 c.to,
24823 DataType::Timestamp {
24824 timezone: false,
24825 ..
24826 }
24827 ) {
24828 c.to = DataType::Custom {
24829 name: "TIMESTAMP_NTZ".to_string(),
24830 };
24831 }
24832 Ok(Expression::Cast(c))
24833 } else {
24834 Ok(e)
24835 }
24836 }
24837
24838 Action::DecimalDefaultPrecision => {
24839 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
24840 if let Expression::Cast(mut c) = e {
24841 if matches!(
24842 c.to,
24843 DataType::Decimal {
24844 precision: None,
24845 ..
24846 }
24847 ) {
24848 c.to = DataType::Decimal {
24849 precision: Some(18),
24850 scale: Some(3),
24851 };
24852 }
24853 Ok(Expression::Cast(c))
24854 } else {
24855 Ok(e)
24856 }
24857 }
24858
24859 Action::FilterToIff => {
24860 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
24861 if let Expression::Filter(f) = e {
24862 let condition = *f.expression;
24863 let agg = *f.this;
24864 // Strip WHERE from condition
24865 let cond = match condition {
24866 Expression::Where(w) => w.this,
24867 other => other,
24868 };
24869 // Extract the aggregate function and its argument
24870 // We want AVG(IFF(condition, x, NULL))
24871 match agg {
24872 Expression::Function(mut func) => {
24873 if !func.args.is_empty() {
24874 let orig_arg = func.args[0].clone();
24875 let iff_call = Expression::Function(Box::new(Function::new(
24876 "IFF".to_string(),
24877 vec![cond, orig_arg, Expression::Null(Null)],
24878 )));
24879 func.args[0] = iff_call;
24880 Ok(Expression::Function(func))
24881 } else {
24882 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24883 this: Box::new(Expression::Function(func)),
24884 expression: Box::new(cond),
24885 })))
24886 }
24887 }
24888 Expression::Avg(mut avg) => {
24889 let iff_call = Expression::Function(Box::new(Function::new(
24890 "IFF".to_string(),
24891 vec![cond, avg.this.clone(), Expression::Null(Null)],
24892 )));
24893 avg.this = iff_call;
24894 Ok(Expression::Avg(avg))
24895 }
24896 Expression::Sum(mut s) => {
24897 let iff_call = Expression::Function(Box::new(Function::new(
24898 "IFF".to_string(),
24899 vec![cond, s.this.clone(), Expression::Null(Null)],
24900 )));
24901 s.this = iff_call;
24902 Ok(Expression::Sum(s))
24903 }
24904 Expression::Count(mut c) => {
24905 if let Some(ref this_expr) = c.this {
24906 let iff_call = Expression::Function(Box::new(Function::new(
24907 "IFF".to_string(),
24908 vec![cond, this_expr.clone(), Expression::Null(Null)],
24909 )));
24910 c.this = Some(iff_call);
24911 }
24912 Ok(Expression::Count(c))
24913 }
24914 other => {
24915 // Fallback: keep as Filter
24916 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24917 this: Box::new(other),
24918 expression: Box::new(cond),
24919 })))
24920 }
24921 }
24922 } else {
24923 Ok(e)
24924 }
24925 }
24926
24927 Action::AggFilterToIff => {
24928 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
24929 // Helper macro to handle the common AggFunc case
24930 macro_rules! handle_agg_filter_to_iff {
24931 ($variant:ident, $agg:expr) => {{
24932 let mut agg = $agg;
24933 if let Some(filter_cond) = agg.filter.take() {
24934 let iff_call = Expression::Function(Box::new(Function::new(
24935 "IFF".to_string(),
24936 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
24937 )));
24938 agg.this = iff_call;
24939 }
24940 Ok(Expression::$variant(agg))
24941 }};
24942 }
24943
24944 match e {
24945 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
24946 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
24947 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
24948 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
24949 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
24950 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
24951 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
24952 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
24953 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
24954 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
24955 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
24956 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
24957 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
24958 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
24959 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
24960 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
24961 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
24962 Expression::ApproxDistinct(agg) => {
24963 handle_agg_filter_to_iff!(ApproxDistinct, agg)
24964 }
24965 Expression::Count(mut c) => {
24966 if let Some(filter_cond) = c.filter.take() {
24967 if let Some(ref this_expr) = c.this {
24968 let iff_call = Expression::Function(Box::new(Function::new(
24969 "IFF".to_string(),
24970 vec![
24971 filter_cond,
24972 this_expr.clone(),
24973 Expression::Null(Null),
24974 ],
24975 )));
24976 c.this = Some(iff_call);
24977 }
24978 }
24979 Ok(Expression::Count(c))
24980 }
24981 other => Ok(other),
24982 }
24983 }
24984
24985 Action::JsonToGetPath => {
24986 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
24987 if let Expression::JsonExtract(je) = e {
24988 // Convert to PARSE_JSON() wrapper:
24989 // - JSON(x) -> PARSE_JSON(x)
24990 // - PARSE_JSON(x) -> keep as-is
24991 // - anything else -> wrap in PARSE_JSON()
24992 let this = match &je.this {
24993 Expression::Function(f)
24994 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
24995 {
24996 Expression::Function(Box::new(Function::new(
24997 "PARSE_JSON".to_string(),
24998 f.args.clone(),
24999 )))
25000 }
25001 Expression::Function(f)
25002 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
25003 {
25004 je.this.clone()
25005 }
25006 // GET_PATH result is already JSON, don't wrap
25007 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
25008 je.this.clone()
25009 }
25010 other => {
25011 // Wrap non-JSON expressions in PARSE_JSON()
25012 Expression::Function(Box::new(Function::new(
25013 "PARSE_JSON".to_string(),
25014 vec![other.clone()],
25015 )))
25016 }
25017 };
25018 // Convert path: extract key from JSONPath or strip $. prefix from string
25019 let path = match &je.path {
25020 Expression::JSONPath(jp) => {
25021 // Extract the key from JSONPath: $root.key -> 'key'
25022 let mut key_parts = Vec::new();
25023 for expr in &jp.expressions {
25024 match expr {
25025 Expression::JSONPathRoot(_) => {} // skip root
25026 Expression::JSONPathKey(k) => {
25027 if let Expression::Literal(lit) = &*k.this {
25028 if let Literal::String(s) = lit.as_ref() {
25029 key_parts.push(s.clone());
25030 }
25031 }
25032 }
25033 _ => {}
25034 }
25035 }
25036 if !key_parts.is_empty() {
25037 Expression::Literal(Box::new(Literal::String(
25038 key_parts.join("."),
25039 )))
25040 } else {
25041 je.path.clone()
25042 }
25043 }
25044 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
25045 {
25046 let Literal::String(s) = lit.as_ref() else {
25047 unreachable!()
25048 };
25049 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
25050 Expression::Literal(Box::new(Literal::String(stripped)))
25051 }
25052 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
25053 {
25054 let Literal::String(s) = lit.as_ref() else {
25055 unreachable!()
25056 };
25057 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
25058 Expression::Literal(Box::new(Literal::String(stripped)))
25059 }
25060 _ => je.path.clone(),
25061 };
25062 Ok(Expression::Function(Box::new(Function::new(
25063 "GET_PATH".to_string(),
25064 vec![this, path],
25065 ))))
25066 } else {
25067 Ok(e)
25068 }
25069 }
25070
25071 Action::StructToRow => {
25072 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
25073 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
25074
25075 // Extract key-value pairs from either Struct or MapFunc
25076 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
25077 Expression::Struct(s) => Some(
25078 s.fields
25079 .iter()
25080 .map(|(opt_name, field_expr)| {
25081 if let Some(name) = opt_name {
25082 (name.clone(), field_expr.clone())
25083 } else if let Expression::NamedArgument(na) = field_expr {
25084 (na.name.name.clone(), na.value.clone())
25085 } else {
25086 (String::new(), field_expr.clone())
25087 }
25088 })
25089 .collect(),
25090 ),
25091 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25092 m.keys
25093 .iter()
25094 .zip(m.values.iter())
25095 .map(|(key, value)| {
25096 let key_name = match key {
25097 Expression::Literal(lit)
25098 if matches!(lit.as_ref(), Literal::String(_)) =>
25099 {
25100 let Literal::String(s) = lit.as_ref() else {
25101 unreachable!()
25102 };
25103 s.clone()
25104 }
25105 Expression::Identifier(id) => id.name.clone(),
25106 _ => String::new(),
25107 };
25108 (key_name, value.clone())
25109 })
25110 .collect(),
25111 ),
25112 _ => None,
25113 };
25114
25115 if let Some(pairs) = kv_pairs {
25116 let mut named_args = Vec::new();
25117 for (key_name, value) in pairs {
25118 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
25119 named_args.push(Expression::Alias(Box::new(
25120 crate::expressions::Alias::new(
25121 value,
25122 Identifier::new(key_name),
25123 ),
25124 )));
25125 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25126 named_args.push(value);
25127 } else {
25128 named_args.push(value);
25129 }
25130 }
25131
25132 if matches!(target, DialectType::BigQuery) {
25133 Ok(Expression::Function(Box::new(Function::new(
25134 "STRUCT".to_string(),
25135 named_args,
25136 ))))
25137 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25138 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
25139 let row_func = Expression::Function(Box::new(Function::new(
25140 "ROW".to_string(),
25141 named_args,
25142 )));
25143
25144 // Try to infer types for each pair
25145 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
25146 Expression::Struct(s) => Some(
25147 s.fields
25148 .iter()
25149 .map(|(opt_name, field_expr)| {
25150 if let Some(name) = opt_name {
25151 (name.clone(), field_expr.clone())
25152 } else if let Expression::NamedArgument(na) = field_expr
25153 {
25154 (na.name.name.clone(), na.value.clone())
25155 } else {
25156 (String::new(), field_expr.clone())
25157 }
25158 })
25159 .collect(),
25160 ),
25161 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25162 m.keys
25163 .iter()
25164 .zip(m.values.iter())
25165 .map(|(key, value)| {
25166 let key_name = match key {
25167 Expression::Literal(lit)
25168 if matches!(
25169 lit.as_ref(),
25170 Literal::String(_)
25171 ) =>
25172 {
25173 let Literal::String(s) = lit.as_ref() else {
25174 unreachable!()
25175 };
25176 s.clone()
25177 }
25178 Expression::Identifier(id) => id.name.clone(),
25179 _ => String::new(),
25180 };
25181 (key_name, value.clone())
25182 })
25183 .collect(),
25184 ),
25185 _ => None,
25186 };
25187
25188 if let Some(pairs) = kv_pairs_again {
25189 // Infer types for all values
25190 let mut all_inferred = true;
25191 let mut fields = Vec::new();
25192 for (name, value) in &pairs {
25193 let inferred_type = match value {
25194 Expression::Literal(lit)
25195 if matches!(lit.as_ref(), Literal::Number(_)) =>
25196 {
25197 let Literal::Number(n) = lit.as_ref() else {
25198 unreachable!()
25199 };
25200 if n.contains('.') {
25201 Some(DataType::Double {
25202 precision: None,
25203 scale: None,
25204 })
25205 } else {
25206 Some(DataType::Int {
25207 length: None,
25208 integer_spelling: true,
25209 })
25210 }
25211 }
25212 Expression::Literal(lit)
25213 if matches!(lit.as_ref(), Literal::String(_)) =>
25214 {
25215 Some(DataType::VarChar {
25216 length: None,
25217 parenthesized_length: false,
25218 })
25219 }
25220 Expression::Boolean(_) => Some(DataType::Boolean),
25221 _ => None,
25222 };
25223 if let Some(dt) = inferred_type {
25224 fields.push(crate::expressions::StructField::new(
25225 name.clone(),
25226 dt,
25227 ));
25228 } else {
25229 all_inferred = false;
25230 break;
25231 }
25232 }
25233
25234 if all_inferred && !fields.is_empty() {
25235 let row_type = DataType::Struct {
25236 fields,
25237 nested: true,
25238 };
25239 Ok(Expression::Cast(Box::new(Cast {
25240 this: row_func,
25241 to: row_type,
25242 trailing_comments: Vec::new(),
25243 double_colon_syntax: false,
25244 format: None,
25245 default: None,
25246 inferred_type: None,
25247 })))
25248 } else {
25249 Ok(row_func)
25250 }
25251 } else {
25252 Ok(row_func)
25253 }
25254 } else {
25255 Ok(Expression::Function(Box::new(Function::new(
25256 "ROW".to_string(),
25257 named_args,
25258 ))))
25259 }
25260 } else {
25261 Ok(e)
25262 }
25263 }
25264
25265 Action::SparkStructConvert => {
25266 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
25267 // or DuckDB {'name': val, ...}
25268 if let Expression::Function(f) = e {
25269 // Extract name-value pairs from aliased args
25270 let mut pairs: Vec<(String, Expression)> = Vec::new();
25271 for arg in &f.args {
25272 match arg {
25273 Expression::Alias(a) => {
25274 pairs.push((a.alias.name.clone(), a.this.clone()));
25275 }
25276 _ => {
25277 pairs.push((String::new(), arg.clone()));
25278 }
25279 }
25280 }
25281
25282 match target {
25283 DialectType::DuckDB => {
25284 // Convert to DuckDB struct literal {'name': value, ...}
25285 let mut keys = Vec::new();
25286 let mut values = Vec::new();
25287 for (name, value) in &pairs {
25288 keys.push(Expression::Literal(Box::new(Literal::String(
25289 name.clone(),
25290 ))));
25291 values.push(value.clone());
25292 }
25293 Ok(Expression::MapFunc(Box::new(
25294 crate::expressions::MapConstructor {
25295 keys,
25296 values,
25297 curly_brace_syntax: true,
25298 with_map_keyword: false,
25299 },
25300 )))
25301 }
25302 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25303 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
25304 let row_args: Vec<Expression> =
25305 pairs.iter().map(|(_, v)| v.clone()).collect();
25306 let row_func = Expression::Function(Box::new(Function::new(
25307 "ROW".to_string(),
25308 row_args,
25309 )));
25310
25311 // Infer types
25312 let mut all_inferred = true;
25313 let mut fields = Vec::new();
25314 for (name, value) in &pairs {
25315 let inferred_type = match value {
25316 Expression::Literal(lit)
25317 if matches!(lit.as_ref(), Literal::Number(_)) =>
25318 {
25319 let Literal::Number(n) = lit.as_ref() else {
25320 unreachable!()
25321 };
25322 if n.contains('.') {
25323 Some(DataType::Double {
25324 precision: None,
25325 scale: None,
25326 })
25327 } else {
25328 Some(DataType::Int {
25329 length: None,
25330 integer_spelling: true,
25331 })
25332 }
25333 }
25334 Expression::Literal(lit)
25335 if matches!(lit.as_ref(), Literal::String(_)) =>
25336 {
25337 Some(DataType::VarChar {
25338 length: None,
25339 parenthesized_length: false,
25340 })
25341 }
25342 Expression::Boolean(_) => Some(DataType::Boolean),
25343 _ => None,
25344 };
25345 if let Some(dt) = inferred_type {
25346 fields.push(crate::expressions::StructField::new(
25347 name.clone(),
25348 dt,
25349 ));
25350 } else {
25351 all_inferred = false;
25352 break;
25353 }
25354 }
25355
25356 if all_inferred && !fields.is_empty() {
25357 let row_type = DataType::Struct {
25358 fields,
25359 nested: true,
25360 };
25361 Ok(Expression::Cast(Box::new(Cast {
25362 this: row_func,
25363 to: row_type,
25364 trailing_comments: Vec::new(),
25365 double_colon_syntax: false,
25366 format: None,
25367 default: None,
25368 inferred_type: None,
25369 })))
25370 } else {
25371 Ok(row_func)
25372 }
25373 }
25374 _ => Ok(Expression::Function(f)),
25375 }
25376 } else {
25377 Ok(e)
25378 }
25379 }
25380
25381 Action::ApproxCountDistinctToApproxDistinct => {
25382 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
25383 if let Expression::ApproxCountDistinct(f) = e {
25384 Ok(Expression::ApproxDistinct(f))
25385 } else {
25386 Ok(e)
25387 }
25388 }
25389
25390 Action::CollectListToArrayAgg => {
25391 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
25392 if let Expression::AggregateFunction(f) = e {
25393 let filter_expr = if !f.args.is_empty() {
25394 let arg = f.args[0].clone();
25395 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
25396 this: arg,
25397 not: true,
25398 postfix_form: false,
25399 })))
25400 } else {
25401 None
25402 };
25403 let agg = crate::expressions::AggFunc {
25404 this: if f.args.is_empty() {
25405 Expression::Null(crate::expressions::Null)
25406 } else {
25407 f.args[0].clone()
25408 },
25409 distinct: f.distinct,
25410 order_by: f.order_by.clone(),
25411 filter: filter_expr,
25412 ignore_nulls: None,
25413 name: None,
25414 having_max: None,
25415 limit: None,
25416 inferred_type: None,
25417 };
25418 Ok(Expression::ArrayAgg(Box::new(agg)))
25419 } else {
25420 Ok(e)
25421 }
25422 }
25423
25424 Action::CollectSetConvert => {
25425 // COLLECT_SET(x) -> target-specific
25426 if let Expression::AggregateFunction(f) = e {
25427 match target {
25428 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
25429 crate::expressions::AggregateFunction {
25430 name: "SET_AGG".to_string(),
25431 args: f.args,
25432 distinct: false,
25433 order_by: f.order_by,
25434 filter: f.filter,
25435 limit: f.limit,
25436 ignore_nulls: f.ignore_nulls,
25437 inferred_type: None,
25438 },
25439 ))),
25440 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
25441 crate::expressions::AggregateFunction {
25442 name: "ARRAY_UNIQUE_AGG".to_string(),
25443 args: f.args,
25444 distinct: false,
25445 order_by: f.order_by,
25446 filter: f.filter,
25447 limit: f.limit,
25448 ignore_nulls: f.ignore_nulls,
25449 inferred_type: None,
25450 },
25451 ))),
25452 DialectType::Trino | DialectType::DuckDB => {
25453 let agg = crate::expressions::AggFunc {
25454 this: if f.args.is_empty() {
25455 Expression::Null(crate::expressions::Null)
25456 } else {
25457 f.args[0].clone()
25458 },
25459 distinct: true,
25460 order_by: Vec::new(),
25461 filter: None,
25462 ignore_nulls: None,
25463 name: None,
25464 having_max: None,
25465 limit: None,
25466 inferred_type: None,
25467 };
25468 Ok(Expression::ArrayAgg(Box::new(agg)))
25469 }
25470 _ => Ok(Expression::AggregateFunction(f)),
25471 }
25472 } else {
25473 Ok(e)
25474 }
25475 }
25476
25477 Action::PercentileConvert => {
25478 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
25479 if let Expression::AggregateFunction(f) = e {
25480 let name = match target {
25481 DialectType::DuckDB => "QUANTILE",
25482 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
25483 _ => "PERCENTILE",
25484 };
25485 Ok(Expression::AggregateFunction(Box::new(
25486 crate::expressions::AggregateFunction {
25487 name: name.to_string(),
25488 args: f.args,
25489 distinct: f.distinct,
25490 order_by: f.order_by,
25491 filter: f.filter,
25492 limit: f.limit,
25493 ignore_nulls: f.ignore_nulls,
25494 inferred_type: None,
25495 },
25496 )))
25497 } else {
25498 Ok(e)
25499 }
25500 }
25501
25502 Action::CorrIsnanWrap => {
25503 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
25504 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
25505 let corr_clone = e.clone();
25506 let isnan = Expression::Function(Box::new(Function::new(
25507 "ISNAN".to_string(),
25508 vec![corr_clone.clone()],
25509 )));
25510 let case_expr = Expression::Case(Box::new(Case {
25511 operand: None,
25512 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
25513 else_: Some(corr_clone),
25514 comments: Vec::new(),
25515 inferred_type: None,
25516 }));
25517 Ok(case_expr)
25518 }
25519
25520 Action::TruncToDateTrunc => {
25521 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
25522 if let Expression::Function(f) = e {
25523 if f.args.len() == 2 {
25524 let timestamp = f.args[0].clone();
25525 let unit_expr = f.args[1].clone();
25526
25527 if matches!(target, DialectType::ClickHouse) {
25528 // For ClickHouse, produce Expression::DateTrunc which the generator
25529 // outputs as DATE_TRUNC(...) without going through the ClickHouse
25530 // target transform that would convert it to dateTrunc
25531 let unit_str = Self::get_unit_str_static(&unit_expr);
25532 let dt_field = match unit_str.as_str() {
25533 "YEAR" => DateTimeField::Year,
25534 "MONTH" => DateTimeField::Month,
25535 "DAY" => DateTimeField::Day,
25536 "HOUR" => DateTimeField::Hour,
25537 "MINUTE" => DateTimeField::Minute,
25538 "SECOND" => DateTimeField::Second,
25539 "WEEK" => DateTimeField::Week,
25540 "QUARTER" => DateTimeField::Quarter,
25541 _ => DateTimeField::Custom(unit_str),
25542 };
25543 Ok(Expression::DateTrunc(Box::new(
25544 crate::expressions::DateTruncFunc {
25545 this: timestamp,
25546 unit: dt_field,
25547 },
25548 )))
25549 } else {
25550 let new_args = vec![unit_expr, timestamp];
25551 Ok(Expression::Function(Box::new(Function::new(
25552 "DATE_TRUNC".to_string(),
25553 new_args,
25554 ))))
25555 }
25556 } else {
25557 Ok(Expression::Function(f))
25558 }
25559 } else {
25560 Ok(e)
25561 }
25562 }
25563
25564 Action::ArrayContainsConvert => {
25565 if let Expression::ArrayContains(f) = e {
25566 match target {
25567 DialectType::Presto | DialectType::Trino => {
25568 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
25569 Ok(Expression::Function(Box::new(Function::new(
25570 "CONTAINS".to_string(),
25571 vec![f.this, f.expression],
25572 ))))
25573 }
25574 DialectType::Snowflake => {
25575 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
25576 let cast_val =
25577 Expression::Cast(Box::new(crate::expressions::Cast {
25578 this: f.expression,
25579 to: crate::expressions::DataType::Custom {
25580 name: "VARIANT".to_string(),
25581 },
25582 trailing_comments: Vec::new(),
25583 double_colon_syntax: false,
25584 format: None,
25585 default: None,
25586 inferred_type: None,
25587 }));
25588 Ok(Expression::Function(Box::new(Function::new(
25589 "ARRAY_CONTAINS".to_string(),
25590 vec![cast_val, f.this],
25591 ))))
25592 }
25593 _ => Ok(Expression::ArrayContains(f)),
25594 }
25595 } else {
25596 Ok(e)
25597 }
25598 }
25599
25600 Action::ArrayExceptConvert => {
25601 if let Expression::ArrayExcept(f) = e {
25602 let source_arr = f.this;
25603 let exclude_arr = f.expression;
25604 match target {
25605 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
25606 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
25607 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25608 // ELSE LIST_TRANSFORM(LIST_FILTER(
25609 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
25610 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
25611 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
25612 // pair -> pair[1])
25613 // END
25614
25615 // Build null check
25616 let source_is_null =
25617 Expression::IsNull(Box::new(crate::expressions::IsNull {
25618 this: source_arr.clone(),
25619 not: false,
25620 postfix_form: false,
25621 }));
25622 let exclude_is_null =
25623 Expression::IsNull(Box::new(crate::expressions::IsNull {
25624 this: exclude_arr.clone(),
25625 not: false,
25626 postfix_form: false,
25627 }));
25628 let null_check =
25629 Expression::Or(Box::new(crate::expressions::BinaryOp {
25630 left: source_is_null,
25631 right: exclude_is_null,
25632 left_comments: vec![],
25633 operator_comments: vec![],
25634 trailing_comments: vec![],
25635 inferred_type: None,
25636 }));
25637
25638 // GENERATE_SERIES(1, LENGTH(source))
25639 let gen_series = Expression::Function(Box::new(Function::new(
25640 "GENERATE_SERIES".to_string(),
25641 vec![
25642 Expression::number(1),
25643 Expression::Function(Box::new(Function::new(
25644 "LENGTH".to_string(),
25645 vec![source_arr.clone()],
25646 ))),
25647 ],
25648 )));
25649
25650 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
25651 let list_zip = Expression::Function(Box::new(Function::new(
25652 "LIST_ZIP".to_string(),
25653 vec![source_arr.clone(), gen_series],
25654 )));
25655
25656 // pair[1] and pair[2]
25657 let pair_col = Expression::column("pair");
25658 let pair_1 = Expression::Subscript(Box::new(
25659 crate::expressions::Subscript {
25660 this: pair_col.clone(),
25661 index: Expression::number(1),
25662 },
25663 ));
25664 let pair_2 = Expression::Subscript(Box::new(
25665 crate::expressions::Subscript {
25666 this: pair_col.clone(),
25667 index: Expression::number(2),
25668 },
25669 ));
25670
25671 // source[1:pair[2]]
25672 let source_slice = Expression::ArraySlice(Box::new(
25673 crate::expressions::ArraySlice {
25674 this: source_arr.clone(),
25675 start: Some(Expression::number(1)),
25676 end: Some(pair_2),
25677 },
25678 ));
25679
25680 let e_col = Expression::column("e");
25681
25682 // e -> e IS NOT DISTINCT FROM pair[1]
25683 let inner_lambda1 =
25684 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25685 parameters: vec![crate::expressions::Identifier::new("e")],
25686 body: Expression::NullSafeEq(Box::new(
25687 crate::expressions::BinaryOp {
25688 left: e_col.clone(),
25689 right: pair_1.clone(),
25690 left_comments: vec![],
25691 operator_comments: vec![],
25692 trailing_comments: vec![],
25693 inferred_type: None,
25694 },
25695 )),
25696 colon: false,
25697 parameter_types: vec![],
25698 }));
25699
25700 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
25701 let inner_filter1 = Expression::Function(Box::new(Function::new(
25702 "LIST_FILTER".to_string(),
25703 vec![source_slice, inner_lambda1],
25704 )));
25705
25706 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
25707 let len1 = Expression::Function(Box::new(Function::new(
25708 "LENGTH".to_string(),
25709 vec![inner_filter1],
25710 )));
25711
25712 // e -> e IS NOT DISTINCT FROM pair[1]
25713 let inner_lambda2 =
25714 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25715 parameters: vec![crate::expressions::Identifier::new("e")],
25716 body: Expression::NullSafeEq(Box::new(
25717 crate::expressions::BinaryOp {
25718 left: e_col,
25719 right: pair_1.clone(),
25720 left_comments: vec![],
25721 operator_comments: vec![],
25722 trailing_comments: vec![],
25723 inferred_type: None,
25724 },
25725 )),
25726 colon: false,
25727 parameter_types: vec![],
25728 }));
25729
25730 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
25731 let inner_filter2 = Expression::Function(Box::new(Function::new(
25732 "LIST_FILTER".to_string(),
25733 vec![exclude_arr.clone(), inner_lambda2],
25734 )));
25735
25736 // LENGTH(LIST_FILTER(exclude, ...))
25737 let len2 = Expression::Function(Box::new(Function::new(
25738 "LENGTH".to_string(),
25739 vec![inner_filter2],
25740 )));
25741
25742 // (LENGTH(...) > LENGTH(...))
25743 let cond = Expression::Paren(Box::new(Paren {
25744 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
25745 left: len1,
25746 right: len2,
25747 left_comments: vec![],
25748 operator_comments: vec![],
25749 trailing_comments: vec![],
25750 inferred_type: None,
25751 })),
25752 trailing_comments: vec![],
25753 }));
25754
25755 // pair -> (condition)
25756 let filter_lambda =
25757 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25758 parameters: vec![crate::expressions::Identifier::new(
25759 "pair",
25760 )],
25761 body: cond,
25762 colon: false,
25763 parameter_types: vec![],
25764 }));
25765
25766 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
25767 let outer_filter = Expression::Function(Box::new(Function::new(
25768 "LIST_FILTER".to_string(),
25769 vec![list_zip, filter_lambda],
25770 )));
25771
25772 // pair -> pair[1]
25773 let transform_lambda =
25774 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25775 parameters: vec![crate::expressions::Identifier::new(
25776 "pair",
25777 )],
25778 body: pair_1,
25779 colon: false,
25780 parameter_types: vec![],
25781 }));
25782
25783 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
25784 let list_transform = Expression::Function(Box::new(Function::new(
25785 "LIST_TRANSFORM".to_string(),
25786 vec![outer_filter, transform_lambda],
25787 )));
25788
25789 Ok(Expression::Case(Box::new(Case {
25790 operand: None,
25791 whens: vec![(null_check, Expression::Null(Null))],
25792 else_: Some(list_transform),
25793 comments: Vec::new(),
25794 inferred_type: None,
25795 })))
25796 }
25797 DialectType::DuckDB => {
25798 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
25799 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25800 // ELSE LIST_FILTER(LIST_DISTINCT(source),
25801 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
25802 // END
25803
25804 // Build: source IS NULL
25805 let source_is_null =
25806 Expression::IsNull(Box::new(crate::expressions::IsNull {
25807 this: source_arr.clone(),
25808 not: false,
25809 postfix_form: false,
25810 }));
25811 // Build: exclude IS NULL
25812 let exclude_is_null =
25813 Expression::IsNull(Box::new(crate::expressions::IsNull {
25814 this: exclude_arr.clone(),
25815 not: false,
25816 postfix_form: false,
25817 }));
25818 // source IS NULL OR exclude IS NULL
25819 let null_check =
25820 Expression::Or(Box::new(crate::expressions::BinaryOp {
25821 left: source_is_null,
25822 right: exclude_is_null,
25823 left_comments: vec![],
25824 operator_comments: vec![],
25825 trailing_comments: vec![],
25826 inferred_type: None,
25827 }));
25828
25829 // LIST_DISTINCT(source)
25830 let list_distinct = Expression::Function(Box::new(Function::new(
25831 "LIST_DISTINCT".to_string(),
25832 vec![source_arr.clone()],
25833 )));
25834
25835 // x IS NOT DISTINCT FROM e
25836 let x_col = Expression::column("x");
25837 let e_col = Expression::column("e");
25838 let is_not_distinct = Expression::NullSafeEq(Box::new(
25839 crate::expressions::BinaryOp {
25840 left: x_col,
25841 right: e_col.clone(),
25842 left_comments: vec![],
25843 operator_comments: vec![],
25844 trailing_comments: vec![],
25845 inferred_type: None,
25846 },
25847 ));
25848
25849 // x -> x IS NOT DISTINCT FROM e
25850 let inner_lambda =
25851 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25852 parameters: vec![crate::expressions::Identifier::new("x")],
25853 body: is_not_distinct,
25854 colon: false,
25855 parameter_types: vec![],
25856 }));
25857
25858 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
25859 let inner_list_filter =
25860 Expression::Function(Box::new(Function::new(
25861 "LIST_FILTER".to_string(),
25862 vec![exclude_arr.clone(), inner_lambda],
25863 )));
25864
25865 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
25866 let len_inner = Expression::Function(Box::new(Function::new(
25867 "LENGTH".to_string(),
25868 vec![inner_list_filter],
25869 )));
25870
25871 // LENGTH(...) = 0
25872 let eq_zero =
25873 Expression::Eq(Box::new(crate::expressions::BinaryOp {
25874 left: len_inner,
25875 right: Expression::number(0),
25876 left_comments: vec![],
25877 operator_comments: vec![],
25878 trailing_comments: vec![],
25879 inferred_type: None,
25880 }));
25881
25882 // e -> LENGTH(LIST_FILTER(...)) = 0
25883 let outer_lambda =
25884 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25885 parameters: vec![crate::expressions::Identifier::new("e")],
25886 body: eq_zero,
25887 colon: false,
25888 parameter_types: vec![],
25889 }));
25890
25891 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
25892 let outer_list_filter =
25893 Expression::Function(Box::new(Function::new(
25894 "LIST_FILTER".to_string(),
25895 vec![list_distinct, outer_lambda],
25896 )));
25897
25898 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
25899 Ok(Expression::Case(Box::new(Case {
25900 operand: None,
25901 whens: vec![(null_check, Expression::Null(Null))],
25902 else_: Some(outer_list_filter),
25903 comments: Vec::new(),
25904 inferred_type: None,
25905 })))
25906 }
25907 DialectType::Snowflake => {
25908 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
25909 Ok(Expression::ArrayExcept(Box::new(
25910 crate::expressions::BinaryFunc {
25911 this: source_arr,
25912 expression: exclude_arr,
25913 original_name: None,
25914 inferred_type: None,
25915 },
25916 )))
25917 }
25918 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25919 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
25920 Ok(Expression::Function(Box::new(Function::new(
25921 "ARRAY_EXCEPT".to_string(),
25922 vec![source_arr, exclude_arr],
25923 ))))
25924 }
25925 _ => Ok(Expression::ArrayExcept(Box::new(
25926 crate::expressions::BinaryFunc {
25927 this: source_arr,
25928 expression: exclude_arr,
25929 original_name: None,
25930 inferred_type: None,
25931 },
25932 ))),
25933 }
25934 } else {
25935 Ok(e)
25936 }
25937 }
25938
25939 Action::RegexpLikeExasolAnchor => {
25940 // RegexpLike -> Exasol: wrap pattern with .*...*
25941 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
25942 // dialects does partial match, so we need to anchor with .* on both sides
25943 if let Expression::RegexpLike(mut f) = e {
25944 match &f.pattern {
25945 Expression::Literal(lit)
25946 if matches!(lit.as_ref(), Literal::String(_)) =>
25947 {
25948 let Literal::String(s) = lit.as_ref() else {
25949 unreachable!()
25950 };
25951 // String literal: wrap with .*...*
25952 f.pattern = Expression::Literal(Box::new(Literal::String(
25953 format!(".*{}.*", s),
25954 )));
25955 }
25956 _ => {
25957 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
25958 f.pattern =
25959 Expression::Paren(Box::new(crate::expressions::Paren {
25960 this: Expression::Concat(Box::new(
25961 crate::expressions::BinaryOp {
25962 left: Expression::Concat(Box::new(
25963 crate::expressions::BinaryOp {
25964 left: Expression::Literal(Box::new(
25965 Literal::String(".*".to_string()),
25966 )),
25967 right: f.pattern,
25968 left_comments: vec![],
25969 operator_comments: vec![],
25970 trailing_comments: vec![],
25971 inferred_type: None,
25972 },
25973 )),
25974 right: Expression::Literal(Box::new(
25975 Literal::String(".*".to_string()),
25976 )),
25977 left_comments: vec![],
25978 operator_comments: vec![],
25979 trailing_comments: vec![],
25980 inferred_type: None,
25981 },
25982 )),
25983 trailing_comments: vec![],
25984 }));
25985 }
25986 }
25987 Ok(Expression::RegexpLike(f))
25988 } else {
25989 Ok(e)
25990 }
25991 }
25992
25993 Action::ArrayPositionSnowflakeSwap => {
25994 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
25995 if let Expression::ArrayPosition(f) = e {
25996 Ok(Expression::ArrayPosition(Box::new(
25997 crate::expressions::BinaryFunc {
25998 this: f.expression,
25999 expression: f.this,
26000 original_name: f.original_name,
26001 inferred_type: f.inferred_type,
26002 },
26003 )))
26004 } else {
26005 Ok(e)
26006 }
26007 }
26008
26009 Action::SnowflakeArrayPositionToDuckDB => {
26010 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
26011 // Snowflake uses 0-based indexing, DuckDB uses 1-based
26012 // The parser has this=value, expression=array (Snowflake order)
26013 if let Expression::ArrayPosition(f) = e {
26014 // Create ARRAY_POSITION(array, value) in standard order
26015 let standard_pos =
26016 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
26017 this: f.expression, // array
26018 expression: f.this, // value
26019 original_name: f.original_name,
26020 inferred_type: f.inferred_type,
26021 }));
26022 // Subtract 1 for zero-based indexing
26023 Ok(Expression::Sub(Box::new(BinaryOp {
26024 left: standard_pos,
26025 right: Expression::number(1),
26026 left_comments: vec![],
26027 operator_comments: vec![],
26028 trailing_comments: vec![],
26029 inferred_type: None,
26030 })))
26031 } else {
26032 Ok(e)
26033 }
26034 }
26035
26036 Action::ArrayDistinctConvert => {
26037 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
26038 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26039 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
26040 // ELSE LIST_DISTINCT(arr)
26041 // END
26042 if let Expression::ArrayDistinct(f) = e {
26043 let arr = f.this;
26044
26045 // ARRAY_LENGTH(arr)
26046 let array_length = Expression::Function(Box::new(Function::new(
26047 "ARRAY_LENGTH".to_string(),
26048 vec![arr.clone()],
26049 )));
26050 // LIST_COUNT(arr)
26051 let list_count = Expression::Function(Box::new(Function::new(
26052 "LIST_COUNT".to_string(),
26053 vec![arr.clone()],
26054 )));
26055 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26056 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26057 left: array_length,
26058 right: list_count,
26059 left_comments: vec![],
26060 operator_comments: vec![],
26061 trailing_comments: vec![],
26062 inferred_type: None,
26063 }));
26064
26065 // _u column
26066 let u_col = Expression::column("_u");
26067 // NOT _u IS NULL
26068 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
26069 this: u_col.clone(),
26070 not: false,
26071 postfix_form: false,
26072 }));
26073 let not_u_is_null =
26074 Expression::Not(Box::new(crate::expressions::UnaryOp {
26075 this: u_is_null,
26076 inferred_type: None,
26077 }));
26078 // _u -> NOT _u IS NULL
26079 let filter_lambda =
26080 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26081 parameters: vec![crate::expressions::Identifier::new("_u")],
26082 body: not_u_is_null,
26083 colon: false,
26084 parameter_types: vec![],
26085 }));
26086 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
26087 let list_filter = Expression::Function(Box::new(Function::new(
26088 "LIST_FILTER".to_string(),
26089 vec![arr.clone(), filter_lambda],
26090 )));
26091 // LIST_DISTINCT(LIST_FILTER(arr, ...))
26092 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
26093 "LIST_DISTINCT".to_string(),
26094 vec![list_filter],
26095 )));
26096 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
26097 let list_append = Expression::Function(Box::new(Function::new(
26098 "LIST_APPEND".to_string(),
26099 vec![list_distinct_filtered, Expression::Null(Null)],
26100 )));
26101
26102 // LIST_DISTINCT(arr)
26103 let list_distinct = Expression::Function(Box::new(Function::new(
26104 "LIST_DISTINCT".to_string(),
26105 vec![arr],
26106 )));
26107
26108 // CASE WHEN neq THEN list_append ELSE list_distinct END
26109 Ok(Expression::Case(Box::new(Case {
26110 operand: None,
26111 whens: vec![(neq, list_append)],
26112 else_: Some(list_distinct),
26113 comments: Vec::new(),
26114 inferred_type: None,
26115 })))
26116 } else {
26117 Ok(e)
26118 }
26119 }
26120
26121 Action::ArrayDistinctClickHouse => {
26122 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
26123 if let Expression::ArrayDistinct(f) = e {
26124 Ok(Expression::Function(Box::new(Function::new(
26125 "arrayDistinct".to_string(),
26126 vec![f.this],
26127 ))))
26128 } else {
26129 Ok(e)
26130 }
26131 }
26132
26133 Action::ArrayContainsDuckDBConvert => {
26134 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
26135 // CASE WHEN value IS NULL
26136 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26137 // ELSE ARRAY_CONTAINS(array, value)
26138 // END
26139 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
26140 if let Expression::ArrayContains(f) = e {
26141 let value = f.this;
26142 let array = f.expression;
26143
26144 // value IS NULL
26145 let value_is_null =
26146 Expression::IsNull(Box::new(crate::expressions::IsNull {
26147 this: value.clone(),
26148 not: false,
26149 postfix_form: false,
26150 }));
26151
26152 // ARRAY_LENGTH(array)
26153 let array_length = Expression::Function(Box::new(Function::new(
26154 "ARRAY_LENGTH".to_string(),
26155 vec![array.clone()],
26156 )));
26157 // LIST_COUNT(array)
26158 let list_count = Expression::Function(Box::new(Function::new(
26159 "LIST_COUNT".to_string(),
26160 vec![array.clone()],
26161 )));
26162 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
26163 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26164 left: array_length,
26165 right: list_count,
26166 left_comments: vec![],
26167 operator_comments: vec![],
26168 trailing_comments: vec![],
26169 inferred_type: None,
26170 }));
26171 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26172 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
26173 this: Box::new(neq),
26174 expression: Box::new(Expression::Boolean(
26175 crate::expressions::BooleanLiteral { value: false },
26176 )),
26177 }));
26178
26179 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
26180 let array_contains = Expression::Function(Box::new(Function::new(
26181 "ARRAY_CONTAINS".to_string(),
26182 vec![array, value],
26183 )));
26184
26185 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
26186 Ok(Expression::Case(Box::new(Case {
26187 operand: None,
26188 whens: vec![(value_is_null, nullif)],
26189 else_: Some(array_contains),
26190 comments: Vec::new(),
26191 inferred_type: None,
26192 })))
26193 } else {
26194 Ok(e)
26195 }
26196 }
26197
26198 Action::StrPositionExpand => {
26199 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
26200 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26201 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26202 if let Expression::StrPosition(sp) = e {
26203 let crate::expressions::StrPosition {
26204 this,
26205 substr,
26206 position,
26207 occurrence,
26208 } = *sp;
26209 let string = *this;
26210 let substr_expr = match substr {
26211 Some(s) => *s,
26212 None => Expression::Null(Null),
26213 };
26214 let pos = match position {
26215 Some(p) => *p,
26216 None => Expression::number(1),
26217 };
26218
26219 // SUBSTRING(string, pos)
26220 let substring_call = Expression::Function(Box::new(Function::new(
26221 "SUBSTRING".to_string(),
26222 vec![string.clone(), pos.clone()],
26223 )));
26224 // STRPOS(SUBSTRING(string, pos), substr)
26225 let strpos_call = Expression::Function(Box::new(Function::new(
26226 "STRPOS".to_string(),
26227 vec![substring_call, substr_expr.clone()],
26228 )));
26229 // STRPOS(...) + pos - 1
26230 let pos_adjusted =
26231 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
26232 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26233 strpos_call.clone(),
26234 pos.clone(),
26235 ))),
26236 Expression::number(1),
26237 )));
26238 // STRPOS(...) = 0
26239 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
26240 strpos_call.clone(),
26241 Expression::number(0),
26242 )));
26243
26244 match target {
26245 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26246 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26247 Ok(Expression::Function(Box::new(Function::new(
26248 "IF".to_string(),
26249 vec![is_zero, Expression::number(0), pos_adjusted],
26250 ))))
26251 }
26252 DialectType::DuckDB => {
26253 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26254 Ok(Expression::Case(Box::new(Case {
26255 operand: None,
26256 whens: vec![(is_zero, Expression::number(0))],
26257 else_: Some(pos_adjusted),
26258 comments: Vec::new(),
26259 inferred_type: None,
26260 })))
26261 }
26262 _ => {
26263 // Reconstruct StrPosition
26264 Ok(Expression::StrPosition(Box::new(
26265 crate::expressions::StrPosition {
26266 this: Box::new(string),
26267 substr: Some(Box::new(substr_expr)),
26268 position: Some(Box::new(pos)),
26269 occurrence,
26270 },
26271 )))
26272 }
26273 }
26274 } else {
26275 Ok(e)
26276 }
26277 }
26278
26279 Action::MonthsBetweenConvert => {
26280 if let Expression::MonthsBetween(mb) = e {
26281 let crate::expressions::BinaryFunc {
26282 this: end_date,
26283 expression: start_date,
26284 ..
26285 } = *mb;
26286 match target {
26287 DialectType::DuckDB => {
26288 let cast_end = Self::ensure_cast_date(end_date);
26289 let cast_start = Self::ensure_cast_date(start_date);
26290 let dd = Expression::Function(Box::new(Function::new(
26291 "DATE_DIFF".to_string(),
26292 vec![
26293 Expression::string("MONTH"),
26294 cast_start.clone(),
26295 cast_end.clone(),
26296 ],
26297 )));
26298 let day_end = Expression::Function(Box::new(Function::new(
26299 "DAY".to_string(),
26300 vec![cast_end.clone()],
26301 )));
26302 let day_start = Expression::Function(Box::new(Function::new(
26303 "DAY".to_string(),
26304 vec![cast_start.clone()],
26305 )));
26306 let last_day_end = Expression::Function(Box::new(Function::new(
26307 "LAST_DAY".to_string(),
26308 vec![cast_end.clone()],
26309 )));
26310 let last_day_start = Expression::Function(Box::new(Function::new(
26311 "LAST_DAY".to_string(),
26312 vec![cast_start.clone()],
26313 )));
26314 let day_last_end = Expression::Function(Box::new(Function::new(
26315 "DAY".to_string(),
26316 vec![last_day_end],
26317 )));
26318 let day_last_start = Expression::Function(Box::new(Function::new(
26319 "DAY".to_string(),
26320 vec![last_day_start],
26321 )));
26322 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
26323 day_end.clone(),
26324 day_last_end,
26325 )));
26326 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
26327 day_start.clone(),
26328 day_last_start,
26329 )));
26330 let both_cond =
26331 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
26332 let day_diff =
26333 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
26334 let day_diff_paren =
26335 Expression::Paren(Box::new(crate::expressions::Paren {
26336 this: day_diff,
26337 trailing_comments: Vec::new(),
26338 }));
26339 let frac = Expression::Div(Box::new(BinaryOp::new(
26340 day_diff_paren,
26341 Expression::Literal(Box::new(Literal::Number(
26342 "31.0".to_string(),
26343 ))),
26344 )));
26345 let case_expr = Expression::Case(Box::new(Case {
26346 operand: None,
26347 whens: vec![(both_cond, Expression::number(0))],
26348 else_: Some(frac),
26349 comments: Vec::new(),
26350 inferred_type: None,
26351 }));
26352 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
26353 }
26354 DialectType::Snowflake | DialectType::Redshift => {
26355 let unit = Expression::Identifier(Identifier::new("MONTH"));
26356 Ok(Expression::Function(Box::new(Function::new(
26357 "DATEDIFF".to_string(),
26358 vec![unit, start_date, end_date],
26359 ))))
26360 }
26361 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26362 Ok(Expression::Function(Box::new(Function::new(
26363 "DATE_DIFF".to_string(),
26364 vec![Expression::string("MONTH"), start_date, end_date],
26365 ))))
26366 }
26367 _ => Ok(Expression::MonthsBetween(Box::new(
26368 crate::expressions::BinaryFunc {
26369 this: end_date,
26370 expression: start_date,
26371 original_name: None,
26372 inferred_type: None,
26373 },
26374 ))),
26375 }
26376 } else {
26377 Ok(e)
26378 }
26379 }
26380
26381 Action::AddMonthsConvert => {
26382 if let Expression::AddMonths(am) = e {
26383 let date = am.this;
26384 let val = am.expression;
26385 match target {
26386 DialectType::TSQL | DialectType::Fabric => {
26387 let cast_date = Self::ensure_cast_datetime2(date);
26388 Ok(Expression::Function(Box::new(Function::new(
26389 "DATEADD".to_string(),
26390 vec![
26391 Expression::Identifier(Identifier::new("MONTH")),
26392 val,
26393 cast_date,
26394 ],
26395 ))))
26396 }
26397 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
26398 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26399 // Optionally wrapped in CAST(... AS type) if the input had a specific type
26400
26401 // Determine the cast type from the date expression
26402 let (cast_date, return_type) = match &date {
26403 Expression::Literal(lit)
26404 if matches!(lit.as_ref(), Literal::String(_)) =>
26405 {
26406 // String literal: CAST(str AS TIMESTAMP), no outer CAST
26407 (
26408 Expression::Cast(Box::new(Cast {
26409 this: date.clone(),
26410 to: DataType::Timestamp {
26411 precision: None,
26412 timezone: false,
26413 },
26414 trailing_comments: Vec::new(),
26415 double_colon_syntax: false,
26416 format: None,
26417 default: None,
26418 inferred_type: None,
26419 })),
26420 None,
26421 )
26422 }
26423 Expression::Cast(c) => {
26424 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
26425 (date.clone(), Some(c.to.clone()))
26426 }
26427 _ => {
26428 // Expression or NULL::TYPE - keep as-is, check for cast type
26429 if let Expression::Cast(c) = &date {
26430 (date.clone(), Some(c.to.clone()))
26431 } else {
26432 (date.clone(), None)
26433 }
26434 }
26435 };
26436
26437 // Build the interval expression
26438 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
26439 // For integer values, use INTERVAL val MONTH
26440 let is_non_integer_val = match &val {
26441 Expression::Literal(lit)
26442 if matches!(lit.as_ref(), Literal::Number(_)) =>
26443 {
26444 let Literal::Number(n) = lit.as_ref() else {
26445 unreachable!()
26446 };
26447 n.contains('.')
26448 }
26449 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
26450 Expression::Neg(n) => {
26451 if let Expression::Literal(lit) = &n.this {
26452 if let Literal::Number(s) = lit.as_ref() {
26453 s.contains('.')
26454 } else {
26455 false
26456 }
26457 } else {
26458 false
26459 }
26460 }
26461 _ => false,
26462 };
26463
26464 let add_interval = if is_non_integer_val {
26465 // TO_MONTHS(CAST(ROUND(val) AS INT))
26466 let round_val = Expression::Function(Box::new(Function::new(
26467 "ROUND".to_string(),
26468 vec![val.clone()],
26469 )));
26470 let cast_int = Expression::Cast(Box::new(Cast {
26471 this: round_val,
26472 to: DataType::Int {
26473 length: None,
26474 integer_spelling: false,
26475 },
26476 trailing_comments: Vec::new(),
26477 double_colon_syntax: false,
26478 format: None,
26479 default: None,
26480 inferred_type: None,
26481 }));
26482 Expression::Function(Box::new(Function::new(
26483 "TO_MONTHS".to_string(),
26484 vec![cast_int],
26485 )))
26486 } else {
26487 // INTERVAL val MONTH
26488 // For negative numbers, wrap in parens
26489 let interval_val = match &val {
26490 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
26491 {
26492 let Literal::Number(_) = lit.as_ref() else {
26493 unreachable!()
26494 };
26495 Expression::Paren(Box::new(Paren {
26496 this: val.clone(),
26497 trailing_comments: Vec::new(),
26498 }))
26499 }
26500 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
26501 this: val.clone(),
26502 trailing_comments: Vec::new(),
26503 })),
26504 Expression::Null(_) => Expression::Paren(Box::new(Paren {
26505 this: val.clone(),
26506 trailing_comments: Vec::new(),
26507 })),
26508 _ => val.clone(),
26509 };
26510 Expression::Interval(Box::new(crate::expressions::Interval {
26511 this: Some(interval_val),
26512 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26513 unit: crate::expressions::IntervalUnit::Month,
26514 use_plural: false,
26515 }),
26516 }))
26517 };
26518
26519 // Build: date + interval
26520 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
26521 cast_date.clone(),
26522 add_interval.clone(),
26523 )));
26524
26525 // Build LAST_DAY(date)
26526 let last_day_date = Expression::Function(Box::new(Function::new(
26527 "LAST_DAY".to_string(),
26528 vec![cast_date.clone()],
26529 )));
26530
26531 // Build LAST_DAY(date + interval)
26532 let last_day_date_plus =
26533 Expression::Function(Box::new(Function::new(
26534 "LAST_DAY".to_string(),
26535 vec![date_plus_interval.clone()],
26536 )));
26537
26538 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26539 let case_expr = Expression::Case(Box::new(Case {
26540 operand: None,
26541 whens: vec![(
26542 Expression::Eq(Box::new(BinaryOp::new(
26543 last_day_date,
26544 cast_date.clone(),
26545 ))),
26546 last_day_date_plus,
26547 )],
26548 else_: Some(date_plus_interval),
26549 comments: Vec::new(),
26550 inferred_type: None,
26551 }));
26552
26553 // Wrap in CAST(... AS type) if needed
26554 if let Some(dt) = return_type {
26555 Ok(Expression::Cast(Box::new(Cast {
26556 this: case_expr,
26557 to: dt,
26558 trailing_comments: Vec::new(),
26559 double_colon_syntax: false,
26560 format: None,
26561 default: None,
26562 inferred_type: None,
26563 })))
26564 } else {
26565 Ok(case_expr)
26566 }
26567 }
26568 DialectType::DuckDB => {
26569 // Non-Snowflake source: simple date + INTERVAL
26570 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26571 {
26572 Expression::Cast(Box::new(Cast {
26573 this: date,
26574 to: DataType::Timestamp {
26575 precision: None,
26576 timezone: false,
26577 },
26578 trailing_comments: Vec::new(),
26579 double_colon_syntax: false,
26580 format: None,
26581 default: None,
26582 inferred_type: None,
26583 }))
26584 } else {
26585 date
26586 };
26587 let interval =
26588 Expression::Interval(Box::new(crate::expressions::Interval {
26589 this: Some(val),
26590 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26591 unit: crate::expressions::IntervalUnit::Month,
26592 use_plural: false,
26593 }),
26594 }));
26595 Ok(Expression::Add(Box::new(BinaryOp::new(
26596 cast_date, interval,
26597 ))))
26598 }
26599 DialectType::Snowflake => {
26600 // Keep ADD_MONTHS when source is also Snowflake
26601 if matches!(source, DialectType::Snowflake) {
26602 Ok(Expression::Function(Box::new(Function::new(
26603 "ADD_MONTHS".to_string(),
26604 vec![date, val],
26605 ))))
26606 } else {
26607 Ok(Expression::Function(Box::new(Function::new(
26608 "DATEADD".to_string(),
26609 vec![
26610 Expression::Identifier(Identifier::new("MONTH")),
26611 val,
26612 date,
26613 ],
26614 ))))
26615 }
26616 }
26617 DialectType::Redshift => {
26618 Ok(Expression::Function(Box::new(Function::new(
26619 "DATEADD".to_string(),
26620 vec![
26621 Expression::Identifier(Identifier::new("MONTH")),
26622 val,
26623 date,
26624 ],
26625 ))))
26626 }
26627 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26628 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26629 {
26630 Expression::Cast(Box::new(Cast {
26631 this: date,
26632 to: DataType::Timestamp {
26633 precision: None,
26634 timezone: false,
26635 },
26636 trailing_comments: Vec::new(),
26637 double_colon_syntax: false,
26638 format: None,
26639 default: None,
26640 inferred_type: None,
26641 }))
26642 } else {
26643 date
26644 };
26645 Ok(Expression::Function(Box::new(Function::new(
26646 "DATE_ADD".to_string(),
26647 vec![Expression::string("MONTH"), val, cast_date],
26648 ))))
26649 }
26650 DialectType::BigQuery => {
26651 let interval =
26652 Expression::Interval(Box::new(crate::expressions::Interval {
26653 this: Some(val),
26654 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26655 unit: crate::expressions::IntervalUnit::Month,
26656 use_plural: false,
26657 }),
26658 }));
26659 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26660 {
26661 Expression::Cast(Box::new(Cast {
26662 this: date,
26663 to: DataType::Custom {
26664 name: "DATETIME".to_string(),
26665 },
26666 trailing_comments: Vec::new(),
26667 double_colon_syntax: false,
26668 format: None,
26669 default: None,
26670 inferred_type: None,
26671 }))
26672 } else {
26673 date
26674 };
26675 Ok(Expression::Function(Box::new(Function::new(
26676 "DATE_ADD".to_string(),
26677 vec![cast_date, interval],
26678 ))))
26679 }
26680 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
26681 Ok(Expression::Function(Box::new(Function::new(
26682 "ADD_MONTHS".to_string(),
26683 vec![date, val],
26684 ))))
26685 }
26686 _ => {
26687 // Default: keep as AddMonths expression
26688 Ok(Expression::AddMonths(Box::new(
26689 crate::expressions::BinaryFunc {
26690 this: date,
26691 expression: val,
26692 original_name: None,
26693 inferred_type: None,
26694 },
26695 )))
26696 }
26697 }
26698 } else {
26699 Ok(e)
26700 }
26701 }
26702
26703 Action::PercentileContConvert => {
26704 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
26705 // Presto/Trino: APPROX_PERCENTILE(col, p)
26706 // Spark/Databricks: PERCENTILE_APPROX(col, p)
26707 if let Expression::WithinGroup(wg) = e {
26708 // Extract percentile value and order by column
26709 let (percentile, _is_disc) = match &wg.this {
26710 Expression::Function(f) => {
26711 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26712 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
26713 Box::new(Literal::Number("0.5".to_string())),
26714 ));
26715 (pct, is_disc)
26716 }
26717 Expression::AggregateFunction(af) => {
26718 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26719 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
26720 Box::new(Literal::Number("0.5".to_string())),
26721 ));
26722 (pct, is_disc)
26723 }
26724 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
26725 _ => return Ok(Expression::WithinGroup(wg)),
26726 };
26727 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
26728 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
26729 );
26730
26731 let func_name = match target {
26732 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26733 "APPROX_PERCENTILE"
26734 }
26735 _ => "PERCENTILE_APPROX", // Spark, Databricks
26736 };
26737 Ok(Expression::Function(Box::new(Function::new(
26738 func_name.to_string(),
26739 vec![col, percentile],
26740 ))))
26741 } else {
26742 Ok(e)
26743 }
26744 }
26745
26746 Action::CurrentUserSparkParens => {
26747 // CURRENT_USER -> CURRENT_USER() for Spark
26748 if let Expression::CurrentUser(_) = e {
26749 Ok(Expression::Function(Box::new(Function::new(
26750 "CURRENT_USER".to_string(),
26751 vec![],
26752 ))))
26753 } else {
26754 Ok(e)
26755 }
26756 }
26757
26758 Action::SparkDateFuncCast => {
26759 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
26760 let cast_arg = |arg: Expression| -> Expression {
26761 match target {
26762 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26763 Self::double_cast_timestamp_date(arg)
26764 }
26765 _ => {
26766 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
26767 Self::ensure_cast_date(arg)
26768 }
26769 }
26770 };
26771 match e {
26772 Expression::Month(f) => Ok(Expression::Month(Box::new(
26773 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26774 ))),
26775 Expression::Year(f) => Ok(Expression::Year(Box::new(
26776 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26777 ))),
26778 Expression::Day(f) => Ok(Expression::Day(Box::new(
26779 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26780 ))),
26781 other => Ok(other),
26782 }
26783 }
26784
26785 Action::MapFromArraysConvert => {
26786 // Expression::MapFromArrays -> target-specific
26787 if let Expression::MapFromArrays(mfa) = e {
26788 let keys = mfa.this;
26789 let values = mfa.expression;
26790 match target {
26791 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26792 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
26793 ))),
26794 _ => {
26795 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
26796 Ok(Expression::Function(Box::new(Function::new(
26797 "MAP".to_string(),
26798 vec![keys, values],
26799 ))))
26800 }
26801 }
26802 } else {
26803 Ok(e)
26804 }
26805 }
26806
26807 Action::AnyToExists => {
26808 if let Expression::Any(q) = e {
26809 if let Some(op) = q.op.clone() {
26810 let lambda_param = crate::expressions::Identifier::new("x");
26811 let rhs = Expression::Identifier(lambda_param.clone());
26812 let body = match op {
26813 crate::expressions::QuantifiedOp::Eq => {
26814 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
26815 }
26816 crate::expressions::QuantifiedOp::Neq => {
26817 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
26818 }
26819 crate::expressions::QuantifiedOp::Lt => {
26820 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
26821 }
26822 crate::expressions::QuantifiedOp::Lte => {
26823 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
26824 }
26825 crate::expressions::QuantifiedOp::Gt => {
26826 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
26827 }
26828 crate::expressions::QuantifiedOp::Gte => {
26829 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
26830 }
26831 };
26832 let lambda =
26833 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26834 parameters: vec![lambda_param],
26835 body,
26836 colon: false,
26837 parameter_types: Vec::new(),
26838 }));
26839 Ok(Expression::Function(Box::new(Function::new(
26840 "EXISTS".to_string(),
26841 vec![q.subquery, lambda],
26842 ))))
26843 } else {
26844 Ok(Expression::Any(q))
26845 }
26846 } else {
26847 Ok(e)
26848 }
26849 }
26850
26851 Action::GenerateSeriesConvert => {
26852 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
26853 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26854 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
26855 if let Expression::Function(f) = e {
26856 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
26857 let start = f.args[0].clone();
26858 let end = f.args[1].clone();
26859 let step = f.args.get(2).cloned();
26860
26861 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
26862 let step = step.map(|s| Self::normalize_interval_string(s, target));
26863
26864 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
26865 let maybe_cast_timestamp = |arg: Expression| -> Expression {
26866 if matches!(
26867 target,
26868 DialectType::Presto
26869 | DialectType::Trino
26870 | DialectType::Athena
26871 | DialectType::Spark
26872 | DialectType::Databricks
26873 | DialectType::Hive
26874 ) {
26875 match &arg {
26876 Expression::CurrentTimestamp(_) => {
26877 Expression::Cast(Box::new(Cast {
26878 this: arg,
26879 to: DataType::Timestamp {
26880 precision: None,
26881 timezone: false,
26882 },
26883 trailing_comments: Vec::new(),
26884 double_colon_syntax: false,
26885 format: None,
26886 default: None,
26887 inferred_type: None,
26888 }))
26889 }
26890 _ => arg,
26891 }
26892 } else {
26893 arg
26894 }
26895 };
26896
26897 let start = maybe_cast_timestamp(start);
26898 let end = maybe_cast_timestamp(end);
26899
26900 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
26901 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
26902 let mut gs_args = vec![start, end];
26903 if let Some(step) = step {
26904 gs_args.push(step);
26905 }
26906 return Ok(Expression::Function(Box::new(Function::new(
26907 "GENERATE_SERIES".to_string(),
26908 gs_args,
26909 ))));
26910 }
26911
26912 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26913 if matches!(target, DialectType::DuckDB) {
26914 let mut gs_args = vec![start, end];
26915 if let Some(step) = step {
26916 gs_args.push(step);
26917 }
26918 let gs = Expression::Function(Box::new(Function::new(
26919 "GENERATE_SERIES".to_string(),
26920 gs_args,
26921 )));
26922 return Ok(Expression::Function(Box::new(Function::new(
26923 "UNNEST".to_string(),
26924 vec![gs],
26925 ))));
26926 }
26927
26928 let mut seq_args = vec![start, end];
26929 if let Some(step) = step {
26930 seq_args.push(step);
26931 }
26932
26933 let seq = Expression::Function(Box::new(Function::new(
26934 "SEQUENCE".to_string(),
26935 seq_args,
26936 )));
26937
26938 match target {
26939 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26940 // Wrap in UNNEST
26941 Ok(Expression::Function(Box::new(Function::new(
26942 "UNNEST".to_string(),
26943 vec![seq],
26944 ))))
26945 }
26946 DialectType::Spark
26947 | DialectType::Databricks
26948 | DialectType::Hive => {
26949 // Wrap in EXPLODE
26950 Ok(Expression::Function(Box::new(Function::new(
26951 "EXPLODE".to_string(),
26952 vec![seq],
26953 ))))
26954 }
26955 _ => {
26956 // Just SEQUENCE for others
26957 Ok(seq)
26958 }
26959 }
26960 } else {
26961 Ok(Expression::Function(f))
26962 }
26963 } else {
26964 Ok(e)
26965 }
26966 }
26967
26968 Action::ConcatCoalesceWrap => {
26969 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
26970 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
26971 if let Expression::Function(f) = e {
26972 if f.name.eq_ignore_ascii_case("CONCAT") {
26973 let new_args: Vec<Expression> = f
26974 .args
26975 .into_iter()
26976 .map(|arg| {
26977 let cast_arg = if matches!(
26978 target,
26979 DialectType::Presto
26980 | DialectType::Trino
26981 | DialectType::Athena
26982 ) {
26983 Expression::Cast(Box::new(Cast {
26984 this: arg,
26985 to: DataType::VarChar {
26986 length: None,
26987 parenthesized_length: false,
26988 },
26989 trailing_comments: Vec::new(),
26990 double_colon_syntax: false,
26991 format: None,
26992 default: None,
26993 inferred_type: None,
26994 }))
26995 } else {
26996 arg
26997 };
26998 Expression::Function(Box::new(Function::new(
26999 "COALESCE".to_string(),
27000 vec![cast_arg, Expression::string("")],
27001 )))
27002 })
27003 .collect();
27004 Ok(Expression::Function(Box::new(Function::new(
27005 "CONCAT".to_string(),
27006 new_args,
27007 ))))
27008 } else {
27009 Ok(Expression::Function(f))
27010 }
27011 } else {
27012 Ok(e)
27013 }
27014 }
27015
27016 Action::PipeConcatToConcat => {
27017 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
27018 if let Expression::Concat(op) = e {
27019 let cast_left = Expression::Cast(Box::new(Cast {
27020 this: op.left,
27021 to: DataType::VarChar {
27022 length: None,
27023 parenthesized_length: false,
27024 },
27025 trailing_comments: Vec::new(),
27026 double_colon_syntax: false,
27027 format: None,
27028 default: None,
27029 inferred_type: None,
27030 }));
27031 let cast_right = Expression::Cast(Box::new(Cast {
27032 this: op.right,
27033 to: DataType::VarChar {
27034 length: None,
27035 parenthesized_length: false,
27036 },
27037 trailing_comments: Vec::new(),
27038 double_colon_syntax: false,
27039 format: None,
27040 default: None,
27041 inferred_type: None,
27042 }));
27043 Ok(Expression::Function(Box::new(Function::new(
27044 "CONCAT".to_string(),
27045 vec![cast_left, cast_right],
27046 ))))
27047 } else {
27048 Ok(e)
27049 }
27050 }
27051
27052 Action::DivFuncConvert => {
27053 // DIV(a, b) -> target-specific integer division
27054 if let Expression::Function(f) = e {
27055 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
27056 let a = f.args[0].clone();
27057 let b = f.args[1].clone();
27058 match target {
27059 DialectType::DuckDB => {
27060 // DIV(a, b) -> CAST(a // b AS DECIMAL)
27061 let int_div = Expression::IntDiv(Box::new(
27062 crate::expressions::BinaryFunc {
27063 this: a,
27064 expression: b,
27065 original_name: None,
27066 inferred_type: None,
27067 },
27068 ));
27069 Ok(Expression::Cast(Box::new(Cast {
27070 this: int_div,
27071 to: DataType::Decimal {
27072 precision: None,
27073 scale: None,
27074 },
27075 trailing_comments: Vec::new(),
27076 double_colon_syntax: false,
27077 format: None,
27078 default: None,
27079 inferred_type: None,
27080 })))
27081 }
27082 DialectType::BigQuery => {
27083 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
27084 let div_func = Expression::Function(Box::new(Function::new(
27085 "DIV".to_string(),
27086 vec![a, b],
27087 )));
27088 Ok(Expression::Cast(Box::new(Cast {
27089 this: div_func,
27090 to: DataType::Custom {
27091 name: "NUMERIC".to_string(),
27092 },
27093 trailing_comments: Vec::new(),
27094 double_colon_syntax: false,
27095 format: None,
27096 default: None,
27097 inferred_type: None,
27098 })))
27099 }
27100 DialectType::SQLite => {
27101 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
27102 let cast_a = Expression::Cast(Box::new(Cast {
27103 this: a,
27104 to: DataType::Custom {
27105 name: "REAL".to_string(),
27106 },
27107 trailing_comments: Vec::new(),
27108 double_colon_syntax: false,
27109 format: None,
27110 default: None,
27111 inferred_type: None,
27112 }));
27113 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
27114 let cast_int = Expression::Cast(Box::new(Cast {
27115 this: div,
27116 to: DataType::Int {
27117 length: None,
27118 integer_spelling: true,
27119 },
27120 trailing_comments: Vec::new(),
27121 double_colon_syntax: false,
27122 format: None,
27123 default: None,
27124 inferred_type: None,
27125 }));
27126 Ok(Expression::Cast(Box::new(Cast {
27127 this: cast_int,
27128 to: DataType::Custom {
27129 name: "REAL".to_string(),
27130 },
27131 trailing_comments: Vec::new(),
27132 double_colon_syntax: false,
27133 format: None,
27134 default: None,
27135 inferred_type: None,
27136 })))
27137 }
27138 _ => Ok(Expression::Function(f)),
27139 }
27140 } else {
27141 Ok(Expression::Function(f))
27142 }
27143 } else {
27144 Ok(e)
27145 }
27146 }
27147
27148 Action::JsonObjectAggConvert => {
27149 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
27150 match e {
27151 Expression::Function(f) => Ok(Expression::Function(Box::new(
27152 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
27153 ))),
27154 Expression::AggregateFunction(af) => {
27155 // AggregateFunction stores all args in the `args` vec
27156 Ok(Expression::Function(Box::new(Function::new(
27157 "JSON_GROUP_OBJECT".to_string(),
27158 af.args,
27159 ))))
27160 }
27161 other => Ok(other),
27162 }
27163 }
27164
27165 Action::JsonbExistsConvert => {
27166 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
27167 if let Expression::Function(f) = e {
27168 if f.args.len() == 2 {
27169 let json_expr = f.args[0].clone();
27170 let key = match &f.args[1] {
27171 Expression::Literal(lit)
27172 if matches!(
27173 lit.as_ref(),
27174 crate::expressions::Literal::String(_)
27175 ) =>
27176 {
27177 let crate::expressions::Literal::String(s) = lit.as_ref()
27178 else {
27179 unreachable!()
27180 };
27181 format!("$.{}", s)
27182 }
27183 _ => return Ok(Expression::Function(f)),
27184 };
27185 Ok(Expression::Function(Box::new(Function::new(
27186 "JSON_EXISTS".to_string(),
27187 vec![json_expr, Expression::string(&key)],
27188 ))))
27189 } else {
27190 Ok(Expression::Function(f))
27191 }
27192 } else {
27193 Ok(e)
27194 }
27195 }
27196
27197 Action::DateBinConvert => {
27198 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
27199 if let Expression::Function(f) = e {
27200 Ok(Expression::Function(Box::new(Function::new(
27201 "TIME_BUCKET".to_string(),
27202 f.args,
27203 ))))
27204 } else {
27205 Ok(e)
27206 }
27207 }
27208
27209 Action::MysqlCastCharToText => {
27210 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
27211 if let Expression::Cast(mut c) = e {
27212 c.to = DataType::Text;
27213 Ok(Expression::Cast(c))
27214 } else {
27215 Ok(e)
27216 }
27217 }
27218
27219 Action::SparkCastVarcharToString => {
27220 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
27221 match e {
27222 Expression::Cast(mut c) => {
27223 c.to = Self::normalize_varchar_to_string(c.to);
27224 Ok(Expression::Cast(c))
27225 }
27226 Expression::TryCast(mut c) => {
27227 c.to = Self::normalize_varchar_to_string(c.to);
27228 Ok(Expression::TryCast(c))
27229 }
27230 _ => Ok(e),
27231 }
27232 }
27233
27234 Action::MinMaxToLeastGreatest => {
27235 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
27236 if let Expression::Function(f) = e {
27237 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
27238 "LEAST"
27239 } else if f.name.eq_ignore_ascii_case("MAX") {
27240 "GREATEST"
27241 } else {
27242 return Ok(Expression::Function(f));
27243 };
27244 Ok(Expression::Function(Box::new(Function::new(
27245 new_name.to_string(),
27246 f.args,
27247 ))))
27248 } else {
27249 Ok(e)
27250 }
27251 }
27252
27253 Action::ClickHouseUniqToApproxCountDistinct => {
27254 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
27255 if let Expression::Function(f) = e {
27256 Ok(Expression::Function(Box::new(Function::new(
27257 "APPROX_COUNT_DISTINCT".to_string(),
27258 f.args,
27259 ))))
27260 } else {
27261 Ok(e)
27262 }
27263 }
27264
27265 Action::ClickHouseAnyToAnyValue => {
27266 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
27267 if let Expression::Function(f) = e {
27268 Ok(Expression::Function(Box::new(Function::new(
27269 "ANY_VALUE".to_string(),
27270 f.args,
27271 ))))
27272 } else {
27273 Ok(e)
27274 }
27275 }
27276
27277 Action::OracleVarchar2ToVarchar => {
27278 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
27279 if let Expression::DataType(DataType::Custom { ref name }) = e {
27280 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
27281 let starts_varchar2 =
27282 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
27283 let starts_nvarchar2 =
27284 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
27285 let inner = if starts_varchar2 || starts_nvarchar2 {
27286 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
27287 let end = name.len() - 1; // skip trailing ")"
27288 Some(&name[start..end])
27289 } else {
27290 Option::None
27291 };
27292 if let Some(inner_str) = inner {
27293 // Parse the number part, ignoring BYTE/CHAR qualifier
27294 let num_str = inner_str.split_whitespace().next().unwrap_or("");
27295 if let Ok(n) = num_str.parse::<u32>() {
27296 Ok(Expression::DataType(DataType::VarChar {
27297 length: Some(n),
27298 parenthesized_length: false,
27299 }))
27300 } else {
27301 Ok(e)
27302 }
27303 } else {
27304 // Plain VARCHAR2 / NVARCHAR2 without parens
27305 Ok(Expression::DataType(DataType::VarChar {
27306 length: Option::None,
27307 parenthesized_length: false,
27308 }))
27309 }
27310 } else {
27311 Ok(e)
27312 }
27313 }
27314
27315 Action::Nvl2Expand => {
27316 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
27317 // But keep as NVL2 for dialects that support it natively
27318 let nvl2_native = matches!(
27319 target,
27320 DialectType::Oracle
27321 | DialectType::Snowflake
27322 | DialectType::Redshift
27323 | DialectType::Teradata
27324 | DialectType::Spark
27325 | DialectType::Databricks
27326 );
27327 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
27328 if nvl2_native {
27329 return Ok(Expression::Nvl2(nvl2));
27330 }
27331 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
27332 } else if let Expression::Function(f) = e {
27333 if nvl2_native {
27334 return Ok(Expression::Function(Box::new(Function::new(
27335 "NVL2".to_string(),
27336 f.args,
27337 ))));
27338 }
27339 if f.args.len() < 2 {
27340 return Ok(Expression::Function(f));
27341 }
27342 let mut args = f.args;
27343 let a = args.remove(0);
27344 let b = args.remove(0);
27345 let c = if !args.is_empty() {
27346 Some(args.remove(0))
27347 } else {
27348 Option::None
27349 };
27350 (a, b, c)
27351 } else {
27352 return Ok(e);
27353 };
27354 // Build: NOT (a IS NULL)
27355 let is_null = Expression::IsNull(Box::new(IsNull {
27356 this: a,
27357 not: false,
27358 postfix_form: false,
27359 }));
27360 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
27361 this: is_null,
27362 inferred_type: None,
27363 }));
27364 Ok(Expression::Case(Box::new(Case {
27365 operand: Option::None,
27366 whens: vec![(not_null, b)],
27367 else_: c,
27368 comments: Vec::new(),
27369 inferred_type: None,
27370 })))
27371 }
27372
27373 Action::IfnullToCoalesce => {
27374 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
27375 if let Expression::Coalesce(mut cf) = e {
27376 cf.original_name = Option::None;
27377 Ok(Expression::Coalesce(cf))
27378 } else if let Expression::Function(f) = e {
27379 Ok(Expression::Function(Box::new(Function::new(
27380 "COALESCE".to_string(),
27381 f.args,
27382 ))))
27383 } else {
27384 Ok(e)
27385 }
27386 }
27387
27388 Action::IsAsciiConvert => {
27389 // IS_ASCII(x) -> dialect-specific ASCII check
27390 if let Expression::Function(f) = e {
27391 let arg = f.args.into_iter().next().unwrap();
27392 match target {
27393 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
27394 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
27395 Ok(Expression::Function(Box::new(Function::new(
27396 "REGEXP_LIKE".to_string(),
27397 vec![
27398 arg,
27399 Expression::Literal(Box::new(Literal::String(
27400 "^[[:ascii:]]*$".to_string(),
27401 ))),
27402 ],
27403 ))))
27404 }
27405 DialectType::PostgreSQL
27406 | DialectType::Redshift
27407 | DialectType::Materialize
27408 | DialectType::RisingWave => {
27409 // (x ~ '^[[:ascii:]]*$')
27410 Ok(Expression::Paren(Box::new(Paren {
27411 this: Expression::RegexpLike(Box::new(
27412 crate::expressions::RegexpFunc {
27413 this: arg,
27414 pattern: Expression::Literal(Box::new(
27415 Literal::String("^[[:ascii:]]*$".to_string()),
27416 )),
27417 flags: Option::None,
27418 },
27419 )),
27420 trailing_comments: Vec::new(),
27421 })))
27422 }
27423 DialectType::SQLite => {
27424 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
27425 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
27426 "2a5b5e012d7f5d2a".to_string(),
27427 )));
27428 let cast_expr = Expression::Cast(Box::new(Cast {
27429 this: hex_lit,
27430 to: DataType::Text,
27431 trailing_comments: Vec::new(),
27432 double_colon_syntax: false,
27433 format: Option::None,
27434 default: Option::None,
27435 inferred_type: None,
27436 }));
27437 let glob = Expression::Glob(Box::new(BinaryOp {
27438 left: arg,
27439 right: cast_expr,
27440 left_comments: Vec::new(),
27441 operator_comments: Vec::new(),
27442 trailing_comments: Vec::new(),
27443 inferred_type: None,
27444 }));
27445 Ok(Expression::Paren(Box::new(Paren {
27446 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
27447 this: glob,
27448 inferred_type: None,
27449 })),
27450 trailing_comments: Vec::new(),
27451 })))
27452 }
27453 DialectType::TSQL | DialectType::Fabric => {
27454 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
27455 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
27456 "255b5e002d7f5d25".to_string(),
27457 )));
27458 let convert_expr = Expression::Convert(Box::new(
27459 crate::expressions::ConvertFunc {
27460 this: hex_lit,
27461 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
27462 style: None,
27463 },
27464 ));
27465 let collated = Expression::Collation(Box::new(
27466 crate::expressions::CollationExpr {
27467 this: convert_expr,
27468 collation: "Latin1_General_BIN".to_string(),
27469 quoted: false,
27470 double_quoted: false,
27471 },
27472 ));
27473 let patindex = Expression::Function(Box::new(Function::new(
27474 "PATINDEX".to_string(),
27475 vec![collated, arg],
27476 )));
27477 let zero =
27478 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27479 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27480 left: patindex,
27481 right: zero,
27482 left_comments: Vec::new(),
27483 operator_comments: Vec::new(),
27484 trailing_comments: Vec::new(),
27485 inferred_type: None,
27486 }));
27487 Ok(Expression::Paren(Box::new(Paren {
27488 this: eq_zero,
27489 trailing_comments: Vec::new(),
27490 })))
27491 }
27492 DialectType::Oracle => {
27493 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
27494 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27495 let s1 = Expression::Literal(Box::new(Literal::String(
27496 "^[".to_string(),
27497 )));
27498 let chr1 = Expression::Function(Box::new(Function::new(
27499 "CHR".to_string(),
27500 vec![Expression::Literal(Box::new(Literal::Number(
27501 "1".to_string(),
27502 )))],
27503 )));
27504 let dash =
27505 Expression::Literal(Box::new(Literal::String("-".to_string())));
27506 let chr127 = Expression::Function(Box::new(Function::new(
27507 "CHR".to_string(),
27508 vec![Expression::Literal(Box::new(Literal::Number(
27509 "127".to_string(),
27510 )))],
27511 )));
27512 let s2 = Expression::Literal(Box::new(Literal::String(
27513 "]*$".to_string(),
27514 )));
27515 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27516 let concat1 =
27517 Expression::DPipe(Box::new(crate::expressions::DPipe {
27518 this: Box::new(s1),
27519 expression: Box::new(chr1),
27520 safe: None,
27521 }));
27522 let concat2 =
27523 Expression::DPipe(Box::new(crate::expressions::DPipe {
27524 this: Box::new(concat1),
27525 expression: Box::new(dash),
27526 safe: None,
27527 }));
27528 let concat3 =
27529 Expression::DPipe(Box::new(crate::expressions::DPipe {
27530 this: Box::new(concat2),
27531 expression: Box::new(chr127),
27532 safe: None,
27533 }));
27534 let concat4 =
27535 Expression::DPipe(Box::new(crate::expressions::DPipe {
27536 this: Box::new(concat3),
27537 expression: Box::new(s2),
27538 safe: None,
27539 }));
27540 let regexp_like = Expression::Function(Box::new(Function::new(
27541 "REGEXP_LIKE".to_string(),
27542 vec![arg, concat4],
27543 )));
27544 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
27545 let true_expr =
27546 Expression::Column(Box::new(crate::expressions::Column {
27547 name: Identifier {
27548 name: "TRUE".to_string(),
27549 quoted: false,
27550 trailing_comments: Vec::new(),
27551 span: None,
27552 },
27553 table: None,
27554 join_mark: false,
27555 trailing_comments: Vec::new(),
27556 span: None,
27557 inferred_type: None,
27558 }));
27559 let nvl = Expression::Function(Box::new(Function::new(
27560 "NVL".to_string(),
27561 vec![regexp_like, true_expr],
27562 )));
27563 Ok(nvl)
27564 }
27565 _ => Ok(Expression::Function(Box::new(Function::new(
27566 "IS_ASCII".to_string(),
27567 vec![arg],
27568 )))),
27569 }
27570 } else {
27571 Ok(e)
27572 }
27573 }
27574
27575 Action::StrPositionConvert => {
27576 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
27577 if let Expression::Function(f) = e {
27578 if f.args.len() < 2 {
27579 return Ok(Expression::Function(f));
27580 }
27581 let mut args = f.args;
27582
27583 let haystack = args.remove(0);
27584 let needle = args.remove(0);
27585 let position = if !args.is_empty() {
27586 Some(args.remove(0))
27587 } else {
27588 Option::None
27589 };
27590 let occurrence = if !args.is_empty() {
27591 Some(args.remove(0))
27592 } else {
27593 Option::None
27594 };
27595
27596 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
27597 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
27598 fn build_position_expansion(
27599 haystack: Expression,
27600 needle: Expression,
27601 pos: Expression,
27602 occurrence: Option<Expression>,
27603 inner_func: &str,
27604 wrapper: &str, // "CASE", "IF", "IIF"
27605 ) -> Expression {
27606 let substr = Expression::Function(Box::new(Function::new(
27607 "SUBSTRING".to_string(),
27608 vec![haystack, pos.clone()],
27609 )));
27610 let mut inner_args = vec![substr, needle];
27611 if let Some(occ) = occurrence {
27612 inner_args.push(occ);
27613 }
27614 let inner_call = Expression::Function(Box::new(Function::new(
27615 inner_func.to_string(),
27616 inner_args,
27617 )));
27618 let zero =
27619 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27620 let one =
27621 Expression::Literal(Box::new(Literal::Number("1".to_string())));
27622 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27623 left: inner_call.clone(),
27624 right: zero.clone(),
27625 left_comments: Vec::new(),
27626 operator_comments: Vec::new(),
27627 trailing_comments: Vec::new(),
27628 inferred_type: None,
27629 }));
27630 let add_pos = Expression::Add(Box::new(BinaryOp {
27631 left: inner_call,
27632 right: pos,
27633 left_comments: Vec::new(),
27634 operator_comments: Vec::new(),
27635 trailing_comments: Vec::new(),
27636 inferred_type: None,
27637 }));
27638 let sub_one = Expression::Sub(Box::new(BinaryOp {
27639 left: add_pos,
27640 right: one,
27641 left_comments: Vec::new(),
27642 operator_comments: Vec::new(),
27643 trailing_comments: Vec::new(),
27644 inferred_type: None,
27645 }));
27646
27647 match wrapper {
27648 "CASE" => Expression::Case(Box::new(Case {
27649 operand: Option::None,
27650 whens: vec![(eq_zero, zero)],
27651 else_: Some(sub_one),
27652 comments: Vec::new(),
27653 inferred_type: None,
27654 })),
27655 "IIF" => Expression::Function(Box::new(Function::new(
27656 "IIF".to_string(),
27657 vec![eq_zero, zero, sub_one],
27658 ))),
27659 _ => Expression::Function(Box::new(Function::new(
27660 "IF".to_string(),
27661 vec![eq_zero, zero, sub_one],
27662 ))),
27663 }
27664 }
27665
27666 match target {
27667 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
27668 DialectType::Athena
27669 | DialectType::DuckDB
27670 | DialectType::Presto
27671 | DialectType::Trino
27672 | DialectType::Drill => {
27673 if let Some(pos) = position {
27674 let wrapper = if matches!(target, DialectType::DuckDB) {
27675 "CASE"
27676 } else {
27677 "IF"
27678 };
27679 let result = build_position_expansion(
27680 haystack, needle, pos, occurrence, "STRPOS", wrapper,
27681 );
27682 if matches!(target, DialectType::Drill) {
27683 // Drill uses backtick-quoted `IF`
27684 if let Expression::Function(mut f) = result {
27685 f.name = "`IF`".to_string();
27686 Ok(Expression::Function(f))
27687 } else {
27688 Ok(result)
27689 }
27690 } else {
27691 Ok(result)
27692 }
27693 } else {
27694 Ok(Expression::Function(Box::new(Function::new(
27695 "STRPOS".to_string(),
27696 vec![haystack, needle],
27697 ))))
27698 }
27699 }
27700 // SQLite: IIF wrapper
27701 DialectType::SQLite => {
27702 if let Some(pos) = position {
27703 Ok(build_position_expansion(
27704 haystack, needle, pos, occurrence, "INSTR", "IIF",
27705 ))
27706 } else {
27707 Ok(Expression::Function(Box::new(Function::new(
27708 "INSTR".to_string(),
27709 vec![haystack, needle],
27710 ))))
27711 }
27712 }
27713 // INSTR group: Teradata, BigQuery, Oracle
27714 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
27715 let mut a = vec![haystack, needle];
27716 if let Some(pos) = position {
27717 a.push(pos);
27718 }
27719 if let Some(occ) = occurrence {
27720 a.push(occ);
27721 }
27722 Ok(Expression::Function(Box::new(Function::new(
27723 "INSTR".to_string(),
27724 a,
27725 ))))
27726 }
27727 // CHARINDEX group: Snowflake, TSQL
27728 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
27729 let mut a = vec![needle, haystack];
27730 if let Some(pos) = position {
27731 a.push(pos);
27732 }
27733 Ok(Expression::Function(Box::new(Function::new(
27734 "CHARINDEX".to_string(),
27735 a,
27736 ))))
27737 }
27738 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
27739 DialectType::PostgreSQL
27740 | DialectType::Materialize
27741 | DialectType::RisingWave
27742 | DialectType::Redshift => {
27743 if let Some(pos) = position {
27744 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
27745 // ELSE POSITION(...) + pos - 1 END
27746 let substr = Expression::Substring(Box::new(
27747 crate::expressions::SubstringFunc {
27748 this: haystack,
27749 start: pos.clone(),
27750 length: Option::None,
27751 from_for_syntax: true,
27752 },
27753 ));
27754 let pos_in = Expression::StrPosition(Box::new(
27755 crate::expressions::StrPosition {
27756 this: Box::new(substr),
27757 substr: Some(Box::new(needle)),
27758 position: Option::None,
27759 occurrence: Option::None,
27760 },
27761 ));
27762 let zero = Expression::Literal(Box::new(Literal::Number(
27763 "0".to_string(),
27764 )));
27765 let one = Expression::Literal(Box::new(Literal::Number(
27766 "1".to_string(),
27767 )));
27768 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27769 left: pos_in.clone(),
27770 right: zero.clone(),
27771 left_comments: Vec::new(),
27772 operator_comments: Vec::new(),
27773 trailing_comments: Vec::new(),
27774 inferred_type: None,
27775 }));
27776 let add_pos = Expression::Add(Box::new(BinaryOp {
27777 left: pos_in,
27778 right: pos,
27779 left_comments: Vec::new(),
27780 operator_comments: Vec::new(),
27781 trailing_comments: Vec::new(),
27782 inferred_type: None,
27783 }));
27784 let sub_one = Expression::Sub(Box::new(BinaryOp {
27785 left: add_pos,
27786 right: one,
27787 left_comments: Vec::new(),
27788 operator_comments: Vec::new(),
27789 trailing_comments: Vec::new(),
27790 inferred_type: None,
27791 }));
27792 Ok(Expression::Case(Box::new(Case {
27793 operand: Option::None,
27794 whens: vec![(eq_zero, zero)],
27795 else_: Some(sub_one),
27796 comments: Vec::new(),
27797 inferred_type: None,
27798 })))
27799 } else {
27800 Ok(Expression::StrPosition(Box::new(
27801 crate::expressions::StrPosition {
27802 this: Box::new(haystack),
27803 substr: Some(Box::new(needle)),
27804 position: Option::None,
27805 occurrence: Option::None,
27806 },
27807 )))
27808 }
27809 }
27810 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
27811 DialectType::MySQL
27812 | DialectType::SingleStore
27813 | DialectType::TiDB
27814 | DialectType::Hive
27815 | DialectType::Spark
27816 | DialectType::Databricks
27817 | DialectType::Doris
27818 | DialectType::StarRocks => {
27819 let mut a = vec![needle, haystack];
27820 if let Some(pos) = position {
27821 a.push(pos);
27822 }
27823 Ok(Expression::Function(Box::new(Function::new(
27824 "LOCATE".to_string(),
27825 a,
27826 ))))
27827 }
27828 // ClickHouse: POSITION(haystack, needle[, position])
27829 DialectType::ClickHouse => {
27830 let mut a = vec![haystack, needle];
27831 if let Some(pos) = position {
27832 a.push(pos);
27833 }
27834 Ok(Expression::Function(Box::new(Function::new(
27835 "POSITION".to_string(),
27836 a,
27837 ))))
27838 }
27839 _ => {
27840 let mut a = vec![haystack, needle];
27841 if let Some(pos) = position {
27842 a.push(pos);
27843 }
27844 if let Some(occ) = occurrence {
27845 a.push(occ);
27846 }
27847 Ok(Expression::Function(Box::new(Function::new(
27848 "STR_POSITION".to_string(),
27849 a,
27850 ))))
27851 }
27852 }
27853 } else {
27854 Ok(e)
27855 }
27856 }
27857
27858 Action::ArraySumConvert => {
27859 // ARRAY_SUM(arr) -> dialect-specific
27860 if let Expression::Function(f) = e {
27861 let args = f.args;
27862 match target {
27863 DialectType::DuckDB => Ok(Expression::Function(Box::new(
27864 Function::new("LIST_SUM".to_string(), args),
27865 ))),
27866 DialectType::Spark | DialectType::Databricks => {
27867 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27868 let arr = args.into_iter().next().unwrap();
27869 let zero =
27870 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27871 let acc_id = Identifier::new("acc");
27872 let x_id = Identifier::new("x");
27873 let acc = Expression::Identifier(acc_id.clone());
27874 let x = Expression::Identifier(x_id.clone());
27875 let add = Expression::Add(Box::new(BinaryOp {
27876 left: acc.clone(),
27877 right: x,
27878 left_comments: Vec::new(),
27879 operator_comments: Vec::new(),
27880 trailing_comments: Vec::new(),
27881 inferred_type: None,
27882 }));
27883 let lambda1 =
27884 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27885 parameters: vec![acc_id.clone(), x_id],
27886 body: add,
27887 colon: false,
27888 parameter_types: Vec::new(),
27889 }));
27890 let lambda2 =
27891 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27892 parameters: vec![acc_id],
27893 body: acc,
27894 colon: false,
27895 parameter_types: Vec::new(),
27896 }));
27897 Ok(Expression::Function(Box::new(Function::new(
27898 "AGGREGATE".to_string(),
27899 vec![arr, zero, lambda1, lambda2],
27900 ))))
27901 }
27902 DialectType::Presto | DialectType::Athena => {
27903 // Presto/Athena keep ARRAY_SUM natively
27904 Ok(Expression::Function(Box::new(Function::new(
27905 "ARRAY_SUM".to_string(),
27906 args,
27907 ))))
27908 }
27909 DialectType::Trino => {
27910 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27911 if args.len() == 1 {
27912 let arr = args.into_iter().next().unwrap();
27913 let zero = Expression::Literal(Box::new(Literal::Number(
27914 "0".to_string(),
27915 )));
27916 let acc_id = Identifier::new("acc");
27917 let x_id = Identifier::new("x");
27918 let acc = Expression::Identifier(acc_id.clone());
27919 let x = Expression::Identifier(x_id.clone());
27920 let add = Expression::Add(Box::new(BinaryOp {
27921 left: acc.clone(),
27922 right: x,
27923 left_comments: Vec::new(),
27924 operator_comments: Vec::new(),
27925 trailing_comments: Vec::new(),
27926 inferred_type: None,
27927 }));
27928 let lambda1 = Expression::Lambda(Box::new(
27929 crate::expressions::LambdaExpr {
27930 parameters: vec![acc_id.clone(), x_id],
27931 body: add,
27932 colon: false,
27933 parameter_types: Vec::new(),
27934 },
27935 ));
27936 let lambda2 = Expression::Lambda(Box::new(
27937 crate::expressions::LambdaExpr {
27938 parameters: vec![acc_id],
27939 body: acc,
27940 colon: false,
27941 parameter_types: Vec::new(),
27942 },
27943 ));
27944 Ok(Expression::Function(Box::new(Function::new(
27945 "REDUCE".to_string(),
27946 vec![arr, zero, lambda1, lambda2],
27947 ))))
27948 } else {
27949 Ok(Expression::Function(Box::new(Function::new(
27950 "ARRAY_SUM".to_string(),
27951 args,
27952 ))))
27953 }
27954 }
27955 DialectType::ClickHouse => {
27956 // arraySum(lambda, arr) or arraySum(arr)
27957 Ok(Expression::Function(Box::new(Function::new(
27958 "arraySum".to_string(),
27959 args,
27960 ))))
27961 }
27962 _ => Ok(Expression::Function(Box::new(Function::new(
27963 "ARRAY_SUM".to_string(),
27964 args,
27965 )))),
27966 }
27967 } else {
27968 Ok(e)
27969 }
27970 }
27971
27972 Action::ArraySizeConvert => {
27973 if let Expression::Function(f) = e {
27974 Ok(Expression::Function(Box::new(Function::new(
27975 "REPEATED_COUNT".to_string(),
27976 f.args,
27977 ))))
27978 } else {
27979 Ok(e)
27980 }
27981 }
27982
27983 Action::ArrayAnyConvert => {
27984 if let Expression::Function(f) = e {
27985 let mut args = f.args;
27986 if args.len() == 2 {
27987 let arr = args.remove(0);
27988 let lambda = args.remove(0);
27989
27990 // Extract lambda parameter name and body
27991 let (param_name, pred_body) =
27992 if let Expression::Lambda(ref lam) = lambda {
27993 let name = if let Some(p) = lam.parameters.first() {
27994 p.name.clone()
27995 } else {
27996 "x".to_string()
27997 };
27998 (name, lam.body.clone())
27999 } else {
28000 ("x".to_string(), lambda.clone())
28001 };
28002
28003 // Helper: build a function call Expression
28004 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
28005 Expression::Function(Box::new(Function::new(
28006 name.to_string(),
28007 args,
28008 )))
28009 };
28010
28011 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
28012 let build_filter_pattern = |len_func: &str,
28013 len_args_extra: Vec<Expression>,
28014 filter_expr: Expression|
28015 -> Expression {
28016 // len_func(arr, ...extra) = 0
28017 let mut len_arr_args = vec![arr.clone()];
28018 len_arr_args.extend(len_args_extra.clone());
28019 let len_arr = make_func(len_func, len_arr_args);
28020 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
28021 len_arr,
28022 Expression::number(0),
28023 )));
28024
28025 // len_func(filter_expr, ...extra) <> 0
28026 let mut len_filter_args = vec![filter_expr];
28027 len_filter_args.extend(len_args_extra);
28028 let len_filter = make_func(len_func, len_filter_args);
28029 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
28030 len_filter,
28031 Expression::number(0),
28032 )));
28033
28034 // (eq_zero OR neq_zero)
28035 let or_expr =
28036 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
28037 Expression::Paren(Box::new(Paren {
28038 this: or_expr,
28039 trailing_comments: Vec::new(),
28040 }))
28041 };
28042
28043 match target {
28044 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
28045 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
28046 }
28047 DialectType::ClickHouse => {
28048 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
28049 // ClickHouse arrayFilter takes lambda first, then array
28050 let filter_expr =
28051 make_func("arrayFilter", vec![lambda, arr.clone()]);
28052 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
28053 }
28054 DialectType::Databricks | DialectType::Spark => {
28055 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
28056 let filter_expr =
28057 make_func("FILTER", vec![arr.clone(), lambda]);
28058 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
28059 }
28060 DialectType::DuckDB => {
28061 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
28062 let filter_expr =
28063 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
28064 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
28065 }
28066 DialectType::Teradata => {
28067 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
28068 let filter_expr =
28069 make_func("FILTER", vec![arr.clone(), lambda]);
28070 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
28071 }
28072 DialectType::BigQuery => {
28073 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
28074 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
28075 let param_col = Expression::column(¶m_name);
28076 let unnest_expr = Expression::Unnest(Box::new(
28077 crate::expressions::UnnestFunc {
28078 this: arr.clone(),
28079 expressions: vec![],
28080 with_ordinality: false,
28081 alias: Some(Identifier::new(¶m_name)),
28082 offset_alias: None,
28083 },
28084 ));
28085 let mut sel = crate::expressions::Select::default();
28086 sel.expressions = vec![param_col];
28087 sel.from = Some(crate::expressions::From {
28088 expressions: vec![unnest_expr],
28089 });
28090 sel.where_clause =
28091 Some(crate::expressions::Where { this: pred_body });
28092 let array_subquery =
28093 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28094 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
28095 }
28096 DialectType::PostgreSQL => {
28097 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
28098 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
28099 let param_col = Expression::column(¶m_name);
28100 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
28101 let unnest_with_alias =
28102 Expression::Alias(Box::new(crate::expressions::Alias {
28103 this: Expression::Unnest(Box::new(
28104 crate::expressions::UnnestFunc {
28105 this: arr.clone(),
28106 expressions: vec![],
28107 with_ordinality: false,
28108 alias: None,
28109 offset_alias: None,
28110 },
28111 )),
28112 alias: Identifier::new("_t0"),
28113 column_aliases: vec![Identifier::new(¶m_name)],
28114 alias_explicit_as: false,
28115 alias_keyword: None,
28116 pre_alias_comments: Vec::new(),
28117 trailing_comments: Vec::new(),
28118 inferred_type: None,
28119 }));
28120 let mut sel = crate::expressions::Select::default();
28121 sel.expressions = vec![param_col];
28122 sel.from = Some(crate::expressions::From {
28123 expressions: vec![unnest_with_alias],
28124 });
28125 sel.where_clause =
28126 Some(crate::expressions::Where { this: pred_body });
28127 let array_subquery =
28128 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28129 Ok(build_filter_pattern(
28130 "ARRAY_LENGTH",
28131 vec![Expression::number(1)],
28132 array_subquery,
28133 ))
28134 }
28135 _ => Ok(Expression::Function(Box::new(Function::new(
28136 "ARRAY_ANY".to_string(),
28137 vec![arr, lambda],
28138 )))),
28139 }
28140 } else {
28141 Ok(Expression::Function(Box::new(Function::new(
28142 "ARRAY_ANY".to_string(),
28143 args,
28144 ))))
28145 }
28146 } else {
28147 Ok(e)
28148 }
28149 }
28150
28151 Action::DecodeSimplify => {
28152 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
28153 // For literal search values: CASE WHEN x = search THEN result
28154 // For NULL search: CASE WHEN x IS NULL THEN result
28155 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
28156 fn is_decode_literal(e: &Expression) -> bool {
28157 matches!(
28158 e,
28159 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
28160 )
28161 }
28162
28163 let build_decode_case =
28164 |this_expr: Expression,
28165 pairs: Vec<(Expression, Expression)>,
28166 default: Option<Expression>| {
28167 let whens: Vec<(Expression, Expression)> = pairs
28168 .into_iter()
28169 .map(|(search, result)| {
28170 if matches!(&search, Expression::Null(_)) {
28171 // NULL search -> IS NULL
28172 let condition = Expression::Is(Box::new(BinaryOp {
28173 left: this_expr.clone(),
28174 right: Expression::Null(crate::expressions::Null),
28175 left_comments: Vec::new(),
28176 operator_comments: Vec::new(),
28177 trailing_comments: Vec::new(),
28178 inferred_type: None,
28179 }));
28180 (condition, result)
28181 } else if is_decode_literal(&search)
28182 || is_decode_literal(&this_expr)
28183 {
28184 // At least one side is a literal -> simple equality (no NULL check needed)
28185 let eq = Expression::Eq(Box::new(BinaryOp {
28186 left: this_expr.clone(),
28187 right: search,
28188 left_comments: Vec::new(),
28189 operator_comments: Vec::new(),
28190 trailing_comments: Vec::new(),
28191 inferred_type: None,
28192 }));
28193 (eq, result)
28194 } else {
28195 // Non-literal -> null-safe comparison
28196 let needs_paren = matches!(
28197 &search,
28198 Expression::Eq(_)
28199 | Expression::Neq(_)
28200 | Expression::Gt(_)
28201 | Expression::Gte(_)
28202 | Expression::Lt(_)
28203 | Expression::Lte(_)
28204 );
28205 let search_ref = if needs_paren {
28206 Expression::Paren(Box::new(crate::expressions::Paren {
28207 this: search.clone(),
28208 trailing_comments: Vec::new(),
28209 }))
28210 } else {
28211 search.clone()
28212 };
28213 // Build: x = search OR (x IS NULL AND search IS NULL)
28214 let eq = Expression::Eq(Box::new(BinaryOp {
28215 left: this_expr.clone(),
28216 right: search_ref,
28217 left_comments: Vec::new(),
28218 operator_comments: Vec::new(),
28219 trailing_comments: Vec::new(),
28220 inferred_type: None,
28221 }));
28222 let search_in_null = if needs_paren {
28223 Expression::Paren(Box::new(crate::expressions::Paren {
28224 this: search.clone(),
28225 trailing_comments: Vec::new(),
28226 }))
28227 } else {
28228 search.clone()
28229 };
28230 let x_is_null = Expression::Is(Box::new(BinaryOp {
28231 left: this_expr.clone(),
28232 right: Expression::Null(crate::expressions::Null),
28233 left_comments: Vec::new(),
28234 operator_comments: Vec::new(),
28235 trailing_comments: Vec::new(),
28236 inferred_type: None,
28237 }));
28238 let search_is_null = Expression::Is(Box::new(BinaryOp {
28239 left: search_in_null,
28240 right: Expression::Null(crate::expressions::Null),
28241 left_comments: Vec::new(),
28242 operator_comments: Vec::new(),
28243 trailing_comments: Vec::new(),
28244 inferred_type: None,
28245 }));
28246 let both_null = Expression::And(Box::new(BinaryOp {
28247 left: x_is_null,
28248 right: search_is_null,
28249 left_comments: Vec::new(),
28250 operator_comments: Vec::new(),
28251 trailing_comments: Vec::new(),
28252 inferred_type: None,
28253 }));
28254 let condition = Expression::Or(Box::new(BinaryOp {
28255 left: eq,
28256 right: Expression::Paren(Box::new(
28257 crate::expressions::Paren {
28258 this: both_null,
28259 trailing_comments: Vec::new(),
28260 },
28261 )),
28262 left_comments: Vec::new(),
28263 operator_comments: Vec::new(),
28264 trailing_comments: Vec::new(),
28265 inferred_type: None,
28266 }));
28267 (condition, result)
28268 }
28269 })
28270 .collect();
28271 Expression::Case(Box::new(Case {
28272 operand: None,
28273 whens,
28274 else_: default,
28275 comments: Vec::new(),
28276 inferred_type: None,
28277 }))
28278 };
28279
28280 if let Expression::Decode(decode) = e {
28281 Ok(build_decode_case(
28282 decode.this,
28283 decode.search_results,
28284 decode.default,
28285 ))
28286 } else if let Expression::DecodeCase(dc) = e {
28287 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
28288 let mut exprs = dc.expressions;
28289 if exprs.len() < 3 {
28290 return Ok(Expression::DecodeCase(Box::new(
28291 crate::expressions::DecodeCase { expressions: exprs },
28292 )));
28293 }
28294 let this_expr = exprs.remove(0);
28295 let mut pairs = Vec::new();
28296 let mut default = None;
28297 let mut i = 0;
28298 while i + 1 < exprs.len() {
28299 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
28300 i += 2;
28301 }
28302 if i < exprs.len() {
28303 // Odd remaining element is the default
28304 default = Some(exprs[i].clone());
28305 }
28306 Ok(build_decode_case(this_expr, pairs, default))
28307 } else {
28308 Ok(e)
28309 }
28310 }
28311
28312 Action::CreateTableLikeToCtas => {
28313 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
28314 if let Expression::CreateTable(ct) = e {
28315 let like_source = ct.constraints.iter().find_map(|c| {
28316 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28317 Some(source.clone())
28318 } else {
28319 None
28320 }
28321 });
28322 if let Some(source_table) = like_source {
28323 let mut new_ct = *ct;
28324 new_ct.constraints.clear();
28325 // Build: SELECT * FROM b LIMIT 0
28326 let select = Expression::Select(Box::new(crate::expressions::Select {
28327 expressions: vec![Expression::Star(crate::expressions::Star {
28328 table: None,
28329 except: None,
28330 replace: None,
28331 rename: None,
28332 trailing_comments: Vec::new(),
28333 span: None,
28334 })],
28335 from: Some(crate::expressions::From {
28336 expressions: vec![Expression::Table(Box::new(source_table))],
28337 }),
28338 limit: Some(crate::expressions::Limit {
28339 this: Expression::Literal(Box::new(Literal::Number(
28340 "0".to_string(),
28341 ))),
28342 percent: false,
28343 comments: Vec::new(),
28344 }),
28345 ..Default::default()
28346 }));
28347 new_ct.as_select = Some(select);
28348 Ok(Expression::CreateTable(Box::new(new_ct)))
28349 } else {
28350 Ok(Expression::CreateTable(ct))
28351 }
28352 } else {
28353 Ok(e)
28354 }
28355 }
28356
28357 Action::CreateTableLikeToSelectInto => {
28358 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
28359 if let Expression::CreateTable(ct) = e {
28360 let like_source = ct.constraints.iter().find_map(|c| {
28361 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28362 Some(source.clone())
28363 } else {
28364 None
28365 }
28366 });
28367 if let Some(source_table) = like_source {
28368 let mut aliased_source = source_table;
28369 aliased_source.alias = Some(Identifier::new("temp"));
28370 // Build: SELECT TOP 0 * INTO a FROM b AS temp
28371 let select = Expression::Select(Box::new(crate::expressions::Select {
28372 expressions: vec![Expression::Star(crate::expressions::Star {
28373 table: None,
28374 except: None,
28375 replace: None,
28376 rename: None,
28377 trailing_comments: Vec::new(),
28378 span: None,
28379 })],
28380 from: Some(crate::expressions::From {
28381 expressions: vec![Expression::Table(Box::new(aliased_source))],
28382 }),
28383 into: Some(crate::expressions::SelectInto {
28384 this: Expression::Table(Box::new(ct.name.clone())),
28385 temporary: false,
28386 unlogged: false,
28387 bulk_collect: false,
28388 expressions: Vec::new(),
28389 }),
28390 top: Some(crate::expressions::Top {
28391 this: Expression::Literal(Box::new(Literal::Number(
28392 "0".to_string(),
28393 ))),
28394 percent: false,
28395 with_ties: false,
28396 parenthesized: false,
28397 }),
28398 ..Default::default()
28399 }));
28400 Ok(select)
28401 } else {
28402 Ok(Expression::CreateTable(ct))
28403 }
28404 } else {
28405 Ok(e)
28406 }
28407 }
28408
28409 Action::CreateTableLikeToAs => {
28410 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
28411 if let Expression::CreateTable(ct) = e {
28412 let like_source = ct.constraints.iter().find_map(|c| {
28413 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28414 Some(source.clone())
28415 } else {
28416 None
28417 }
28418 });
28419 if let Some(source_table) = like_source {
28420 let mut new_ct = *ct;
28421 new_ct.constraints.clear();
28422 // AS b (just a table reference, not a SELECT)
28423 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
28424 Ok(Expression::CreateTable(Box::new(new_ct)))
28425 } else {
28426 Ok(Expression::CreateTable(ct))
28427 }
28428 } else {
28429 Ok(e)
28430 }
28431 }
28432
28433 Action::TsOrDsToDateConvert => {
28434 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
28435 if let Expression::Function(f) = e {
28436 let mut args = f.args;
28437 let this = args.remove(0);
28438 let fmt = if !args.is_empty() {
28439 match &args[0] {
28440 Expression::Literal(lit)
28441 if matches!(lit.as_ref(), Literal::String(_)) =>
28442 {
28443 let Literal::String(s) = lit.as_ref() else {
28444 unreachable!()
28445 };
28446 Some(s.clone())
28447 }
28448 _ => None,
28449 }
28450 } else {
28451 None
28452 };
28453 Ok(Expression::TsOrDsToDate(Box::new(
28454 crate::expressions::TsOrDsToDate {
28455 this: Box::new(this),
28456 format: fmt,
28457 safe: None,
28458 },
28459 )))
28460 } else {
28461 Ok(e)
28462 }
28463 }
28464
28465 Action::TsOrDsToDateStrConvert => {
28466 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
28467 if let Expression::Function(f) = e {
28468 let arg = f.args.into_iter().next().unwrap();
28469 let str_type = match target {
28470 DialectType::DuckDB
28471 | DialectType::PostgreSQL
28472 | DialectType::Materialize => DataType::Text,
28473 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28474 DataType::Custom {
28475 name: "STRING".to_string(),
28476 }
28477 }
28478 DialectType::Presto
28479 | DialectType::Trino
28480 | DialectType::Athena
28481 | DialectType::Drill => DataType::VarChar {
28482 length: None,
28483 parenthesized_length: false,
28484 },
28485 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
28486 DataType::Custom {
28487 name: "STRING".to_string(),
28488 }
28489 }
28490 _ => DataType::VarChar {
28491 length: None,
28492 parenthesized_length: false,
28493 },
28494 };
28495 let cast_expr = Expression::Cast(Box::new(Cast {
28496 this: arg,
28497 to: str_type,
28498 double_colon_syntax: false,
28499 trailing_comments: Vec::new(),
28500 format: None,
28501 default: None,
28502 inferred_type: None,
28503 }));
28504 Ok(Expression::Substring(Box::new(
28505 crate::expressions::SubstringFunc {
28506 this: cast_expr,
28507 start: Expression::number(1),
28508 length: Some(Expression::number(10)),
28509 from_for_syntax: false,
28510 },
28511 )))
28512 } else {
28513 Ok(e)
28514 }
28515 }
28516
28517 Action::DateStrToDateConvert => {
28518 // DATE_STR_TO_DATE(x) -> dialect-specific
28519 if let Expression::Function(f) = e {
28520 let arg = f.args.into_iter().next().unwrap();
28521 match target {
28522 DialectType::SQLite => {
28523 // SQLite: just the bare expression (dates are strings)
28524 Ok(arg)
28525 }
28526 _ => Ok(Expression::Cast(Box::new(Cast {
28527 this: arg,
28528 to: DataType::Date,
28529 double_colon_syntax: false,
28530 trailing_comments: Vec::new(),
28531 format: None,
28532 default: None,
28533 inferred_type: None,
28534 }))),
28535 }
28536 } else {
28537 Ok(e)
28538 }
28539 }
28540
28541 Action::TimeStrToDateConvert => {
28542 // TIME_STR_TO_DATE(x) -> dialect-specific
28543 if let Expression::Function(f) = e {
28544 let arg = f.args.into_iter().next().unwrap();
28545 match target {
28546 DialectType::Hive
28547 | DialectType::Doris
28548 | DialectType::StarRocks
28549 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
28550 Function::new("TO_DATE".to_string(), vec![arg]),
28551 ))),
28552 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28553 // Presto: CAST(x AS TIMESTAMP)
28554 Ok(Expression::Cast(Box::new(Cast {
28555 this: arg,
28556 to: DataType::Timestamp {
28557 timezone: false,
28558 precision: None,
28559 },
28560 double_colon_syntax: false,
28561 trailing_comments: Vec::new(),
28562 format: None,
28563 default: None,
28564 inferred_type: None,
28565 })))
28566 }
28567 _ => {
28568 // Default: CAST(x AS DATE)
28569 Ok(Expression::Cast(Box::new(Cast {
28570 this: arg,
28571 to: DataType::Date,
28572 double_colon_syntax: false,
28573 trailing_comments: Vec::new(),
28574 format: None,
28575 default: None,
28576 inferred_type: None,
28577 })))
28578 }
28579 }
28580 } else {
28581 Ok(e)
28582 }
28583 }
28584
28585 Action::TimeStrToTimeConvert => {
28586 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
28587 if let Expression::Function(f) = e {
28588 let mut args = f.args;
28589 let this = args.remove(0);
28590 let zone = if !args.is_empty() {
28591 match &args[0] {
28592 Expression::Literal(lit)
28593 if matches!(lit.as_ref(), Literal::String(_)) =>
28594 {
28595 let Literal::String(s) = lit.as_ref() else {
28596 unreachable!()
28597 };
28598 Some(s.clone())
28599 }
28600 _ => None,
28601 }
28602 } else {
28603 None
28604 };
28605 let has_zone = zone.is_some();
28606
28607 match target {
28608 DialectType::SQLite => {
28609 // SQLite: just the bare expression
28610 Ok(this)
28611 }
28612 DialectType::MySQL => {
28613 if has_zone {
28614 // MySQL with zone: TIMESTAMP(x)
28615 Ok(Expression::Function(Box::new(Function::new(
28616 "TIMESTAMP".to_string(),
28617 vec![this],
28618 ))))
28619 } else {
28620 // MySQL: CAST(x AS DATETIME) or with precision
28621 // Use DataType::Custom to avoid MySQL's transform_cast converting
28622 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
28623 let precision = if let Expression::Literal(ref lit) = this {
28624 if let Literal::String(ref s) = lit.as_ref() {
28625 if let Some(dot_pos) = s.rfind('.') {
28626 let frac = &s[dot_pos + 1..];
28627 let digit_count = frac
28628 .chars()
28629 .take_while(|c| c.is_ascii_digit())
28630 .count();
28631 if digit_count > 0 {
28632 Some(digit_count)
28633 } else {
28634 None
28635 }
28636 } else {
28637 None
28638 }
28639 } else {
28640 None
28641 }
28642 } else {
28643 None
28644 };
28645 let type_name = match precision {
28646 Some(p) => format!("DATETIME({})", p),
28647 None => "DATETIME".to_string(),
28648 };
28649 Ok(Expression::Cast(Box::new(Cast {
28650 this,
28651 to: DataType::Custom { name: type_name },
28652 double_colon_syntax: false,
28653 trailing_comments: Vec::new(),
28654 format: None,
28655 default: None,
28656 inferred_type: None,
28657 })))
28658 }
28659 }
28660 DialectType::ClickHouse => {
28661 if has_zone {
28662 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
28663 // We need to strip the timezone offset from the literal if present
28664 let clean_this = if let Expression::Literal(ref lit) = this {
28665 if let Literal::String(ref s) = lit.as_ref() {
28666 // Strip timezone offset like "-08:00" or "+00:00"
28667 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
28668 if let Some(offset_pos) = re_offset {
28669 if offset_pos > 10 {
28670 // After the date part
28671 let trimmed = s[..offset_pos].to_string();
28672 Expression::Literal(Box::new(Literal::String(
28673 trimmed,
28674 )))
28675 } else {
28676 this.clone()
28677 }
28678 } else {
28679 this.clone()
28680 }
28681 } else {
28682 this.clone()
28683 }
28684 } else {
28685 this.clone()
28686 };
28687 let zone_str = zone.unwrap();
28688 // Build: CAST(x AS DateTime64(6, 'zone'))
28689 let type_name = format!("DateTime64(6, '{}')", zone_str);
28690 Ok(Expression::Cast(Box::new(Cast {
28691 this: clean_this,
28692 to: DataType::Custom { name: type_name },
28693 double_colon_syntax: false,
28694 trailing_comments: Vec::new(),
28695 format: None,
28696 default: None,
28697 inferred_type: None,
28698 })))
28699 } else {
28700 Ok(Expression::Cast(Box::new(Cast {
28701 this,
28702 to: DataType::Custom {
28703 name: "DateTime64(6)".to_string(),
28704 },
28705 double_colon_syntax: false,
28706 trailing_comments: Vec::new(),
28707 format: None,
28708 default: None,
28709 inferred_type: None,
28710 })))
28711 }
28712 }
28713 DialectType::BigQuery => {
28714 if has_zone {
28715 // BigQuery with zone: CAST(x AS TIMESTAMP)
28716 Ok(Expression::Cast(Box::new(Cast {
28717 this,
28718 to: DataType::Timestamp {
28719 timezone: false,
28720 precision: None,
28721 },
28722 double_colon_syntax: false,
28723 trailing_comments: Vec::new(),
28724 format: None,
28725 default: None,
28726 inferred_type: None,
28727 })))
28728 } else {
28729 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
28730 Ok(Expression::Cast(Box::new(Cast {
28731 this,
28732 to: DataType::Custom {
28733 name: "DATETIME".to_string(),
28734 },
28735 double_colon_syntax: false,
28736 trailing_comments: Vec::new(),
28737 format: None,
28738 default: None,
28739 inferred_type: None,
28740 })))
28741 }
28742 }
28743 DialectType::Doris => {
28744 // Doris: CAST(x AS DATETIME)
28745 Ok(Expression::Cast(Box::new(Cast {
28746 this,
28747 to: DataType::Custom {
28748 name: "DATETIME".to_string(),
28749 },
28750 double_colon_syntax: false,
28751 trailing_comments: Vec::new(),
28752 format: None,
28753 default: None,
28754 inferred_type: None,
28755 })))
28756 }
28757 DialectType::TSQL | DialectType::Fabric => {
28758 if has_zone {
28759 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
28760 let cast_expr = Expression::Cast(Box::new(Cast {
28761 this,
28762 to: DataType::Custom {
28763 name: "DATETIMEOFFSET".to_string(),
28764 },
28765 double_colon_syntax: false,
28766 trailing_comments: Vec::new(),
28767 format: None,
28768 default: None,
28769 inferred_type: None,
28770 }));
28771 Ok(Expression::AtTimeZone(Box::new(
28772 crate::expressions::AtTimeZone {
28773 this: cast_expr,
28774 zone: Expression::Literal(Box::new(Literal::String(
28775 "UTC".to_string(),
28776 ))),
28777 },
28778 )))
28779 } else {
28780 // TSQL: CAST(x AS DATETIME2)
28781 Ok(Expression::Cast(Box::new(Cast {
28782 this,
28783 to: DataType::Custom {
28784 name: "DATETIME2".to_string(),
28785 },
28786 double_colon_syntax: false,
28787 trailing_comments: Vec::new(),
28788 format: None,
28789 default: None,
28790 inferred_type: None,
28791 })))
28792 }
28793 }
28794 DialectType::DuckDB => {
28795 if has_zone {
28796 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
28797 Ok(Expression::Cast(Box::new(Cast {
28798 this,
28799 to: DataType::Timestamp {
28800 timezone: true,
28801 precision: None,
28802 },
28803 double_colon_syntax: false,
28804 trailing_comments: Vec::new(),
28805 format: None,
28806 default: None,
28807 inferred_type: None,
28808 })))
28809 } else {
28810 // DuckDB: CAST(x AS TIMESTAMP)
28811 Ok(Expression::Cast(Box::new(Cast {
28812 this,
28813 to: DataType::Timestamp {
28814 timezone: false,
28815 precision: None,
28816 },
28817 double_colon_syntax: false,
28818 trailing_comments: Vec::new(),
28819 format: None,
28820 default: None,
28821 inferred_type: None,
28822 })))
28823 }
28824 }
28825 DialectType::PostgreSQL
28826 | DialectType::Materialize
28827 | DialectType::RisingWave => {
28828 if has_zone {
28829 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
28830 Ok(Expression::Cast(Box::new(Cast {
28831 this,
28832 to: DataType::Timestamp {
28833 timezone: true,
28834 precision: None,
28835 },
28836 double_colon_syntax: false,
28837 trailing_comments: Vec::new(),
28838 format: None,
28839 default: None,
28840 inferred_type: None,
28841 })))
28842 } else {
28843 // PostgreSQL: CAST(x AS TIMESTAMP)
28844 Ok(Expression::Cast(Box::new(Cast {
28845 this,
28846 to: DataType::Timestamp {
28847 timezone: false,
28848 precision: None,
28849 },
28850 double_colon_syntax: false,
28851 trailing_comments: Vec::new(),
28852 format: None,
28853 default: None,
28854 inferred_type: None,
28855 })))
28856 }
28857 }
28858 DialectType::Snowflake => {
28859 if has_zone {
28860 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
28861 Ok(Expression::Cast(Box::new(Cast {
28862 this,
28863 to: DataType::Timestamp {
28864 timezone: true,
28865 precision: None,
28866 },
28867 double_colon_syntax: false,
28868 trailing_comments: Vec::new(),
28869 format: None,
28870 default: None,
28871 inferred_type: None,
28872 })))
28873 } else {
28874 // Snowflake: CAST(x AS TIMESTAMP)
28875 Ok(Expression::Cast(Box::new(Cast {
28876 this,
28877 to: DataType::Timestamp {
28878 timezone: false,
28879 precision: None,
28880 },
28881 double_colon_syntax: false,
28882 trailing_comments: Vec::new(),
28883 format: None,
28884 default: None,
28885 inferred_type: None,
28886 })))
28887 }
28888 }
28889 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28890 if has_zone {
28891 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28892 // Check for precision from sub-second digits
28893 let precision = if let Expression::Literal(ref lit) = this {
28894 if let Literal::String(ref s) = lit.as_ref() {
28895 if let Some(dot_pos) = s.rfind('.') {
28896 let frac = &s[dot_pos + 1..];
28897 let digit_count = frac
28898 .chars()
28899 .take_while(|c| c.is_ascii_digit())
28900 .count();
28901 if digit_count > 0
28902 && matches!(target, DialectType::Trino)
28903 {
28904 Some(digit_count as u32)
28905 } else {
28906 None
28907 }
28908 } else {
28909 None
28910 }
28911 } else {
28912 None
28913 }
28914 } else {
28915 None
28916 };
28917 let dt = if let Some(prec) = precision {
28918 DataType::Timestamp {
28919 timezone: true,
28920 precision: Some(prec),
28921 }
28922 } else {
28923 DataType::Timestamp {
28924 timezone: true,
28925 precision: None,
28926 }
28927 };
28928 Ok(Expression::Cast(Box::new(Cast {
28929 this,
28930 to: dt,
28931 double_colon_syntax: false,
28932 trailing_comments: Vec::new(),
28933 format: None,
28934 default: None,
28935 inferred_type: None,
28936 })))
28937 } else {
28938 // Check for sub-second precision for Trino
28939 let precision = if let Expression::Literal(ref lit) = this {
28940 if let Literal::String(ref s) = lit.as_ref() {
28941 if let Some(dot_pos) = s.rfind('.') {
28942 let frac = &s[dot_pos + 1..];
28943 let digit_count = frac
28944 .chars()
28945 .take_while(|c| c.is_ascii_digit())
28946 .count();
28947 if digit_count > 0
28948 && matches!(target, DialectType::Trino)
28949 {
28950 Some(digit_count as u32)
28951 } else {
28952 None
28953 }
28954 } else {
28955 None
28956 }
28957 } else {
28958 None
28959 }
28960 } else {
28961 None
28962 };
28963 let dt = DataType::Timestamp {
28964 timezone: false,
28965 precision,
28966 };
28967 Ok(Expression::Cast(Box::new(Cast {
28968 this,
28969 to: dt,
28970 double_colon_syntax: false,
28971 trailing_comments: Vec::new(),
28972 format: None,
28973 default: None,
28974 inferred_type: None,
28975 })))
28976 }
28977 }
28978 DialectType::Redshift => {
28979 if has_zone {
28980 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28981 Ok(Expression::Cast(Box::new(Cast {
28982 this,
28983 to: DataType::Timestamp {
28984 timezone: true,
28985 precision: None,
28986 },
28987 double_colon_syntax: false,
28988 trailing_comments: Vec::new(),
28989 format: None,
28990 default: None,
28991 inferred_type: None,
28992 })))
28993 } else {
28994 // Redshift: CAST(x AS TIMESTAMP)
28995 Ok(Expression::Cast(Box::new(Cast {
28996 this,
28997 to: DataType::Timestamp {
28998 timezone: false,
28999 precision: None,
29000 },
29001 double_colon_syntax: false,
29002 trailing_comments: Vec::new(),
29003 format: None,
29004 default: None,
29005 inferred_type: None,
29006 })))
29007 }
29008 }
29009 _ => {
29010 // Default: CAST(x AS TIMESTAMP)
29011 Ok(Expression::Cast(Box::new(Cast {
29012 this,
29013 to: DataType::Timestamp {
29014 timezone: false,
29015 precision: None,
29016 },
29017 double_colon_syntax: false,
29018 trailing_comments: Vec::new(),
29019 format: None,
29020 default: None,
29021 inferred_type: None,
29022 })))
29023 }
29024 }
29025 } else {
29026 Ok(e)
29027 }
29028 }
29029
29030 Action::DateToDateStrConvert => {
29031 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
29032 if let Expression::Function(f) = e {
29033 let arg = f.args.into_iter().next().unwrap();
29034 let str_type = match target {
29035 DialectType::DuckDB => DataType::Text,
29036 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29037 DataType::Custom {
29038 name: "STRING".to_string(),
29039 }
29040 }
29041 DialectType::Presto
29042 | DialectType::Trino
29043 | DialectType::Athena
29044 | DialectType::Drill => DataType::VarChar {
29045 length: None,
29046 parenthesized_length: false,
29047 },
29048 _ => DataType::VarChar {
29049 length: None,
29050 parenthesized_length: false,
29051 },
29052 };
29053 Ok(Expression::Cast(Box::new(Cast {
29054 this: arg,
29055 to: str_type,
29056 double_colon_syntax: false,
29057 trailing_comments: Vec::new(),
29058 format: None,
29059 default: None,
29060 inferred_type: None,
29061 })))
29062 } else {
29063 Ok(e)
29064 }
29065 }
29066
29067 Action::DateToDiConvert => {
29068 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
29069 if let Expression::Function(f) = e {
29070 let arg = f.args.into_iter().next().unwrap();
29071 let inner = match target {
29072 DialectType::DuckDB => {
29073 // STRFTIME(x, '%Y%m%d')
29074 Expression::Function(Box::new(Function::new(
29075 "STRFTIME".to_string(),
29076 vec![arg, Expression::string("%Y%m%d")],
29077 )))
29078 }
29079 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29080 // DATE_FORMAT(x, 'yyyyMMdd')
29081 Expression::Function(Box::new(Function::new(
29082 "DATE_FORMAT".to_string(),
29083 vec![arg, Expression::string("yyyyMMdd")],
29084 )))
29085 }
29086 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29087 // DATE_FORMAT(x, '%Y%m%d')
29088 Expression::Function(Box::new(Function::new(
29089 "DATE_FORMAT".to_string(),
29090 vec![arg, Expression::string("%Y%m%d")],
29091 )))
29092 }
29093 DialectType::Drill => {
29094 // TO_DATE(x, 'yyyyMMdd')
29095 Expression::Function(Box::new(Function::new(
29096 "TO_DATE".to_string(),
29097 vec![arg, Expression::string("yyyyMMdd")],
29098 )))
29099 }
29100 _ => {
29101 // Default: STRFTIME(x, '%Y%m%d')
29102 Expression::Function(Box::new(Function::new(
29103 "STRFTIME".to_string(),
29104 vec![arg, Expression::string("%Y%m%d")],
29105 )))
29106 }
29107 };
29108 // Use INT (not INTEGER) for Presto/Trino
29109 let int_type = match target {
29110 DialectType::Presto
29111 | DialectType::Trino
29112 | DialectType::Athena
29113 | DialectType::TSQL
29114 | DialectType::Fabric
29115 | DialectType::SQLite
29116 | DialectType::Redshift => DataType::Custom {
29117 name: "INT".to_string(),
29118 },
29119 _ => DataType::Int {
29120 length: None,
29121 integer_spelling: false,
29122 },
29123 };
29124 Ok(Expression::Cast(Box::new(Cast {
29125 this: inner,
29126 to: int_type,
29127 double_colon_syntax: false,
29128 trailing_comments: Vec::new(),
29129 format: None,
29130 default: None,
29131 inferred_type: None,
29132 })))
29133 } else {
29134 Ok(e)
29135 }
29136 }
29137
29138 Action::DiToDateConvert => {
29139 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
29140 if let Expression::Function(f) = e {
29141 let arg = f.args.into_iter().next().unwrap();
29142 match target {
29143 DialectType::DuckDB => {
29144 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
29145 let cast_text = Expression::Cast(Box::new(Cast {
29146 this: arg,
29147 to: DataType::Text,
29148 double_colon_syntax: false,
29149 trailing_comments: Vec::new(),
29150 format: None,
29151 default: None,
29152 inferred_type: None,
29153 }));
29154 let strptime = Expression::Function(Box::new(Function::new(
29155 "STRPTIME".to_string(),
29156 vec![cast_text, Expression::string("%Y%m%d")],
29157 )));
29158 Ok(Expression::Cast(Box::new(Cast {
29159 this: strptime,
29160 to: DataType::Date,
29161 double_colon_syntax: false,
29162 trailing_comments: Vec::new(),
29163 format: None,
29164 default: None,
29165 inferred_type: None,
29166 })))
29167 }
29168 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29169 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
29170 let cast_str = Expression::Cast(Box::new(Cast {
29171 this: arg,
29172 to: DataType::Custom {
29173 name: "STRING".to_string(),
29174 },
29175 double_colon_syntax: false,
29176 trailing_comments: Vec::new(),
29177 format: None,
29178 default: None,
29179 inferred_type: None,
29180 }));
29181 Ok(Expression::Function(Box::new(Function::new(
29182 "TO_DATE".to_string(),
29183 vec![cast_str, Expression::string("yyyyMMdd")],
29184 ))))
29185 }
29186 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29187 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
29188 let cast_varchar = Expression::Cast(Box::new(Cast {
29189 this: arg,
29190 to: DataType::VarChar {
29191 length: None,
29192 parenthesized_length: false,
29193 },
29194 double_colon_syntax: false,
29195 trailing_comments: Vec::new(),
29196 format: None,
29197 default: None,
29198 inferred_type: None,
29199 }));
29200 let date_parse = Expression::Function(Box::new(Function::new(
29201 "DATE_PARSE".to_string(),
29202 vec![cast_varchar, Expression::string("%Y%m%d")],
29203 )));
29204 Ok(Expression::Cast(Box::new(Cast {
29205 this: date_parse,
29206 to: DataType::Date,
29207 double_colon_syntax: false,
29208 trailing_comments: Vec::new(),
29209 format: None,
29210 default: None,
29211 inferred_type: None,
29212 })))
29213 }
29214 DialectType::Drill => {
29215 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
29216 let cast_varchar = Expression::Cast(Box::new(Cast {
29217 this: arg,
29218 to: DataType::VarChar {
29219 length: None,
29220 parenthesized_length: false,
29221 },
29222 double_colon_syntax: false,
29223 trailing_comments: Vec::new(),
29224 format: None,
29225 default: None,
29226 inferred_type: None,
29227 }));
29228 Ok(Expression::Function(Box::new(Function::new(
29229 "TO_DATE".to_string(),
29230 vec![cast_varchar, Expression::string("yyyyMMdd")],
29231 ))))
29232 }
29233 _ => Ok(Expression::Function(Box::new(Function::new(
29234 "DI_TO_DATE".to_string(),
29235 vec![arg],
29236 )))),
29237 }
29238 } else {
29239 Ok(e)
29240 }
29241 }
29242
29243 Action::TsOrDiToDiConvert => {
29244 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
29245 if let Expression::Function(f) = e {
29246 let arg = f.args.into_iter().next().unwrap();
29247 let str_type = match target {
29248 DialectType::DuckDB => DataType::Text,
29249 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29250 DataType::Custom {
29251 name: "STRING".to_string(),
29252 }
29253 }
29254 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29255 DataType::VarChar {
29256 length: None,
29257 parenthesized_length: false,
29258 }
29259 }
29260 _ => DataType::VarChar {
29261 length: None,
29262 parenthesized_length: false,
29263 },
29264 };
29265 let cast_str = Expression::Cast(Box::new(Cast {
29266 this: arg,
29267 to: str_type,
29268 double_colon_syntax: false,
29269 trailing_comments: Vec::new(),
29270 format: None,
29271 default: None,
29272 inferred_type: None,
29273 }));
29274 let replace_expr = Expression::Function(Box::new(Function::new(
29275 "REPLACE".to_string(),
29276 vec![cast_str, Expression::string("-"), Expression::string("")],
29277 )));
29278 let substr_name = match target {
29279 DialectType::DuckDB
29280 | DialectType::Hive
29281 | DialectType::Spark
29282 | DialectType::Databricks => "SUBSTR",
29283 _ => "SUBSTR",
29284 };
29285 let substr = Expression::Function(Box::new(Function::new(
29286 substr_name.to_string(),
29287 vec![replace_expr, Expression::number(1), Expression::number(8)],
29288 )));
29289 // Use INT (not INTEGER) for Presto/Trino etc.
29290 let int_type = match target {
29291 DialectType::Presto
29292 | DialectType::Trino
29293 | DialectType::Athena
29294 | DialectType::TSQL
29295 | DialectType::Fabric
29296 | DialectType::SQLite
29297 | DialectType::Redshift => DataType::Custom {
29298 name: "INT".to_string(),
29299 },
29300 _ => DataType::Int {
29301 length: None,
29302 integer_spelling: false,
29303 },
29304 };
29305 Ok(Expression::Cast(Box::new(Cast {
29306 this: substr,
29307 to: int_type,
29308 double_colon_syntax: false,
29309 trailing_comments: Vec::new(),
29310 format: None,
29311 default: None,
29312 inferred_type: None,
29313 })))
29314 } else {
29315 Ok(e)
29316 }
29317 }
29318
29319 Action::UnixToStrConvert => {
29320 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
29321 if let Expression::Function(f) = e {
29322 let mut args = f.args;
29323 let this = args.remove(0);
29324 let fmt_expr = if !args.is_empty() {
29325 Some(args.remove(0))
29326 } else {
29327 None
29328 };
29329
29330 // Check if format is a string literal
29331 let fmt_str = fmt_expr.as_ref().and_then(|f| {
29332 if let Expression::Literal(lit) = f {
29333 if let Literal::String(s) = lit.as_ref() {
29334 Some(s.clone())
29335 } else {
29336 None
29337 }
29338 } else {
29339 None
29340 }
29341 });
29342
29343 if let Some(fmt_string) = fmt_str {
29344 // String literal format -> use UnixToStr expression (generator handles it)
29345 Ok(Expression::UnixToStr(Box::new(
29346 crate::expressions::UnixToStr {
29347 this: Box::new(this),
29348 format: Some(fmt_string),
29349 },
29350 )))
29351 } else if let Some(fmt_e) = fmt_expr {
29352 // Non-literal format (e.g., identifier `y`) -> build target expression directly
29353 match target {
29354 DialectType::DuckDB => {
29355 // STRFTIME(TO_TIMESTAMP(x), y)
29356 let to_ts = Expression::Function(Box::new(Function::new(
29357 "TO_TIMESTAMP".to_string(),
29358 vec![this],
29359 )));
29360 Ok(Expression::Function(Box::new(Function::new(
29361 "STRFTIME".to_string(),
29362 vec![to_ts, fmt_e],
29363 ))))
29364 }
29365 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29366 // DATE_FORMAT(FROM_UNIXTIME(x), y)
29367 let from_unix = Expression::Function(Box::new(Function::new(
29368 "FROM_UNIXTIME".to_string(),
29369 vec![this],
29370 )));
29371 Ok(Expression::Function(Box::new(Function::new(
29372 "DATE_FORMAT".to_string(),
29373 vec![from_unix, fmt_e],
29374 ))))
29375 }
29376 DialectType::Hive
29377 | DialectType::Spark
29378 | DialectType::Databricks
29379 | DialectType::Doris
29380 | DialectType::StarRocks => {
29381 // FROM_UNIXTIME(x, y)
29382 Ok(Expression::Function(Box::new(Function::new(
29383 "FROM_UNIXTIME".to_string(),
29384 vec![this, fmt_e],
29385 ))))
29386 }
29387 _ => {
29388 // Default: keep as UNIX_TO_STR(x, y)
29389 Ok(Expression::Function(Box::new(Function::new(
29390 "UNIX_TO_STR".to_string(),
29391 vec![this, fmt_e],
29392 ))))
29393 }
29394 }
29395 } else {
29396 Ok(Expression::UnixToStr(Box::new(
29397 crate::expressions::UnixToStr {
29398 this: Box::new(this),
29399 format: None,
29400 },
29401 )))
29402 }
29403 } else {
29404 Ok(e)
29405 }
29406 }
29407
29408 Action::UnixToTimeConvert => {
29409 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
29410 if let Expression::Function(f) = e {
29411 let arg = f.args.into_iter().next().unwrap();
29412 Ok(Expression::UnixToTime(Box::new(
29413 crate::expressions::UnixToTime {
29414 this: Box::new(arg),
29415 scale: None,
29416 zone: None,
29417 hours: None,
29418 minutes: None,
29419 format: None,
29420 target_type: None,
29421 },
29422 )))
29423 } else {
29424 Ok(e)
29425 }
29426 }
29427
29428 Action::UnixToTimeStrConvert => {
29429 // UNIX_TO_TIME_STR(x) -> dialect-specific
29430 if let Expression::Function(f) = e {
29431 let arg = f.args.into_iter().next().unwrap();
29432 match target {
29433 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29434 // FROM_UNIXTIME(x)
29435 Ok(Expression::Function(Box::new(Function::new(
29436 "FROM_UNIXTIME".to_string(),
29437 vec![arg],
29438 ))))
29439 }
29440 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29441 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
29442 let from_unix = Expression::Function(Box::new(Function::new(
29443 "FROM_UNIXTIME".to_string(),
29444 vec![arg],
29445 )));
29446 Ok(Expression::Cast(Box::new(Cast {
29447 this: from_unix,
29448 to: DataType::VarChar {
29449 length: None,
29450 parenthesized_length: false,
29451 },
29452 double_colon_syntax: false,
29453 trailing_comments: Vec::new(),
29454 format: None,
29455 default: None,
29456 inferred_type: None,
29457 })))
29458 }
29459 DialectType::DuckDB => {
29460 // CAST(TO_TIMESTAMP(x) AS TEXT)
29461 let to_ts = Expression::Function(Box::new(Function::new(
29462 "TO_TIMESTAMP".to_string(),
29463 vec![arg],
29464 )));
29465 Ok(Expression::Cast(Box::new(Cast {
29466 this: to_ts,
29467 to: DataType::Text,
29468 double_colon_syntax: false,
29469 trailing_comments: Vec::new(),
29470 format: None,
29471 default: None,
29472 inferred_type: None,
29473 })))
29474 }
29475 _ => Ok(Expression::Function(Box::new(Function::new(
29476 "UNIX_TO_TIME_STR".to_string(),
29477 vec![arg],
29478 )))),
29479 }
29480 } else {
29481 Ok(e)
29482 }
29483 }
29484
29485 Action::TimeToUnixConvert => {
29486 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
29487 if let Expression::Function(f) = e {
29488 let arg = f.args.into_iter().next().unwrap();
29489 Ok(Expression::TimeToUnix(Box::new(
29490 crate::expressions::UnaryFunc {
29491 this: arg,
29492 original_name: None,
29493 inferred_type: None,
29494 },
29495 )))
29496 } else {
29497 Ok(e)
29498 }
29499 }
29500
29501 Action::TimeToStrConvert => {
29502 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
29503 if let Expression::Function(f) = e {
29504 let mut args = f.args;
29505 let this = args.remove(0);
29506 let fmt = match args.remove(0) {
29507 Expression::Literal(lit)
29508 if matches!(lit.as_ref(), Literal::String(_)) =>
29509 {
29510 let Literal::String(s) = lit.as_ref() else {
29511 unreachable!()
29512 };
29513 s.clone()
29514 }
29515 other => {
29516 return Ok(Expression::Function(Box::new(Function::new(
29517 "TIME_TO_STR".to_string(),
29518 vec![this, other],
29519 ))));
29520 }
29521 };
29522 Ok(Expression::TimeToStr(Box::new(
29523 crate::expressions::TimeToStr {
29524 this: Box::new(this),
29525 format: fmt,
29526 culture: None,
29527 zone: None,
29528 },
29529 )))
29530 } else {
29531 Ok(e)
29532 }
29533 }
29534
29535 Action::StrToUnixConvert => {
29536 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
29537 if let Expression::Function(f) = e {
29538 let mut args = f.args;
29539 let this = args.remove(0);
29540 let fmt = match args.remove(0) {
29541 Expression::Literal(lit)
29542 if matches!(lit.as_ref(), Literal::String(_)) =>
29543 {
29544 let Literal::String(s) = lit.as_ref() else {
29545 unreachable!()
29546 };
29547 s.clone()
29548 }
29549 other => {
29550 return Ok(Expression::Function(Box::new(Function::new(
29551 "STR_TO_UNIX".to_string(),
29552 vec![this, other],
29553 ))));
29554 }
29555 };
29556 Ok(Expression::StrToUnix(Box::new(
29557 crate::expressions::StrToUnix {
29558 this: Some(Box::new(this)),
29559 format: Some(fmt),
29560 },
29561 )))
29562 } else {
29563 Ok(e)
29564 }
29565 }
29566
29567 Action::TimeStrToUnixConvert => {
29568 // TIME_STR_TO_UNIX(x) -> dialect-specific
29569 if let Expression::Function(f) = e {
29570 let arg = f.args.into_iter().next().unwrap();
29571 match target {
29572 DialectType::DuckDB => {
29573 // EPOCH(CAST(x AS TIMESTAMP))
29574 let cast_ts = Expression::Cast(Box::new(Cast {
29575 this: arg,
29576 to: DataType::Timestamp {
29577 timezone: false,
29578 precision: None,
29579 },
29580 double_colon_syntax: false,
29581 trailing_comments: Vec::new(),
29582 format: None,
29583 default: None,
29584 inferred_type: None,
29585 }));
29586 Ok(Expression::Function(Box::new(Function::new(
29587 "EPOCH".to_string(),
29588 vec![cast_ts],
29589 ))))
29590 }
29591 DialectType::Hive
29592 | DialectType::Doris
29593 | DialectType::StarRocks
29594 | DialectType::MySQL => {
29595 // UNIX_TIMESTAMP(x)
29596 Ok(Expression::Function(Box::new(Function::new(
29597 "UNIX_TIMESTAMP".to_string(),
29598 vec![arg],
29599 ))))
29600 }
29601 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29602 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
29603 let date_parse = Expression::Function(Box::new(Function::new(
29604 "DATE_PARSE".to_string(),
29605 vec![arg, Expression::string("%Y-%m-%d %T")],
29606 )));
29607 Ok(Expression::Function(Box::new(Function::new(
29608 "TO_UNIXTIME".to_string(),
29609 vec![date_parse],
29610 ))))
29611 }
29612 _ => Ok(Expression::Function(Box::new(Function::new(
29613 "TIME_STR_TO_UNIX".to_string(),
29614 vec![arg],
29615 )))),
29616 }
29617 } else {
29618 Ok(e)
29619 }
29620 }
29621
29622 Action::TimeToTimeStrConvert => {
29623 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
29624 if let Expression::Function(f) = e {
29625 let arg = f.args.into_iter().next().unwrap();
29626 let str_type = match target {
29627 DialectType::DuckDB => DataType::Text,
29628 DialectType::Hive
29629 | DialectType::Spark
29630 | DialectType::Databricks
29631 | DialectType::Doris
29632 | DialectType::StarRocks => DataType::Custom {
29633 name: "STRING".to_string(),
29634 },
29635 DialectType::Redshift => DataType::Custom {
29636 name: "VARCHAR(MAX)".to_string(),
29637 },
29638 _ => DataType::VarChar {
29639 length: None,
29640 parenthesized_length: false,
29641 },
29642 };
29643 Ok(Expression::Cast(Box::new(Cast {
29644 this: arg,
29645 to: str_type,
29646 double_colon_syntax: false,
29647 trailing_comments: Vec::new(),
29648 format: None,
29649 default: None,
29650 inferred_type: None,
29651 })))
29652 } else {
29653 Ok(e)
29654 }
29655 }
29656
29657 Action::DateTruncSwapArgs => {
29658 // DATE_TRUNC('unit', x) from Generic -> target-specific
29659 if let Expression::Function(f) = e {
29660 if f.args.len() == 2 {
29661 let unit_arg = f.args[0].clone();
29662 let expr_arg = f.args[1].clone();
29663 // Extract unit string from the first arg
29664 let unit_str = match &unit_arg {
29665 Expression::Literal(lit)
29666 if matches!(lit.as_ref(), Literal::String(_)) =>
29667 {
29668 let Literal::String(s) = lit.as_ref() else {
29669 unreachable!()
29670 };
29671 s.to_ascii_uppercase()
29672 }
29673 _ => return Ok(Expression::Function(f)),
29674 };
29675 match target {
29676 DialectType::BigQuery => {
29677 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
29678 let unit_ident =
29679 Expression::Column(Box::new(crate::expressions::Column {
29680 name: crate::expressions::Identifier::new(unit_str),
29681 table: None,
29682 join_mark: false,
29683 trailing_comments: Vec::new(),
29684 span: None,
29685 inferred_type: None,
29686 }));
29687 Ok(Expression::Function(Box::new(Function::new(
29688 "DATE_TRUNC".to_string(),
29689 vec![expr_arg, unit_ident],
29690 ))))
29691 }
29692 DialectType::Doris => {
29693 // Doris: DATE_TRUNC(x, 'UNIT')
29694 Ok(Expression::Function(Box::new(Function::new(
29695 "DATE_TRUNC".to_string(),
29696 vec![expr_arg, Expression::string(&unit_str)],
29697 ))))
29698 }
29699 DialectType::StarRocks => {
29700 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
29701 Ok(Expression::Function(Box::new(Function::new(
29702 "DATE_TRUNC".to_string(),
29703 vec![Expression::string(&unit_str), expr_arg],
29704 ))))
29705 }
29706 DialectType::Spark | DialectType::Databricks => {
29707 // Spark: TRUNC(x, 'UNIT')
29708 Ok(Expression::Function(Box::new(Function::new(
29709 "TRUNC".to_string(),
29710 vec![expr_arg, Expression::string(&unit_str)],
29711 ))))
29712 }
29713 DialectType::MySQL => {
29714 // MySQL: complex expansion based on unit
29715 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
29716 }
29717 _ => Ok(Expression::Function(f)),
29718 }
29719 } else {
29720 Ok(Expression::Function(f))
29721 }
29722 } else {
29723 Ok(e)
29724 }
29725 }
29726
29727 Action::TimestampTruncConvert => {
29728 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
29729 if let Expression::Function(f) = e {
29730 if f.args.len() >= 2 {
29731 let expr_arg = f.args[0].clone();
29732 let unit_arg = f.args[1].clone();
29733 let tz_arg = if f.args.len() >= 3 {
29734 Some(f.args[2].clone())
29735 } else {
29736 None
29737 };
29738 // Extract unit string
29739 let unit_str = match &unit_arg {
29740 Expression::Literal(lit)
29741 if matches!(lit.as_ref(), Literal::String(_)) =>
29742 {
29743 let Literal::String(s) = lit.as_ref() else {
29744 unreachable!()
29745 };
29746 s.to_ascii_uppercase()
29747 }
29748 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
29749 _ => {
29750 return Ok(Expression::Function(f));
29751 }
29752 };
29753 match target {
29754 DialectType::Spark | DialectType::Databricks => {
29755 // Spark: DATE_TRUNC('UNIT', x)
29756 Ok(Expression::Function(Box::new(Function::new(
29757 "DATE_TRUNC".to_string(),
29758 vec![Expression::string(&unit_str), expr_arg],
29759 ))))
29760 }
29761 DialectType::Doris | DialectType::StarRocks => {
29762 // Doris: DATE_TRUNC(x, 'UNIT')
29763 Ok(Expression::Function(Box::new(Function::new(
29764 "DATE_TRUNC".to_string(),
29765 vec![expr_arg, Expression::string(&unit_str)],
29766 ))))
29767 }
29768 DialectType::BigQuery => {
29769 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
29770 let unit_ident =
29771 Expression::Column(Box::new(crate::expressions::Column {
29772 name: crate::expressions::Identifier::new(unit_str),
29773 table: None,
29774 join_mark: false,
29775 trailing_comments: Vec::new(),
29776 span: None,
29777 inferred_type: None,
29778 }));
29779 let mut args = vec![expr_arg, unit_ident];
29780 if let Some(tz) = tz_arg {
29781 args.push(tz);
29782 }
29783 Ok(Expression::Function(Box::new(Function::new(
29784 "TIMESTAMP_TRUNC".to_string(),
29785 args,
29786 ))))
29787 }
29788 DialectType::DuckDB => {
29789 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
29790 if let Some(tz) = tz_arg {
29791 let tz_str = match &tz {
29792 Expression::Literal(lit)
29793 if matches!(lit.as_ref(), Literal::String(_)) =>
29794 {
29795 let Literal::String(s) = lit.as_ref() else {
29796 unreachable!()
29797 };
29798 s.clone()
29799 }
29800 _ => "UTC".to_string(),
29801 };
29802 // x AT TIME ZONE 'tz'
29803 let at_tz = Expression::AtTimeZone(Box::new(
29804 crate::expressions::AtTimeZone {
29805 this: expr_arg,
29806 zone: Expression::string(&tz_str),
29807 },
29808 ));
29809 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
29810 let trunc = Expression::Function(Box::new(Function::new(
29811 "DATE_TRUNC".to_string(),
29812 vec![Expression::string(&unit_str), at_tz],
29813 )));
29814 // DATE_TRUNC(...) AT TIME ZONE 'tz'
29815 Ok(Expression::AtTimeZone(Box::new(
29816 crate::expressions::AtTimeZone {
29817 this: trunc,
29818 zone: Expression::string(&tz_str),
29819 },
29820 )))
29821 } else {
29822 Ok(Expression::Function(Box::new(Function::new(
29823 "DATE_TRUNC".to_string(),
29824 vec![Expression::string(&unit_str), expr_arg],
29825 ))))
29826 }
29827 }
29828 DialectType::Presto
29829 | DialectType::Trino
29830 | DialectType::Athena
29831 | DialectType::Snowflake => {
29832 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
29833 Ok(Expression::Function(Box::new(Function::new(
29834 "DATE_TRUNC".to_string(),
29835 vec![Expression::string(&unit_str), expr_arg],
29836 ))))
29837 }
29838 _ => {
29839 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
29840 let mut args = vec![Expression::string(&unit_str), expr_arg];
29841 if let Some(tz) = tz_arg {
29842 args.push(tz);
29843 }
29844 Ok(Expression::Function(Box::new(Function::new(
29845 "DATE_TRUNC".to_string(),
29846 args,
29847 ))))
29848 }
29849 }
29850 } else {
29851 Ok(Expression::Function(f))
29852 }
29853 } else {
29854 Ok(e)
29855 }
29856 }
29857
29858 Action::StrToDateConvert => {
29859 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
29860 if let Expression::Function(f) = e {
29861 if f.args.len() == 2 {
29862 let mut args = f.args;
29863 let this = args.remove(0);
29864 let fmt_expr = args.remove(0);
29865 let fmt_str = match &fmt_expr {
29866 Expression::Literal(lit)
29867 if matches!(lit.as_ref(), Literal::String(_)) =>
29868 {
29869 let Literal::String(s) = lit.as_ref() else {
29870 unreachable!()
29871 };
29872 Some(s.clone())
29873 }
29874 _ => None,
29875 };
29876 let default_date = "%Y-%m-%d";
29877 let default_time = "%Y-%m-%d %H:%M:%S";
29878 let is_default = fmt_str
29879 .as_ref()
29880 .map_or(false, |f| f == default_date || f == default_time);
29881
29882 if is_default {
29883 // Default format: handle per-dialect
29884 match target {
29885 DialectType::MySQL
29886 | DialectType::Doris
29887 | DialectType::StarRocks => {
29888 // Keep STR_TO_DATE(x, fmt) as-is
29889 Ok(Expression::Function(Box::new(Function::new(
29890 "STR_TO_DATE".to_string(),
29891 vec![this, fmt_expr],
29892 ))))
29893 }
29894 DialectType::Hive => {
29895 // Hive: CAST(x AS DATE)
29896 Ok(Expression::Cast(Box::new(Cast {
29897 this,
29898 to: DataType::Date,
29899 double_colon_syntax: false,
29900 trailing_comments: Vec::new(),
29901 format: None,
29902 default: None,
29903 inferred_type: None,
29904 })))
29905 }
29906 DialectType::Presto
29907 | DialectType::Trino
29908 | DialectType::Athena => {
29909 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
29910 let date_parse =
29911 Expression::Function(Box::new(Function::new(
29912 "DATE_PARSE".to_string(),
29913 vec![this, fmt_expr],
29914 )));
29915 Ok(Expression::Cast(Box::new(Cast {
29916 this: date_parse,
29917 to: DataType::Date,
29918 double_colon_syntax: false,
29919 trailing_comments: Vec::new(),
29920 format: None,
29921 default: None,
29922 inferred_type: None,
29923 })))
29924 }
29925 _ => {
29926 // Others: TsOrDsToDate (delegates to generator)
29927 Ok(Expression::TsOrDsToDate(Box::new(
29928 crate::expressions::TsOrDsToDate {
29929 this: Box::new(this),
29930 format: None,
29931 safe: None,
29932 },
29933 )))
29934 }
29935 }
29936 } else if let Some(fmt) = fmt_str {
29937 match target {
29938 DialectType::Doris
29939 | DialectType::StarRocks
29940 | DialectType::MySQL => {
29941 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
29942 let mut normalized = fmt.clone();
29943 normalized = normalized.replace("%-d", "%e");
29944 normalized = normalized.replace("%-m", "%c");
29945 normalized = normalized.replace("%H:%M:%S", "%T");
29946 Ok(Expression::Function(Box::new(Function::new(
29947 "STR_TO_DATE".to_string(),
29948 vec![this, Expression::string(&normalized)],
29949 ))))
29950 }
29951 DialectType::Hive => {
29952 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
29953 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29954 let unix_ts =
29955 Expression::Function(Box::new(Function::new(
29956 "UNIX_TIMESTAMP".to_string(),
29957 vec![this, Expression::string(&java_fmt)],
29958 )));
29959 let from_unix =
29960 Expression::Function(Box::new(Function::new(
29961 "FROM_UNIXTIME".to_string(),
29962 vec![unix_ts],
29963 )));
29964 Ok(Expression::Cast(Box::new(Cast {
29965 this: from_unix,
29966 to: DataType::Date,
29967 double_colon_syntax: false,
29968 trailing_comments: Vec::new(),
29969 format: None,
29970 default: None,
29971 inferred_type: None,
29972 })))
29973 }
29974 DialectType::Spark | DialectType::Databricks => {
29975 // Spark: TO_DATE(x, java_fmt)
29976 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29977 Ok(Expression::Function(Box::new(Function::new(
29978 "TO_DATE".to_string(),
29979 vec![this, Expression::string(&java_fmt)],
29980 ))))
29981 }
29982 DialectType::Drill => {
29983 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
29984 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
29985 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29986 let java_fmt = java_fmt.replace('T', "'T'");
29987 Ok(Expression::Function(Box::new(Function::new(
29988 "TO_DATE".to_string(),
29989 vec![this, Expression::string(&java_fmt)],
29990 ))))
29991 }
29992 _ => {
29993 // For other dialects: use TsOrDsToDate which delegates to generator
29994 Ok(Expression::TsOrDsToDate(Box::new(
29995 crate::expressions::TsOrDsToDate {
29996 this: Box::new(this),
29997 format: Some(fmt),
29998 safe: None,
29999 },
30000 )))
30001 }
30002 }
30003 } else {
30004 // Non-string format - keep as-is
30005 let mut new_args = Vec::new();
30006 new_args.push(this);
30007 new_args.push(fmt_expr);
30008 Ok(Expression::Function(Box::new(Function::new(
30009 "STR_TO_DATE".to_string(),
30010 new_args,
30011 ))))
30012 }
30013 } else {
30014 Ok(Expression::Function(f))
30015 }
30016 } else {
30017 Ok(e)
30018 }
30019 }
30020
30021 Action::TsOrDsAddConvert => {
30022 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
30023 if let Expression::Function(f) = e {
30024 if f.args.len() == 3 {
30025 let mut args = f.args;
30026 let x = args.remove(0);
30027 let n = args.remove(0);
30028 let unit_expr = args.remove(0);
30029 let unit_str = match &unit_expr {
30030 Expression::Literal(lit)
30031 if matches!(lit.as_ref(), Literal::String(_)) =>
30032 {
30033 let Literal::String(s) = lit.as_ref() else {
30034 unreachable!()
30035 };
30036 s.to_ascii_uppercase()
30037 }
30038 _ => "DAY".to_string(),
30039 };
30040
30041 match target {
30042 DialectType::Hive
30043 | DialectType::Spark
30044 | DialectType::Databricks => {
30045 // DATE_ADD(x, n) - only supports DAY unit
30046 Ok(Expression::Function(Box::new(Function::new(
30047 "DATE_ADD".to_string(),
30048 vec![x, n],
30049 ))))
30050 }
30051 DialectType::MySQL => {
30052 // DATE_ADD(x, INTERVAL n UNIT)
30053 let iu = match unit_str.as_str() {
30054 "YEAR" => crate::expressions::IntervalUnit::Year,
30055 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30056 "MONTH" => crate::expressions::IntervalUnit::Month,
30057 "WEEK" => crate::expressions::IntervalUnit::Week,
30058 "HOUR" => crate::expressions::IntervalUnit::Hour,
30059 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30060 "SECOND" => crate::expressions::IntervalUnit::Second,
30061 _ => crate::expressions::IntervalUnit::Day,
30062 };
30063 let interval = Expression::Interval(Box::new(
30064 crate::expressions::Interval {
30065 this: Some(n),
30066 unit: Some(
30067 crate::expressions::IntervalUnitSpec::Simple {
30068 unit: iu,
30069 use_plural: false,
30070 },
30071 ),
30072 },
30073 ));
30074 Ok(Expression::Function(Box::new(Function::new(
30075 "DATE_ADD".to_string(),
30076 vec![x, interval],
30077 ))))
30078 }
30079 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30080 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
30081 let cast_ts = Expression::Cast(Box::new(Cast {
30082 this: x,
30083 to: DataType::Timestamp {
30084 precision: None,
30085 timezone: false,
30086 },
30087 double_colon_syntax: false,
30088 trailing_comments: Vec::new(),
30089 format: None,
30090 default: None,
30091 inferred_type: None,
30092 }));
30093 let cast_date = Expression::Cast(Box::new(Cast {
30094 this: cast_ts,
30095 to: DataType::Date,
30096 double_colon_syntax: false,
30097 trailing_comments: Vec::new(),
30098 format: None,
30099 default: None,
30100 inferred_type: None,
30101 }));
30102 Ok(Expression::Function(Box::new(Function::new(
30103 "DATE_ADD".to_string(),
30104 vec![Expression::string(&unit_str), n, cast_date],
30105 ))))
30106 }
30107 DialectType::DuckDB => {
30108 // CAST(x AS DATE) + INTERVAL n UNIT
30109 let cast_date = Expression::Cast(Box::new(Cast {
30110 this: x,
30111 to: DataType::Date,
30112 double_colon_syntax: false,
30113 trailing_comments: Vec::new(),
30114 format: None,
30115 default: None,
30116 inferred_type: None,
30117 }));
30118 let iu = match unit_str.as_str() {
30119 "YEAR" => crate::expressions::IntervalUnit::Year,
30120 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30121 "MONTH" => crate::expressions::IntervalUnit::Month,
30122 "WEEK" => crate::expressions::IntervalUnit::Week,
30123 "HOUR" => crate::expressions::IntervalUnit::Hour,
30124 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30125 "SECOND" => crate::expressions::IntervalUnit::Second,
30126 _ => crate::expressions::IntervalUnit::Day,
30127 };
30128 let interval = Expression::Interval(Box::new(
30129 crate::expressions::Interval {
30130 this: Some(n),
30131 unit: Some(
30132 crate::expressions::IntervalUnitSpec::Simple {
30133 unit: iu,
30134 use_plural: false,
30135 },
30136 ),
30137 },
30138 ));
30139 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
30140 left: cast_date,
30141 right: interval,
30142 left_comments: Vec::new(),
30143 operator_comments: Vec::new(),
30144 trailing_comments: Vec::new(),
30145 inferred_type: None,
30146 })))
30147 }
30148 DialectType::Drill => {
30149 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
30150 let cast_date = Expression::Cast(Box::new(Cast {
30151 this: x,
30152 to: DataType::Date,
30153 double_colon_syntax: false,
30154 trailing_comments: Vec::new(),
30155 format: None,
30156 default: None,
30157 inferred_type: None,
30158 }));
30159 let iu = match unit_str.as_str() {
30160 "YEAR" => crate::expressions::IntervalUnit::Year,
30161 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30162 "MONTH" => crate::expressions::IntervalUnit::Month,
30163 "WEEK" => crate::expressions::IntervalUnit::Week,
30164 "HOUR" => crate::expressions::IntervalUnit::Hour,
30165 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30166 "SECOND" => crate::expressions::IntervalUnit::Second,
30167 _ => crate::expressions::IntervalUnit::Day,
30168 };
30169 let interval = Expression::Interval(Box::new(
30170 crate::expressions::Interval {
30171 this: Some(n),
30172 unit: Some(
30173 crate::expressions::IntervalUnitSpec::Simple {
30174 unit: iu,
30175 use_plural: false,
30176 },
30177 ),
30178 },
30179 ));
30180 Ok(Expression::Function(Box::new(Function::new(
30181 "DATE_ADD".to_string(),
30182 vec![cast_date, interval],
30183 ))))
30184 }
30185 _ => {
30186 // Default: keep as TS_OR_DS_ADD
30187 Ok(Expression::Function(Box::new(Function::new(
30188 "TS_OR_DS_ADD".to_string(),
30189 vec![x, n, unit_expr],
30190 ))))
30191 }
30192 }
30193 } else {
30194 Ok(Expression::Function(f))
30195 }
30196 } else {
30197 Ok(e)
30198 }
30199 }
30200
30201 Action::DateFromUnixDateConvert => {
30202 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30203 if let Expression::Function(f) = e {
30204 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
30205 if matches!(
30206 target,
30207 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
30208 ) {
30209 return Ok(Expression::Function(Box::new(Function::new(
30210 "DATE_FROM_UNIX_DATE".to_string(),
30211 f.args,
30212 ))));
30213 }
30214 let n = f.args.into_iter().next().unwrap();
30215 let epoch_date = Expression::Cast(Box::new(Cast {
30216 this: Expression::string("1970-01-01"),
30217 to: DataType::Date,
30218 double_colon_syntax: false,
30219 trailing_comments: Vec::new(),
30220 format: None,
30221 default: None,
30222 inferred_type: None,
30223 }));
30224 match target {
30225 DialectType::DuckDB => {
30226 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
30227 let interval =
30228 Expression::Interval(Box::new(crate::expressions::Interval {
30229 this: Some(n),
30230 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30231 unit: crate::expressions::IntervalUnit::Day,
30232 use_plural: false,
30233 }),
30234 }));
30235 Ok(Expression::Add(Box::new(
30236 crate::expressions::BinaryOp::new(epoch_date, interval),
30237 )))
30238 }
30239 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30240 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
30241 Ok(Expression::Function(Box::new(Function::new(
30242 "DATE_ADD".to_string(),
30243 vec![Expression::string("DAY"), n, epoch_date],
30244 ))))
30245 }
30246 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
30247 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30248 Ok(Expression::Function(Box::new(Function::new(
30249 "DATEADD".to_string(),
30250 vec![
30251 Expression::Identifier(Identifier::new("DAY")),
30252 n,
30253 epoch_date,
30254 ],
30255 ))))
30256 }
30257 DialectType::BigQuery => {
30258 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30259 let interval =
30260 Expression::Interval(Box::new(crate::expressions::Interval {
30261 this: Some(n),
30262 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30263 unit: crate::expressions::IntervalUnit::Day,
30264 use_plural: false,
30265 }),
30266 }));
30267 Ok(Expression::Function(Box::new(Function::new(
30268 "DATE_ADD".to_string(),
30269 vec![epoch_date, interval],
30270 ))))
30271 }
30272 DialectType::MySQL
30273 | DialectType::Doris
30274 | DialectType::StarRocks
30275 | DialectType::Drill => {
30276 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30277 let interval =
30278 Expression::Interval(Box::new(crate::expressions::Interval {
30279 this: Some(n),
30280 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30281 unit: crate::expressions::IntervalUnit::Day,
30282 use_plural: false,
30283 }),
30284 }));
30285 Ok(Expression::Function(Box::new(Function::new(
30286 "DATE_ADD".to_string(),
30287 vec![epoch_date, interval],
30288 ))))
30289 }
30290 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30291 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
30292 Ok(Expression::Function(Box::new(Function::new(
30293 "DATE_ADD".to_string(),
30294 vec![epoch_date, n],
30295 ))))
30296 }
30297 DialectType::PostgreSQL
30298 | DialectType::Materialize
30299 | DialectType::RisingWave => {
30300 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
30301 let n_str = match &n {
30302 Expression::Literal(lit)
30303 if matches!(lit.as_ref(), Literal::Number(_)) =>
30304 {
30305 let Literal::Number(s) = lit.as_ref() else {
30306 unreachable!()
30307 };
30308 s.clone()
30309 }
30310 _ => Self::expr_to_string_static(&n),
30311 };
30312 let interval =
30313 Expression::Interval(Box::new(crate::expressions::Interval {
30314 this: Some(Expression::string(&format!("{} DAY", n_str))),
30315 unit: None,
30316 }));
30317 Ok(Expression::Add(Box::new(
30318 crate::expressions::BinaryOp::new(epoch_date, interval),
30319 )))
30320 }
30321 _ => {
30322 // Default: keep as-is
30323 Ok(Expression::Function(Box::new(Function::new(
30324 "DATE_FROM_UNIX_DATE".to_string(),
30325 vec![n],
30326 ))))
30327 }
30328 }
30329 } else {
30330 Ok(e)
30331 }
30332 }
30333
30334 Action::ArrayRemoveConvert => {
30335 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
30336 if let Expression::ArrayRemove(bf) = e {
30337 let arr = bf.this;
30338 let target_val = bf.expression;
30339 match target {
30340 DialectType::DuckDB => {
30341 let u_id = crate::expressions::Identifier::new("_u");
30342 let lambda =
30343 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30344 parameters: vec![u_id.clone()],
30345 body: Expression::Neq(Box::new(BinaryOp {
30346 left: Expression::Identifier(u_id),
30347 right: target_val,
30348 left_comments: Vec::new(),
30349 operator_comments: Vec::new(),
30350 trailing_comments: Vec::new(),
30351 inferred_type: None,
30352 })),
30353 colon: false,
30354 parameter_types: Vec::new(),
30355 }));
30356 Ok(Expression::Function(Box::new(Function::new(
30357 "LIST_FILTER".to_string(),
30358 vec![arr, lambda],
30359 ))))
30360 }
30361 DialectType::ClickHouse => {
30362 let u_id = crate::expressions::Identifier::new("_u");
30363 let lambda =
30364 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30365 parameters: vec![u_id.clone()],
30366 body: Expression::Neq(Box::new(BinaryOp {
30367 left: Expression::Identifier(u_id),
30368 right: target_val,
30369 left_comments: Vec::new(),
30370 operator_comments: Vec::new(),
30371 trailing_comments: Vec::new(),
30372 inferred_type: None,
30373 })),
30374 colon: false,
30375 parameter_types: Vec::new(),
30376 }));
30377 Ok(Expression::Function(Box::new(Function::new(
30378 "arrayFilter".to_string(),
30379 vec![lambda, arr],
30380 ))))
30381 }
30382 DialectType::BigQuery => {
30383 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
30384 let u_id = crate::expressions::Identifier::new("_u");
30385 let u_col =
30386 Expression::Column(Box::new(crate::expressions::Column {
30387 name: u_id.clone(),
30388 table: None,
30389 join_mark: false,
30390 trailing_comments: Vec::new(),
30391 span: None,
30392 inferred_type: None,
30393 }));
30394 let unnest_expr =
30395 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
30396 this: arr,
30397 expressions: Vec::new(),
30398 with_ordinality: false,
30399 alias: None,
30400 offset_alias: None,
30401 }));
30402 let aliased_unnest =
30403 Expression::Alias(Box::new(crate::expressions::Alias {
30404 this: unnest_expr,
30405 alias: u_id.clone(),
30406 column_aliases: Vec::new(),
30407 alias_explicit_as: false,
30408 alias_keyword: None,
30409 pre_alias_comments: Vec::new(),
30410 trailing_comments: Vec::new(),
30411 inferred_type: None,
30412 }));
30413 let where_cond = Expression::Neq(Box::new(BinaryOp {
30414 left: u_col.clone(),
30415 right: target_val,
30416 left_comments: Vec::new(),
30417 operator_comments: Vec::new(),
30418 trailing_comments: Vec::new(),
30419 inferred_type: None,
30420 }));
30421 let subquery = Expression::Select(Box::new(
30422 crate::expressions::Select::new()
30423 .column(u_col)
30424 .from(aliased_unnest)
30425 .where_(where_cond),
30426 ));
30427 Ok(Expression::ArrayFunc(Box::new(
30428 crate::expressions::ArrayConstructor {
30429 expressions: vec![subquery],
30430 bracket_notation: false,
30431 use_list_keyword: false,
30432 },
30433 )))
30434 }
30435 _ => Ok(Expression::ArrayRemove(Box::new(
30436 crate::expressions::BinaryFunc {
30437 original_name: None,
30438 this: arr,
30439 expression: target_val,
30440 inferred_type: None,
30441 },
30442 ))),
30443 }
30444 } else {
30445 Ok(e)
30446 }
30447 }
30448
30449 Action::ArrayReverseConvert => {
30450 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
30451 if let Expression::ArrayReverse(af) = e {
30452 Ok(Expression::Function(Box::new(Function::new(
30453 "arrayReverse".to_string(),
30454 vec![af.this],
30455 ))))
30456 } else {
30457 Ok(e)
30458 }
30459 }
30460
30461 Action::JsonKeysConvert => {
30462 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
30463 if let Expression::JsonKeys(uf) = e {
30464 match target {
30465 DialectType::Spark | DialectType::Databricks => {
30466 Ok(Expression::Function(Box::new(Function::new(
30467 "JSON_OBJECT_KEYS".to_string(),
30468 vec![uf.this],
30469 ))))
30470 }
30471 DialectType::Snowflake => Ok(Expression::Function(Box::new(
30472 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
30473 ))),
30474 _ => Ok(Expression::JsonKeys(uf)),
30475 }
30476 } else {
30477 Ok(e)
30478 }
30479 }
30480
30481 Action::ParseJsonStrip => {
30482 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
30483 if let Expression::ParseJson(uf) = e {
30484 Ok(uf.this)
30485 } else {
30486 Ok(e)
30487 }
30488 }
30489
30490 Action::ArraySizeDrill => {
30491 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
30492 if let Expression::ArraySize(uf) = e {
30493 Ok(Expression::Function(Box::new(Function::new(
30494 "REPEATED_COUNT".to_string(),
30495 vec![uf.this],
30496 ))))
30497 } else {
30498 Ok(e)
30499 }
30500 }
30501
30502 Action::WeekOfYearToWeekIso => {
30503 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
30504 if let Expression::WeekOfYear(uf) = e {
30505 Ok(Expression::Function(Box::new(Function::new(
30506 "WEEKISO".to_string(),
30507 vec![uf.this],
30508 ))))
30509 } else {
30510 Ok(e)
30511 }
30512 }
30513 }
30514 })
30515 }
30516
30517 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
30518 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
30519 use crate::expressions::Function;
30520 match unit {
30521 "DAY" => {
30522 // DATE(x)
30523 Ok(Expression::Function(Box::new(Function::new(
30524 "DATE".to_string(),
30525 vec![expr.clone()],
30526 ))))
30527 }
30528 "WEEK" => {
30529 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
30530 let year_x = Expression::Function(Box::new(Function::new(
30531 "YEAR".to_string(),
30532 vec![expr.clone()],
30533 )));
30534 let week_x = Expression::Function(Box::new(Function::new(
30535 "WEEK".to_string(),
30536 vec![expr.clone(), Expression::number(1)],
30537 )));
30538 let concat_args = vec![
30539 year_x,
30540 Expression::string(" "),
30541 week_x,
30542 Expression::string(" 1"),
30543 ];
30544 let concat = Expression::Function(Box::new(Function::new(
30545 "CONCAT".to_string(),
30546 concat_args,
30547 )));
30548 Ok(Expression::Function(Box::new(Function::new(
30549 "STR_TO_DATE".to_string(),
30550 vec![concat, Expression::string("%Y %u %w")],
30551 ))))
30552 }
30553 "MONTH" => {
30554 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
30555 let year_x = Expression::Function(Box::new(Function::new(
30556 "YEAR".to_string(),
30557 vec![expr.clone()],
30558 )));
30559 let month_x = Expression::Function(Box::new(Function::new(
30560 "MONTH".to_string(),
30561 vec![expr.clone()],
30562 )));
30563 let concat_args = vec![
30564 year_x,
30565 Expression::string(" "),
30566 month_x,
30567 Expression::string(" 1"),
30568 ];
30569 let concat = Expression::Function(Box::new(Function::new(
30570 "CONCAT".to_string(),
30571 concat_args,
30572 )));
30573 Ok(Expression::Function(Box::new(Function::new(
30574 "STR_TO_DATE".to_string(),
30575 vec![concat, Expression::string("%Y %c %e")],
30576 ))))
30577 }
30578 "QUARTER" => {
30579 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
30580 let year_x = Expression::Function(Box::new(Function::new(
30581 "YEAR".to_string(),
30582 vec![expr.clone()],
30583 )));
30584 let quarter_x = Expression::Function(Box::new(Function::new(
30585 "QUARTER".to_string(),
30586 vec![expr.clone()],
30587 )));
30588 // QUARTER(x) * 3 - 2
30589 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
30590 left: quarter_x,
30591 right: Expression::number(3),
30592 left_comments: Vec::new(),
30593 operator_comments: Vec::new(),
30594 trailing_comments: Vec::new(),
30595 inferred_type: None,
30596 }));
30597 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
30598 left: mul,
30599 right: Expression::number(2),
30600 left_comments: Vec::new(),
30601 operator_comments: Vec::new(),
30602 trailing_comments: Vec::new(),
30603 inferred_type: None,
30604 }));
30605 let concat_args = vec![
30606 year_x,
30607 Expression::string(" "),
30608 sub,
30609 Expression::string(" 1"),
30610 ];
30611 let concat = Expression::Function(Box::new(Function::new(
30612 "CONCAT".to_string(),
30613 concat_args,
30614 )));
30615 Ok(Expression::Function(Box::new(Function::new(
30616 "STR_TO_DATE".to_string(),
30617 vec![concat, Expression::string("%Y %c %e")],
30618 ))))
30619 }
30620 "YEAR" => {
30621 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
30622 let year_x = Expression::Function(Box::new(Function::new(
30623 "YEAR".to_string(),
30624 vec![expr.clone()],
30625 )));
30626 let concat_args = vec![year_x, Expression::string(" 1 1")];
30627 let concat = Expression::Function(Box::new(Function::new(
30628 "CONCAT".to_string(),
30629 concat_args,
30630 )));
30631 Ok(Expression::Function(Box::new(Function::new(
30632 "STR_TO_DATE".to_string(),
30633 vec![concat, Expression::string("%Y %c %e")],
30634 ))))
30635 }
30636 _ => {
30637 // Unsupported unit -> keep as DATE_TRUNC
30638 Ok(Expression::Function(Box::new(Function::new(
30639 "DATE_TRUNC".to_string(),
30640 vec![Expression::string(unit), expr.clone()],
30641 ))))
30642 }
30643 }
30644 }
30645
30646 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
30647 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
30648 use crate::expressions::DataType;
30649 match dt {
30650 DataType::VarChar { .. } | DataType::Char { .. } => true,
30651 DataType::Struct { fields, .. } => fields
30652 .iter()
30653 .any(|f| Self::has_varchar_char_type(&f.data_type)),
30654 _ => false,
30655 }
30656 }
30657
30658 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
30659 fn normalize_varchar_to_string(
30660 dt: crate::expressions::DataType,
30661 ) -> crate::expressions::DataType {
30662 use crate::expressions::DataType;
30663 match dt {
30664 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
30665 name: "STRING".to_string(),
30666 },
30667 DataType::Struct { fields, nested } => {
30668 let fields = fields
30669 .into_iter()
30670 .map(|mut f| {
30671 f.data_type = Self::normalize_varchar_to_string(f.data_type);
30672 f
30673 })
30674 .collect();
30675 DataType::Struct { fields, nested }
30676 }
30677 other => other,
30678 }
30679 }
30680
30681 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
30682 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
30683 if let Expression::Literal(ref lit) = expr {
30684 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
30685 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
30686 let trimmed = s.trim();
30687
30688 // Find where digits end and unit text begins
30689 let digit_end = trimmed
30690 .find(|c: char| !c.is_ascii_digit())
30691 .unwrap_or(trimmed.len());
30692 if digit_end == 0 || digit_end == trimmed.len() {
30693 return expr;
30694 }
30695 let num = &trimmed[..digit_end];
30696 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
30697 if unit_text.is_empty() {
30698 return expr;
30699 }
30700
30701 let known_units = [
30702 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
30703 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
30704 ];
30705 if !known_units.contains(&unit_text.as_str()) {
30706 return expr;
30707 }
30708
30709 let unit_str = unit_text.clone();
30710 // Singularize
30711 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
30712 &unit_str[..unit_str.len() - 1]
30713 } else {
30714 &unit_str
30715 };
30716 let unit = unit_singular;
30717
30718 match target {
30719 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30720 // INTERVAL '2' DAY
30721 let iu = match unit {
30722 "DAY" => crate::expressions::IntervalUnit::Day,
30723 "HOUR" => crate::expressions::IntervalUnit::Hour,
30724 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30725 "SECOND" => crate::expressions::IntervalUnit::Second,
30726 "WEEK" => crate::expressions::IntervalUnit::Week,
30727 "MONTH" => crate::expressions::IntervalUnit::Month,
30728 "YEAR" => crate::expressions::IntervalUnit::Year,
30729 _ => return expr,
30730 };
30731 return Expression::Interval(Box::new(crate::expressions::Interval {
30732 this: Some(Expression::string(num)),
30733 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30734 unit: iu,
30735 use_plural: false,
30736 }),
30737 }));
30738 }
30739 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
30740 // INTERVAL '2 DAYS'
30741 let plural = if num != "1" && !unit_str.ends_with('S') {
30742 format!("{} {}S", num, unit)
30743 } else if unit_str.ends_with('S') {
30744 format!("{} {}", num, unit_str)
30745 } else {
30746 format!("{} {}", num, unit)
30747 };
30748 return Expression::Interval(Box::new(crate::expressions::Interval {
30749 this: Some(Expression::string(&plural)),
30750 unit: None,
30751 }));
30752 }
30753 _ => {
30754 // Spark/Databricks/Hive: INTERVAL '1' DAY
30755 let iu = match unit {
30756 "DAY" => crate::expressions::IntervalUnit::Day,
30757 "HOUR" => crate::expressions::IntervalUnit::Hour,
30758 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30759 "SECOND" => crate::expressions::IntervalUnit::Second,
30760 "WEEK" => crate::expressions::IntervalUnit::Week,
30761 "MONTH" => crate::expressions::IntervalUnit::Month,
30762 "YEAR" => crate::expressions::IntervalUnit::Year,
30763 _ => return expr,
30764 };
30765 return Expression::Interval(Box::new(crate::expressions::Interval {
30766 this: Some(Expression::string(num)),
30767 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30768 unit: iu,
30769 use_plural: false,
30770 }),
30771 }));
30772 }
30773 }
30774 }
30775 }
30776 // If it's already an INTERVAL expression, pass through
30777 expr
30778 }
30779
30780 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
30781 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
30782 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
30783 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
30784 fn rewrite_unnest_expansion(
30785 select: &crate::expressions::Select,
30786 target: DialectType,
30787 ) -> Option<crate::expressions::Select> {
30788 use crate::expressions::{
30789 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
30790 UnnestFunc,
30791 };
30792
30793 let index_offset: i64 = match target {
30794 DialectType::Presto | DialectType::Trino => 1,
30795 _ => 0, // BigQuery, Snowflake
30796 };
30797
30798 let if_func_name = match target {
30799 DialectType::Snowflake => "IFF",
30800 _ => "IF",
30801 };
30802
30803 let array_length_func = match target {
30804 DialectType::BigQuery => "ARRAY_LENGTH",
30805 DialectType::Presto | DialectType::Trino => "CARDINALITY",
30806 DialectType::Snowflake => "ARRAY_SIZE",
30807 _ => "ARRAY_LENGTH",
30808 };
30809
30810 let use_table_aliases = matches!(
30811 target,
30812 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
30813 );
30814 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
30815
30816 fn make_col(name: &str, table: Option<&str>) -> Expression {
30817 if let Some(tbl) = table {
30818 Expression::boxed_column(Column {
30819 name: Identifier::new(name.to_string()),
30820 table: Some(Identifier::new(tbl.to_string())),
30821 join_mark: false,
30822 trailing_comments: Vec::new(),
30823 span: None,
30824 inferred_type: None,
30825 })
30826 } else {
30827 Expression::Identifier(Identifier::new(name.to_string()))
30828 }
30829 }
30830
30831 fn make_join(this: Expression) -> Join {
30832 Join {
30833 this,
30834 on: None,
30835 using: Vec::new(),
30836 kind: JoinKind::Cross,
30837 use_inner_keyword: false,
30838 use_outer_keyword: false,
30839 deferred_condition: false,
30840 join_hint: None,
30841 match_condition: None,
30842 pivots: Vec::new(),
30843 comments: Vec::new(),
30844 nesting_group: 0,
30845 directed: false,
30846 }
30847 }
30848
30849 // Collect UNNEST info from SELECT expressions
30850 struct UnnestInfo {
30851 arr_expr: Expression,
30852 col_alias: String,
30853 pos_alias: String,
30854 source_alias: String,
30855 original_expr: Expression,
30856 has_outer_alias: Option<String>,
30857 }
30858
30859 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
30860 let mut col_counter = 0usize;
30861 let mut pos_counter = 1usize;
30862 let mut source_counter = 1usize;
30863
30864 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
30865 match expr {
30866 Expression::Unnest(u) => Some(u.this.clone()),
30867 Expression::Function(f)
30868 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
30869 {
30870 Some(f.args[0].clone())
30871 }
30872 Expression::Alias(a) => extract_unnest_arg(&a.this),
30873 Expression::Add(op)
30874 | Expression::Sub(op)
30875 | Expression::Mul(op)
30876 | Expression::Div(op) => {
30877 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
30878 }
30879 _ => None,
30880 }
30881 }
30882
30883 fn get_alias_name(expr: &Expression) -> Option<String> {
30884 if let Expression::Alias(a) = expr {
30885 Some(a.alias.name.clone())
30886 } else {
30887 None
30888 }
30889 }
30890
30891 for sel_expr in &select.expressions {
30892 if let Some(arr) = extract_unnest_arg(sel_expr) {
30893 col_counter += 1;
30894 pos_counter += 1;
30895 source_counter += 1;
30896
30897 let col_alias = if col_counter == 1 {
30898 "col".to_string()
30899 } else {
30900 format!("col_{}", col_counter)
30901 };
30902 let pos_alias = format!("pos_{}", pos_counter);
30903 let source_alias = format!("_u_{}", source_counter);
30904 let has_outer_alias = get_alias_name(sel_expr);
30905
30906 unnest_infos.push(UnnestInfo {
30907 arr_expr: arr,
30908 col_alias,
30909 pos_alias,
30910 source_alias,
30911 original_expr: sel_expr.clone(),
30912 has_outer_alias,
30913 });
30914 }
30915 }
30916
30917 if unnest_infos.is_empty() {
30918 return None;
30919 }
30920
30921 let series_alias = "pos".to_string();
30922 let series_source_alias = "_u".to_string();
30923 let tbl_ref = if use_table_aliases {
30924 Some(series_source_alias.as_str())
30925 } else {
30926 None
30927 };
30928
30929 // Build new SELECT expressions
30930 let mut new_select_exprs = Vec::new();
30931 for info in &unnest_infos {
30932 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
30933 let src_ref = if use_table_aliases {
30934 Some(info.source_alias.as_str())
30935 } else {
30936 None
30937 };
30938
30939 let pos_col = make_col(&series_alias, tbl_ref);
30940 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
30941 let col_ref = make_col(actual_col_name, src_ref);
30942
30943 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
30944 pos_col.clone(),
30945 unnest_pos_col.clone(),
30946 )));
30947 let mut if_args = vec![eq_cond, col_ref];
30948 if null_third_arg {
30949 if_args.push(Expression::Null(crate::expressions::Null));
30950 }
30951
30952 let if_expr =
30953 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
30954 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
30955
30956 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
30957 final_expr,
30958 Identifier::new(actual_col_name.clone()),
30959 ))));
30960 }
30961
30962 // Build array size expressions for GREATEST
30963 let size_exprs: Vec<Expression> = unnest_infos
30964 .iter()
30965 .map(|info| {
30966 Expression::Function(Box::new(Function::new(
30967 array_length_func.to_string(),
30968 vec![info.arr_expr.clone()],
30969 )))
30970 })
30971 .collect();
30972
30973 let greatest =
30974 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
30975
30976 let series_end = if index_offset == 0 {
30977 Expression::Sub(Box::new(BinaryOp::new(
30978 greatest,
30979 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30980 )))
30981 } else {
30982 greatest
30983 };
30984
30985 // Build the position array source
30986 let series_unnest_expr = match target {
30987 DialectType::BigQuery => {
30988 let gen_array = Expression::Function(Box::new(Function::new(
30989 "GENERATE_ARRAY".to_string(),
30990 vec![
30991 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
30992 series_end,
30993 ],
30994 )));
30995 Expression::Unnest(Box::new(UnnestFunc {
30996 this: gen_array,
30997 expressions: Vec::new(),
30998 with_ordinality: false,
30999 alias: None,
31000 offset_alias: None,
31001 }))
31002 }
31003 DialectType::Presto | DialectType::Trino => {
31004 let sequence = Expression::Function(Box::new(Function::new(
31005 "SEQUENCE".to_string(),
31006 vec![
31007 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31008 series_end,
31009 ],
31010 )));
31011 Expression::Unnest(Box::new(UnnestFunc {
31012 this: sequence,
31013 expressions: Vec::new(),
31014 with_ordinality: false,
31015 alias: None,
31016 offset_alias: None,
31017 }))
31018 }
31019 DialectType::Snowflake => {
31020 let range_end = Expression::Add(Box::new(BinaryOp::new(
31021 Expression::Paren(Box::new(crate::expressions::Paren {
31022 this: series_end,
31023 trailing_comments: Vec::new(),
31024 })),
31025 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31026 )));
31027 let gen_range = Expression::Function(Box::new(Function::new(
31028 "ARRAY_GENERATE_RANGE".to_string(),
31029 vec![
31030 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
31031 range_end,
31032 ],
31033 )));
31034 let flatten_arg =
31035 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31036 name: Identifier::new("INPUT".to_string()),
31037 value: gen_range,
31038 separator: crate::expressions::NamedArgSeparator::DArrow,
31039 }));
31040 let flatten = Expression::Function(Box::new(Function::new(
31041 "FLATTEN".to_string(),
31042 vec![flatten_arg],
31043 )));
31044 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
31045 }
31046 _ => return None,
31047 };
31048
31049 // Build series alias expression
31050 let series_alias_expr = if use_table_aliases {
31051 let col_aliases = if matches!(target, DialectType::Snowflake) {
31052 vec![
31053 Identifier::new("seq".to_string()),
31054 Identifier::new("key".to_string()),
31055 Identifier::new("path".to_string()),
31056 Identifier::new("index".to_string()),
31057 Identifier::new(series_alias.clone()),
31058 Identifier::new("this".to_string()),
31059 ]
31060 } else {
31061 vec![Identifier::new(series_alias.clone())]
31062 };
31063 Expression::Alias(Box::new(Alias {
31064 this: series_unnest_expr,
31065 alias: Identifier::new(series_source_alias.clone()),
31066 column_aliases: col_aliases,
31067 alias_explicit_as: false,
31068 alias_keyword: None,
31069 pre_alias_comments: Vec::new(),
31070 trailing_comments: Vec::new(),
31071 inferred_type: None,
31072 }))
31073 } else {
31074 Expression::Alias(Box::new(Alias::new(
31075 series_unnest_expr,
31076 Identifier::new(series_alias.clone()),
31077 )))
31078 };
31079
31080 // Build CROSS JOINs for each UNNEST
31081 let mut joins = Vec::new();
31082 for info in &unnest_infos {
31083 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
31084
31085 let unnest_join_expr = match target {
31086 DialectType::BigQuery => {
31087 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
31088 let unnest = UnnestFunc {
31089 this: info.arr_expr.clone(),
31090 expressions: Vec::new(),
31091 with_ordinality: true,
31092 alias: Some(Identifier::new(actual_col_name.clone())),
31093 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
31094 };
31095 Expression::Unnest(Box::new(unnest))
31096 }
31097 DialectType::Presto | DialectType::Trino => {
31098 let unnest = UnnestFunc {
31099 this: info.arr_expr.clone(),
31100 expressions: Vec::new(),
31101 with_ordinality: true,
31102 alias: None,
31103 offset_alias: None,
31104 };
31105 Expression::Alias(Box::new(Alias {
31106 this: Expression::Unnest(Box::new(unnest)),
31107 alias: Identifier::new(info.source_alias.clone()),
31108 column_aliases: vec![
31109 Identifier::new(actual_col_name.clone()),
31110 Identifier::new(info.pos_alias.clone()),
31111 ],
31112 alias_explicit_as: false,
31113 alias_keyword: None,
31114 pre_alias_comments: Vec::new(),
31115 trailing_comments: Vec::new(),
31116 inferred_type: None,
31117 }))
31118 }
31119 DialectType::Snowflake => {
31120 let flatten_arg =
31121 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31122 name: Identifier::new("INPUT".to_string()),
31123 value: info.arr_expr.clone(),
31124 separator: crate::expressions::NamedArgSeparator::DArrow,
31125 }));
31126 let flatten = Expression::Function(Box::new(Function::new(
31127 "FLATTEN".to_string(),
31128 vec![flatten_arg],
31129 )));
31130 let table_fn = Expression::Function(Box::new(Function::new(
31131 "TABLE".to_string(),
31132 vec![flatten],
31133 )));
31134 Expression::Alias(Box::new(Alias {
31135 this: table_fn,
31136 alias: Identifier::new(info.source_alias.clone()),
31137 column_aliases: vec![
31138 Identifier::new("seq".to_string()),
31139 Identifier::new("key".to_string()),
31140 Identifier::new("path".to_string()),
31141 Identifier::new(info.pos_alias.clone()),
31142 Identifier::new(actual_col_name.clone()),
31143 Identifier::new("this".to_string()),
31144 ],
31145 alias_explicit_as: false,
31146 alias_keyword: None,
31147 pre_alias_comments: Vec::new(),
31148 trailing_comments: Vec::new(),
31149 inferred_type: None,
31150 }))
31151 }
31152 _ => return None,
31153 };
31154
31155 joins.push(make_join(unnest_join_expr));
31156 }
31157
31158 // Build WHERE clause
31159 let mut where_conditions: Vec<Expression> = Vec::new();
31160 for info in &unnest_infos {
31161 let src_ref = if use_table_aliases {
31162 Some(info.source_alias.as_str())
31163 } else {
31164 None
31165 };
31166 let pos_col = make_col(&series_alias, tbl_ref);
31167 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31168
31169 let arr_size = Expression::Function(Box::new(Function::new(
31170 array_length_func.to_string(),
31171 vec![info.arr_expr.clone()],
31172 )));
31173
31174 let size_ref = if index_offset == 0 {
31175 Expression::Paren(Box::new(crate::expressions::Paren {
31176 this: Expression::Sub(Box::new(BinaryOp::new(
31177 arr_size,
31178 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31179 ))),
31180 trailing_comments: Vec::new(),
31181 }))
31182 } else {
31183 arr_size
31184 };
31185
31186 let eq = Expression::Eq(Box::new(BinaryOp::new(
31187 pos_col.clone(),
31188 unnest_pos_col.clone(),
31189 )));
31190 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
31191 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
31192 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
31193 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
31194 this: and_cond,
31195 trailing_comments: Vec::new(),
31196 }));
31197 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
31198
31199 where_conditions.push(or_cond);
31200 }
31201
31202 let where_expr = if where_conditions.len() == 1 {
31203 // Single condition: no parens needed
31204 where_conditions.into_iter().next().unwrap()
31205 } else {
31206 // Multiple conditions: wrap each OR in parens, then combine with AND
31207 let wrap = |e: Expression| {
31208 Expression::Paren(Box::new(crate::expressions::Paren {
31209 this: e,
31210 trailing_comments: Vec::new(),
31211 }))
31212 };
31213 let mut iter = where_conditions.into_iter();
31214 let first = wrap(iter.next().unwrap());
31215 let second = wrap(iter.next().unwrap());
31216 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
31217 this: Expression::And(Box::new(BinaryOp::new(first, second))),
31218 trailing_comments: Vec::new(),
31219 }));
31220 for cond in iter {
31221 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
31222 }
31223 combined
31224 };
31225
31226 // Build the new SELECT
31227 let mut new_select = select.clone();
31228 new_select.expressions = new_select_exprs;
31229
31230 if new_select.from.is_some() {
31231 let mut all_joins = vec![make_join(series_alias_expr)];
31232 all_joins.extend(joins);
31233 new_select.joins.extend(all_joins);
31234 } else {
31235 new_select.from = Some(From {
31236 expressions: vec![series_alias_expr],
31237 });
31238 new_select.joins.extend(joins);
31239 }
31240
31241 if let Some(ref existing_where) = new_select.where_clause {
31242 let combined = Expression::And(Box::new(BinaryOp::new(
31243 existing_where.this.clone(),
31244 where_expr,
31245 )));
31246 new_select.where_clause = Some(crate::expressions::Where { this: combined });
31247 } else {
31248 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
31249 }
31250
31251 Some(new_select)
31252 }
31253
31254 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
31255 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
31256 match original {
31257 Expression::Unnest(_) => replacement.clone(),
31258 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
31259 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
31260 Expression::Add(op) => {
31261 let left = Self::replace_unnest_with_if(&op.left, replacement);
31262 let right = Self::replace_unnest_with_if(&op.right, replacement);
31263 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
31264 }
31265 Expression::Sub(op) => {
31266 let left = Self::replace_unnest_with_if(&op.left, replacement);
31267 let right = Self::replace_unnest_with_if(&op.right, replacement);
31268 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
31269 }
31270 Expression::Mul(op) => {
31271 let left = Self::replace_unnest_with_if(&op.left, replacement);
31272 let right = Self::replace_unnest_with_if(&op.right, replacement);
31273 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
31274 }
31275 Expression::Div(op) => {
31276 let left = Self::replace_unnest_with_if(&op.left, replacement);
31277 let right = Self::replace_unnest_with_if(&op.right, replacement);
31278 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
31279 }
31280 _ => original.clone(),
31281 }
31282 }
31283
31284 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
31285 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
31286 fn decompose_json_path(path: &str) -> Vec<String> {
31287 let mut parts = Vec::new();
31288 let path = if path.starts_with("$.") {
31289 &path[2..]
31290 } else if path.starts_with('$') {
31291 &path[1..]
31292 } else {
31293 path
31294 };
31295 if path.is_empty() {
31296 return parts;
31297 }
31298 let mut current = String::new();
31299 let chars: Vec<char> = path.chars().collect();
31300 let mut i = 0;
31301 while i < chars.len() {
31302 match chars[i] {
31303 '.' => {
31304 if !current.is_empty() {
31305 parts.push(current.clone());
31306 current.clear();
31307 }
31308 i += 1;
31309 }
31310 '[' => {
31311 if !current.is_empty() {
31312 parts.push(current.clone());
31313 current.clear();
31314 }
31315 i += 1;
31316 let mut bracket_content = String::new();
31317 while i < chars.len() && chars[i] != ']' {
31318 if chars[i] == '"' || chars[i] == '\'' {
31319 let quote = chars[i];
31320 i += 1;
31321 while i < chars.len() && chars[i] != quote {
31322 bracket_content.push(chars[i]);
31323 i += 1;
31324 }
31325 if i < chars.len() {
31326 i += 1;
31327 }
31328 } else {
31329 bracket_content.push(chars[i]);
31330 i += 1;
31331 }
31332 }
31333 if i < chars.len() {
31334 i += 1;
31335 }
31336 if bracket_content != "*" {
31337 parts.push(bracket_content);
31338 }
31339 }
31340 _ => {
31341 current.push(chars[i]);
31342 i += 1;
31343 }
31344 }
31345 }
31346 if !current.is_empty() {
31347 parts.push(current);
31348 }
31349 parts
31350 }
31351
31352 /// Strip `$` prefix from a JSON path, keeping the rest.
31353 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
31354 fn strip_json_dollar_prefix(path: &str) -> String {
31355 if path.starts_with("$.") {
31356 path[2..].to_string()
31357 } else if path.starts_with('$') {
31358 path[1..].to_string()
31359 } else {
31360 path.to_string()
31361 }
31362 }
31363
31364 /// Strip `[*]` wildcards from a JSON path.
31365 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
31366 fn strip_json_wildcards(path: &str) -> String {
31367 path.replace("[*]", "")
31368 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
31369 .trim_end_matches('.')
31370 .to_string()
31371 }
31372
31373 /// Convert bracket notation to dot notation for JSON paths.
31374 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
31375 fn bracket_to_dot_notation(path: &str) -> String {
31376 let mut result = String::new();
31377 let chars: Vec<char> = path.chars().collect();
31378 let mut i = 0;
31379 while i < chars.len() {
31380 if chars[i] == '[' {
31381 // Read bracket content
31382 i += 1;
31383 let mut bracket_content = String::new();
31384 let mut is_quoted = false;
31385 let mut _quote_char = '"';
31386 while i < chars.len() && chars[i] != ']' {
31387 if chars[i] == '"' || chars[i] == '\'' {
31388 is_quoted = true;
31389 _quote_char = chars[i];
31390 i += 1;
31391 while i < chars.len() && chars[i] != _quote_char {
31392 bracket_content.push(chars[i]);
31393 i += 1;
31394 }
31395 if i < chars.len() {
31396 i += 1;
31397 }
31398 } else {
31399 bracket_content.push(chars[i]);
31400 i += 1;
31401 }
31402 }
31403 if i < chars.len() {
31404 i += 1;
31405 } // skip ]
31406 if bracket_content == "*" {
31407 // Keep wildcard as-is
31408 result.push_str("[*]");
31409 } else if is_quoted {
31410 // Quoted bracket -> dot notation with quotes
31411 result.push('.');
31412 result.push('"');
31413 result.push_str(&bracket_content);
31414 result.push('"');
31415 } else {
31416 // Numeric index -> keep as bracket
31417 result.push('[');
31418 result.push_str(&bracket_content);
31419 result.push(']');
31420 }
31421 } else {
31422 result.push(chars[i]);
31423 i += 1;
31424 }
31425 }
31426 result
31427 }
31428
31429 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
31430 /// `$["a b"]` -> `$['a b']`
31431 fn bracket_to_single_quotes(path: &str) -> String {
31432 let mut result = String::new();
31433 let chars: Vec<char> = path.chars().collect();
31434 let mut i = 0;
31435 while i < chars.len() {
31436 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
31437 result.push('[');
31438 result.push('\'');
31439 i += 2; // skip [ and "
31440 while i < chars.len() && chars[i] != '"' {
31441 result.push(chars[i]);
31442 i += 1;
31443 }
31444 if i < chars.len() {
31445 i += 1;
31446 } // skip closing "
31447 result.push('\'');
31448 } else {
31449 result.push(chars[i]);
31450 i += 1;
31451 }
31452 }
31453 result
31454 }
31455
31456 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
31457 /// or PostgreSQL #temp -> TEMPORARY.
31458 /// Also strips # from INSERT INTO #table for non-TSQL targets.
31459 fn transform_select_into(
31460 expr: Expression,
31461 _source: DialectType,
31462 target: DialectType,
31463 ) -> Expression {
31464 use crate::expressions::{CreateTable, Expression, TableRef};
31465
31466 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
31467 if let Expression::Insert(ref insert) = expr {
31468 if insert.table.name.name.starts_with('#')
31469 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
31470 {
31471 let mut new_insert = insert.clone();
31472 new_insert.table.name.name =
31473 insert.table.name.name.trim_start_matches('#').to_string();
31474 return Expression::Insert(new_insert);
31475 }
31476 return expr;
31477 }
31478
31479 if let Expression::Select(ref select) = expr {
31480 if let Some(ref into) = select.into {
31481 let table_name_raw = match &into.this {
31482 Expression::Table(tr) => tr.name.name.clone(),
31483 Expression::Identifier(id) => id.name.clone(),
31484 _ => String::new(),
31485 };
31486 let is_temp = table_name_raw.starts_with('#') || into.temporary;
31487 let clean_name = table_name_raw.trim_start_matches('#').to_string();
31488
31489 match target {
31490 DialectType::DuckDB | DialectType::Snowflake => {
31491 // SELECT INTO -> CREATE TABLE AS SELECT
31492 let mut new_select = select.clone();
31493 new_select.into = None;
31494 let ct = CreateTable {
31495 name: TableRef::new(clean_name),
31496 on_cluster: None,
31497 columns: Vec::new(),
31498 constraints: Vec::new(),
31499 if_not_exists: false,
31500 temporary: is_temp,
31501 or_replace: false,
31502 table_modifier: None,
31503 as_select: Some(Expression::Select(new_select)),
31504 as_select_parenthesized: false,
31505 on_commit: None,
31506 clone_source: None,
31507 clone_at_clause: None,
31508 shallow_clone: false,
31509 deep_clone: false,
31510 is_copy: false,
31511 leading_comments: Vec::new(),
31512 with_properties: Vec::new(),
31513 teradata_post_name_options: Vec::new(),
31514 with_data: None,
31515 with_statistics: None,
31516 teradata_indexes: Vec::new(),
31517 with_cte: None,
31518 properties: Vec::new(),
31519 partition_of: None,
31520 post_table_properties: Vec::new(),
31521 mysql_table_options: Vec::new(),
31522 inherits: Vec::new(),
31523 on_property: None,
31524 copy_grants: false,
31525 using_template: None,
31526 rollup: None,
31527 uuid: None,
31528 with_partition_columns: Vec::new(),
31529 with_connection: None,
31530 };
31531 return Expression::CreateTable(Box::new(ct));
31532 }
31533 DialectType::PostgreSQL | DialectType::Redshift => {
31534 // PostgreSQL: #foo -> INTO TEMPORARY foo
31535 if is_temp && !into.temporary {
31536 let mut new_select = select.clone();
31537 let mut new_into = into.clone();
31538 new_into.temporary = true;
31539 new_into.unlogged = false;
31540 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
31541 new_select.into = Some(new_into);
31542 Expression::Select(new_select)
31543 } else {
31544 expr
31545 }
31546 }
31547 _ => expr,
31548 }
31549 } else {
31550 expr
31551 }
31552 } else {
31553 expr
31554 }
31555 }
31556
31557 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
31558 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
31559 fn transform_create_table_properties(
31560 ct: &mut crate::expressions::CreateTable,
31561 _source: DialectType,
31562 target: DialectType,
31563 ) {
31564 use crate::expressions::{
31565 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
31566 Properties,
31567 };
31568
31569 // Helper to convert a raw property value string to the correct Expression
31570 let value_to_expr = |v: &str| -> Expression {
31571 let trimmed = v.trim();
31572 // Check if it's a quoted string (starts and ends with ')
31573 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
31574 Expression::Literal(Box::new(Literal::String(
31575 trimmed[1..trimmed.len() - 1].to_string(),
31576 )))
31577 }
31578 // Check if it's a number
31579 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
31580 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
31581 }
31582 // Check if it's ARRAY[...] or ARRAY(...)
31583 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31584 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
31585 let inner = trimmed
31586 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
31587 .trim_start_matches('[')
31588 .trim_start_matches('(')
31589 .trim_end_matches(']')
31590 .trim_end_matches(')');
31591 let elements: Vec<Expression> = inner
31592 .split(',')
31593 .map(|e| {
31594 let elem = e.trim().trim_matches('\'');
31595 Expression::Literal(Box::new(Literal::String(elem.to_string())))
31596 })
31597 .collect();
31598 Expression::Function(Box::new(crate::expressions::Function::new(
31599 "ARRAY".to_string(),
31600 elements,
31601 )))
31602 }
31603 // Otherwise, just output as identifier (unquoted)
31604 else {
31605 Expression::Identifier(Identifier::new(trimmed.to_string()))
31606 }
31607 };
31608
31609 if ct.with_properties.is_empty() && ct.properties.is_empty() {
31610 return;
31611 }
31612
31613 // Handle Presto-style WITH properties
31614 if !ct.with_properties.is_empty() {
31615 // Extract FORMAT property and remaining properties
31616 let mut format_value: Option<String> = None;
31617 let mut partitioned_by: Option<String> = None;
31618 let mut other_props: Vec<(String, String)> = Vec::new();
31619
31620 for (key, value) in ct.with_properties.drain(..) {
31621 if key.eq_ignore_ascii_case("FORMAT") {
31622 // Strip surrounding quotes from value if present
31623 format_value = Some(value.trim_matches('\'').to_string());
31624 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
31625 partitioned_by = Some(value);
31626 } else {
31627 other_props.push((key, value));
31628 }
31629 }
31630
31631 match target {
31632 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31633 // Presto: keep WITH properties but lowercase 'format' key
31634 if let Some(fmt) = format_value {
31635 ct.with_properties
31636 .push(("format".to_string(), format!("'{}'", fmt)));
31637 }
31638 if let Some(part) = partitioned_by {
31639 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
31640 let trimmed = part.trim();
31641 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
31642 // Also handle ARRAY['...'] format - keep as-is
31643 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31644 ct.with_properties
31645 .push(("PARTITIONED_BY".to_string(), part));
31646 } else {
31647 // Parse column names from the parenthesized list
31648 let cols: Vec<&str> = inner
31649 .split(',')
31650 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
31651 .collect();
31652 let array_val = format!(
31653 "ARRAY[{}]",
31654 cols.iter()
31655 .map(|c| format!("'{}'", c))
31656 .collect::<Vec<_>>()
31657 .join(", ")
31658 );
31659 ct.with_properties
31660 .push(("PARTITIONED_BY".to_string(), array_val));
31661 }
31662 }
31663 ct.with_properties.extend(other_props);
31664 }
31665 DialectType::Hive => {
31666 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
31667 if let Some(fmt) = format_value {
31668 ct.properties.push(Expression::FileFormatProperty(Box::new(
31669 FileFormatProperty {
31670 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31671 expressions: vec![],
31672 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
31673 value: true,
31674 }))),
31675 },
31676 )));
31677 }
31678 if let Some(_part) = partitioned_by {
31679 // PARTITIONED_BY handling is complex - move columns to partitioned by
31680 // For now, the partition columns are extracted from the column list
31681 Self::apply_partitioned_by(ct, &_part, target);
31682 }
31683 if !other_props.is_empty() {
31684 let eq_exprs: Vec<Expression> = other_props
31685 .into_iter()
31686 .map(|(k, v)| {
31687 Expression::Eq(Box::new(BinaryOp::new(
31688 Expression::Literal(Box::new(Literal::String(k))),
31689 value_to_expr(&v),
31690 )))
31691 })
31692 .collect();
31693 ct.properties
31694 .push(Expression::Properties(Box::new(Properties {
31695 expressions: eq_exprs,
31696 })));
31697 }
31698 }
31699 DialectType::Spark | DialectType::Databricks => {
31700 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
31701 if let Some(fmt) = format_value {
31702 ct.properties.push(Expression::FileFormatProperty(Box::new(
31703 FileFormatProperty {
31704 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31705 expressions: vec![],
31706 hive_format: None, // None means USING syntax
31707 },
31708 )));
31709 }
31710 if let Some(_part) = partitioned_by {
31711 Self::apply_partitioned_by(ct, &_part, target);
31712 }
31713 if !other_props.is_empty() {
31714 let eq_exprs: Vec<Expression> = other_props
31715 .into_iter()
31716 .map(|(k, v)| {
31717 Expression::Eq(Box::new(BinaryOp::new(
31718 Expression::Literal(Box::new(Literal::String(k))),
31719 value_to_expr(&v),
31720 )))
31721 })
31722 .collect();
31723 ct.properties
31724 .push(Expression::Properties(Box::new(Properties {
31725 expressions: eq_exprs,
31726 })));
31727 }
31728 }
31729 DialectType::DuckDB => {
31730 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
31731 // Keep nothing
31732 }
31733 _ => {
31734 // For other dialects, keep WITH properties as-is
31735 if let Some(fmt) = format_value {
31736 ct.with_properties
31737 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
31738 }
31739 if let Some(part) = partitioned_by {
31740 ct.with_properties
31741 .push(("PARTITIONED_BY".to_string(), part));
31742 }
31743 ct.with_properties.extend(other_props);
31744 }
31745 }
31746 }
31747
31748 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
31749 // and Hive STORED AS -> Presto WITH (format=...) conversion
31750 if !ct.properties.is_empty() {
31751 let is_presto_target = matches!(
31752 target,
31753 DialectType::Presto | DialectType::Trino | DialectType::Athena
31754 );
31755 let is_duckdb_target = matches!(target, DialectType::DuckDB);
31756
31757 if is_presto_target || is_duckdb_target {
31758 let mut new_properties = Vec::new();
31759 for prop in ct.properties.drain(..) {
31760 match &prop {
31761 Expression::FileFormatProperty(ffp) => {
31762 if is_presto_target {
31763 // Convert STORED AS/USING to WITH (format=...)
31764 if let Some(ref fmt_expr) = ffp.this {
31765 let fmt_str = match fmt_expr.as_ref() {
31766 Expression::Identifier(id) => id.name.clone(),
31767 Expression::Literal(lit)
31768 if matches!(lit.as_ref(), Literal::String(_)) =>
31769 {
31770 let Literal::String(s) = lit.as_ref() else {
31771 unreachable!()
31772 };
31773 s.clone()
31774 }
31775 _ => {
31776 new_properties.push(prop);
31777 continue;
31778 }
31779 };
31780 ct.with_properties
31781 .push(("format".to_string(), format!("'{}'", fmt_str)));
31782 }
31783 }
31784 // DuckDB: just strip file format properties
31785 }
31786 // Convert TBLPROPERTIES to WITH properties for Presto target
31787 Expression::Properties(props) if is_presto_target => {
31788 for expr in &props.expressions {
31789 if let Expression::Eq(eq) = expr {
31790 // Extract key and value from the Eq expression
31791 let key = match &eq.left {
31792 Expression::Literal(lit)
31793 if matches!(lit.as_ref(), Literal::String(_)) =>
31794 {
31795 let Literal::String(s) = lit.as_ref() else {
31796 unreachable!()
31797 };
31798 s.clone()
31799 }
31800 Expression::Identifier(id) => id.name.clone(),
31801 _ => continue,
31802 };
31803 let value = match &eq.right {
31804 Expression::Literal(lit)
31805 if matches!(lit.as_ref(), Literal::String(_)) =>
31806 {
31807 let Literal::String(s) = lit.as_ref() else {
31808 unreachable!()
31809 };
31810 format!("'{}'", s)
31811 }
31812 Expression::Literal(lit)
31813 if matches!(lit.as_ref(), Literal::Number(_)) =>
31814 {
31815 let Literal::Number(n) = lit.as_ref() else {
31816 unreachable!()
31817 };
31818 n.clone()
31819 }
31820 Expression::Identifier(id) => id.name.clone(),
31821 _ => continue,
31822 };
31823 ct.with_properties.push((key, value));
31824 }
31825 }
31826 }
31827 // Convert PartitionedByProperty for Presto target
31828 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
31829 // Check if it contains ColumnDef expressions (Hive-style with types)
31830 if let Expression::Tuple(ref tuple) = *pbp.this {
31831 let mut col_names: Vec<String> = Vec::new();
31832 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
31833 let mut has_col_defs = false;
31834 for expr in &tuple.expressions {
31835 if let Expression::ColumnDef(ref cd) = expr {
31836 has_col_defs = true;
31837 col_names.push(cd.name.name.clone());
31838 col_defs.push(*cd.clone());
31839 } else if let Expression::Column(ref col) = expr {
31840 col_names.push(col.name.name.clone());
31841 } else if let Expression::Identifier(ref id) = expr {
31842 col_names.push(id.name.clone());
31843 } else {
31844 // For function expressions like MONTHS(y), serialize to SQL
31845 let generic = Dialect::get(DialectType::Generic);
31846 if let Ok(sql) = generic.generate(expr) {
31847 col_names.push(sql);
31848 }
31849 }
31850 }
31851 if has_col_defs {
31852 // Merge partition column defs into the main column list
31853 for cd in col_defs {
31854 ct.columns.push(cd);
31855 }
31856 }
31857 if !col_names.is_empty() {
31858 // Add PARTITIONED_BY property
31859 let array_val = format!(
31860 "ARRAY[{}]",
31861 col_names
31862 .iter()
31863 .map(|n| format!("'{}'", n))
31864 .collect::<Vec<_>>()
31865 .join(", ")
31866 );
31867 ct.with_properties
31868 .push(("PARTITIONED_BY".to_string(), array_val));
31869 }
31870 }
31871 // Skip - don't keep in properties
31872 }
31873 _ => {
31874 if !is_duckdb_target {
31875 new_properties.push(prop);
31876 }
31877 }
31878 }
31879 }
31880 ct.properties = new_properties;
31881 } else {
31882 // For Hive/Spark targets, unquote format names in STORED AS
31883 for prop in &mut ct.properties {
31884 if let Expression::FileFormatProperty(ref mut ffp) = prop {
31885 if let Some(ref mut fmt_expr) = ffp.this {
31886 if let Expression::Literal(lit) = fmt_expr.as_ref() {
31887 if let Literal::String(s) = lit.as_ref() {
31888 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
31889 let unquoted = s.clone();
31890 *fmt_expr =
31891 Box::new(Expression::Identifier(Identifier::new(unquoted)));
31892 }
31893 }
31894 }
31895 }
31896 }
31897 }
31898 }
31899 }
31900
31901 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
31902 fn apply_partitioned_by(
31903 ct: &mut crate::expressions::CreateTable,
31904 partitioned_by_value: &str,
31905 target: DialectType,
31906 ) {
31907 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
31908
31909 // Parse the ARRAY['col1', 'col2'] value to extract column names
31910 let mut col_names: Vec<String> = Vec::new();
31911 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
31912 let inner = partitioned_by_value
31913 .trim()
31914 .trim_start_matches("ARRAY")
31915 .trim_start_matches('[')
31916 .trim_start_matches('(')
31917 .trim_end_matches(']')
31918 .trim_end_matches(')');
31919 for part in inner.split(',') {
31920 let col = part.trim().trim_matches('\'').trim_matches('"');
31921 if !col.is_empty() {
31922 col_names.push(col.to_string());
31923 }
31924 }
31925
31926 if col_names.is_empty() {
31927 return;
31928 }
31929
31930 if matches!(target, DialectType::Hive) {
31931 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
31932 let mut partition_col_defs = Vec::new();
31933 for col_name in &col_names {
31934 // Find and remove from columns
31935 if let Some(pos) = ct
31936 .columns
31937 .iter()
31938 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
31939 {
31940 let col_def = ct.columns.remove(pos);
31941 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
31942 }
31943 }
31944 if !partition_col_defs.is_empty() {
31945 ct.properties
31946 .push(Expression::PartitionedByProperty(Box::new(
31947 PartitionedByProperty {
31948 this: Box::new(Expression::Tuple(Box::new(Tuple {
31949 expressions: partition_col_defs,
31950 }))),
31951 },
31952 )));
31953 }
31954 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
31955 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
31956 // Use quoted identifiers to match the quoting style of the original column definitions
31957 let partition_exprs: Vec<Expression> = col_names
31958 .iter()
31959 .map(|name| {
31960 // Check if the column exists in the column list and use its quoting
31961 let is_quoted = ct
31962 .columns
31963 .iter()
31964 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
31965 let ident = if is_quoted {
31966 Identifier::quoted(name.clone())
31967 } else {
31968 Identifier::new(name.clone())
31969 };
31970 Expression::boxed_column(Column {
31971 name: ident,
31972 table: None,
31973 join_mark: false,
31974 trailing_comments: Vec::new(),
31975 span: None,
31976 inferred_type: None,
31977 })
31978 })
31979 .collect();
31980 ct.properties
31981 .push(Expression::PartitionedByProperty(Box::new(
31982 PartitionedByProperty {
31983 this: Box::new(Expression::Tuple(Box::new(Tuple {
31984 expressions: partition_exprs,
31985 }))),
31986 },
31987 )));
31988 }
31989 // DuckDB: strip partitioned_by entirely (already handled)
31990 }
31991
31992 /// Convert a DataType to Spark's type string format (using angle brackets)
31993 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
31994 use crate::expressions::DataType;
31995 match dt {
31996 DataType::Int { .. } => "INT".to_string(),
31997 DataType::BigInt { .. } => "BIGINT".to_string(),
31998 DataType::SmallInt { .. } => "SMALLINT".to_string(),
31999 DataType::TinyInt { .. } => "TINYINT".to_string(),
32000 DataType::Float { .. } => "FLOAT".to_string(),
32001 DataType::Double { .. } => "DOUBLE".to_string(),
32002 DataType::Decimal {
32003 precision: Some(p),
32004 scale: Some(s),
32005 } => format!("DECIMAL({}, {})", p, s),
32006 DataType::Decimal {
32007 precision: Some(p), ..
32008 } => format!("DECIMAL({})", p),
32009 DataType::Decimal { .. } => "DECIMAL".to_string(),
32010 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
32011 "STRING".to_string()
32012 }
32013 DataType::Char { .. } => "STRING".to_string(),
32014 DataType::Boolean => "BOOLEAN".to_string(),
32015 DataType::Date => "DATE".to_string(),
32016 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
32017 DataType::Json | DataType::JsonB => "STRING".to_string(),
32018 DataType::Binary { .. } => "BINARY".to_string(),
32019 DataType::Array { element_type, .. } => {
32020 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
32021 }
32022 DataType::Map {
32023 key_type,
32024 value_type,
32025 } => format!(
32026 "MAP<{}, {}>",
32027 Self::data_type_to_spark_string(key_type),
32028 Self::data_type_to_spark_string(value_type)
32029 ),
32030 DataType::Struct { fields, .. } => {
32031 let field_strs: Vec<String> = fields
32032 .iter()
32033 .map(|f| {
32034 if f.name.is_empty() {
32035 Self::data_type_to_spark_string(&f.data_type)
32036 } else {
32037 format!(
32038 "{}: {}",
32039 f.name,
32040 Self::data_type_to_spark_string(&f.data_type)
32041 )
32042 }
32043 })
32044 .collect();
32045 format!("STRUCT<{}>", field_strs.join(", "))
32046 }
32047 DataType::Custom { name } => name.clone(),
32048 _ => format!("{:?}", dt),
32049 }
32050 }
32051
32052 /// Extract value and unit from an Interval expression
32053 /// Returns (value_expression, IntervalUnit)
32054 fn extract_interval_parts(
32055 interval_expr: &Expression,
32056 ) -> Option<(Expression, crate::expressions::IntervalUnit)> {
32057 use crate::expressions::{DataType, IntervalUnit, IntervalUnitSpec, Literal};
32058
32059 fn unit_from_str(unit: &str) -> Option<IntervalUnit> {
32060 match unit.trim().to_ascii_uppercase().as_str() {
32061 "YEAR" | "YEARS" => Some(IntervalUnit::Year),
32062 "QUARTER" | "QUARTERS" => Some(IntervalUnit::Quarter),
32063 "MONTH" | "MONTHS" => Some(IntervalUnit::Month),
32064 "WEEK" | "WEEKS" | "ISOWEEK" => Some(IntervalUnit::Week),
32065 "DAY" | "DAYS" => Some(IntervalUnit::Day),
32066 "HOUR" | "HOURS" => Some(IntervalUnit::Hour),
32067 "MINUTE" | "MINUTES" => Some(IntervalUnit::Minute),
32068 "SECOND" | "SECONDS" => Some(IntervalUnit::Second),
32069 "MILLISECOND" | "MILLISECONDS" => Some(IntervalUnit::Millisecond),
32070 "MICROSECOND" | "MICROSECONDS" => Some(IntervalUnit::Microsecond),
32071 "NANOSECOND" | "NANOSECONDS" => Some(IntervalUnit::Nanosecond),
32072 _ => None,
32073 }
32074 }
32075
32076 fn parts_from_literal_string(s: &str) -> Option<(Expression, IntervalUnit)> {
32077 let mut parts = s.split_whitespace();
32078 let value = parts.next()?;
32079 let unit = unit_from_str(parts.next()?)?;
32080 Some((
32081 Expression::Literal(Box::new(Literal::String(value.to_string()))),
32082 unit,
32083 ))
32084 }
32085
32086 fn unit_from_spec(unit: &IntervalUnitSpec) -> Option<IntervalUnit> {
32087 match unit {
32088 IntervalUnitSpec::Simple { unit, .. } => Some(*unit),
32089 IntervalUnitSpec::Expr(expr) => match expr.as_ref() {
32090 Expression::Day(_) => Some(IntervalUnit::Day),
32091 Expression::Month(_) => Some(IntervalUnit::Month),
32092 Expression::Year(_) => Some(IntervalUnit::Year),
32093 Expression::Identifier(id) => unit_from_str(&id.name),
32094 Expression::Var(v) => unit_from_str(&v.this),
32095 Expression::Column(col) => unit_from_str(&col.name.name),
32096 _ => None,
32097 },
32098 _ => None,
32099 }
32100 }
32101
32102 match interval_expr {
32103 Expression::Interval(iv) => {
32104 let val = iv.this.clone().unwrap_or(Expression::number(0));
32105 if let Expression::Literal(lit) = &val {
32106 if let Literal::String(s) = lit.as_ref() {
32107 if let Some(parts) = parts_from_literal_string(s) {
32108 return Some(parts);
32109 }
32110 }
32111 }
32112 let unit = iv
32113 .unit
32114 .as_ref()
32115 .and_then(unit_from_spec)
32116 .unwrap_or(IntervalUnit::Day);
32117 Some((val, unit))
32118 }
32119 Expression::Cast(cast) if matches!(cast.to, DataType::Interval { .. }) => {
32120 if let Expression::Literal(lit) = &cast.this {
32121 if let Literal::String(s) = lit.as_ref() {
32122 if let Some(parts) = parts_from_literal_string(s) {
32123 return Some(parts);
32124 }
32125 }
32126 }
32127 let unit = match &cast.to {
32128 DataType::Interval {
32129 unit: Some(unit), ..
32130 } => unit_from_str(unit).unwrap_or(IntervalUnit::Day),
32131 _ => IntervalUnit::Day,
32132 };
32133 Some((cast.this.clone(), unit))
32134 }
32135 _ => None,
32136 }
32137 }
32138
32139 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
32140 match expr {
32141 Expression::Add(op) => {
32142 Self::extract_interval_parts(&op.right)?;
32143 Some(Self::build_tsql_dateadd_from_interval(
32144 op.left.clone(),
32145 &op.right,
32146 false,
32147 ))
32148 }
32149 Expression::Sub(op) => {
32150 Self::extract_interval_parts(&op.right)?;
32151 Some(Self::build_tsql_dateadd_from_interval(
32152 op.left.clone(),
32153 &op.right,
32154 true,
32155 ))
32156 }
32157 _ => None,
32158 }
32159 }
32160
32161 fn build_tsql_dateadd_from_interval(
32162 date: Expression,
32163 interval: &Expression,
32164 subtract: bool,
32165 ) -> Expression {
32166 let (value, unit) = Self::extract_interval_parts(interval)
32167 .unwrap_or_else(|| (interval.clone(), crate::expressions::IntervalUnit::Day));
32168 let unit = Self::interval_unit_to_string(&unit);
32169 let amount = Self::tsql_dateadd_amount(value, subtract);
32170
32171 Expression::Function(Box::new(Function::new(
32172 "DATEADD".to_string(),
32173 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
32174 )))
32175 }
32176
32177 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
32178 use crate::expressions::{Parameter, ParameterStyle, UnaryOp};
32179
32180 fn numeric_literal_value(value: &Expression) -> Option<&str> {
32181 match value {
32182 Expression::Literal(lit) => match lit.as_ref() {
32183 crate::expressions::Literal::Number(n)
32184 | crate::expressions::Literal::String(n) => Some(n.as_str()),
32185 _ => None,
32186 },
32187 _ => None,
32188 }
32189 }
32190
32191 fn colon_parameter(value: &Expression) -> Option<Expression> {
32192 let Expression::Literal(lit) = value else {
32193 return None;
32194 };
32195 let crate::expressions::Literal::String(s) = lit.as_ref() else {
32196 return None;
32197 };
32198 let name = s.strip_prefix(':')?;
32199 if name.is_empty()
32200 || !name
32201 .chars()
32202 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
32203 {
32204 return None;
32205 }
32206
32207 Some(Expression::Parameter(Box::new(Parameter {
32208 name: if name.chars().all(|ch| ch.is_ascii_digit()) {
32209 None
32210 } else {
32211 Some(name.to_string())
32212 },
32213 index: name.parse::<u32>().ok(),
32214 style: ParameterStyle::Colon,
32215 quoted: false,
32216 string_quoted: false,
32217 expression: None,
32218 })))
32219 }
32220
32221 let value = colon_parameter(&value).unwrap_or(value);
32222
32223 if let Some(n) = numeric_literal_value(&value) {
32224 if let Ok(parsed) = n.parse::<f64>() {
32225 let normalized = if negate { -parsed } else { parsed };
32226 let rendered = if normalized.fract() == 0.0 {
32227 format!("{}", normalized as i64)
32228 } else {
32229 normalized.to_string()
32230 };
32231 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
32232 rendered,
32233 )));
32234 }
32235 }
32236
32237 if !negate {
32238 return value;
32239 }
32240
32241 match value {
32242 Expression::Neg(op) => op.this,
32243 other => Expression::Neg(Box::new(UnaryOp {
32244 this: other,
32245 inferred_type: None,
32246 })),
32247 }
32248 }
32249
32250 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
32251 fn normalize_bigquery_function(
32252 e: Expression,
32253 source: DialectType,
32254 target: DialectType,
32255 ) -> Result<Expression> {
32256 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
32257
32258 let f = if let Expression::Function(f) = e {
32259 *f
32260 } else {
32261 return Ok(e);
32262 };
32263 let name = f.name.to_ascii_uppercase();
32264 let mut args = f.args;
32265
32266 /// Helper to extract unit string from an identifier, column, or literal expression
32267 fn get_unit_str(expr: &Expression) -> String {
32268 match expr {
32269 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
32270 Expression::Var(v) => v.this.to_ascii_uppercase(),
32271 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
32272 let Literal::String(s) = lit.as_ref() else {
32273 unreachable!()
32274 };
32275 s.to_ascii_uppercase()
32276 }
32277 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
32278 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
32279 Expression::Function(f) => {
32280 let base = f.name.to_ascii_uppercase();
32281 if !f.args.is_empty() {
32282 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
32283 let inner = get_unit_str(&f.args[0]);
32284 format!("{}({})", base, inner)
32285 } else {
32286 base
32287 }
32288 }
32289 _ => "DAY".to_string(),
32290 }
32291 }
32292
32293 /// Parse unit string to IntervalUnit
32294 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
32295 match s {
32296 "YEAR" => crate::expressions::IntervalUnit::Year,
32297 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
32298 "MONTH" => crate::expressions::IntervalUnit::Month,
32299 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32300 "DAY" => crate::expressions::IntervalUnit::Day,
32301 "HOUR" => crate::expressions::IntervalUnit::Hour,
32302 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32303 "SECOND" => crate::expressions::IntervalUnit::Second,
32304 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
32305 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
32306 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32307 _ => crate::expressions::IntervalUnit::Day,
32308 }
32309 }
32310
32311 match name.as_str() {
32312 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
32313 // (BigQuery: result = date1 - date2, Standard: result = end - start)
32314 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
32315 let date1 = args.remove(0);
32316 let date2 = args.remove(0);
32317 let unit_expr = args.remove(0);
32318 let unit_str = get_unit_str(&unit_expr);
32319
32320 if matches!(target, DialectType::BigQuery) {
32321 // BigQuery -> BigQuery: just uppercase the unit
32322 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
32323 return Ok(Expression::Function(Box::new(Function::new(
32324 f.name,
32325 vec![date1, date2, unit],
32326 ))));
32327 }
32328
32329 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
32330 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
32331 if matches!(target, DialectType::Snowflake) {
32332 return Ok(Expression::TimestampDiff(Box::new(
32333 crate::expressions::TimestampDiff {
32334 this: Box::new(date2),
32335 expression: Box::new(date1),
32336 unit: Some(unit_str),
32337 },
32338 )));
32339 }
32340
32341 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
32342 if matches!(target, DialectType::DuckDB) {
32343 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
32344 // CAST to TIME
32345 let cast_fn = |e: Expression| -> Expression {
32346 match e {
32347 Expression::Literal(lit)
32348 if matches!(lit.as_ref(), Literal::String(_)) =>
32349 {
32350 let Literal::String(s) = lit.as_ref() else {
32351 unreachable!()
32352 };
32353 Expression::Cast(Box::new(Cast {
32354 this: Expression::Literal(Box::new(Literal::String(
32355 s.clone(),
32356 ))),
32357 to: DataType::Custom {
32358 name: "TIME".to_string(),
32359 },
32360 trailing_comments: vec![],
32361 double_colon_syntax: false,
32362 format: None,
32363 default: None,
32364 inferred_type: None,
32365 }))
32366 }
32367 other => other,
32368 }
32369 };
32370 (cast_fn(date1), cast_fn(date2))
32371 } else if name == "DATETIME_DIFF" {
32372 // CAST to TIMESTAMP
32373 (
32374 Self::ensure_cast_timestamp(date1),
32375 Self::ensure_cast_timestamp(date2),
32376 )
32377 } else {
32378 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
32379 (
32380 Self::ensure_cast_timestamptz(date1),
32381 Self::ensure_cast_timestamptz(date2),
32382 )
32383 };
32384 return Ok(Expression::Function(Box::new(Function::new(
32385 "DATE_DIFF".to_string(),
32386 vec![
32387 Expression::Literal(Box::new(Literal::String(unit_str))),
32388 cast_d2,
32389 cast_d1,
32390 ],
32391 ))));
32392 }
32393
32394 // Convert to standard TIMESTAMPDIFF(unit, start, end)
32395 let unit = Expression::Identifier(Identifier::new(unit_str));
32396 Ok(Expression::Function(Box::new(Function::new(
32397 "TIMESTAMPDIFF".to_string(),
32398 vec![unit, date2, date1],
32399 ))))
32400 }
32401
32402 // DATEDIFF(unit, start, end) -> target-specific form
32403 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
32404 "DATEDIFF" if args.len() == 3 => {
32405 let arg0 = args.remove(0);
32406 let arg1 = args.remove(0);
32407 let arg2 = args.remove(0);
32408 let unit_str = get_unit_str(&arg0);
32409
32410 // Redshift DATEDIFF(unit, start, end) order: result = end - start
32411 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
32412 // TSQL DATEDIFF(unit, start, end) order: result = end - start
32413
32414 if matches!(target, DialectType::Snowflake) {
32415 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
32416 let unit = Expression::Identifier(Identifier::new(unit_str));
32417 return Ok(Expression::Function(Box::new(Function::new(
32418 "DATEDIFF".to_string(),
32419 vec![unit, arg1, arg2],
32420 ))));
32421 }
32422
32423 if matches!(target, DialectType::DuckDB) {
32424 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
32425 let cast_d1 = Self::ensure_cast_timestamp(arg1);
32426 let cast_d2 = Self::ensure_cast_timestamp(arg2);
32427 return Ok(Expression::Function(Box::new(Function::new(
32428 "DATE_DIFF".to_string(),
32429 vec![
32430 Expression::Literal(Box::new(Literal::String(unit_str))),
32431 cast_d1,
32432 cast_d2,
32433 ],
32434 ))));
32435 }
32436
32437 if matches!(target, DialectType::BigQuery) {
32438 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
32439 let cast_d1 = Self::ensure_cast_datetime(arg1);
32440 let cast_d2 = Self::ensure_cast_datetime(arg2);
32441 let unit = Expression::Identifier(Identifier::new(unit_str));
32442 return Ok(Expression::Function(Box::new(Function::new(
32443 "DATE_DIFF".to_string(),
32444 vec![cast_d2, cast_d1, unit],
32445 ))));
32446 }
32447
32448 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32449 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
32450 let unit = Expression::Identifier(Identifier::new(unit_str));
32451 return Ok(Expression::Function(Box::new(Function::new(
32452 "DATEDIFF".to_string(),
32453 vec![unit, arg1, arg2],
32454 ))));
32455 }
32456
32457 if matches!(target, DialectType::Hive) {
32458 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
32459 match unit_str.as_str() {
32460 "MONTH" => {
32461 return Ok(Expression::Function(Box::new(Function::new(
32462 "CAST".to_string(),
32463 vec![Expression::Function(Box::new(Function::new(
32464 "MONTHS_BETWEEN".to_string(),
32465 vec![arg2, arg1],
32466 )))],
32467 ))));
32468 }
32469 "WEEK" => {
32470 return Ok(Expression::Cast(Box::new(Cast {
32471 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
32472 Expression::Function(Box::new(Function::new(
32473 "DATEDIFF".to_string(),
32474 vec![arg2, arg1],
32475 ))),
32476 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
32477 ))),
32478 to: DataType::Int {
32479 length: None,
32480 integer_spelling: false,
32481 },
32482 trailing_comments: vec![],
32483 double_colon_syntax: false,
32484 format: None,
32485 default: None,
32486 inferred_type: None,
32487 })));
32488 }
32489 _ => {
32490 // Default: DATEDIFF(end, start) for DAY
32491 return Ok(Expression::Function(Box::new(Function::new(
32492 "DATEDIFF".to_string(),
32493 vec![arg2, arg1],
32494 ))));
32495 }
32496 }
32497 }
32498
32499 if matches!(
32500 target,
32501 DialectType::Presto | DialectType::Trino | DialectType::Athena
32502 ) {
32503 // Presto/Trino: DATE_DIFF('UNIT', start, end)
32504 return Ok(Expression::Function(Box::new(Function::new(
32505 "DATE_DIFF".to_string(),
32506 vec![
32507 Expression::Literal(Box::new(Literal::String(unit_str))),
32508 arg1,
32509 arg2,
32510 ],
32511 ))));
32512 }
32513
32514 if matches!(target, DialectType::TSQL) {
32515 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
32516 let cast_d2 = Self::ensure_cast_datetime2(arg2);
32517 let unit = Expression::Identifier(Identifier::new(unit_str));
32518 return Ok(Expression::Function(Box::new(Function::new(
32519 "DATEDIFF".to_string(),
32520 vec![unit, arg1, cast_d2],
32521 ))));
32522 }
32523
32524 if matches!(target, DialectType::PostgreSQL) {
32525 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
32526 // For now, use DATEDIFF (passthrough) with uppercased unit
32527 let unit = Expression::Identifier(Identifier::new(unit_str));
32528 return Ok(Expression::Function(Box::new(Function::new(
32529 "DATEDIFF".to_string(),
32530 vec![unit, arg1, arg2],
32531 ))));
32532 }
32533
32534 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
32535 let unit = Expression::Identifier(Identifier::new(unit_str));
32536 Ok(Expression::Function(Box::new(Function::new(
32537 "DATEDIFF".to_string(),
32538 vec![unit, arg1, arg2],
32539 ))))
32540 }
32541
32542 // DATE_DIFF(date1, date2, unit) -> standard form
32543 "DATE_DIFF" if args.len() == 3 => {
32544 let date1 = args.remove(0);
32545 let date2 = args.remove(0);
32546 let unit_expr = args.remove(0);
32547 let unit_str = get_unit_str(&unit_expr);
32548
32549 if matches!(target, DialectType::BigQuery) {
32550 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
32551 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
32552 "WEEK".to_string()
32553 } else {
32554 unit_str
32555 };
32556 let norm_d1 = Self::date_literal_to_cast(date1);
32557 let norm_d2 = Self::date_literal_to_cast(date2);
32558 let unit = Expression::Identifier(Identifier::new(norm_unit));
32559 return Ok(Expression::Function(Box::new(Function::new(
32560 f.name,
32561 vec![norm_d1, norm_d2, unit],
32562 ))));
32563 }
32564
32565 if matches!(target, DialectType::MySQL) {
32566 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
32567 let norm_d1 = Self::date_literal_to_cast(date1);
32568 let norm_d2 = Self::date_literal_to_cast(date2);
32569 return Ok(Expression::Function(Box::new(Function::new(
32570 "DATEDIFF".to_string(),
32571 vec![norm_d1, norm_d2],
32572 ))));
32573 }
32574
32575 if matches!(target, DialectType::StarRocks) {
32576 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
32577 let norm_d1 = Self::date_literal_to_cast(date1);
32578 let norm_d2 = Self::date_literal_to_cast(date2);
32579 return Ok(Expression::Function(Box::new(Function::new(
32580 "DATE_DIFF".to_string(),
32581 vec![
32582 Expression::Literal(Box::new(Literal::String(unit_str))),
32583 norm_d1,
32584 norm_d2,
32585 ],
32586 ))));
32587 }
32588
32589 if matches!(target, DialectType::DuckDB) {
32590 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
32591 let norm_d1 = Self::ensure_cast_date(date1);
32592 let norm_d2 = Self::ensure_cast_date(date2);
32593
32594 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
32595 let is_week_variant = unit_str == "WEEK"
32596 || unit_str.starts_with("WEEK(")
32597 || unit_str == "ISOWEEK";
32598 if is_week_variant {
32599 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
32600 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
32601 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
32602 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
32603 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
32604 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
32605 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
32606 Some("1") // Shift Sunday to Monday alignment
32607 } else if unit_str == "WEEK(SATURDAY)" {
32608 Some("-5")
32609 } else if unit_str == "WEEK(TUESDAY)" {
32610 Some("-1")
32611 } else if unit_str == "WEEK(WEDNESDAY)" {
32612 Some("-2")
32613 } else if unit_str == "WEEK(THURSDAY)" {
32614 Some("-3")
32615 } else if unit_str == "WEEK(FRIDAY)" {
32616 Some("-4")
32617 } else {
32618 Some("1") // default to Sunday
32619 };
32620
32621 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
32622 let shifted = if let Some(off) = offset {
32623 let interval =
32624 Expression::Interval(Box::new(crate::expressions::Interval {
32625 this: Some(Expression::Literal(Box::new(Literal::String(
32626 off.to_string(),
32627 )))),
32628 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32629 unit: crate::expressions::IntervalUnit::Day,
32630 use_plural: false,
32631 }),
32632 }));
32633 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
32634 date, interval,
32635 )))
32636 } else {
32637 date
32638 };
32639 Expression::Function(Box::new(Function::new(
32640 "DATE_TRUNC".to_string(),
32641 vec![
32642 Expression::Literal(Box::new(Literal::String(
32643 "WEEK".to_string(),
32644 ))),
32645 shifted,
32646 ],
32647 )))
32648 };
32649
32650 let trunc_d2 = make_trunc(norm_d2, day_offset);
32651 let trunc_d1 = make_trunc(norm_d1, day_offset);
32652 return Ok(Expression::Function(Box::new(Function::new(
32653 "DATE_DIFF".to_string(),
32654 vec![
32655 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
32656 trunc_d2,
32657 trunc_d1,
32658 ],
32659 ))));
32660 }
32661
32662 return Ok(Expression::Function(Box::new(Function::new(
32663 "DATE_DIFF".to_string(),
32664 vec![
32665 Expression::Literal(Box::new(Literal::String(unit_str))),
32666 norm_d2,
32667 norm_d1,
32668 ],
32669 ))));
32670 }
32671
32672 // Default: DATEDIFF(unit, date2, date1)
32673 let unit = Expression::Identifier(Identifier::new(unit_str));
32674 Ok(Expression::Function(Box::new(Function::new(
32675 "DATEDIFF".to_string(),
32676 vec![unit, date2, date1],
32677 ))))
32678 }
32679
32680 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
32681 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
32682 let ts = args.remove(0);
32683 let interval_expr = args.remove(0);
32684 let (val, unit) =
32685 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
32686 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32687 });
32688
32689 match target {
32690 DialectType::Snowflake => {
32691 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
32692 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
32693 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
32694 let unit_str = Self::interval_unit_to_string(&unit);
32695 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32696 Ok(Expression::TimestampAdd(Box::new(
32697 crate::expressions::TimestampAdd {
32698 this: Box::new(val),
32699 expression: Box::new(cast_ts),
32700 unit: Some(unit_str.to_string()),
32701 },
32702 )))
32703 }
32704 DialectType::Spark | DialectType::Databricks => {
32705 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
32706 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
32707 let interval =
32708 Expression::Interval(Box::new(crate::expressions::Interval {
32709 this: Some(val),
32710 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32711 unit,
32712 use_plural: false,
32713 }),
32714 }));
32715 Ok(Expression::Add(Box::new(
32716 crate::expressions::BinaryOp::new(ts, interval),
32717 )))
32718 } else if name == "DATETIME_ADD"
32719 && matches!(target, DialectType::Databricks)
32720 {
32721 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
32722 let unit_str = Self::interval_unit_to_string(&unit);
32723 Ok(Expression::Function(Box::new(Function::new(
32724 "TIMESTAMPADD".to_string(),
32725 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
32726 ))))
32727 } else {
32728 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
32729 let unit_str = Self::interval_unit_to_string(&unit);
32730 let cast_ts =
32731 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
32732 Self::maybe_cast_ts(ts)
32733 } else {
32734 ts
32735 };
32736 Ok(Expression::Function(Box::new(Function::new(
32737 "DATE_ADD".to_string(),
32738 vec![
32739 Expression::Identifier(Identifier::new(unit_str)),
32740 val,
32741 cast_ts,
32742 ],
32743 ))))
32744 }
32745 }
32746 DialectType::MySQL => {
32747 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
32748 let mysql_ts = if name.starts_with("TIMESTAMP") {
32749 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32750 match &ts {
32751 Expression::Function(ref inner_f)
32752 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32753 {
32754 // Already wrapped, keep as-is
32755 ts
32756 }
32757 _ => {
32758 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
32759 let unwrapped = match ts {
32760 Expression::Literal(lit)
32761 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32762 {
32763 let Literal::Timestamp(s) = lit.as_ref() else {
32764 unreachable!()
32765 };
32766 Expression::Literal(Box::new(Literal::String(
32767 s.clone(),
32768 )))
32769 }
32770 other => other,
32771 };
32772 Expression::Function(Box::new(Function::new(
32773 "TIMESTAMP".to_string(),
32774 vec![unwrapped],
32775 )))
32776 }
32777 }
32778 } else {
32779 ts
32780 };
32781 Ok(Expression::DateAdd(Box::new(
32782 crate::expressions::DateAddFunc {
32783 this: mysql_ts,
32784 interval: val,
32785 unit,
32786 },
32787 )))
32788 }
32789 _ => {
32790 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
32791 let cast_ts = if matches!(target, DialectType::DuckDB) {
32792 if name == "DATETIME_ADD" {
32793 Self::ensure_cast_timestamp(ts)
32794 } else if name.starts_with("TIMESTAMP") {
32795 Self::maybe_cast_ts_to_tz(ts, &name)
32796 } else {
32797 ts
32798 }
32799 } else {
32800 ts
32801 };
32802 Ok(Expression::DateAdd(Box::new(
32803 crate::expressions::DateAddFunc {
32804 this: cast_ts,
32805 interval: val,
32806 unit,
32807 },
32808 )))
32809 }
32810 }
32811 }
32812
32813 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
32814 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
32815 let ts = args.remove(0);
32816 let interval_expr = args.remove(0);
32817 let (val, unit) =
32818 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
32819 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32820 });
32821
32822 match target {
32823 DialectType::Snowflake => {
32824 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
32825 let unit_str = Self::interval_unit_to_string(&unit);
32826 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32827 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32828 val,
32829 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32830 this: Expression::number(1),
32831 inferred_type: None,
32832 })),
32833 )));
32834 Ok(Expression::TimestampAdd(Box::new(
32835 crate::expressions::TimestampAdd {
32836 this: Box::new(neg_val),
32837 expression: Box::new(cast_ts),
32838 unit: Some(unit_str.to_string()),
32839 },
32840 )))
32841 }
32842 DialectType::Spark | DialectType::Databricks => {
32843 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
32844 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
32845 {
32846 // Spark: ts - INTERVAL val UNIT
32847 let cast_ts = if name.starts_with("TIMESTAMP") {
32848 Self::maybe_cast_ts(ts)
32849 } else {
32850 ts
32851 };
32852 let interval =
32853 Expression::Interval(Box::new(crate::expressions::Interval {
32854 this: Some(val),
32855 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32856 unit,
32857 use_plural: false,
32858 }),
32859 }));
32860 Ok(Expression::Sub(Box::new(
32861 crate::expressions::BinaryOp::new(cast_ts, interval),
32862 )))
32863 } else {
32864 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
32865 let unit_str = Self::interval_unit_to_string(&unit);
32866 let neg_val =
32867 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32868 val,
32869 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32870 this: Expression::number(1),
32871 inferred_type: None,
32872 })),
32873 )));
32874 Ok(Expression::Function(Box::new(Function::new(
32875 "TIMESTAMPADD".to_string(),
32876 vec![
32877 Expression::Identifier(Identifier::new(unit_str)),
32878 neg_val,
32879 ts,
32880 ],
32881 ))))
32882 }
32883 }
32884 DialectType::MySQL => {
32885 let mysql_ts = if name.starts_with("TIMESTAMP") {
32886 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32887 match &ts {
32888 Expression::Function(ref inner_f)
32889 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32890 {
32891 // Already wrapped, keep as-is
32892 ts
32893 }
32894 _ => {
32895 let unwrapped = match ts {
32896 Expression::Literal(lit)
32897 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32898 {
32899 let Literal::Timestamp(s) = lit.as_ref() else {
32900 unreachable!()
32901 };
32902 Expression::Literal(Box::new(Literal::String(
32903 s.clone(),
32904 )))
32905 }
32906 other => other,
32907 };
32908 Expression::Function(Box::new(Function::new(
32909 "TIMESTAMP".to_string(),
32910 vec![unwrapped],
32911 )))
32912 }
32913 }
32914 } else {
32915 ts
32916 };
32917 Ok(Expression::DateSub(Box::new(
32918 crate::expressions::DateAddFunc {
32919 this: mysql_ts,
32920 interval: val,
32921 unit,
32922 },
32923 )))
32924 }
32925 _ => {
32926 let cast_ts = if matches!(target, DialectType::DuckDB) {
32927 if name == "DATETIME_SUB" {
32928 Self::ensure_cast_timestamp(ts)
32929 } else if name.starts_with("TIMESTAMP") {
32930 Self::maybe_cast_ts_to_tz(ts, &name)
32931 } else {
32932 ts
32933 }
32934 } else {
32935 ts
32936 };
32937 Ok(Expression::DateSub(Box::new(
32938 crate::expressions::DateAddFunc {
32939 this: cast_ts,
32940 interval: val,
32941 unit,
32942 },
32943 )))
32944 }
32945 }
32946 }
32947
32948 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
32949 "DATE_SUB" if args.len() == 2 => {
32950 let date = args.remove(0);
32951 let interval_expr = args.remove(0);
32952 let (val, unit) =
32953 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
32954 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32955 });
32956
32957 match target {
32958 DialectType::Databricks | DialectType::Spark => {
32959 // Databricks/Spark: DATE_ADD(date, -val)
32960 // Use DateAdd expression with negative val so it generates correctly
32961 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
32962 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
32963 // Instead, we directly output as a simple negated DateSub
32964 Ok(Expression::DateSub(Box::new(
32965 crate::expressions::DateAddFunc {
32966 this: date,
32967 interval: val,
32968 unit,
32969 },
32970 )))
32971 }
32972 DialectType::DuckDB => {
32973 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
32974 let cast_date = Self::ensure_cast_date(date);
32975 let interval =
32976 Expression::Interval(Box::new(crate::expressions::Interval {
32977 this: Some(val),
32978 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32979 unit,
32980 use_plural: false,
32981 }),
32982 }));
32983 Ok(Expression::Sub(Box::new(
32984 crate::expressions::BinaryOp::new(cast_date, interval),
32985 )))
32986 }
32987 DialectType::Snowflake => {
32988 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
32989 // Just ensure the date is cast properly
32990 let cast_date = Self::ensure_cast_date(date);
32991 Ok(Expression::DateSub(Box::new(
32992 crate::expressions::DateAddFunc {
32993 this: cast_date,
32994 interval: val,
32995 unit,
32996 },
32997 )))
32998 }
32999 DialectType::PostgreSQL => {
33000 // PostgreSQL: date - INTERVAL 'val UNIT'
33001 let unit_str = Self::interval_unit_to_string(&unit);
33002 let interval =
33003 Expression::Interval(Box::new(crate::expressions::Interval {
33004 this: Some(Expression::Literal(Box::new(Literal::String(
33005 format!("{} {}", Self::expr_to_string(&val), unit_str),
33006 )))),
33007 unit: None,
33008 }));
33009 Ok(Expression::Sub(Box::new(
33010 crate::expressions::BinaryOp::new(date, interval),
33011 )))
33012 }
33013 _ => Ok(Expression::DateSub(Box::new(
33014 crate::expressions::DateAddFunc {
33015 this: date,
33016 interval: val,
33017 unit,
33018 },
33019 ))),
33020 }
33021 }
33022
33023 // DATEADD(unit, val, date) -> target-specific form
33024 // Used by: Redshift, Snowflake, TSQL, ClickHouse
33025 "DATEADD" if args.len() == 3 => {
33026 let arg0 = args.remove(0);
33027 let arg1 = args.remove(0);
33028 let arg2 = args.remove(0);
33029 let unit_str = get_unit_str(&arg0);
33030
33031 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
33032 // Keep DATEADD(UNIT, val, date) with uppercased unit
33033 let unit = Expression::Identifier(Identifier::new(unit_str));
33034 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
33035 let date = if matches!(target, DialectType::TSQL)
33036 && !matches!(
33037 source,
33038 DialectType::Spark | DialectType::Databricks | DialectType::Hive
33039 ) {
33040 Self::ensure_cast_datetime2(arg2)
33041 } else {
33042 arg2
33043 };
33044 return Ok(Expression::Function(Box::new(Function::new(
33045 "DATEADD".to_string(),
33046 vec![unit, arg1, date],
33047 ))));
33048 }
33049
33050 if matches!(target, DialectType::DuckDB) {
33051 // DuckDB: date + INTERVAL 'val' UNIT
33052 let iu = parse_interval_unit(&unit_str);
33053 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33054 this: Some(arg1),
33055 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33056 unit: iu,
33057 use_plural: false,
33058 }),
33059 }));
33060 let cast_date = Self::ensure_cast_timestamp(arg2);
33061 return Ok(Expression::Add(Box::new(
33062 crate::expressions::BinaryOp::new(cast_date, interval),
33063 )));
33064 }
33065
33066 if matches!(target, DialectType::BigQuery) {
33067 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
33068 let iu = parse_interval_unit(&unit_str);
33069 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33070 this: Some(arg1),
33071 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33072 unit: iu,
33073 use_plural: false,
33074 }),
33075 }));
33076 return Ok(Expression::Function(Box::new(Function::new(
33077 "DATE_ADD".to_string(),
33078 vec![arg2, interval],
33079 ))));
33080 }
33081
33082 if matches!(target, DialectType::Databricks) {
33083 // Databricks: keep DATEADD(UNIT, val, date) format
33084 let unit = Expression::Identifier(Identifier::new(unit_str));
33085 return Ok(Expression::Function(Box::new(Function::new(
33086 "DATEADD".to_string(),
33087 vec![unit, arg1, arg2],
33088 ))));
33089 }
33090
33091 if matches!(target, DialectType::Spark) {
33092 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
33093 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
33094 if let Expression::Literal(lit) = &expr {
33095 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
33096 if let Ok(val) = n.parse::<i64>() {
33097 return Expression::Literal(Box::new(
33098 crate::expressions::Literal::Number(
33099 (val * factor).to_string(),
33100 ),
33101 ));
33102 }
33103 }
33104 }
33105 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33106 expr,
33107 Expression::Literal(Box::new(crate::expressions::Literal::Number(
33108 factor.to_string(),
33109 ))),
33110 )))
33111 }
33112 match unit_str.as_str() {
33113 "YEAR" => {
33114 let months = multiply_expr_dateadd(arg1, 12);
33115 return Ok(Expression::Function(Box::new(Function::new(
33116 "ADD_MONTHS".to_string(),
33117 vec![arg2, months],
33118 ))));
33119 }
33120 "QUARTER" => {
33121 let months = multiply_expr_dateadd(arg1, 3);
33122 return Ok(Expression::Function(Box::new(Function::new(
33123 "ADD_MONTHS".to_string(),
33124 vec![arg2, months],
33125 ))));
33126 }
33127 "MONTH" => {
33128 return Ok(Expression::Function(Box::new(Function::new(
33129 "ADD_MONTHS".to_string(),
33130 vec![arg2, arg1],
33131 ))));
33132 }
33133 "WEEK" => {
33134 let days = multiply_expr_dateadd(arg1, 7);
33135 return Ok(Expression::Function(Box::new(Function::new(
33136 "DATE_ADD".to_string(),
33137 vec![arg2, days],
33138 ))));
33139 }
33140 "DAY" => {
33141 return Ok(Expression::Function(Box::new(Function::new(
33142 "DATE_ADD".to_string(),
33143 vec![arg2, arg1],
33144 ))));
33145 }
33146 _ => {
33147 let unit = Expression::Identifier(Identifier::new(unit_str));
33148 return Ok(Expression::Function(Box::new(Function::new(
33149 "DATE_ADD".to_string(),
33150 vec![unit, arg1, arg2],
33151 ))));
33152 }
33153 }
33154 }
33155
33156 if matches!(target, DialectType::Hive) {
33157 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
33158 match unit_str.as_str() {
33159 "DAY" => {
33160 return Ok(Expression::Function(Box::new(Function::new(
33161 "DATE_ADD".to_string(),
33162 vec![arg2, arg1],
33163 ))));
33164 }
33165 "MONTH" => {
33166 return Ok(Expression::Function(Box::new(Function::new(
33167 "ADD_MONTHS".to_string(),
33168 vec![arg2, arg1],
33169 ))));
33170 }
33171 _ => {
33172 let iu = parse_interval_unit(&unit_str);
33173 let interval =
33174 Expression::Interval(Box::new(crate::expressions::Interval {
33175 this: Some(arg1),
33176 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33177 unit: iu,
33178 use_plural: false,
33179 }),
33180 }));
33181 return Ok(Expression::Add(Box::new(
33182 crate::expressions::BinaryOp::new(arg2, interval),
33183 )));
33184 }
33185 }
33186 }
33187
33188 if matches!(target, DialectType::PostgreSQL) {
33189 // PostgreSQL: date + INTERVAL 'val UNIT'
33190 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33191 this: Some(Expression::Literal(Box::new(Literal::String(format!(
33192 "{} {}",
33193 Self::expr_to_string(&arg1),
33194 unit_str
33195 ))))),
33196 unit: None,
33197 }));
33198 return Ok(Expression::Add(Box::new(
33199 crate::expressions::BinaryOp::new(arg2, interval),
33200 )));
33201 }
33202
33203 if matches!(
33204 target,
33205 DialectType::Presto | DialectType::Trino | DialectType::Athena
33206 ) {
33207 // Presto/Trino: DATE_ADD('UNIT', val, date)
33208 return Ok(Expression::Function(Box::new(Function::new(
33209 "DATE_ADD".to_string(),
33210 vec![
33211 Expression::Literal(Box::new(Literal::String(unit_str))),
33212 arg1,
33213 arg2,
33214 ],
33215 ))));
33216 }
33217
33218 if matches!(target, DialectType::ClickHouse) {
33219 // ClickHouse: DATE_ADD(UNIT, val, date)
33220 let unit = Expression::Identifier(Identifier::new(unit_str));
33221 return Ok(Expression::Function(Box::new(Function::new(
33222 "DATE_ADD".to_string(),
33223 vec![unit, arg1, arg2],
33224 ))));
33225 }
33226
33227 // Default: keep DATEADD with uppercased unit
33228 let unit = Expression::Identifier(Identifier::new(unit_str));
33229 Ok(Expression::Function(Box::new(Function::new(
33230 "DATEADD".to_string(),
33231 vec![unit, arg1, arg2],
33232 ))))
33233 }
33234
33235 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
33236 "DATE_ADD" if args.len() == 3 => {
33237 let arg0 = args.remove(0);
33238 let arg1 = args.remove(0);
33239 let arg2 = args.remove(0);
33240 let unit_str = get_unit_str(&arg0);
33241
33242 if matches!(
33243 target,
33244 DialectType::Presto | DialectType::Trino | DialectType::Athena
33245 ) {
33246 // Presto/Trino: DATE_ADD('UNIT', val, date)
33247 return Ok(Expression::Function(Box::new(Function::new(
33248 "DATE_ADD".to_string(),
33249 vec![
33250 Expression::Literal(Box::new(Literal::String(unit_str))),
33251 arg1,
33252 arg2,
33253 ],
33254 ))));
33255 }
33256
33257 if matches!(
33258 target,
33259 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
33260 ) {
33261 // DATEADD(UNIT, val, date)
33262 let unit = Expression::Identifier(Identifier::new(unit_str));
33263 let date = if matches!(target, DialectType::TSQL) {
33264 Self::ensure_cast_datetime2(arg2)
33265 } else {
33266 arg2
33267 };
33268 return Ok(Expression::Function(Box::new(Function::new(
33269 "DATEADD".to_string(),
33270 vec![unit, arg1, date],
33271 ))));
33272 }
33273
33274 if matches!(target, DialectType::DuckDB) {
33275 // DuckDB: date + INTERVAL val UNIT
33276 let iu = parse_interval_unit(&unit_str);
33277 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33278 this: Some(arg1),
33279 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33280 unit: iu,
33281 use_plural: false,
33282 }),
33283 }));
33284 return Ok(Expression::Add(Box::new(
33285 crate::expressions::BinaryOp::new(arg2, interval),
33286 )));
33287 }
33288
33289 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33290 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
33291 let unit = Expression::Identifier(Identifier::new(unit_str));
33292 return Ok(Expression::Function(Box::new(Function::new(
33293 "DATE_ADD".to_string(),
33294 vec![unit, arg1, arg2],
33295 ))));
33296 }
33297
33298 // Default: DATE_ADD(UNIT, val, date)
33299 let unit = Expression::Identifier(Identifier::new(unit_str));
33300 Ok(Expression::Function(Box::new(Function::new(
33301 "DATE_ADD".to_string(),
33302 vec![unit, arg1, arg2],
33303 ))))
33304 }
33305
33306 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
33307 "DATE_ADD" if args.len() == 2 => {
33308 let date = args.remove(0);
33309 let interval_expr = args.remove(0);
33310 let (val, unit) =
33311 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33312 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33313 });
33314 let unit_str = Self::interval_unit_to_string(&unit);
33315
33316 match target {
33317 DialectType::DuckDB => {
33318 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
33319 let cast_date = Self::ensure_cast_date(date);
33320 let quoted_val = Self::quote_interval_val(&val);
33321 let interval =
33322 Expression::Interval(Box::new(crate::expressions::Interval {
33323 this: Some(quoted_val),
33324 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33325 unit,
33326 use_plural: false,
33327 }),
33328 }));
33329 Ok(Expression::Add(Box::new(
33330 crate::expressions::BinaryOp::new(cast_date, interval),
33331 )))
33332 }
33333 DialectType::PostgreSQL => {
33334 // PostgreSQL: date + INTERVAL 'val UNIT'
33335 let interval =
33336 Expression::Interval(Box::new(crate::expressions::Interval {
33337 this: Some(Expression::Literal(Box::new(Literal::String(
33338 format!("{} {}", Self::expr_to_string(&val), unit_str),
33339 )))),
33340 unit: None,
33341 }));
33342 Ok(Expression::Add(Box::new(
33343 crate::expressions::BinaryOp::new(date, interval),
33344 )))
33345 }
33346 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33347 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
33348 let val_str = Self::expr_to_string(&val);
33349 Ok(Expression::Function(Box::new(Function::new(
33350 "DATE_ADD".to_string(),
33351 vec![
33352 Expression::Literal(Box::new(Literal::String(
33353 unit_str.to_string(),
33354 ))),
33355 Expression::Cast(Box::new(Cast {
33356 this: Expression::Literal(Box::new(Literal::String(val_str))),
33357 to: DataType::BigInt { length: None },
33358 trailing_comments: vec![],
33359 double_colon_syntax: false,
33360 format: None,
33361 default: None,
33362 inferred_type: None,
33363 })),
33364 date,
33365 ],
33366 ))))
33367 }
33368 DialectType::Spark | DialectType::Hive => {
33369 // Spark/Hive: DATE_ADD(date, val) for DAY
33370 match unit_str {
33371 "DAY" => Ok(Expression::Function(Box::new(Function::new(
33372 "DATE_ADD".to_string(),
33373 vec![date, val],
33374 )))),
33375 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
33376 "ADD_MONTHS".to_string(),
33377 vec![date, val],
33378 )))),
33379 _ => {
33380 let iu = parse_interval_unit(&unit_str);
33381 let interval =
33382 Expression::Interval(Box::new(crate::expressions::Interval {
33383 this: Some(val),
33384 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33385 unit: iu,
33386 use_plural: false,
33387 }),
33388 }));
33389 Ok(Expression::Function(Box::new(Function::new(
33390 "DATE_ADD".to_string(),
33391 vec![date, interval],
33392 ))))
33393 }
33394 }
33395 }
33396 DialectType::Snowflake => {
33397 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
33398 let cast_date = Self::ensure_cast_date(date);
33399 let val_str = Self::expr_to_string(&val);
33400 Ok(Expression::Function(Box::new(Function::new(
33401 "DATEADD".to_string(),
33402 vec![
33403 Expression::Identifier(Identifier::new(unit_str)),
33404 Expression::Literal(Box::new(Literal::String(val_str))),
33405 cast_date,
33406 ],
33407 ))))
33408 }
33409 DialectType::TSQL | DialectType::Fabric => {
33410 let cast_date = Self::ensure_cast_datetime2(date);
33411 Ok(Expression::Function(Box::new(Function::new(
33412 "DATEADD".to_string(),
33413 vec![
33414 Expression::Identifier(Identifier::new(unit_str)),
33415 val,
33416 cast_date,
33417 ],
33418 ))))
33419 }
33420 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
33421 "DATEADD".to_string(),
33422 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33423 )))),
33424 DialectType::MySQL => {
33425 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
33426 let quoted_val = Self::quote_interval_val(&val);
33427 let iu = parse_interval_unit(&unit_str);
33428 let interval =
33429 Expression::Interval(Box::new(crate::expressions::Interval {
33430 this: Some(quoted_val),
33431 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33432 unit: iu,
33433 use_plural: false,
33434 }),
33435 }));
33436 Ok(Expression::Function(Box::new(Function::new(
33437 "DATE_ADD".to_string(),
33438 vec![date, interval],
33439 ))))
33440 }
33441 DialectType::BigQuery => {
33442 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
33443 let quoted_val = Self::quote_interval_val(&val);
33444 let iu = parse_interval_unit(&unit_str);
33445 let interval =
33446 Expression::Interval(Box::new(crate::expressions::Interval {
33447 this: Some(quoted_val),
33448 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33449 unit: iu,
33450 use_plural: false,
33451 }),
33452 }));
33453 Ok(Expression::Function(Box::new(Function::new(
33454 "DATE_ADD".to_string(),
33455 vec![date, interval],
33456 ))))
33457 }
33458 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
33459 "DATEADD".to_string(),
33460 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33461 )))),
33462 _ => {
33463 // Default: keep as DATE_ADD with decomposed interval
33464 Ok(Expression::DateAdd(Box::new(
33465 crate::expressions::DateAddFunc {
33466 this: date,
33467 interval: val,
33468 unit,
33469 },
33470 )))
33471 }
33472 }
33473 }
33474
33475 // ADD_MONTHS(date, val) -> target-specific form
33476 "ADD_MONTHS" if args.len() == 2 => {
33477 let date = args.remove(0);
33478 let val = args.remove(0);
33479
33480 if matches!(target, DialectType::TSQL) {
33481 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
33482 let cast_date = Self::ensure_cast_datetime2(date);
33483 return Ok(Expression::Function(Box::new(Function::new(
33484 "DATEADD".to_string(),
33485 vec![
33486 Expression::Identifier(Identifier::new("MONTH")),
33487 val,
33488 cast_date,
33489 ],
33490 ))));
33491 }
33492
33493 if matches!(target, DialectType::DuckDB) {
33494 // DuckDB: date + INTERVAL val MONTH
33495 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33496 this: Some(val),
33497 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33498 unit: crate::expressions::IntervalUnit::Month,
33499 use_plural: false,
33500 }),
33501 }));
33502 return Ok(Expression::Add(Box::new(
33503 crate::expressions::BinaryOp::new(date, interval),
33504 )));
33505 }
33506
33507 if matches!(target, DialectType::Snowflake) {
33508 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
33509 if matches!(source, DialectType::Snowflake) {
33510 return Ok(Expression::Function(Box::new(Function::new(
33511 "ADD_MONTHS".to_string(),
33512 vec![date, val],
33513 ))));
33514 }
33515 return Ok(Expression::Function(Box::new(Function::new(
33516 "DATEADD".to_string(),
33517 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
33518 ))));
33519 }
33520
33521 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33522 // Spark: ADD_MONTHS(date, val) - keep as is
33523 return Ok(Expression::Function(Box::new(Function::new(
33524 "ADD_MONTHS".to_string(),
33525 vec![date, val],
33526 ))));
33527 }
33528
33529 if matches!(target, DialectType::Hive) {
33530 return Ok(Expression::Function(Box::new(Function::new(
33531 "ADD_MONTHS".to_string(),
33532 vec![date, val],
33533 ))));
33534 }
33535
33536 if matches!(
33537 target,
33538 DialectType::Presto | DialectType::Trino | DialectType::Athena
33539 ) {
33540 // Presto: DATE_ADD('MONTH', val, date)
33541 return Ok(Expression::Function(Box::new(Function::new(
33542 "DATE_ADD".to_string(),
33543 vec![
33544 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
33545 val,
33546 date,
33547 ],
33548 ))));
33549 }
33550
33551 // Default: keep ADD_MONTHS
33552 Ok(Expression::Function(Box::new(Function::new(
33553 "ADD_MONTHS".to_string(),
33554 vec![date, val],
33555 ))))
33556 }
33557
33558 // SAFE_DIVIDE(x, y) -> target-specific form directly
33559 "SAFE_DIVIDE" if args.len() == 2 => {
33560 let x = args.remove(0);
33561 let y = args.remove(0);
33562 // Wrap x and y in parens if they're complex expressions
33563 let y_ref = match &y {
33564 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33565 y.clone()
33566 }
33567 _ => Expression::Paren(Box::new(Paren {
33568 this: y.clone(),
33569 trailing_comments: vec![],
33570 })),
33571 };
33572 let x_ref = match &x {
33573 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33574 x.clone()
33575 }
33576 _ => Expression::Paren(Box::new(Paren {
33577 this: x.clone(),
33578 trailing_comments: vec![],
33579 })),
33580 };
33581 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
33582 y_ref.clone(),
33583 Expression::number(0),
33584 )));
33585 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33586 x_ref.clone(),
33587 y_ref.clone(),
33588 )));
33589
33590 match target {
33591 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
33592 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
33593 )),
33594 DialectType::DuckDB | DialectType::PostgreSQL => {
33595 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
33596 let result_div = if matches!(target, DialectType::PostgreSQL) {
33597 let cast_x = Expression::Cast(Box::new(Cast {
33598 this: x_ref,
33599 to: DataType::Custom {
33600 name: "DOUBLE PRECISION".to_string(),
33601 },
33602 trailing_comments: vec![],
33603 double_colon_syntax: false,
33604 format: None,
33605 default: None,
33606 inferred_type: None,
33607 }));
33608 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33609 cast_x, y_ref,
33610 )))
33611 } else {
33612 div_expr
33613 };
33614 Ok(Expression::Case(Box::new(crate::expressions::Case {
33615 operand: None,
33616 whens: vec![(condition, result_div)],
33617 else_: Some(Expression::Null(crate::expressions::Null)),
33618 comments: Vec::new(),
33619 inferred_type: None,
33620 })))
33621 }
33622 DialectType::Snowflake => {
33623 // IFF(y <> 0, x / y, NULL)
33624 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33625 condition,
33626 true_value: div_expr,
33627 false_value: Some(Expression::Null(crate::expressions::Null)),
33628 original_name: Some("IFF".to_string()),
33629 inferred_type: None,
33630 })))
33631 }
33632 DialectType::Presto | DialectType::Trino => {
33633 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
33634 let cast_x = Expression::Cast(Box::new(Cast {
33635 this: x_ref,
33636 to: DataType::Double {
33637 precision: None,
33638 scale: None,
33639 },
33640 trailing_comments: vec![],
33641 double_colon_syntax: false,
33642 format: None,
33643 default: None,
33644 inferred_type: None,
33645 }));
33646 let cast_div = Expression::Div(Box::new(
33647 crate::expressions::BinaryOp::new(cast_x, y_ref),
33648 ));
33649 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33650 condition,
33651 true_value: cast_div,
33652 false_value: Some(Expression::Null(crate::expressions::Null)),
33653 original_name: None,
33654 inferred_type: None,
33655 })))
33656 }
33657 _ => {
33658 // IF(y <> 0, x / y, NULL)
33659 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33660 condition,
33661 true_value: div_expr,
33662 false_value: Some(Expression::Null(crate::expressions::Null)),
33663 original_name: None,
33664 inferred_type: None,
33665 })))
33666 }
33667 }
33668 }
33669
33670 // GENERATE_UUID() -> UUID() with CAST to string
33671 "GENERATE_UUID" => {
33672 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
33673 this: None,
33674 name: None,
33675 is_string: None,
33676 }));
33677 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
33678 let cast_type = match target {
33679 DialectType::DuckDB => Some(DataType::Text),
33680 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
33681 length: None,
33682 parenthesized_length: false,
33683 }),
33684 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33685 Some(DataType::String { length: None })
33686 }
33687 _ => None,
33688 };
33689 if let Some(dt) = cast_type {
33690 Ok(Expression::Cast(Box::new(Cast {
33691 this: uuid_expr,
33692 to: dt,
33693 trailing_comments: vec![],
33694 double_colon_syntax: false,
33695 format: None,
33696 default: None,
33697 inferred_type: None,
33698 })))
33699 } else {
33700 Ok(uuid_expr)
33701 }
33702 }
33703
33704 // COUNTIF(x) -> CountIf expression
33705 "COUNTIF" if args.len() == 1 => {
33706 let arg = args.remove(0);
33707 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
33708 this: arg,
33709 distinct: false,
33710 filter: None,
33711 order_by: vec![],
33712 name: None,
33713 ignore_nulls: None,
33714 having_max: None,
33715 limit: None,
33716 inferred_type: None,
33717 })))
33718 }
33719
33720 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
33721 "EDIT_DISTANCE" => {
33722 // Strip named arguments (max_distance => N) and pass as positional
33723 let mut positional_args: Vec<Expression> = vec![];
33724 for arg in args {
33725 match arg {
33726 Expression::NamedArgument(na) => {
33727 positional_args.push(na.value);
33728 }
33729 other => positional_args.push(other),
33730 }
33731 }
33732 if positional_args.len() >= 2 {
33733 let col1 = positional_args.remove(0);
33734 let col2 = positional_args.remove(0);
33735 let levenshtein = crate::expressions::BinaryFunc {
33736 this: col1,
33737 expression: col2,
33738 original_name: None,
33739 inferred_type: None,
33740 };
33741 // Pass extra args through a function wrapper with all args
33742 if !positional_args.is_empty() {
33743 let max_dist = positional_args.remove(0);
33744 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
33745 if matches!(target, DialectType::DuckDB) {
33746 let lev = Expression::Function(Box::new(Function::new(
33747 "LEVENSHTEIN".to_string(),
33748 vec![levenshtein.this, levenshtein.expression],
33749 )));
33750 let lev_is_null =
33751 Expression::IsNull(Box::new(crate::expressions::IsNull {
33752 this: lev.clone(),
33753 not: false,
33754 postfix_form: false,
33755 }));
33756 let max_is_null =
33757 Expression::IsNull(Box::new(crate::expressions::IsNull {
33758 this: max_dist.clone(),
33759 not: false,
33760 postfix_form: false,
33761 }));
33762 let null_check =
33763 Expression::Or(Box::new(crate::expressions::BinaryOp {
33764 left: lev_is_null,
33765 right: max_is_null,
33766 left_comments: Vec::new(),
33767 operator_comments: Vec::new(),
33768 trailing_comments: Vec::new(),
33769 inferred_type: None,
33770 }));
33771 let least =
33772 Expression::Least(Box::new(crate::expressions::VarArgFunc {
33773 expressions: vec![lev, max_dist],
33774 original_name: None,
33775 inferred_type: None,
33776 }));
33777 return Ok(Expression::Case(Box::new(crate::expressions::Case {
33778 operand: None,
33779 whens: vec![(
33780 null_check,
33781 Expression::Null(crate::expressions::Null),
33782 )],
33783 else_: Some(least),
33784 comments: Vec::new(),
33785 inferred_type: None,
33786 })));
33787 }
33788 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
33789 all_args.extend(positional_args);
33790 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
33791 let func_name = if matches!(target, DialectType::PostgreSQL) {
33792 "LEVENSHTEIN_LESS_EQUAL"
33793 } else {
33794 "LEVENSHTEIN"
33795 };
33796 return Ok(Expression::Function(Box::new(Function::new(
33797 func_name.to_string(),
33798 all_args,
33799 ))));
33800 }
33801 Ok(Expression::Levenshtein(Box::new(levenshtein)))
33802 } else {
33803 Ok(Expression::Function(Box::new(Function::new(
33804 "EDIT_DISTANCE".to_string(),
33805 positional_args,
33806 ))))
33807 }
33808 }
33809
33810 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
33811 "TIMESTAMP_SECONDS" if args.len() == 1 => {
33812 let arg = args.remove(0);
33813 Ok(Expression::UnixToTime(Box::new(
33814 crate::expressions::UnixToTime {
33815 this: Box::new(arg),
33816 scale: Some(0),
33817 zone: None,
33818 hours: None,
33819 minutes: None,
33820 format: None,
33821 target_type: None,
33822 },
33823 )))
33824 }
33825
33826 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
33827 "TIMESTAMP_MILLIS" if args.len() == 1 => {
33828 let arg = args.remove(0);
33829 Ok(Expression::UnixToTime(Box::new(
33830 crate::expressions::UnixToTime {
33831 this: Box::new(arg),
33832 scale: Some(3),
33833 zone: None,
33834 hours: None,
33835 minutes: None,
33836 format: None,
33837 target_type: None,
33838 },
33839 )))
33840 }
33841
33842 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
33843 "TIMESTAMP_MICROS" if args.len() == 1 => {
33844 let arg = args.remove(0);
33845 Ok(Expression::UnixToTime(Box::new(
33846 crate::expressions::UnixToTime {
33847 this: Box::new(arg),
33848 scale: Some(6),
33849 zone: None,
33850 hours: None,
33851 minutes: None,
33852 format: None,
33853 target_type: None,
33854 },
33855 )))
33856 }
33857
33858 // DIV(x, y) -> IntDiv expression
33859 "DIV" if args.len() == 2 => {
33860 let x = args.remove(0);
33861 let y = args.remove(0);
33862 Ok(Expression::IntDiv(Box::new(
33863 crate::expressions::BinaryFunc {
33864 this: x,
33865 expression: y,
33866 original_name: None,
33867 inferred_type: None,
33868 },
33869 )))
33870 }
33871
33872 // TO_HEX(x) -> target-specific form
33873 "TO_HEX" if args.len() == 1 => {
33874 let arg = args.remove(0);
33875 // Check if inner function already returns hex string in certain targets
33876 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
33877 if matches!(target, DialectType::BigQuery) {
33878 // BQ->BQ: keep as TO_HEX
33879 Ok(Expression::Function(Box::new(Function::new(
33880 "TO_HEX".to_string(),
33881 vec![arg],
33882 ))))
33883 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
33884 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
33885 Ok(arg)
33886 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
33887 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
33888 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
33889 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
33890 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
33891 if let Expression::Function(ref inner_f) = arg {
33892 let inner_args = inner_f.args.clone();
33893 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
33894 "SHA1" => Expression::Function(Box::new(Function::new(
33895 "SHA1_BINARY".to_string(),
33896 inner_args,
33897 ))),
33898 "MD5" => Expression::Function(Box::new(Function::new(
33899 "MD5_BINARY".to_string(),
33900 inner_args,
33901 ))),
33902 "SHA256" => {
33903 let mut a = inner_args;
33904 a.push(Expression::number(256));
33905 Expression::Function(Box::new(Function::new(
33906 "SHA2_BINARY".to_string(),
33907 a,
33908 )))
33909 }
33910 "SHA512" => {
33911 let mut a = inner_args;
33912 a.push(Expression::number(512));
33913 Expression::Function(Box::new(Function::new(
33914 "SHA2_BINARY".to_string(),
33915 a,
33916 )))
33917 }
33918 _ => arg.clone(),
33919 };
33920 Ok(Expression::Function(Box::new(Function::new(
33921 "TO_CHAR".to_string(),
33922 vec![binary_func],
33923 ))))
33924 } else {
33925 let inner = Expression::Function(Box::new(Function::new(
33926 "HEX".to_string(),
33927 vec![arg],
33928 )));
33929 Ok(Expression::Lower(Box::new(
33930 crate::expressions::UnaryFunc::new(inner),
33931 )))
33932 }
33933 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
33934 let inner = Expression::Function(Box::new(Function::new(
33935 "TO_HEX".to_string(),
33936 vec![arg],
33937 )));
33938 Ok(Expression::Lower(Box::new(
33939 crate::expressions::UnaryFunc::new(inner),
33940 )))
33941 } else {
33942 let inner =
33943 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
33944 Ok(Expression::Lower(Box::new(
33945 crate::expressions::UnaryFunc::new(inner),
33946 )))
33947 }
33948 }
33949
33950 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
33951 "LAST_DAY" if args.len() == 2 => {
33952 let date = args.remove(0);
33953 let _unit = args.remove(0); // Strip the unit (MONTH is default)
33954 Ok(Expression::Function(Box::new(Function::new(
33955 "LAST_DAY".to_string(),
33956 vec![date],
33957 ))))
33958 }
33959
33960 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
33961 "GENERATE_ARRAY" => {
33962 let start = args.get(0).cloned();
33963 let end = args.get(1).cloned();
33964 let step = args.get(2).cloned();
33965 Ok(Expression::GenerateSeries(Box::new(
33966 crate::expressions::GenerateSeries {
33967 start: start.map(Box::new),
33968 end: end.map(Box::new),
33969 step: step.map(Box::new),
33970 is_end_exclusive: None,
33971 },
33972 )))
33973 }
33974
33975 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
33976 "GENERATE_TIMESTAMP_ARRAY" => {
33977 let start = args.get(0).cloned();
33978 let end = args.get(1).cloned();
33979 let step = args.get(2).cloned();
33980
33981 if matches!(target, DialectType::DuckDB) {
33982 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
33983 // Only cast string literals - leave columns/expressions as-is
33984 let maybe_cast_ts = |expr: Expression| -> Expression {
33985 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33986 {
33987 Expression::Cast(Box::new(Cast {
33988 this: expr,
33989 to: DataType::Timestamp {
33990 precision: None,
33991 timezone: false,
33992 },
33993 trailing_comments: vec![],
33994 double_colon_syntax: false,
33995 format: None,
33996 default: None,
33997 inferred_type: None,
33998 }))
33999 } else {
34000 expr
34001 }
34002 };
34003 let cast_start = start.map(maybe_cast_ts);
34004 let cast_end = end.map(maybe_cast_ts);
34005 Ok(Expression::GenerateSeries(Box::new(
34006 crate::expressions::GenerateSeries {
34007 start: cast_start.map(Box::new),
34008 end: cast_end.map(Box::new),
34009 step: step.map(Box::new),
34010 is_end_exclusive: None,
34011 },
34012 )))
34013 } else {
34014 Ok(Expression::GenerateSeries(Box::new(
34015 crate::expressions::GenerateSeries {
34016 start: start.map(Box::new),
34017 end: end.map(Box::new),
34018 step: step.map(Box::new),
34019 is_end_exclusive: None,
34020 },
34021 )))
34022 }
34023 }
34024
34025 // TO_JSON(x) -> target-specific (from Spark/Hive)
34026 "TO_JSON" => {
34027 match target {
34028 DialectType::Presto | DialectType::Trino => {
34029 // JSON_FORMAT(CAST(x AS JSON))
34030 let arg = args
34031 .into_iter()
34032 .next()
34033 .unwrap_or(Expression::Null(crate::expressions::Null));
34034 let cast_json = Expression::Cast(Box::new(Cast {
34035 this: arg,
34036 to: DataType::Custom {
34037 name: "JSON".to_string(),
34038 },
34039 trailing_comments: vec![],
34040 double_colon_syntax: false,
34041 format: None,
34042 default: None,
34043 inferred_type: None,
34044 }));
34045 Ok(Expression::Function(Box::new(Function::new(
34046 "JSON_FORMAT".to_string(),
34047 vec![cast_json],
34048 ))))
34049 }
34050 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
34051 "TO_JSON_STRING".to_string(),
34052 args,
34053 )))),
34054 DialectType::DuckDB => {
34055 // CAST(TO_JSON(x) AS TEXT)
34056 let arg = args
34057 .into_iter()
34058 .next()
34059 .unwrap_or(Expression::Null(crate::expressions::Null));
34060 let to_json = Expression::Function(Box::new(Function::new(
34061 "TO_JSON".to_string(),
34062 vec![arg],
34063 )));
34064 Ok(Expression::Cast(Box::new(Cast {
34065 this: to_json,
34066 to: DataType::Text,
34067 trailing_comments: vec![],
34068 double_colon_syntax: false,
34069 format: None,
34070 default: None,
34071 inferred_type: None,
34072 })))
34073 }
34074 _ => Ok(Expression::Function(Box::new(Function::new(
34075 "TO_JSON".to_string(),
34076 args,
34077 )))),
34078 }
34079 }
34080
34081 // TO_JSON_STRING(x) -> target-specific
34082 "TO_JSON_STRING" => {
34083 match target {
34084 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
34085 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
34086 ),
34087 DialectType::Presto | DialectType::Trino => {
34088 // JSON_FORMAT(CAST(x AS JSON))
34089 let arg = args
34090 .into_iter()
34091 .next()
34092 .unwrap_or(Expression::Null(crate::expressions::Null));
34093 let cast_json = Expression::Cast(Box::new(Cast {
34094 this: arg,
34095 to: DataType::Custom {
34096 name: "JSON".to_string(),
34097 },
34098 trailing_comments: vec![],
34099 double_colon_syntax: false,
34100 format: None,
34101 default: None,
34102 inferred_type: None,
34103 }));
34104 Ok(Expression::Function(Box::new(Function::new(
34105 "JSON_FORMAT".to_string(),
34106 vec![cast_json],
34107 ))))
34108 }
34109 DialectType::DuckDB => {
34110 // CAST(TO_JSON(x) AS TEXT)
34111 let arg = args
34112 .into_iter()
34113 .next()
34114 .unwrap_or(Expression::Null(crate::expressions::Null));
34115 let to_json = Expression::Function(Box::new(Function::new(
34116 "TO_JSON".to_string(),
34117 vec![arg],
34118 )));
34119 Ok(Expression::Cast(Box::new(Cast {
34120 this: to_json,
34121 to: DataType::Text,
34122 trailing_comments: vec![],
34123 double_colon_syntax: false,
34124 format: None,
34125 default: None,
34126 inferred_type: None,
34127 })))
34128 }
34129 DialectType::Snowflake => {
34130 // TO_JSON(x)
34131 Ok(Expression::Function(Box::new(Function::new(
34132 "TO_JSON".to_string(),
34133 args,
34134 ))))
34135 }
34136 _ => Ok(Expression::Function(Box::new(Function::new(
34137 "TO_JSON_STRING".to_string(),
34138 args,
34139 )))),
34140 }
34141 }
34142
34143 // SAFE_ADD(x, y) -> SafeAdd expression
34144 "SAFE_ADD" if args.len() == 2 => {
34145 let x = args.remove(0);
34146 let y = args.remove(0);
34147 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
34148 this: Box::new(x),
34149 expression: Box::new(y),
34150 })))
34151 }
34152
34153 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
34154 "SAFE_SUBTRACT" if args.len() == 2 => {
34155 let x = args.remove(0);
34156 let y = args.remove(0);
34157 Ok(Expression::SafeSubtract(Box::new(
34158 crate::expressions::SafeSubtract {
34159 this: Box::new(x),
34160 expression: Box::new(y),
34161 },
34162 )))
34163 }
34164
34165 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
34166 "SAFE_MULTIPLY" if args.len() == 2 => {
34167 let x = args.remove(0);
34168 let y = args.remove(0);
34169 Ok(Expression::SafeMultiply(Box::new(
34170 crate::expressions::SafeMultiply {
34171 this: Box::new(x),
34172 expression: Box::new(y),
34173 },
34174 )))
34175 }
34176
34177 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
34178 "REGEXP_CONTAINS" if args.len() == 2 => {
34179 let str_expr = args.remove(0);
34180 let pattern = args.remove(0);
34181 Ok(Expression::RegexpLike(Box::new(
34182 crate::expressions::RegexpFunc {
34183 this: str_expr,
34184 pattern,
34185 flags: None,
34186 },
34187 )))
34188 }
34189
34190 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
34191 "CONTAINS_SUBSTR" if args.len() == 2 => {
34192 let a = args.remove(0);
34193 let b = args.remove(0);
34194 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
34195 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
34196 Ok(Expression::Function(Box::new(Function::new(
34197 "CONTAINS".to_string(),
34198 vec![lower_a, lower_b],
34199 ))))
34200 }
34201
34202 // INT64(x) -> CAST(x AS BIGINT)
34203 "INT64" if args.len() == 1 => {
34204 let arg = args.remove(0);
34205 Ok(Expression::Cast(Box::new(Cast {
34206 this: arg,
34207 to: DataType::BigInt { length: None },
34208 trailing_comments: vec![],
34209 double_colon_syntax: false,
34210 format: None,
34211 default: None,
34212 inferred_type: None,
34213 })))
34214 }
34215
34216 // INSTR(str, substr) -> target-specific
34217 "INSTR" if args.len() >= 2 => {
34218 let str_expr = args.remove(0);
34219 let substr = args.remove(0);
34220 if matches!(target, DialectType::Snowflake) {
34221 // CHARINDEX(substr, str)
34222 Ok(Expression::Function(Box::new(Function::new(
34223 "CHARINDEX".to_string(),
34224 vec![substr, str_expr],
34225 ))))
34226 } else if matches!(target, DialectType::BigQuery) {
34227 // Keep as INSTR
34228 Ok(Expression::Function(Box::new(Function::new(
34229 "INSTR".to_string(),
34230 vec![str_expr, substr],
34231 ))))
34232 } else {
34233 // Default: keep as INSTR
34234 Ok(Expression::Function(Box::new(Function::new(
34235 "INSTR".to_string(),
34236 vec![str_expr, substr],
34237 ))))
34238 }
34239 }
34240
34241 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
34242 "DATE_TRUNC" if args.len() == 2 => {
34243 let expr = args.remove(0);
34244 let unit_expr = args.remove(0);
34245 let unit_str = get_unit_str(&unit_expr);
34246
34247 match target {
34248 DialectType::DuckDB
34249 | DialectType::Snowflake
34250 | DialectType::PostgreSQL
34251 | DialectType::Presto
34252 | DialectType::Trino
34253 | DialectType::Databricks
34254 | DialectType::Spark
34255 | DialectType::Redshift
34256 | DialectType::ClickHouse
34257 | DialectType::TSQL => {
34258 // Standard: DATE_TRUNC('UNIT', expr)
34259 Ok(Expression::Function(Box::new(Function::new(
34260 "DATE_TRUNC".to_string(),
34261 vec![
34262 Expression::Literal(Box::new(Literal::String(unit_str))),
34263 expr,
34264 ],
34265 ))))
34266 }
34267 _ => {
34268 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
34269 Ok(Expression::Function(Box::new(Function::new(
34270 "DATE_TRUNC".to_string(),
34271 vec![expr, unit_expr],
34272 ))))
34273 }
34274 }
34275 }
34276
34277 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
34278 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
34279 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
34280 let ts = args.remove(0);
34281 let unit_expr = args.remove(0);
34282 let tz = if !args.is_empty() {
34283 Some(args.remove(0))
34284 } else {
34285 None
34286 };
34287 let unit_str = get_unit_str(&unit_expr);
34288
34289 match target {
34290 DialectType::DuckDB => {
34291 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34292 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
34293 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
34294 let is_coarse = matches!(
34295 unit_str.as_str(),
34296 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
34297 );
34298 // For DATETIME_TRUNC, cast string args to TIMESTAMP
34299 let cast_ts = if name == "DATETIME_TRUNC" {
34300 match ts {
34301 Expression::Literal(ref lit)
34302 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
34303 {
34304 Expression::Cast(Box::new(Cast {
34305 this: ts,
34306 to: DataType::Timestamp {
34307 precision: None,
34308 timezone: false,
34309 },
34310 trailing_comments: vec![],
34311 double_colon_syntax: false,
34312 format: None,
34313 default: None,
34314 inferred_type: None,
34315 }))
34316 }
34317 _ => Self::maybe_cast_ts_to_tz(ts, &name),
34318 }
34319 } else {
34320 Self::maybe_cast_ts_to_tz(ts, &name)
34321 };
34322
34323 if let Some(tz_arg) = tz {
34324 if is_coarse {
34325 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
34326 let at_tz = Expression::AtTimeZone(Box::new(
34327 crate::expressions::AtTimeZone {
34328 this: cast_ts,
34329 zone: tz_arg.clone(),
34330 },
34331 ));
34332 let date_trunc = Expression::Function(Box::new(Function::new(
34333 "DATE_TRUNC".to_string(),
34334 vec![
34335 Expression::Literal(Box::new(Literal::String(unit_str))),
34336 at_tz,
34337 ],
34338 )));
34339 Ok(Expression::AtTimeZone(Box::new(
34340 crate::expressions::AtTimeZone {
34341 this: date_trunc,
34342 zone: tz_arg,
34343 },
34344 )))
34345 } else {
34346 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
34347 Ok(Expression::Function(Box::new(Function::new(
34348 "DATE_TRUNC".to_string(),
34349 vec![
34350 Expression::Literal(Box::new(Literal::String(unit_str))),
34351 cast_ts,
34352 ],
34353 ))))
34354 }
34355 } else {
34356 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34357 Ok(Expression::Function(Box::new(Function::new(
34358 "DATE_TRUNC".to_string(),
34359 vec![
34360 Expression::Literal(Box::new(Literal::String(unit_str))),
34361 cast_ts,
34362 ],
34363 ))))
34364 }
34365 }
34366 DialectType::Databricks | DialectType::Spark => {
34367 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
34368 Ok(Expression::Function(Box::new(Function::new(
34369 "DATE_TRUNC".to_string(),
34370 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
34371 ))))
34372 }
34373 _ => {
34374 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
34375 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
34376 let mut date_trunc_args = vec![unit, ts];
34377 if let Some(tz_arg) = tz {
34378 date_trunc_args.push(tz_arg);
34379 }
34380 Ok(Expression::Function(Box::new(Function::new(
34381 "TIMESTAMP_TRUNC".to_string(),
34382 date_trunc_args,
34383 ))))
34384 }
34385 }
34386 }
34387
34388 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
34389 "TIME" => {
34390 if args.len() == 3 {
34391 // TIME(h, m, s) constructor
34392 match target {
34393 DialectType::TSQL => {
34394 // TIMEFROMPARTS(h, m, s, 0, 0)
34395 args.push(Expression::number(0));
34396 args.push(Expression::number(0));
34397 Ok(Expression::Function(Box::new(Function::new(
34398 "TIMEFROMPARTS".to_string(),
34399 args,
34400 ))))
34401 }
34402 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
34403 "MAKETIME".to_string(),
34404 args,
34405 )))),
34406 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
34407 Function::new("MAKE_TIME".to_string(), args),
34408 ))),
34409 _ => Ok(Expression::Function(Box::new(Function::new(
34410 "TIME".to_string(),
34411 args,
34412 )))),
34413 }
34414 } else if args.len() == 1 {
34415 let arg = args.remove(0);
34416 if matches!(target, DialectType::Spark) {
34417 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
34418 Ok(Expression::Cast(Box::new(Cast {
34419 this: arg,
34420 to: DataType::Timestamp {
34421 timezone: false,
34422 precision: None,
34423 },
34424 trailing_comments: vec![],
34425 double_colon_syntax: false,
34426 format: None,
34427 default: None,
34428 inferred_type: None,
34429 })))
34430 } else {
34431 // Most targets: CAST(x AS TIME)
34432 Ok(Expression::Cast(Box::new(Cast {
34433 this: arg,
34434 to: DataType::Time {
34435 precision: None,
34436 timezone: false,
34437 },
34438 trailing_comments: vec![],
34439 double_colon_syntax: false,
34440 format: None,
34441 default: None,
34442 inferred_type: None,
34443 })))
34444 }
34445 } else if args.len() == 2 {
34446 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
34447 let expr = args.remove(0);
34448 let tz = args.remove(0);
34449 let cast_tstz = Expression::Cast(Box::new(Cast {
34450 this: expr,
34451 to: DataType::Timestamp {
34452 timezone: true,
34453 precision: None,
34454 },
34455 trailing_comments: vec![],
34456 double_colon_syntax: false,
34457 format: None,
34458 default: None,
34459 inferred_type: None,
34460 }));
34461 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34462 this: cast_tstz,
34463 zone: tz,
34464 }));
34465 Ok(Expression::Cast(Box::new(Cast {
34466 this: at_tz,
34467 to: DataType::Time {
34468 precision: None,
34469 timezone: false,
34470 },
34471 trailing_comments: vec![],
34472 double_colon_syntax: false,
34473 format: None,
34474 default: None,
34475 inferred_type: None,
34476 })))
34477 } else {
34478 Ok(Expression::Function(Box::new(Function::new(
34479 "TIME".to_string(),
34480 args,
34481 ))))
34482 }
34483 }
34484
34485 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
34486 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34487 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
34488 // DATETIME(y, m, d, h, min, s) -> target-specific
34489 "DATETIME" => {
34490 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
34491 if matches!(target, DialectType::BigQuery) {
34492 if args.len() == 2 {
34493 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34494 if has_time_literal {
34495 let first = args.remove(0);
34496 let second = args.remove(0);
34497 let time_as_cast = match second {
34498 Expression::Literal(lit)
34499 if matches!(lit.as_ref(), Literal::Time(_)) =>
34500 {
34501 let Literal::Time(s) = lit.as_ref() else {
34502 unreachable!()
34503 };
34504 Expression::Cast(Box::new(Cast {
34505 this: Expression::Literal(Box::new(Literal::String(
34506 s.clone(),
34507 ))),
34508 to: DataType::Time {
34509 precision: None,
34510 timezone: false,
34511 },
34512 trailing_comments: vec![],
34513 double_colon_syntax: false,
34514 format: None,
34515 default: None,
34516 inferred_type: None,
34517 }))
34518 }
34519 other => other,
34520 };
34521 return Ok(Expression::Function(Box::new(Function::new(
34522 "DATETIME".to_string(),
34523 vec![first, time_as_cast],
34524 ))));
34525 }
34526 }
34527 return Ok(Expression::Function(Box::new(Function::new(
34528 "DATETIME".to_string(),
34529 args,
34530 ))));
34531 }
34532
34533 if args.len() == 1 {
34534 let arg = args.remove(0);
34535 Ok(Expression::Cast(Box::new(Cast {
34536 this: arg,
34537 to: DataType::Timestamp {
34538 timezone: false,
34539 precision: None,
34540 },
34541 trailing_comments: vec![],
34542 double_colon_syntax: false,
34543 format: None,
34544 default: None,
34545 inferred_type: None,
34546 })))
34547 } else if args.len() == 2 {
34548 let first = args.remove(0);
34549 let second = args.remove(0);
34550 // Check if second arg is a TIME literal
34551 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34552 if is_time_literal {
34553 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34554 let cast_date = Expression::Cast(Box::new(Cast {
34555 this: first,
34556 to: DataType::Date,
34557 trailing_comments: vec![],
34558 double_colon_syntax: false,
34559 format: None,
34560 default: None,
34561 inferred_type: None,
34562 }));
34563 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
34564 let time_as_string = match second {
34565 Expression::Literal(lit)
34566 if matches!(lit.as_ref(), Literal::Time(_)) =>
34567 {
34568 let Literal::Time(s) = lit.as_ref() else {
34569 unreachable!()
34570 };
34571 Expression::Literal(Box::new(Literal::String(s.clone())))
34572 }
34573 other => other,
34574 };
34575 let cast_time = Expression::Cast(Box::new(Cast {
34576 this: time_as_string,
34577 to: DataType::Time {
34578 precision: None,
34579 timezone: false,
34580 },
34581 trailing_comments: vec![],
34582 double_colon_syntax: false,
34583 format: None,
34584 default: None,
34585 inferred_type: None,
34586 }));
34587 let add_expr =
34588 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
34589 Ok(Expression::Cast(Box::new(Cast {
34590 this: add_expr,
34591 to: DataType::Timestamp {
34592 timezone: false,
34593 precision: None,
34594 },
34595 trailing_comments: vec![],
34596 double_colon_syntax: false,
34597 format: None,
34598 default: None,
34599 inferred_type: None,
34600 })))
34601 } else {
34602 // DATETIME('string', 'timezone')
34603 let cast_tstz = Expression::Cast(Box::new(Cast {
34604 this: first,
34605 to: DataType::Timestamp {
34606 timezone: true,
34607 precision: None,
34608 },
34609 trailing_comments: vec![],
34610 double_colon_syntax: false,
34611 format: None,
34612 default: None,
34613 inferred_type: None,
34614 }));
34615 let at_tz =
34616 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34617 this: cast_tstz,
34618 zone: second,
34619 }));
34620 Ok(Expression::Cast(Box::new(Cast {
34621 this: at_tz,
34622 to: DataType::Timestamp {
34623 timezone: false,
34624 precision: None,
34625 },
34626 trailing_comments: vec![],
34627 double_colon_syntax: false,
34628 format: None,
34629 default: None,
34630 inferred_type: None,
34631 })))
34632 }
34633 } else if args.len() >= 3 {
34634 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
34635 // For other targets, use MAKE_TIMESTAMP or similar
34636 if matches!(target, DialectType::Snowflake) {
34637 Ok(Expression::Function(Box::new(Function::new(
34638 "TIMESTAMP_FROM_PARTS".to_string(),
34639 args,
34640 ))))
34641 } else {
34642 Ok(Expression::Function(Box::new(Function::new(
34643 "DATETIME".to_string(),
34644 args,
34645 ))))
34646 }
34647 } else {
34648 Ok(Expression::Function(Box::new(Function::new(
34649 "DATETIME".to_string(),
34650 args,
34651 ))))
34652 }
34653 }
34654
34655 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
34656 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
34657 "TIMESTAMP" => {
34658 if args.len() == 1 {
34659 let arg = args.remove(0);
34660 Ok(Expression::Cast(Box::new(Cast {
34661 this: arg,
34662 to: DataType::Timestamp {
34663 timezone: true,
34664 precision: None,
34665 },
34666 trailing_comments: vec![],
34667 double_colon_syntax: false,
34668 format: None,
34669 default: None,
34670 inferred_type: None,
34671 })))
34672 } else if args.len() == 2 {
34673 let arg = args.remove(0);
34674 let tz = args.remove(0);
34675 let cast_ts = Expression::Cast(Box::new(Cast {
34676 this: arg,
34677 to: DataType::Timestamp {
34678 timezone: false,
34679 precision: None,
34680 },
34681 trailing_comments: vec![],
34682 double_colon_syntax: false,
34683 format: None,
34684 default: None,
34685 inferred_type: None,
34686 }));
34687 if matches!(target, DialectType::Snowflake) {
34688 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
34689 Ok(Expression::Function(Box::new(Function::new(
34690 "CONVERT_TIMEZONE".to_string(),
34691 vec![tz, cast_ts],
34692 ))))
34693 } else {
34694 Ok(Expression::AtTimeZone(Box::new(
34695 crate::expressions::AtTimeZone {
34696 this: cast_ts,
34697 zone: tz,
34698 },
34699 )))
34700 }
34701 } else {
34702 Ok(Expression::Function(Box::new(Function::new(
34703 "TIMESTAMP".to_string(),
34704 args,
34705 ))))
34706 }
34707 }
34708
34709 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
34710 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
34711 "STRING" => {
34712 if args.len() == 1 {
34713 let arg = args.remove(0);
34714 let cast_type = match target {
34715 DialectType::DuckDB => DataType::Text,
34716 _ => DataType::VarChar {
34717 length: None,
34718 parenthesized_length: false,
34719 },
34720 };
34721 Ok(Expression::Cast(Box::new(Cast {
34722 this: arg,
34723 to: cast_type,
34724 trailing_comments: vec![],
34725 double_colon_syntax: false,
34726 format: None,
34727 default: None,
34728 inferred_type: None,
34729 })))
34730 } else if args.len() == 2 {
34731 let arg = args.remove(0);
34732 let tz = args.remove(0);
34733 let cast_type = match target {
34734 DialectType::DuckDB => DataType::Text,
34735 _ => DataType::VarChar {
34736 length: None,
34737 parenthesized_length: false,
34738 },
34739 };
34740 if matches!(target, DialectType::Snowflake) {
34741 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
34742 let convert_tz = Expression::Function(Box::new(Function::new(
34743 "CONVERT_TIMEZONE".to_string(),
34744 vec![
34745 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
34746 tz,
34747 arg,
34748 ],
34749 )));
34750 Ok(Expression::Cast(Box::new(Cast {
34751 this: convert_tz,
34752 to: cast_type,
34753 trailing_comments: vec![],
34754 double_colon_syntax: false,
34755 format: None,
34756 default: None,
34757 inferred_type: None,
34758 })))
34759 } else {
34760 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
34761 let cast_ts = Expression::Cast(Box::new(Cast {
34762 this: arg,
34763 to: DataType::Timestamp {
34764 timezone: false,
34765 precision: None,
34766 },
34767 trailing_comments: vec![],
34768 double_colon_syntax: false,
34769 format: None,
34770 default: None,
34771 inferred_type: None,
34772 }));
34773 let at_utc =
34774 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34775 this: cast_ts,
34776 zone: Expression::Literal(Box::new(Literal::String(
34777 "UTC".to_string(),
34778 ))),
34779 }));
34780 let at_tz =
34781 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34782 this: at_utc,
34783 zone: tz,
34784 }));
34785 Ok(Expression::Cast(Box::new(Cast {
34786 this: at_tz,
34787 to: cast_type,
34788 trailing_comments: vec![],
34789 double_colon_syntax: false,
34790 format: None,
34791 default: None,
34792 inferred_type: None,
34793 })))
34794 }
34795 } else {
34796 Ok(Expression::Function(Box::new(Function::new(
34797 "STRING".to_string(),
34798 args,
34799 ))))
34800 }
34801 }
34802
34803 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
34804 "UNIX_SECONDS" if args.len() == 1 => {
34805 let ts = args.remove(0);
34806 match target {
34807 DialectType::DuckDB => {
34808 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
34809 let cast_ts = Self::ensure_cast_timestamptz(ts);
34810 let epoch = Expression::Function(Box::new(Function::new(
34811 "EPOCH".to_string(),
34812 vec![cast_ts],
34813 )));
34814 Ok(Expression::Cast(Box::new(Cast {
34815 this: epoch,
34816 to: DataType::BigInt { length: None },
34817 trailing_comments: vec![],
34818 double_colon_syntax: false,
34819 format: None,
34820 default: None,
34821 inferred_type: None,
34822 })))
34823 }
34824 DialectType::Snowflake => {
34825 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
34826 let epoch = Expression::Cast(Box::new(Cast {
34827 this: Expression::Literal(Box::new(Literal::String(
34828 "1970-01-01 00:00:00+00".to_string(),
34829 ))),
34830 to: DataType::Timestamp {
34831 timezone: true,
34832 precision: None,
34833 },
34834 trailing_comments: vec![],
34835 double_colon_syntax: false,
34836 format: None,
34837 default: None,
34838 inferred_type: None,
34839 }));
34840 Ok(Expression::TimestampDiff(Box::new(
34841 crate::expressions::TimestampDiff {
34842 this: Box::new(epoch),
34843 expression: Box::new(ts),
34844 unit: Some("SECONDS".to_string()),
34845 },
34846 )))
34847 }
34848 _ => Ok(Expression::Function(Box::new(Function::new(
34849 "UNIX_SECONDS".to_string(),
34850 vec![ts],
34851 )))),
34852 }
34853 }
34854
34855 "UNIX_MILLIS" if args.len() == 1 => {
34856 let ts = args.remove(0);
34857 match target {
34858 DialectType::DuckDB => {
34859 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
34860 let cast_ts = Self::ensure_cast_timestamptz(ts);
34861 Ok(Expression::Function(Box::new(Function::new(
34862 "EPOCH_MS".to_string(),
34863 vec![cast_ts],
34864 ))))
34865 }
34866 _ => Ok(Expression::Function(Box::new(Function::new(
34867 "UNIX_MILLIS".to_string(),
34868 vec![ts],
34869 )))),
34870 }
34871 }
34872
34873 "UNIX_MICROS" if args.len() == 1 => {
34874 let ts = args.remove(0);
34875 match target {
34876 DialectType::DuckDB => {
34877 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
34878 let cast_ts = Self::ensure_cast_timestamptz(ts);
34879 Ok(Expression::Function(Box::new(Function::new(
34880 "EPOCH_US".to_string(),
34881 vec![cast_ts],
34882 ))))
34883 }
34884 _ => Ok(Expression::Function(Box::new(Function::new(
34885 "UNIX_MICROS".to_string(),
34886 vec![ts],
34887 )))),
34888 }
34889 }
34890
34891 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
34892 "ARRAY_CONCAT" | "LIST_CONCAT" => {
34893 match target {
34894 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34895 // CONCAT(arr1, arr2, ...)
34896 Ok(Expression::Function(Box::new(Function::new(
34897 "CONCAT".to_string(),
34898 args,
34899 ))))
34900 }
34901 DialectType::Presto | DialectType::Trino => {
34902 // CONCAT(arr1, arr2, ...)
34903 Ok(Expression::Function(Box::new(Function::new(
34904 "CONCAT".to_string(),
34905 args,
34906 ))))
34907 }
34908 DialectType::Snowflake => {
34909 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34910 if args.len() == 1 {
34911 // ARRAY_CAT requires 2 args, add empty array as []
34912 let empty_arr = Expression::ArrayFunc(Box::new(
34913 crate::expressions::ArrayConstructor {
34914 expressions: vec![],
34915 bracket_notation: true,
34916 use_list_keyword: false,
34917 },
34918 ));
34919 let mut new_args = args;
34920 new_args.push(empty_arr);
34921 Ok(Expression::Function(Box::new(Function::new(
34922 "ARRAY_CAT".to_string(),
34923 new_args,
34924 ))))
34925 } else if args.is_empty() {
34926 Ok(Expression::Function(Box::new(Function::new(
34927 "ARRAY_CAT".to_string(),
34928 args,
34929 ))))
34930 } else {
34931 let mut it = args.into_iter().rev();
34932 let mut result = it.next().unwrap();
34933 for arr in it {
34934 result = Expression::Function(Box::new(Function::new(
34935 "ARRAY_CAT".to_string(),
34936 vec![arr, result],
34937 )));
34938 }
34939 Ok(result)
34940 }
34941 }
34942 DialectType::PostgreSQL => {
34943 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34944 if args.len() <= 1 {
34945 Ok(Expression::Function(Box::new(Function::new(
34946 "ARRAY_CAT".to_string(),
34947 args,
34948 ))))
34949 } else {
34950 let mut it = args.into_iter().rev();
34951 let mut result = it.next().unwrap();
34952 for arr in it {
34953 result = Expression::Function(Box::new(Function::new(
34954 "ARRAY_CAT".to_string(),
34955 vec![arr, result],
34956 )));
34957 }
34958 Ok(result)
34959 }
34960 }
34961 DialectType::Redshift => {
34962 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
34963 if args.len() <= 2 {
34964 Ok(Expression::Function(Box::new(Function::new(
34965 "ARRAY_CONCAT".to_string(),
34966 args,
34967 ))))
34968 } else {
34969 let mut it = args.into_iter().rev();
34970 let mut result = it.next().unwrap();
34971 for arr in it {
34972 result = Expression::Function(Box::new(Function::new(
34973 "ARRAY_CONCAT".to_string(),
34974 vec![arr, result],
34975 )));
34976 }
34977 Ok(result)
34978 }
34979 }
34980 DialectType::DuckDB => {
34981 // LIST_CONCAT supports multiple args natively in DuckDB
34982 Ok(Expression::Function(Box::new(Function::new(
34983 "LIST_CONCAT".to_string(),
34984 args,
34985 ))))
34986 }
34987 _ => Ok(Expression::Function(Box::new(Function::new(
34988 "ARRAY_CONCAT".to_string(),
34989 args,
34990 )))),
34991 }
34992 }
34993
34994 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
34995 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
34996 let arg = args.remove(0);
34997 match target {
34998 DialectType::Snowflake => {
34999 let array_agg =
35000 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
35001 this: arg,
35002 distinct: false,
35003 filter: None,
35004 order_by: vec![],
35005 name: None,
35006 ignore_nulls: None,
35007 having_max: None,
35008 limit: None,
35009 inferred_type: None,
35010 }));
35011 Ok(Expression::Function(Box::new(Function::new(
35012 "ARRAY_FLATTEN".to_string(),
35013 vec![array_agg],
35014 ))))
35015 }
35016 _ => Ok(Expression::Function(Box::new(Function::new(
35017 "ARRAY_CONCAT_AGG".to_string(),
35018 vec![arg],
35019 )))),
35020 }
35021 }
35022
35023 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
35024 "MD5" if args.len() == 1 => {
35025 let arg = args.remove(0);
35026 match target {
35027 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35028 // UNHEX(MD5(x))
35029 let md5 = Expression::Function(Box::new(Function::new(
35030 "MD5".to_string(),
35031 vec![arg],
35032 )));
35033 Ok(Expression::Function(Box::new(Function::new(
35034 "UNHEX".to_string(),
35035 vec![md5],
35036 ))))
35037 }
35038 DialectType::Snowflake => {
35039 // MD5_BINARY(x)
35040 Ok(Expression::Function(Box::new(Function::new(
35041 "MD5_BINARY".to_string(),
35042 vec![arg],
35043 ))))
35044 }
35045 _ => Ok(Expression::Function(Box::new(Function::new(
35046 "MD5".to_string(),
35047 vec![arg],
35048 )))),
35049 }
35050 }
35051
35052 "SHA1" if args.len() == 1 => {
35053 let arg = args.remove(0);
35054 match target {
35055 DialectType::DuckDB => {
35056 // UNHEX(SHA1(x))
35057 let sha1 = Expression::Function(Box::new(Function::new(
35058 "SHA1".to_string(),
35059 vec![arg],
35060 )));
35061 Ok(Expression::Function(Box::new(Function::new(
35062 "UNHEX".to_string(),
35063 vec![sha1],
35064 ))))
35065 }
35066 _ => Ok(Expression::Function(Box::new(Function::new(
35067 "SHA1".to_string(),
35068 vec![arg],
35069 )))),
35070 }
35071 }
35072
35073 "SHA256" if args.len() == 1 => {
35074 let arg = args.remove(0);
35075 match target {
35076 DialectType::DuckDB => {
35077 // UNHEX(SHA256(x))
35078 let sha = Expression::Function(Box::new(Function::new(
35079 "SHA256".to_string(),
35080 vec![arg],
35081 )));
35082 Ok(Expression::Function(Box::new(Function::new(
35083 "UNHEX".to_string(),
35084 vec![sha],
35085 ))))
35086 }
35087 DialectType::Snowflake => {
35088 // SHA2_BINARY(x, 256)
35089 Ok(Expression::Function(Box::new(Function::new(
35090 "SHA2_BINARY".to_string(),
35091 vec![arg, Expression::number(256)],
35092 ))))
35093 }
35094 DialectType::Redshift | DialectType::Spark => {
35095 // SHA2(x, 256)
35096 Ok(Expression::Function(Box::new(Function::new(
35097 "SHA2".to_string(),
35098 vec![arg, Expression::number(256)],
35099 ))))
35100 }
35101 _ => Ok(Expression::Function(Box::new(Function::new(
35102 "SHA256".to_string(),
35103 vec![arg],
35104 )))),
35105 }
35106 }
35107
35108 "SHA512" if args.len() == 1 => {
35109 let arg = args.remove(0);
35110 match target {
35111 DialectType::Snowflake => {
35112 // SHA2_BINARY(x, 512)
35113 Ok(Expression::Function(Box::new(Function::new(
35114 "SHA2_BINARY".to_string(),
35115 vec![arg, Expression::number(512)],
35116 ))))
35117 }
35118 DialectType::Redshift | DialectType::Spark => {
35119 // SHA2(x, 512)
35120 Ok(Expression::Function(Box::new(Function::new(
35121 "SHA2".to_string(),
35122 vec![arg, Expression::number(512)],
35123 ))))
35124 }
35125 _ => Ok(Expression::Function(Box::new(Function::new(
35126 "SHA512".to_string(),
35127 vec![arg],
35128 )))),
35129 }
35130 }
35131
35132 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
35133 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
35134 let str_expr = args.remove(0);
35135 let pattern = args.remove(0);
35136
35137 // Check if pattern contains capturing groups (parentheses)
35138 let has_groups = match &pattern {
35139 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
35140 let Literal::String(s) = lit.as_ref() else {
35141 unreachable!()
35142 };
35143 s.contains('(') && s.contains(')')
35144 }
35145 _ => false,
35146 };
35147
35148 match target {
35149 DialectType::DuckDB => {
35150 let group = if has_groups {
35151 Expression::number(1)
35152 } else {
35153 Expression::number(0)
35154 };
35155 Ok(Expression::Function(Box::new(Function::new(
35156 "REGEXP_EXTRACT_ALL".to_string(),
35157 vec![str_expr, pattern, group],
35158 ))))
35159 }
35160 DialectType::Spark | DialectType::Databricks => {
35161 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
35162 if has_groups {
35163 Ok(Expression::Function(Box::new(Function::new(
35164 "REGEXP_EXTRACT_ALL".to_string(),
35165 vec![str_expr, pattern],
35166 ))))
35167 } else {
35168 Ok(Expression::Function(Box::new(Function::new(
35169 "REGEXP_EXTRACT_ALL".to_string(),
35170 vec![str_expr, pattern, Expression::number(0)],
35171 ))))
35172 }
35173 }
35174 DialectType::Presto | DialectType::Trino => {
35175 if has_groups {
35176 Ok(Expression::Function(Box::new(Function::new(
35177 "REGEXP_EXTRACT_ALL".to_string(),
35178 vec![str_expr, pattern, Expression::number(1)],
35179 ))))
35180 } else {
35181 Ok(Expression::Function(Box::new(Function::new(
35182 "REGEXP_EXTRACT_ALL".to_string(),
35183 vec![str_expr, pattern],
35184 ))))
35185 }
35186 }
35187 DialectType::Snowflake => {
35188 if has_groups {
35189 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
35190 Ok(Expression::Function(Box::new(Function::new(
35191 "REGEXP_EXTRACT_ALL".to_string(),
35192 vec![
35193 str_expr,
35194 pattern,
35195 Expression::number(1),
35196 Expression::number(1),
35197 Expression::Literal(Box::new(Literal::String("c".to_string()))),
35198 Expression::number(1),
35199 ],
35200 ))))
35201 } else {
35202 Ok(Expression::Function(Box::new(Function::new(
35203 "REGEXP_EXTRACT_ALL".to_string(),
35204 vec![str_expr, pattern],
35205 ))))
35206 }
35207 }
35208 _ => Ok(Expression::Function(Box::new(Function::new(
35209 "REGEXP_EXTRACT_ALL".to_string(),
35210 vec![str_expr, pattern],
35211 )))),
35212 }
35213 }
35214
35215 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
35216 "MOD" if args.len() == 2 => {
35217 match target {
35218 DialectType::PostgreSQL
35219 | DialectType::DuckDB
35220 | DialectType::Presto
35221 | DialectType::Trino
35222 | DialectType::Athena
35223 | DialectType::Snowflake => {
35224 let x = args.remove(0);
35225 let y = args.remove(0);
35226 // Wrap complex expressions in parens to preserve precedence
35227 let needs_paren = |e: &Expression| {
35228 matches!(
35229 e,
35230 Expression::Add(_)
35231 | Expression::Sub(_)
35232 | Expression::Mul(_)
35233 | Expression::Div(_)
35234 )
35235 };
35236 let x = if needs_paren(&x) {
35237 Expression::Paren(Box::new(crate::expressions::Paren {
35238 this: x,
35239 trailing_comments: vec![],
35240 }))
35241 } else {
35242 x
35243 };
35244 let y = if needs_paren(&y) {
35245 Expression::Paren(Box::new(crate::expressions::Paren {
35246 this: y,
35247 trailing_comments: vec![],
35248 }))
35249 } else {
35250 y
35251 };
35252 Ok(Expression::Mod(Box::new(
35253 crate::expressions::BinaryOp::new(x, y),
35254 )))
35255 }
35256 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
35257 // Hive/Spark: a % b
35258 let x = args.remove(0);
35259 let y = args.remove(0);
35260 let needs_paren = |e: &Expression| {
35261 matches!(
35262 e,
35263 Expression::Add(_)
35264 | Expression::Sub(_)
35265 | Expression::Mul(_)
35266 | Expression::Div(_)
35267 )
35268 };
35269 let x = if needs_paren(&x) {
35270 Expression::Paren(Box::new(crate::expressions::Paren {
35271 this: x,
35272 trailing_comments: vec![],
35273 }))
35274 } else {
35275 x
35276 };
35277 let y = if needs_paren(&y) {
35278 Expression::Paren(Box::new(crate::expressions::Paren {
35279 this: y,
35280 trailing_comments: vec![],
35281 }))
35282 } else {
35283 y
35284 };
35285 Ok(Expression::Mod(Box::new(
35286 crate::expressions::BinaryOp::new(x, y),
35287 )))
35288 }
35289 _ => Ok(Expression::Function(Box::new(Function::new(
35290 "MOD".to_string(),
35291 args,
35292 )))),
35293 }
35294 }
35295
35296 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
35297 "ARRAY_FILTER" if args.len() == 2 => {
35298 let name = match target {
35299 DialectType::DuckDB => "LIST_FILTER",
35300 DialectType::StarRocks => "ARRAY_FILTER",
35301 _ => "FILTER",
35302 };
35303 Ok(Expression::Function(Box::new(Function::new(
35304 name.to_string(),
35305 args,
35306 ))))
35307 }
35308 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
35309 "FILTER" if args.len() == 2 => {
35310 let name = match target {
35311 DialectType::DuckDB => "LIST_FILTER",
35312 DialectType::StarRocks => "ARRAY_FILTER",
35313 _ => "FILTER",
35314 };
35315 Ok(Expression::Function(Box::new(Function::new(
35316 name.to_string(),
35317 args,
35318 ))))
35319 }
35320 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
35321 "REDUCE" if args.len() >= 3 => {
35322 let name = match target {
35323 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
35324 _ => "REDUCE",
35325 };
35326 Ok(Expression::Function(Box::new(Function::new(
35327 name.to_string(),
35328 args,
35329 ))))
35330 }
35331 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
35332 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
35333 Function::new("ARRAY_REVERSE".to_string(), args),
35334 ))),
35335
35336 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
35337 "CONCAT" if args.len() > 2 => match target {
35338 DialectType::DuckDB => {
35339 let mut it = args.into_iter();
35340 let mut result = it.next().unwrap();
35341 for arg in it {
35342 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
35343 this: Box::new(result),
35344 expression: Box::new(arg),
35345 safe: None,
35346 }));
35347 }
35348 Ok(result)
35349 }
35350 _ => Ok(Expression::Function(Box::new(Function::new(
35351 "CONCAT".to_string(),
35352 args,
35353 )))),
35354 },
35355
35356 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
35357 "GENERATE_DATE_ARRAY" => {
35358 if matches!(target, DialectType::BigQuery) {
35359 // BQ->BQ: add default interval if not present
35360 if args.len() == 2 {
35361 let start = args.remove(0);
35362 let end = args.remove(0);
35363 let default_interval =
35364 Expression::Interval(Box::new(crate::expressions::Interval {
35365 this: Some(Expression::Literal(Box::new(Literal::String(
35366 "1".to_string(),
35367 )))),
35368 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35369 unit: crate::expressions::IntervalUnit::Day,
35370 use_plural: false,
35371 }),
35372 }));
35373 Ok(Expression::Function(Box::new(Function::new(
35374 "GENERATE_DATE_ARRAY".to_string(),
35375 vec![start, end, default_interval],
35376 ))))
35377 } else {
35378 Ok(Expression::Function(Box::new(Function::new(
35379 "GENERATE_DATE_ARRAY".to_string(),
35380 args,
35381 ))))
35382 }
35383 } else if matches!(target, DialectType::DuckDB) {
35384 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
35385 let start = args.get(0).cloned();
35386 let end = args.get(1).cloned();
35387 let step = args.get(2).cloned().or_else(|| {
35388 Some(Expression::Interval(Box::new(
35389 crate::expressions::Interval {
35390 this: Some(Expression::Literal(Box::new(Literal::String(
35391 "1".to_string(),
35392 )))),
35393 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35394 unit: crate::expressions::IntervalUnit::Day,
35395 use_plural: false,
35396 }),
35397 },
35398 )))
35399 });
35400
35401 // Wrap start/end in CAST(... AS DATE) only for string literals
35402 let maybe_cast_date = |expr: Expression| -> Expression {
35403 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35404 {
35405 Expression::Cast(Box::new(Cast {
35406 this: expr,
35407 to: DataType::Date,
35408 trailing_comments: vec![],
35409 double_colon_syntax: false,
35410 format: None,
35411 default: None,
35412 inferred_type: None,
35413 }))
35414 } else {
35415 expr
35416 }
35417 };
35418 let cast_start = start.map(maybe_cast_date);
35419 let cast_end = end.map(maybe_cast_date);
35420
35421 let gen_series =
35422 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
35423 start: cast_start.map(Box::new),
35424 end: cast_end.map(Box::new),
35425 step: step.map(Box::new),
35426 is_end_exclusive: None,
35427 }));
35428
35429 // Wrap in CAST(... AS DATE[])
35430 Ok(Expression::Cast(Box::new(Cast {
35431 this: gen_series,
35432 to: DataType::Array {
35433 element_type: Box::new(DataType::Date),
35434 dimension: None,
35435 },
35436 trailing_comments: vec![],
35437 double_colon_syntax: false,
35438 format: None,
35439 default: None,
35440 inferred_type: None,
35441 })))
35442 } else if matches!(target, DialectType::Snowflake) {
35443 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
35444 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
35445 if args.len() == 2 {
35446 let start = args.remove(0);
35447 let end = args.remove(0);
35448 let default_interval =
35449 Expression::Interval(Box::new(crate::expressions::Interval {
35450 this: Some(Expression::Literal(Box::new(Literal::String(
35451 "1".to_string(),
35452 )))),
35453 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35454 unit: crate::expressions::IntervalUnit::Day,
35455 use_plural: false,
35456 }),
35457 }));
35458 Ok(Expression::Function(Box::new(Function::new(
35459 "GENERATE_DATE_ARRAY".to_string(),
35460 vec![start, end, default_interval],
35461 ))))
35462 } else {
35463 Ok(Expression::Function(Box::new(Function::new(
35464 "GENERATE_DATE_ARRAY".to_string(),
35465 args,
35466 ))))
35467 }
35468 } else {
35469 // Convert to GenerateSeries for other targets
35470 let start = args.get(0).cloned();
35471 let end = args.get(1).cloned();
35472 let step = args.get(2).cloned().or_else(|| {
35473 Some(Expression::Interval(Box::new(
35474 crate::expressions::Interval {
35475 this: Some(Expression::Literal(Box::new(Literal::String(
35476 "1".to_string(),
35477 )))),
35478 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35479 unit: crate::expressions::IntervalUnit::Day,
35480 use_plural: false,
35481 }),
35482 },
35483 )))
35484 });
35485 Ok(Expression::GenerateSeries(Box::new(
35486 crate::expressions::GenerateSeries {
35487 start: start.map(Box::new),
35488 end: end.map(Box::new),
35489 step: step.map(Box::new),
35490 is_end_exclusive: None,
35491 },
35492 )))
35493 }
35494 }
35495
35496 // PARSE_DATE(format, str) -> target-specific
35497 "PARSE_DATE" if args.len() == 2 => {
35498 let format = args.remove(0);
35499 let str_expr = args.remove(0);
35500 match target {
35501 DialectType::DuckDB => {
35502 // CAST(STRPTIME(str, duck_format) AS DATE)
35503 let duck_format = Self::bq_format_to_duckdb(&format);
35504 let strptime = Expression::Function(Box::new(Function::new(
35505 "STRPTIME".to_string(),
35506 vec![str_expr, duck_format],
35507 )));
35508 Ok(Expression::Cast(Box::new(Cast {
35509 this: strptime,
35510 to: DataType::Date,
35511 trailing_comments: vec![],
35512 double_colon_syntax: false,
35513 format: None,
35514 default: None,
35515 inferred_type: None,
35516 })))
35517 }
35518 DialectType::Snowflake => {
35519 // _POLYGLOT_DATE(str, snowflake_format)
35520 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
35521 let sf_format = Self::bq_format_to_snowflake(&format);
35522 Ok(Expression::Function(Box::new(Function::new(
35523 "_POLYGLOT_DATE".to_string(),
35524 vec![str_expr, sf_format],
35525 ))))
35526 }
35527 _ => Ok(Expression::Function(Box::new(Function::new(
35528 "PARSE_DATE".to_string(),
35529 vec![format, str_expr],
35530 )))),
35531 }
35532 }
35533
35534 // PARSE_TIMESTAMP(format, str) -> target-specific
35535 "PARSE_TIMESTAMP" if args.len() >= 2 => {
35536 let format = args.remove(0);
35537 let str_expr = args.remove(0);
35538 let tz = if !args.is_empty() {
35539 Some(args.remove(0))
35540 } else {
35541 None
35542 };
35543 match target {
35544 DialectType::DuckDB => {
35545 let duck_format = Self::bq_format_to_duckdb(&format);
35546 let strptime = Expression::Function(Box::new(Function::new(
35547 "STRPTIME".to_string(),
35548 vec![str_expr, duck_format],
35549 )));
35550 Ok(strptime)
35551 }
35552 _ => {
35553 let mut result_args = vec![format, str_expr];
35554 if let Some(tz_arg) = tz {
35555 result_args.push(tz_arg);
35556 }
35557 Ok(Expression::Function(Box::new(Function::new(
35558 "PARSE_TIMESTAMP".to_string(),
35559 result_args,
35560 ))))
35561 }
35562 }
35563 }
35564
35565 // FORMAT_DATE(format, date) -> target-specific
35566 "FORMAT_DATE" if args.len() == 2 => {
35567 let format = args.remove(0);
35568 let date_expr = args.remove(0);
35569 match target {
35570 DialectType::DuckDB => {
35571 // STRFTIME(CAST(date AS DATE), format)
35572 let cast_date = Expression::Cast(Box::new(Cast {
35573 this: date_expr,
35574 to: DataType::Date,
35575 trailing_comments: vec![],
35576 double_colon_syntax: false,
35577 format: None,
35578 default: None,
35579 inferred_type: None,
35580 }));
35581 Ok(Expression::Function(Box::new(Function::new(
35582 "STRFTIME".to_string(),
35583 vec![cast_date, format],
35584 ))))
35585 }
35586 _ => Ok(Expression::Function(Box::new(Function::new(
35587 "FORMAT_DATE".to_string(),
35588 vec![format, date_expr],
35589 )))),
35590 }
35591 }
35592
35593 // FORMAT_DATETIME(format, datetime) -> target-specific
35594 "FORMAT_DATETIME" if args.len() == 2 => {
35595 let format = args.remove(0);
35596 let dt_expr = args.remove(0);
35597
35598 if matches!(target, DialectType::BigQuery) {
35599 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
35600 let norm_format = Self::bq_format_normalize_bq(&format);
35601 // Also strip DATETIME keyword from typed literals
35602 let norm_dt = match dt_expr {
35603 Expression::Literal(lit)
35604 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
35605 {
35606 let Literal::Timestamp(s) = lit.as_ref() else {
35607 unreachable!()
35608 };
35609 Expression::Cast(Box::new(Cast {
35610 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35611 to: DataType::Custom {
35612 name: "DATETIME".to_string(),
35613 },
35614 trailing_comments: vec![],
35615 double_colon_syntax: false,
35616 format: None,
35617 default: None,
35618 inferred_type: None,
35619 }))
35620 }
35621 other => other,
35622 };
35623 return Ok(Expression::Function(Box::new(Function::new(
35624 "FORMAT_DATETIME".to_string(),
35625 vec![norm_format, norm_dt],
35626 ))));
35627 }
35628
35629 match target {
35630 DialectType::DuckDB => {
35631 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
35632 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
35633 let duck_format = Self::bq_format_to_duckdb(&format);
35634 Ok(Expression::Function(Box::new(Function::new(
35635 "STRFTIME".to_string(),
35636 vec![cast_dt, duck_format],
35637 ))))
35638 }
35639 _ => Ok(Expression::Function(Box::new(Function::new(
35640 "FORMAT_DATETIME".to_string(),
35641 vec![format, dt_expr],
35642 )))),
35643 }
35644 }
35645
35646 // FORMAT_TIMESTAMP(format, ts) -> target-specific
35647 "FORMAT_TIMESTAMP" if args.len() == 2 => {
35648 let format = args.remove(0);
35649 let ts_expr = args.remove(0);
35650 match target {
35651 DialectType::DuckDB => {
35652 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
35653 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35654 let cast_ts = Expression::Cast(Box::new(Cast {
35655 this: cast_tstz,
35656 to: DataType::Timestamp {
35657 timezone: false,
35658 precision: None,
35659 },
35660 trailing_comments: vec![],
35661 double_colon_syntax: false,
35662 format: None,
35663 default: None,
35664 inferred_type: None,
35665 }));
35666 Ok(Expression::Function(Box::new(Function::new(
35667 "STRFTIME".to_string(),
35668 vec![cast_ts, format],
35669 ))))
35670 }
35671 DialectType::Snowflake => {
35672 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
35673 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35674 let cast_ts = Expression::Cast(Box::new(Cast {
35675 this: cast_tstz,
35676 to: DataType::Timestamp {
35677 timezone: false,
35678 precision: None,
35679 },
35680 trailing_comments: vec![],
35681 double_colon_syntax: false,
35682 format: None,
35683 default: None,
35684 inferred_type: None,
35685 }));
35686 let sf_format = Self::bq_format_to_snowflake(&format);
35687 Ok(Expression::Function(Box::new(Function::new(
35688 "TO_CHAR".to_string(),
35689 vec![cast_ts, sf_format],
35690 ))))
35691 }
35692 _ => Ok(Expression::Function(Box::new(Function::new(
35693 "FORMAT_TIMESTAMP".to_string(),
35694 vec![format, ts_expr],
35695 )))),
35696 }
35697 }
35698
35699 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
35700 "UNIX_DATE" if args.len() == 1 => {
35701 let date = args.remove(0);
35702 match target {
35703 DialectType::DuckDB => {
35704 let epoch = Expression::Cast(Box::new(Cast {
35705 this: Expression::Literal(Box::new(Literal::String(
35706 "1970-01-01".to_string(),
35707 ))),
35708 to: DataType::Date,
35709 trailing_comments: vec![],
35710 double_colon_syntax: false,
35711 format: None,
35712 default: None,
35713 inferred_type: None,
35714 }));
35715 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
35716 // Need to convert DATE literal to CAST
35717 let norm_date = Self::date_literal_to_cast(date);
35718 Ok(Expression::Function(Box::new(Function::new(
35719 "DATE_DIFF".to_string(),
35720 vec![
35721 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
35722 epoch,
35723 norm_date,
35724 ],
35725 ))))
35726 }
35727 _ => Ok(Expression::Function(Box::new(Function::new(
35728 "UNIX_DATE".to_string(),
35729 vec![date],
35730 )))),
35731 }
35732 }
35733
35734 // UNIX_SECONDS(ts) -> target-specific
35735 "UNIX_SECONDS" if args.len() == 1 => {
35736 let ts = args.remove(0);
35737 match target {
35738 DialectType::DuckDB => {
35739 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35740 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35741 let epoch = Expression::Function(Box::new(Function::new(
35742 "EPOCH".to_string(),
35743 vec![norm_ts],
35744 )));
35745 Ok(Expression::Cast(Box::new(Cast {
35746 this: epoch,
35747 to: DataType::BigInt { length: None },
35748 trailing_comments: vec![],
35749 double_colon_syntax: false,
35750 format: None,
35751 default: None,
35752 inferred_type: None,
35753 })))
35754 }
35755 DialectType::Snowflake => {
35756 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35757 let epoch = Expression::Cast(Box::new(Cast {
35758 this: Expression::Literal(Box::new(Literal::String(
35759 "1970-01-01 00:00:00+00".to_string(),
35760 ))),
35761 to: DataType::Timestamp {
35762 timezone: true,
35763 precision: None,
35764 },
35765 trailing_comments: vec![],
35766 double_colon_syntax: false,
35767 format: None,
35768 default: None,
35769 inferred_type: None,
35770 }));
35771 Ok(Expression::Function(Box::new(Function::new(
35772 "TIMESTAMPDIFF".to_string(),
35773 vec![
35774 Expression::Identifier(Identifier::new("SECONDS".to_string())),
35775 epoch,
35776 ts,
35777 ],
35778 ))))
35779 }
35780 _ => Ok(Expression::Function(Box::new(Function::new(
35781 "UNIX_SECONDS".to_string(),
35782 vec![ts],
35783 )))),
35784 }
35785 }
35786
35787 // UNIX_MILLIS(ts) -> target-specific
35788 "UNIX_MILLIS" if args.len() == 1 => {
35789 let ts = args.remove(0);
35790 match target {
35791 DialectType::DuckDB => {
35792 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35793 Ok(Expression::Function(Box::new(Function::new(
35794 "EPOCH_MS".to_string(),
35795 vec![norm_ts],
35796 ))))
35797 }
35798 _ => Ok(Expression::Function(Box::new(Function::new(
35799 "UNIX_MILLIS".to_string(),
35800 vec![ts],
35801 )))),
35802 }
35803 }
35804
35805 // UNIX_MICROS(ts) -> target-specific
35806 "UNIX_MICROS" if args.len() == 1 => {
35807 let ts = args.remove(0);
35808 match target {
35809 DialectType::DuckDB => {
35810 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35811 Ok(Expression::Function(Box::new(Function::new(
35812 "EPOCH_US".to_string(),
35813 vec![norm_ts],
35814 ))))
35815 }
35816 _ => Ok(Expression::Function(Box::new(Function::new(
35817 "UNIX_MICROS".to_string(),
35818 vec![ts],
35819 )))),
35820 }
35821 }
35822
35823 // INSTR(str, substr) -> target-specific
35824 "INSTR" => {
35825 if matches!(target, DialectType::BigQuery) {
35826 // BQ->BQ: keep as INSTR
35827 Ok(Expression::Function(Box::new(Function::new(
35828 "INSTR".to_string(),
35829 args,
35830 ))))
35831 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
35832 // Snowflake: CHARINDEX(substr, str) - swap args
35833 let str_expr = args.remove(0);
35834 let substr = args.remove(0);
35835 Ok(Expression::Function(Box::new(Function::new(
35836 "CHARINDEX".to_string(),
35837 vec![substr, str_expr],
35838 ))))
35839 } else {
35840 // Keep as INSTR for other targets
35841 Ok(Expression::Function(Box::new(Function::new(
35842 "INSTR".to_string(),
35843 args,
35844 ))))
35845 }
35846 }
35847
35848 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
35849 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
35850 if matches!(target, DialectType::BigQuery) {
35851 // BQ->BQ: always output with parens (function form), keep any timezone arg
35852 Ok(Expression::Function(Box::new(Function::new(name, args))))
35853 } else if name == "CURRENT_DATE" && args.len() == 1 {
35854 // CURRENT_DATE('UTC') - has timezone arg
35855 let tz_arg = args.remove(0);
35856 match target {
35857 DialectType::DuckDB => {
35858 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
35859 let ct = Expression::CurrentTimestamp(
35860 crate::expressions::CurrentTimestamp {
35861 precision: None,
35862 sysdate: false,
35863 },
35864 );
35865 let at_tz =
35866 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
35867 this: ct,
35868 zone: tz_arg,
35869 }));
35870 Ok(Expression::Cast(Box::new(Cast {
35871 this: at_tz,
35872 to: DataType::Date,
35873 trailing_comments: vec![],
35874 double_colon_syntax: false,
35875 format: None,
35876 default: None,
35877 inferred_type: None,
35878 })))
35879 }
35880 DialectType::Snowflake => {
35881 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
35882 let ct = Expression::Function(Box::new(Function::new(
35883 "CURRENT_TIMESTAMP".to_string(),
35884 vec![],
35885 )));
35886 let convert = Expression::Function(Box::new(Function::new(
35887 "CONVERT_TIMEZONE".to_string(),
35888 vec![tz_arg, ct],
35889 )));
35890 Ok(Expression::Cast(Box::new(Cast {
35891 this: convert,
35892 to: DataType::Date,
35893 trailing_comments: vec![],
35894 double_colon_syntax: false,
35895 format: None,
35896 default: None,
35897 inferred_type: None,
35898 })))
35899 }
35900 _ => {
35901 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
35902 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
35903 Ok(Expression::AtTimeZone(Box::new(
35904 crate::expressions::AtTimeZone {
35905 this: cd,
35906 zone: tz_arg,
35907 },
35908 )))
35909 }
35910 }
35911 } else if (name == "CURRENT_TIMESTAMP"
35912 || name == "CURRENT_TIME"
35913 || name == "CURRENT_DATE")
35914 && args.is_empty()
35915 && matches!(
35916 target,
35917 DialectType::PostgreSQL
35918 | DialectType::DuckDB
35919 | DialectType::Presto
35920 | DialectType::Trino
35921 )
35922 {
35923 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
35924 if name == "CURRENT_TIMESTAMP" {
35925 Ok(Expression::CurrentTimestamp(
35926 crate::expressions::CurrentTimestamp {
35927 precision: None,
35928 sysdate: false,
35929 },
35930 ))
35931 } else if name == "CURRENT_DATE" {
35932 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
35933 } else {
35934 // CURRENT_TIME
35935 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
35936 precision: None,
35937 }))
35938 }
35939 } else {
35940 // All other targets: keep as function (with parens)
35941 Ok(Expression::Function(Box::new(Function::new(name, args))))
35942 }
35943 }
35944
35945 // JSON_QUERY(json, path) -> target-specific
35946 "JSON_QUERY" if args.len() == 2 => {
35947 match target {
35948 DialectType::DuckDB | DialectType::SQLite => {
35949 // json -> path syntax
35950 let json_expr = args.remove(0);
35951 let path = args.remove(0);
35952 Ok(Expression::JsonExtract(Box::new(
35953 crate::expressions::JsonExtractFunc {
35954 this: json_expr,
35955 path,
35956 returning: None,
35957 arrow_syntax: true,
35958 hash_arrow_syntax: false,
35959 wrapper_option: None,
35960 quotes_option: None,
35961 on_scalar_string: false,
35962 on_error: None,
35963 },
35964 )))
35965 }
35966 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35967 Ok(Expression::Function(Box::new(Function::new(
35968 "GET_JSON_OBJECT".to_string(),
35969 args,
35970 ))))
35971 }
35972 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
35973 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
35974 )),
35975 _ => Ok(Expression::Function(Box::new(Function::new(
35976 "JSON_QUERY".to_string(),
35977 args,
35978 )))),
35979 }
35980 }
35981
35982 // JSON_VALUE_ARRAY(json, path) -> target-specific
35983 "JSON_VALUE_ARRAY" if args.len() == 2 => {
35984 match target {
35985 DialectType::DuckDB => {
35986 // CAST(json -> path AS TEXT[])
35987 let json_expr = args.remove(0);
35988 let path = args.remove(0);
35989 let arrow = Expression::JsonExtract(Box::new(
35990 crate::expressions::JsonExtractFunc {
35991 this: json_expr,
35992 path,
35993 returning: None,
35994 arrow_syntax: true,
35995 hash_arrow_syntax: false,
35996 wrapper_option: None,
35997 quotes_option: None,
35998 on_scalar_string: false,
35999 on_error: None,
36000 },
36001 ));
36002 Ok(Expression::Cast(Box::new(Cast {
36003 this: arrow,
36004 to: DataType::Array {
36005 element_type: Box::new(DataType::Text),
36006 dimension: None,
36007 },
36008 trailing_comments: vec![],
36009 double_colon_syntax: false,
36010 format: None,
36011 default: None,
36012 inferred_type: None,
36013 })))
36014 }
36015 DialectType::Snowflake => {
36016 let json_expr = args.remove(0);
36017 let path_expr = args.remove(0);
36018 // Convert JSON path from $.path to just path
36019 let sf_path = if let Expression::Literal(ref lit) = path_expr {
36020 if let Literal::String(ref s) = lit.as_ref() {
36021 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
36022 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
36023 } else {
36024 path_expr.clone()
36025 }
36026 } else {
36027 path_expr
36028 };
36029 let parse_json = Expression::Function(Box::new(Function::new(
36030 "PARSE_JSON".to_string(),
36031 vec![json_expr],
36032 )));
36033 let get_path = Expression::Function(Box::new(Function::new(
36034 "GET_PATH".to_string(),
36035 vec![parse_json, sf_path],
36036 )));
36037 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
36038 let cast_expr = Expression::Cast(Box::new(Cast {
36039 this: Expression::Identifier(Identifier::new("x")),
36040 to: DataType::VarChar {
36041 length: None,
36042 parenthesized_length: false,
36043 },
36044 trailing_comments: vec![],
36045 double_colon_syntax: false,
36046 format: None,
36047 default: None,
36048 inferred_type: None,
36049 }));
36050 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36051 parameters: vec![Identifier::new("x")],
36052 body: cast_expr,
36053 colon: false,
36054 parameter_types: vec![],
36055 }));
36056 Ok(Expression::Function(Box::new(Function::new(
36057 "TRANSFORM".to_string(),
36058 vec![get_path, lambda],
36059 ))))
36060 }
36061 _ => Ok(Expression::Function(Box::new(Function::new(
36062 "JSON_VALUE_ARRAY".to_string(),
36063 args,
36064 )))),
36065 }
36066 }
36067
36068 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
36069 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
36070 // This is different from Hive/Spark where 3rd arg is "group_index"
36071 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
36072 match target {
36073 DialectType::DuckDB
36074 | DialectType::Presto
36075 | DialectType::Trino
36076 | DialectType::Athena => {
36077 if args.len() == 2 {
36078 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
36079 args.push(Expression::number(1));
36080 Ok(Expression::Function(Box::new(Function::new(
36081 "REGEXP_EXTRACT".to_string(),
36082 args,
36083 ))))
36084 } else if args.len() == 3 {
36085 let val = args.remove(0);
36086 let regex = args.remove(0);
36087 let position = args.remove(0);
36088 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36089 if is_pos_1 {
36090 Ok(Expression::Function(Box::new(Function::new(
36091 "REGEXP_EXTRACT".to_string(),
36092 vec![val, regex, Expression::number(1)],
36093 ))))
36094 } else {
36095 let substring_expr = Expression::Function(Box::new(Function::new(
36096 "SUBSTRING".to_string(),
36097 vec![val, position],
36098 )));
36099 let nullif_expr = Expression::Function(Box::new(Function::new(
36100 "NULLIF".to_string(),
36101 vec![
36102 substring_expr,
36103 Expression::Literal(Box::new(Literal::String(
36104 String::new(),
36105 ))),
36106 ],
36107 )));
36108 Ok(Expression::Function(Box::new(Function::new(
36109 "REGEXP_EXTRACT".to_string(),
36110 vec![nullif_expr, regex, Expression::number(1)],
36111 ))))
36112 }
36113 } else if args.len() == 4 {
36114 let val = args.remove(0);
36115 let regex = args.remove(0);
36116 let position = args.remove(0);
36117 let occurrence = args.remove(0);
36118 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36119 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36120 if is_pos_1 && is_occ_1 {
36121 Ok(Expression::Function(Box::new(Function::new(
36122 "REGEXP_EXTRACT".to_string(),
36123 vec![val, regex, Expression::number(1)],
36124 ))))
36125 } else {
36126 let subject = if is_pos_1 {
36127 val
36128 } else {
36129 let substring_expr = Expression::Function(Box::new(
36130 Function::new("SUBSTRING".to_string(), vec![val, position]),
36131 ));
36132 Expression::Function(Box::new(Function::new(
36133 "NULLIF".to_string(),
36134 vec![
36135 substring_expr,
36136 Expression::Literal(Box::new(Literal::String(
36137 String::new(),
36138 ))),
36139 ],
36140 )))
36141 };
36142 let extract_all = Expression::Function(Box::new(Function::new(
36143 "REGEXP_EXTRACT_ALL".to_string(),
36144 vec![subject, regex, Expression::number(1)],
36145 )));
36146 Ok(Expression::Function(Box::new(Function::new(
36147 "ARRAY_EXTRACT".to_string(),
36148 vec![extract_all, occurrence],
36149 ))))
36150 }
36151 } else {
36152 Ok(Expression::Function(Box::new(Function {
36153 name: f.name,
36154 args,
36155 distinct: f.distinct,
36156 trailing_comments: f.trailing_comments,
36157 use_bracket_syntax: f.use_bracket_syntax,
36158 no_parens: f.no_parens,
36159 quoted: f.quoted,
36160 span: None,
36161 inferred_type: None,
36162 })))
36163 }
36164 }
36165 DialectType::Snowflake => {
36166 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
36167 Ok(Expression::Function(Box::new(Function::new(
36168 "REGEXP_SUBSTR".to_string(),
36169 args,
36170 ))))
36171 }
36172 _ => {
36173 // For other targets (Hive/Spark/BigQuery): pass through as-is
36174 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
36175 Ok(Expression::Function(Box::new(Function {
36176 name: f.name,
36177 args,
36178 distinct: f.distinct,
36179 trailing_comments: f.trailing_comments,
36180 use_bracket_syntax: f.use_bracket_syntax,
36181 no_parens: f.no_parens,
36182 quoted: f.quoted,
36183 span: None,
36184 inferred_type: None,
36185 })))
36186 }
36187 }
36188 }
36189
36190 // BigQuery STRUCT(args) -> target-specific struct expression
36191 "STRUCT" => {
36192 // Convert Function args to Struct fields
36193 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
36194 for (i, arg) in args.into_iter().enumerate() {
36195 match arg {
36196 Expression::Alias(a) => {
36197 // Named field: expr AS name
36198 fields.push((Some(a.alias.name.clone()), a.this));
36199 }
36200 other => {
36201 // Unnamed field: for Spark/Hive, keep as None
36202 // For Snowflake, auto-name as _N
36203 // For DuckDB, use column name for column refs, _N for others
36204 if matches!(target, DialectType::Snowflake) {
36205 fields.push((Some(format!("_{}", i)), other));
36206 } else if matches!(target, DialectType::DuckDB) {
36207 let auto_name = match &other {
36208 Expression::Column(col) => col.name.name.clone(),
36209 _ => format!("_{}", i),
36210 };
36211 fields.push((Some(auto_name), other));
36212 } else {
36213 fields.push((None, other));
36214 }
36215 }
36216 }
36217 }
36218
36219 match target {
36220 DialectType::Snowflake => {
36221 // OBJECT_CONSTRUCT('name', value, ...)
36222 let mut oc_args = Vec::new();
36223 for (name, val) in &fields {
36224 if let Some(n) = name {
36225 oc_args.push(Expression::Literal(Box::new(Literal::String(
36226 n.clone(),
36227 ))));
36228 oc_args.push(val.clone());
36229 } else {
36230 oc_args.push(val.clone());
36231 }
36232 }
36233 Ok(Expression::Function(Box::new(Function::new(
36234 "OBJECT_CONSTRUCT".to_string(),
36235 oc_args,
36236 ))))
36237 }
36238 DialectType::DuckDB => {
36239 // {'name': value, ...}
36240 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36241 fields,
36242 })))
36243 }
36244 DialectType::Hive => {
36245 // STRUCT(val1, val2, ...) - strip aliases
36246 let hive_fields: Vec<(Option<String>, Expression)> =
36247 fields.into_iter().map(|(_, v)| (None, v)).collect();
36248 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36249 fields: hive_fields,
36250 })))
36251 }
36252 DialectType::Spark | DialectType::Databricks => {
36253 // Use Expression::Struct to bypass Spark target transform auto-naming
36254 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36255 fields,
36256 })))
36257 }
36258 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
36259 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
36260 let all_named =
36261 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
36262 let all_types_inferable = all_named
36263 && fields
36264 .iter()
36265 .all(|(_, val)| Self::can_infer_presto_type(val));
36266 let row_args: Vec<Expression> =
36267 fields.iter().map(|(_, v)| v.clone()).collect();
36268 let row_expr = Expression::Function(Box::new(Function::new(
36269 "ROW".to_string(),
36270 row_args,
36271 )));
36272 if all_named && all_types_inferable {
36273 // Build ROW type with inferred types
36274 let mut row_type_fields = Vec::new();
36275 for (name, val) in &fields {
36276 if let Some(n) = name {
36277 let type_str = Self::infer_sql_type_for_presto(val);
36278 row_type_fields.push(crate::expressions::StructField::new(
36279 n.clone(),
36280 crate::expressions::DataType::Custom { name: type_str },
36281 ));
36282 }
36283 }
36284 let row_type = crate::expressions::DataType::Struct {
36285 fields: row_type_fields,
36286 nested: true,
36287 };
36288 Ok(Expression::Cast(Box::new(Cast {
36289 this: row_expr,
36290 to: row_type,
36291 trailing_comments: Vec::new(),
36292 double_colon_syntax: false,
36293 format: None,
36294 default: None,
36295 inferred_type: None,
36296 })))
36297 } else {
36298 Ok(row_expr)
36299 }
36300 }
36301 _ => {
36302 // Default: keep as STRUCT function with original args
36303 let mut new_args = Vec::new();
36304 for (name, val) in fields {
36305 if let Some(n) = name {
36306 new_args.push(Expression::Alias(Box::new(
36307 crate::expressions::Alias::new(val, Identifier::new(n)),
36308 )));
36309 } else {
36310 new_args.push(val);
36311 }
36312 }
36313 Ok(Expression::Function(Box::new(Function::new(
36314 "STRUCT".to_string(),
36315 new_args,
36316 ))))
36317 }
36318 }
36319 }
36320
36321 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
36322 "ROUND" if args.len() == 3 => {
36323 let x = args.remove(0);
36324 let n = args.remove(0);
36325 let mode = args.remove(0);
36326 // Check if mode is 'ROUND_HALF_EVEN'
36327 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
36328 if is_half_even && matches!(target, DialectType::DuckDB) {
36329 Ok(Expression::Function(Box::new(Function::new(
36330 "ROUND_EVEN".to_string(),
36331 vec![x, n],
36332 ))))
36333 } else {
36334 // Pass through with all args
36335 Ok(Expression::Function(Box::new(Function::new(
36336 "ROUND".to_string(),
36337 vec![x, n, mode],
36338 ))))
36339 }
36340 }
36341
36342 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
36343 "MAKE_INTERVAL" => {
36344 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
36345 // The positional args are: year, month
36346 // Named args are: day =>, minute =>, etc.
36347 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
36348 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
36349 // For BigQuery->BigQuery: reorder named args (day before minute)
36350 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
36351 let mut parts: Vec<(String, String)> = Vec::new();
36352 let mut pos_idx = 0;
36353 let pos_units = ["year", "month"];
36354 for arg in &args {
36355 if let Expression::NamedArgument(na) = arg {
36356 // Named arg like minute => 5
36357 let unit = na.name.name.clone();
36358 if let Expression::Literal(lit) = &na.value {
36359 if let Literal::Number(n) = lit.as_ref() {
36360 parts.push((unit, n.clone()));
36361 }
36362 }
36363 } else if pos_idx < pos_units.len() {
36364 if let Expression::Literal(lit) = arg {
36365 if let Literal::Number(n) = lit.as_ref() {
36366 parts.push((pos_units[pos_idx].to_string(), n.clone()));
36367 }
36368 }
36369 pos_idx += 1;
36370 }
36371 }
36372 // Don't sort - preserve original argument order
36373 let separator = if matches!(target, DialectType::Snowflake) {
36374 ", "
36375 } else {
36376 " "
36377 };
36378 let interval_str = parts
36379 .iter()
36380 .map(|(u, v)| format!("{} {}", v, u))
36381 .collect::<Vec<_>>()
36382 .join(separator);
36383 Ok(Expression::Interval(Box::new(
36384 crate::expressions::Interval {
36385 this: Some(Expression::Literal(Box::new(Literal::String(
36386 interval_str,
36387 )))),
36388 unit: None,
36389 },
36390 )))
36391 } else if matches!(target, DialectType::BigQuery) {
36392 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
36393 let mut positional = Vec::new();
36394 let mut named: Vec<(
36395 String,
36396 Expression,
36397 crate::expressions::NamedArgSeparator,
36398 )> = Vec::new();
36399 let _pos_units = ["year", "month"];
36400 let mut _pos_idx = 0;
36401 for arg in args {
36402 if let Expression::NamedArgument(na) = arg {
36403 named.push((na.name.name.clone(), na.value, na.separator));
36404 } else {
36405 positional.push(arg);
36406 _pos_idx += 1;
36407 }
36408 }
36409 // Sort named args by: day, hour, minute, second
36410 let unit_order = |u: &str| -> usize {
36411 match u.to_ascii_lowercase().as_str() {
36412 "day" => 0,
36413 "hour" => 1,
36414 "minute" => 2,
36415 "second" => 3,
36416 _ => 4,
36417 }
36418 };
36419 named.sort_by_key(|(u, _, _)| unit_order(u));
36420 let mut result_args = positional;
36421 for (name, value, sep) in named {
36422 result_args.push(Expression::NamedArgument(Box::new(
36423 crate::expressions::NamedArgument {
36424 name: Identifier::new(&name),
36425 value,
36426 separator: sep,
36427 },
36428 )));
36429 }
36430 Ok(Expression::Function(Box::new(Function::new(
36431 "MAKE_INTERVAL".to_string(),
36432 result_args,
36433 ))))
36434 } else {
36435 Ok(Expression::Function(Box::new(Function::new(
36436 "MAKE_INTERVAL".to_string(),
36437 args,
36438 ))))
36439 }
36440 }
36441
36442 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
36443 "ARRAY_TO_STRING" if args.len() == 3 => {
36444 let arr = args.remove(0);
36445 let sep = args.remove(0);
36446 let null_text = args.remove(0);
36447 match target {
36448 DialectType::DuckDB => {
36449 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
36450 let _lambda_param =
36451 Expression::Identifier(crate::expressions::Identifier::new("x"));
36452 let coalesce =
36453 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
36454 original_name: None,
36455 expressions: vec![
36456 Expression::Identifier(crate::expressions::Identifier::new(
36457 "x",
36458 )),
36459 null_text,
36460 ],
36461 inferred_type: None,
36462 }));
36463 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36464 parameters: vec![crate::expressions::Identifier::new("x")],
36465 body: coalesce,
36466 colon: false,
36467 parameter_types: vec![],
36468 }));
36469 let list_transform = Expression::Function(Box::new(Function::new(
36470 "LIST_TRANSFORM".to_string(),
36471 vec![arr, lambda],
36472 )));
36473 Ok(Expression::Function(Box::new(Function::new(
36474 "ARRAY_TO_STRING".to_string(),
36475 vec![list_transform, sep],
36476 ))))
36477 }
36478 _ => Ok(Expression::Function(Box::new(Function::new(
36479 "ARRAY_TO_STRING".to_string(),
36480 vec![arr, sep, null_text],
36481 )))),
36482 }
36483 }
36484
36485 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
36486 "LENGTH" if args.len() == 1 => {
36487 let arg = args.remove(0);
36488 match target {
36489 DialectType::DuckDB => {
36490 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
36491 let typeof_func = Expression::Function(Box::new(Function::new(
36492 "TYPEOF".to_string(),
36493 vec![arg.clone()],
36494 )));
36495 let blob_cast = Expression::Cast(Box::new(Cast {
36496 this: arg.clone(),
36497 to: DataType::VarBinary { length: None },
36498 trailing_comments: vec![],
36499 double_colon_syntax: false,
36500 format: None,
36501 default: None,
36502 inferred_type: None,
36503 }));
36504 let octet_length = Expression::Function(Box::new(Function::new(
36505 "OCTET_LENGTH".to_string(),
36506 vec![blob_cast],
36507 )));
36508 let text_cast = Expression::Cast(Box::new(Cast {
36509 this: arg,
36510 to: DataType::Text,
36511 trailing_comments: vec![],
36512 double_colon_syntax: false,
36513 format: None,
36514 default: None,
36515 inferred_type: None,
36516 }));
36517 let length_text = Expression::Function(Box::new(Function::new(
36518 "LENGTH".to_string(),
36519 vec![text_cast],
36520 )));
36521 Ok(Expression::Case(Box::new(crate::expressions::Case {
36522 operand: Some(typeof_func),
36523 whens: vec![(
36524 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
36525 octet_length,
36526 )],
36527 else_: Some(length_text),
36528 comments: Vec::new(),
36529 inferred_type: None,
36530 })))
36531 }
36532 _ => Ok(Expression::Function(Box::new(Function::new(
36533 "LENGTH".to_string(),
36534 vec![arg],
36535 )))),
36536 }
36537 }
36538
36539 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
36540 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
36541 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
36542 // The args should be [x, fraction] with the null handling stripped
36543 // For DuckDB: QUANTILE_CONT(x, fraction)
36544 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
36545 match target {
36546 DialectType::DuckDB => {
36547 // Strip down to just 2 args, rename to QUANTILE_CONT
36548 let x = args[0].clone();
36549 let frac = args[1].clone();
36550 Ok(Expression::Function(Box::new(Function::new(
36551 "QUANTILE_CONT".to_string(),
36552 vec![x, frac],
36553 ))))
36554 }
36555 _ => Ok(Expression::Function(Box::new(Function::new(
36556 "PERCENTILE_CONT".to_string(),
36557 args,
36558 )))),
36559 }
36560 }
36561
36562 // All others: pass through
36563 _ => Ok(Expression::Function(Box::new(Function {
36564 name: f.name,
36565 args,
36566 distinct: f.distinct,
36567 trailing_comments: f.trailing_comments,
36568 use_bracket_syntax: f.use_bracket_syntax,
36569 no_parens: f.no_parens,
36570 quoted: f.quoted,
36571 span: None,
36572 inferred_type: None,
36573 }))),
36574 }
36575 }
36576
36577 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
36578 /// Returns false for column references and other non-literal expressions where the type is unknown.
36579 fn can_infer_presto_type(expr: &Expression) -> bool {
36580 match expr {
36581 Expression::Literal(_) => true,
36582 Expression::Boolean(_) => true,
36583 Expression::Array(_) | Expression::ArrayFunc(_) => true,
36584 Expression::Struct(_) | Expression::StructFunc(_) => true,
36585 Expression::Function(f) => {
36586 f.name.eq_ignore_ascii_case("STRUCT")
36587 || f.name.eq_ignore_ascii_case("ROW")
36588 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
36589 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36590 || f.name.eq_ignore_ascii_case("NOW")
36591 }
36592 Expression::Cast(_) => true,
36593 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
36594 _ => false,
36595 }
36596 }
36597
36598 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
36599 fn infer_sql_type_for_presto(expr: &Expression) -> String {
36600 use crate::expressions::Literal;
36601 match expr {
36602 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36603 "VARCHAR".to_string()
36604 }
36605 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36606 let Literal::Number(n) = lit.as_ref() else {
36607 unreachable!()
36608 };
36609 if n.contains('.') {
36610 "DOUBLE".to_string()
36611 } else {
36612 "INTEGER".to_string()
36613 }
36614 }
36615 Expression::Boolean(_) => "BOOLEAN".to_string(),
36616 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36617 "DATE".to_string()
36618 }
36619 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36620 "TIMESTAMP".to_string()
36621 }
36622 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36623 "TIMESTAMP".to_string()
36624 }
36625 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
36626 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
36627 Expression::Function(f) => {
36628 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
36629 "ROW".to_string()
36630 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
36631 "DATE".to_string()
36632 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36633 || f.name.eq_ignore_ascii_case("NOW")
36634 {
36635 "TIMESTAMP".to_string()
36636 } else {
36637 "VARCHAR".to_string()
36638 }
36639 }
36640 Expression::Cast(c) => {
36641 // If already cast, use the target type
36642 Self::data_type_to_presto_string(&c.to)
36643 }
36644 _ => "VARCHAR".to_string(),
36645 }
36646 }
36647
36648 /// Convert a DataType to its Presto/Trino string representation for ROW type
36649 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
36650 use crate::expressions::DataType;
36651 match dt {
36652 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
36653 "VARCHAR".to_string()
36654 }
36655 DataType::Int { .. }
36656 | DataType::BigInt { .. }
36657 | DataType::SmallInt { .. }
36658 | DataType::TinyInt { .. } => "INTEGER".to_string(),
36659 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
36660 DataType::Boolean => "BOOLEAN".to_string(),
36661 DataType::Date => "DATE".to_string(),
36662 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
36663 DataType::Struct { fields, .. } => {
36664 let field_strs: Vec<String> = fields
36665 .iter()
36666 .map(|f| {
36667 format!(
36668 "{} {}",
36669 f.name,
36670 Self::data_type_to_presto_string(&f.data_type)
36671 )
36672 })
36673 .collect();
36674 format!("ROW({})", field_strs.join(", "))
36675 }
36676 DataType::Array { element_type, .. } => {
36677 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
36678 }
36679 DataType::Custom { name } => {
36680 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
36681 name.clone()
36682 }
36683 _ => "VARCHAR".to_string(),
36684 }
36685 }
36686
36687 /// Convert IntervalUnit to string
36688 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
36689 match unit {
36690 crate::expressions::IntervalUnit::Year => "YEAR",
36691 crate::expressions::IntervalUnit::Quarter => "QUARTER",
36692 crate::expressions::IntervalUnit::Month => "MONTH",
36693 crate::expressions::IntervalUnit::Week => "WEEK",
36694 crate::expressions::IntervalUnit::Day => "DAY",
36695 crate::expressions::IntervalUnit::Hour => "HOUR",
36696 crate::expressions::IntervalUnit::Minute => "MINUTE",
36697 crate::expressions::IntervalUnit::Second => "SECOND",
36698 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
36699 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
36700 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
36701 }
36702 }
36703
36704 /// Extract unit string from an expression (uppercased)
36705 fn get_unit_str_static(expr: &Expression) -> String {
36706 use crate::expressions::Literal;
36707 match expr {
36708 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
36709 Expression::Var(v) => v.this.to_ascii_uppercase(),
36710 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36711 let Literal::String(s) = lit.as_ref() else {
36712 unreachable!()
36713 };
36714 s.to_ascii_uppercase()
36715 }
36716 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
36717 Expression::Function(f) => {
36718 let base = f.name.to_ascii_uppercase();
36719 if !f.args.is_empty() {
36720 let inner = Self::get_unit_str_static(&f.args[0]);
36721 format!("{}({})", base, inner)
36722 } else {
36723 base
36724 }
36725 }
36726 _ => "DAY".to_string(),
36727 }
36728 }
36729
36730 /// Parse unit string to IntervalUnit
36731 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
36732 match s {
36733 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
36734 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
36735 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
36736 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
36737 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
36738 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
36739 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
36740 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
36741 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
36742 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
36743 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
36744 _ => crate::expressions::IntervalUnit::Day,
36745 }
36746 }
36747
36748 /// Convert expression to simple string for interval building
36749 fn expr_to_string_static(expr: &Expression) -> String {
36750 use crate::expressions::Literal;
36751 match expr {
36752 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36753 let Literal::Number(s) = lit.as_ref() else {
36754 unreachable!()
36755 };
36756 s.clone()
36757 }
36758 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36759 let Literal::String(s) = lit.as_ref() else {
36760 unreachable!()
36761 };
36762 s.clone()
36763 }
36764 Expression::Identifier(id) => id.name.clone(),
36765 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
36766 _ => "1".to_string(),
36767 }
36768 }
36769
36770 /// Extract a simple string representation from a literal expression
36771 fn expr_to_string(expr: &Expression) -> String {
36772 use crate::expressions::Literal;
36773 match expr {
36774 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36775 let Literal::Number(s) = lit.as_ref() else {
36776 unreachable!()
36777 };
36778 s.clone()
36779 }
36780 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36781 let Literal::String(s) = lit.as_ref() else {
36782 unreachable!()
36783 };
36784 s.clone()
36785 }
36786 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
36787 Expression::Identifier(id) => id.name.clone(),
36788 _ => "1".to_string(),
36789 }
36790 }
36791
36792 /// Quote an interval value expression as a string literal if it's a number (or negated number)
36793 fn quote_interval_val(expr: &Expression) -> Expression {
36794 use crate::expressions::Literal;
36795 match expr {
36796 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36797 let Literal::Number(n) = lit.as_ref() else {
36798 unreachable!()
36799 };
36800 Expression::Literal(Box::new(Literal::String(n.clone())))
36801 }
36802 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
36803 Expression::Neg(inner) => {
36804 if let Expression::Literal(lit) = &inner.this {
36805 if let Literal::Number(n) = lit.as_ref() {
36806 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
36807 } else {
36808 inner.this.clone()
36809 }
36810 } else {
36811 expr.clone()
36812 }
36813 }
36814 _ => expr.clone(),
36815 }
36816 }
36817
36818 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
36819 fn timestamp_string_has_timezone(ts: &str) -> bool {
36820 let trimmed = ts.trim();
36821 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
36822 if let Some(last_space) = trimmed.rfind(' ') {
36823 let suffix = &trimmed[last_space + 1..];
36824 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
36825 let rest = &suffix[1..];
36826 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
36827 return true;
36828 }
36829 }
36830 }
36831 // Check for named timezone abbreviations
36832 let ts_lower = trimmed.to_ascii_lowercase();
36833 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
36834 for abbrev in &tz_abbrevs {
36835 if ts_lower.ends_with(abbrev) {
36836 return true;
36837 }
36838 }
36839 false
36840 }
36841
36842 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
36843 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
36844 use crate::expressions::{Cast, DataType, Literal};
36845 match expr {
36846 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36847 let Literal::Timestamp(s) = lit.as_ref() else {
36848 unreachable!()
36849 };
36850 let tz = func_name.starts_with("TIMESTAMP");
36851 Expression::Cast(Box::new(Cast {
36852 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36853 to: if tz {
36854 DataType::Timestamp {
36855 timezone: true,
36856 precision: None,
36857 }
36858 } else {
36859 DataType::Timestamp {
36860 timezone: false,
36861 precision: None,
36862 }
36863 },
36864 trailing_comments: vec![],
36865 double_colon_syntax: false,
36866 format: None,
36867 default: None,
36868 inferred_type: None,
36869 }))
36870 }
36871 other => other,
36872 }
36873 }
36874
36875 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
36876 fn maybe_cast_ts(expr: Expression) -> Expression {
36877 use crate::expressions::{Cast, DataType, Literal};
36878 match expr {
36879 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36880 let Literal::Timestamp(s) = lit.as_ref() else {
36881 unreachable!()
36882 };
36883 Expression::Cast(Box::new(Cast {
36884 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36885 to: DataType::Timestamp {
36886 timezone: false,
36887 precision: None,
36888 },
36889 trailing_comments: vec![],
36890 double_colon_syntax: false,
36891 format: None,
36892 default: None,
36893 inferred_type: None,
36894 }))
36895 }
36896 other => other,
36897 }
36898 }
36899
36900 /// Convert DATE 'x' literal to CAST('x' AS DATE)
36901 fn date_literal_to_cast(expr: Expression) -> Expression {
36902 use crate::expressions::{Cast, DataType, Literal};
36903 match expr {
36904 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36905 let Literal::Date(s) = lit.as_ref() else {
36906 unreachable!()
36907 };
36908 Expression::Cast(Box::new(Cast {
36909 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36910 to: DataType::Date,
36911 trailing_comments: vec![],
36912 double_colon_syntax: false,
36913 format: None,
36914 default: None,
36915 inferred_type: None,
36916 }))
36917 }
36918 other => other,
36919 }
36920 }
36921
36922 /// Ensure an expression that should be a date is CAST(... AS DATE).
36923 /// Handles both DATE literals and string literals that look like dates.
36924 fn ensure_cast_date(expr: Expression) -> Expression {
36925 use crate::expressions::{Cast, DataType, Literal};
36926 match expr {
36927 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36928 let Literal::Date(s) = lit.as_ref() else {
36929 unreachable!()
36930 };
36931 Expression::Cast(Box::new(Cast {
36932 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36933 to: DataType::Date,
36934 trailing_comments: vec![],
36935 double_colon_syntax: false,
36936 format: None,
36937 default: None,
36938 inferred_type: None,
36939 }))
36940 }
36941 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
36942 // String literal that should be a date -> CAST('s' AS DATE)
36943 Expression::Cast(Box::new(Cast {
36944 this: expr,
36945 to: DataType::Date,
36946 trailing_comments: vec![],
36947 double_colon_syntax: false,
36948 format: None,
36949 default: None,
36950 inferred_type: None,
36951 }))
36952 }
36953 // Already a CAST or other expression -> leave as-is
36954 other => other,
36955 }
36956 }
36957
36958 /// Force CAST(expr AS DATE) for any expression (not just literals)
36959 /// Skips if the expression is already a CAST to DATE
36960 fn force_cast_date(expr: Expression) -> Expression {
36961 use crate::expressions::{Cast, DataType};
36962 // If it's already a CAST to DATE, don't double-wrap
36963 if let Expression::Cast(ref c) = expr {
36964 if matches!(c.to, DataType::Date) {
36965 return expr;
36966 }
36967 }
36968 Expression::Cast(Box::new(Cast {
36969 this: expr,
36970 to: DataType::Date,
36971 trailing_comments: vec![],
36972 double_colon_syntax: false,
36973 format: None,
36974 default: None,
36975 inferred_type: None,
36976 }))
36977 }
36978
36979 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
36980 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
36981 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
36982 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
36983
36984 fn ensure_to_date_preserved(expr: Expression) -> Expression {
36985 use crate::expressions::{Function, Literal};
36986 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
36987 {
36988 Expression::Function(Box::new(Function::new(
36989 Self::PRESERVED_TO_DATE.to_string(),
36990 vec![expr],
36991 )))
36992 } else {
36993 expr
36994 }
36995 }
36996
36997 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
36998 fn try_cast_date(expr: Expression) -> Expression {
36999 use crate::expressions::{Cast, DataType};
37000 Expression::TryCast(Box::new(Cast {
37001 this: expr,
37002 to: DataType::Date,
37003 trailing_comments: vec![],
37004 double_colon_syntax: false,
37005 format: None,
37006 default: None,
37007 inferred_type: None,
37008 }))
37009 }
37010
37011 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
37012 fn double_cast_timestamp_date(expr: Expression) -> Expression {
37013 use crate::expressions::{Cast, DataType};
37014 let inner = Expression::Cast(Box::new(Cast {
37015 this: expr,
37016 to: DataType::Timestamp {
37017 timezone: false,
37018 precision: None,
37019 },
37020 trailing_comments: vec![],
37021 double_colon_syntax: false,
37022 format: None,
37023 default: None,
37024 inferred_type: None,
37025 }));
37026 Expression::Cast(Box::new(Cast {
37027 this: inner,
37028 to: DataType::Date,
37029 trailing_comments: vec![],
37030 double_colon_syntax: false,
37031 format: None,
37032 default: None,
37033 inferred_type: None,
37034 }))
37035 }
37036
37037 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
37038 fn double_cast_datetime_date(expr: Expression) -> Expression {
37039 use crate::expressions::{Cast, DataType};
37040 let inner = Expression::Cast(Box::new(Cast {
37041 this: expr,
37042 to: DataType::Custom {
37043 name: "DATETIME".to_string(),
37044 },
37045 trailing_comments: vec![],
37046 double_colon_syntax: false,
37047 format: None,
37048 default: None,
37049 inferred_type: None,
37050 }));
37051 Expression::Cast(Box::new(Cast {
37052 this: inner,
37053 to: DataType::Date,
37054 trailing_comments: vec![],
37055 double_colon_syntax: false,
37056 format: None,
37057 default: None,
37058 inferred_type: None,
37059 }))
37060 }
37061
37062 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
37063 fn double_cast_datetime2_date(expr: Expression) -> Expression {
37064 use crate::expressions::{Cast, DataType};
37065 let inner = Expression::Cast(Box::new(Cast {
37066 this: expr,
37067 to: DataType::Custom {
37068 name: "DATETIME2".to_string(),
37069 },
37070 trailing_comments: vec![],
37071 double_colon_syntax: false,
37072 format: None,
37073 default: None,
37074 inferred_type: None,
37075 }));
37076 Expression::Cast(Box::new(Cast {
37077 this: inner,
37078 to: DataType::Date,
37079 trailing_comments: vec![],
37080 double_colon_syntax: false,
37081 format: None,
37082 default: None,
37083 inferred_type: None,
37084 }))
37085 }
37086
37087 /// Convert Hive/Java-style date format strings to C-style (strftime) format
37088 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
37089 fn hive_format_to_c_format(fmt: &str) -> String {
37090 let mut result = String::new();
37091 let chars: Vec<char> = fmt.chars().collect();
37092 let mut i = 0;
37093 while i < chars.len() {
37094 match chars[i] {
37095 'y' => {
37096 let mut count = 0;
37097 while i < chars.len() && chars[i] == 'y' {
37098 count += 1;
37099 i += 1;
37100 }
37101 if count >= 4 {
37102 result.push_str("%Y");
37103 } else if count == 2 {
37104 result.push_str("%y");
37105 } else {
37106 result.push_str("%Y");
37107 }
37108 }
37109 'M' => {
37110 let mut count = 0;
37111 while i < chars.len() && chars[i] == 'M' {
37112 count += 1;
37113 i += 1;
37114 }
37115 if count >= 3 {
37116 result.push_str("%b");
37117 } else if count == 2 {
37118 result.push_str("%m");
37119 } else {
37120 result.push_str("%m");
37121 }
37122 }
37123 'd' => {
37124 let mut _count = 0;
37125 while i < chars.len() && chars[i] == 'd' {
37126 _count += 1;
37127 i += 1;
37128 }
37129 result.push_str("%d");
37130 }
37131 'H' => {
37132 let mut _count = 0;
37133 while i < chars.len() && chars[i] == 'H' {
37134 _count += 1;
37135 i += 1;
37136 }
37137 result.push_str("%H");
37138 }
37139 'h' => {
37140 let mut _count = 0;
37141 while i < chars.len() && chars[i] == 'h' {
37142 _count += 1;
37143 i += 1;
37144 }
37145 result.push_str("%I");
37146 }
37147 'm' => {
37148 let mut _count = 0;
37149 while i < chars.len() && chars[i] == 'm' {
37150 _count += 1;
37151 i += 1;
37152 }
37153 result.push_str("%M");
37154 }
37155 's' => {
37156 let mut _count = 0;
37157 while i < chars.len() && chars[i] == 's' {
37158 _count += 1;
37159 i += 1;
37160 }
37161 result.push_str("%S");
37162 }
37163 'S' => {
37164 // Fractional seconds - skip
37165 while i < chars.len() && chars[i] == 'S' {
37166 i += 1;
37167 }
37168 result.push_str("%f");
37169 }
37170 'a' => {
37171 // AM/PM
37172 while i < chars.len() && chars[i] == 'a' {
37173 i += 1;
37174 }
37175 result.push_str("%p");
37176 }
37177 'E' => {
37178 let mut count = 0;
37179 while i < chars.len() && chars[i] == 'E' {
37180 count += 1;
37181 i += 1;
37182 }
37183 if count >= 4 {
37184 result.push_str("%A");
37185 } else {
37186 result.push_str("%a");
37187 }
37188 }
37189 '\'' => {
37190 // Quoted literal text - pass through the quotes and content
37191 result.push('\'');
37192 i += 1;
37193 while i < chars.len() && chars[i] != '\'' {
37194 result.push(chars[i]);
37195 i += 1;
37196 }
37197 if i < chars.len() {
37198 result.push('\'');
37199 i += 1;
37200 }
37201 }
37202 c => {
37203 result.push(c);
37204 i += 1;
37205 }
37206 }
37207 }
37208 result
37209 }
37210
37211 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
37212 fn hive_format_to_presto_format(fmt: &str) -> String {
37213 let c_fmt = Self::hive_format_to_c_format(fmt);
37214 // Presto uses %T for HH:MM:SS
37215 c_fmt.replace("%H:%M:%S", "%T")
37216 }
37217
37218 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
37219 fn ensure_cast_timestamp(expr: Expression) -> Expression {
37220 use crate::expressions::{Cast, DataType, Literal};
37221 match expr {
37222 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37223 let Literal::Timestamp(s) = lit.as_ref() else {
37224 unreachable!()
37225 };
37226 Expression::Cast(Box::new(Cast {
37227 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37228 to: DataType::Timestamp {
37229 timezone: false,
37230 precision: None,
37231 },
37232 trailing_comments: vec![],
37233 double_colon_syntax: false,
37234 format: None,
37235 default: None,
37236 inferred_type: None,
37237 }))
37238 }
37239 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37240 Expression::Cast(Box::new(Cast {
37241 this: expr,
37242 to: DataType::Timestamp {
37243 timezone: false,
37244 precision: None,
37245 },
37246 trailing_comments: vec![],
37247 double_colon_syntax: false,
37248 format: None,
37249 default: None,
37250 inferred_type: None,
37251 }))
37252 }
37253 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37254 let Literal::Datetime(s) = lit.as_ref() else {
37255 unreachable!()
37256 };
37257 Expression::Cast(Box::new(Cast {
37258 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37259 to: DataType::Timestamp {
37260 timezone: false,
37261 precision: None,
37262 },
37263 trailing_comments: vec![],
37264 double_colon_syntax: false,
37265 format: None,
37266 default: None,
37267 inferred_type: None,
37268 }))
37269 }
37270 other => other,
37271 }
37272 }
37273
37274 /// Force CAST to TIMESTAMP for any expression (not just literals)
37275 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
37276 fn force_cast_timestamp(expr: Expression) -> Expression {
37277 use crate::expressions::{Cast, DataType};
37278 // Don't double-wrap if already a CAST to TIMESTAMP
37279 if let Expression::Cast(ref c) = expr {
37280 if matches!(c.to, DataType::Timestamp { .. }) {
37281 return expr;
37282 }
37283 }
37284 Expression::Cast(Box::new(Cast {
37285 this: expr,
37286 to: DataType::Timestamp {
37287 timezone: false,
37288 precision: None,
37289 },
37290 trailing_comments: vec![],
37291 double_colon_syntax: false,
37292 format: None,
37293 default: None,
37294 inferred_type: None,
37295 }))
37296 }
37297
37298 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
37299 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
37300 use crate::expressions::{Cast, DataType, Literal};
37301 match expr {
37302 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37303 let Literal::Timestamp(s) = lit.as_ref() else {
37304 unreachable!()
37305 };
37306 Expression::Cast(Box::new(Cast {
37307 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37308 to: DataType::Timestamp {
37309 timezone: true,
37310 precision: None,
37311 },
37312 trailing_comments: vec![],
37313 double_colon_syntax: false,
37314 format: None,
37315 default: None,
37316 inferred_type: None,
37317 }))
37318 }
37319 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37320 Expression::Cast(Box::new(Cast {
37321 this: expr,
37322 to: DataType::Timestamp {
37323 timezone: true,
37324 precision: None,
37325 },
37326 trailing_comments: vec![],
37327 double_colon_syntax: false,
37328 format: None,
37329 default: None,
37330 inferred_type: None,
37331 }))
37332 }
37333 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37334 let Literal::Datetime(s) = lit.as_ref() else {
37335 unreachable!()
37336 };
37337 Expression::Cast(Box::new(Cast {
37338 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37339 to: DataType::Timestamp {
37340 timezone: true,
37341 precision: None,
37342 },
37343 trailing_comments: vec![],
37344 double_colon_syntax: false,
37345 format: None,
37346 default: None,
37347 inferred_type: None,
37348 }))
37349 }
37350 other => other,
37351 }
37352 }
37353
37354 /// Ensure expression is CAST to DATETIME (for BigQuery)
37355 fn ensure_cast_datetime(expr: Expression) -> Expression {
37356 use crate::expressions::{Cast, DataType, Literal};
37357 match expr {
37358 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37359 Expression::Cast(Box::new(Cast {
37360 this: expr,
37361 to: DataType::Custom {
37362 name: "DATETIME".to_string(),
37363 },
37364 trailing_comments: vec![],
37365 double_colon_syntax: false,
37366 format: None,
37367 default: None,
37368 inferred_type: None,
37369 }))
37370 }
37371 other => other,
37372 }
37373 }
37374
37375 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
37376 fn force_cast_datetime(expr: Expression) -> Expression {
37377 use crate::expressions::{Cast, DataType};
37378 if let Expression::Cast(ref c) = expr {
37379 if let DataType::Custom { ref name } = c.to {
37380 if name.eq_ignore_ascii_case("DATETIME") {
37381 return expr;
37382 }
37383 }
37384 }
37385 Expression::Cast(Box::new(Cast {
37386 this: expr,
37387 to: DataType::Custom {
37388 name: "DATETIME".to_string(),
37389 },
37390 trailing_comments: vec![],
37391 double_colon_syntax: false,
37392 format: None,
37393 default: None,
37394 inferred_type: None,
37395 }))
37396 }
37397
37398 /// Ensure expression is CAST to DATETIME2 (for TSQL)
37399 fn ensure_cast_datetime2(expr: Expression) -> Expression {
37400 use crate::expressions::{Cast, DataType, Literal};
37401 match expr {
37402 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37403 Expression::Cast(Box::new(Cast {
37404 this: expr,
37405 to: DataType::Custom {
37406 name: "DATETIME2".to_string(),
37407 },
37408 trailing_comments: vec![],
37409 double_colon_syntax: false,
37410 format: None,
37411 default: None,
37412 inferred_type: None,
37413 }))
37414 }
37415 other => other,
37416 }
37417 }
37418
37419 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
37420 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
37421 use crate::expressions::{Cast, DataType, Literal};
37422 match expr {
37423 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37424 let Literal::Timestamp(s) = lit.as_ref() else {
37425 unreachable!()
37426 };
37427 Expression::Cast(Box::new(Cast {
37428 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37429 to: DataType::Timestamp {
37430 timezone: true,
37431 precision: None,
37432 },
37433 trailing_comments: vec![],
37434 double_colon_syntax: false,
37435 format: None,
37436 default: None,
37437 inferred_type: None,
37438 }))
37439 }
37440 other => other,
37441 }
37442 }
37443
37444 /// Convert BigQuery format string to Snowflake format string
37445 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
37446 use crate::expressions::Literal;
37447 if let Expression::Literal(lit) = format_expr {
37448 if let Literal::String(s) = lit.as_ref() {
37449 let sf = s
37450 .replace("%Y", "yyyy")
37451 .replace("%m", "mm")
37452 .replace("%d", "DD")
37453 .replace("%H", "HH24")
37454 .replace("%M", "MI")
37455 .replace("%S", "SS")
37456 .replace("%b", "mon")
37457 .replace("%B", "Month")
37458 .replace("%e", "FMDD");
37459 Expression::Literal(Box::new(Literal::String(sf)))
37460 } else {
37461 format_expr.clone()
37462 }
37463 } else {
37464 format_expr.clone()
37465 }
37466 }
37467
37468 /// Convert BigQuery format string to DuckDB format string
37469 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
37470 use crate::expressions::Literal;
37471 if let Expression::Literal(lit) = format_expr {
37472 if let Literal::String(s) = lit.as_ref() {
37473 let duck = s
37474 .replace("%T", "%H:%M:%S")
37475 .replace("%F", "%Y-%m-%d")
37476 .replace("%D", "%m/%d/%y")
37477 .replace("%x", "%m/%d/%y")
37478 .replace("%c", "%a %b %-d %H:%M:%S %Y")
37479 .replace("%e", "%-d")
37480 .replace("%E6S", "%S.%f");
37481 Expression::Literal(Box::new(Literal::String(duck)))
37482 } else {
37483 format_expr.clone()
37484 }
37485 } else {
37486 format_expr.clone()
37487 }
37488 }
37489
37490 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
37491 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
37492 use crate::expressions::Literal;
37493 if let Expression::Literal(lit) = format_expr {
37494 if let Literal::String(s) = lit.as_ref() {
37495 // Replace format elements from longest to shortest to avoid partial matches
37496 let result = s
37497 .replace("YYYYMMDD", "%Y%m%d")
37498 .replace("YYYY", "%Y")
37499 .replace("YY", "%y")
37500 .replace("MONTH", "%B")
37501 .replace("MON", "%b")
37502 .replace("MM", "%m")
37503 .replace("DD", "%d")
37504 .replace("HH24", "%H")
37505 .replace("HH12", "%I")
37506 .replace("HH", "%I")
37507 .replace("MI", "%M")
37508 .replace("SSTZH", "%S%z")
37509 .replace("SS", "%S")
37510 .replace("TZH", "%z");
37511 Expression::Literal(Box::new(Literal::String(result)))
37512 } else {
37513 format_expr.clone()
37514 }
37515 } else {
37516 format_expr.clone()
37517 }
37518 }
37519
37520 /// Normalize BigQuery format strings for BQ->BQ output
37521 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
37522 use crate::expressions::Literal;
37523 if let Expression::Literal(lit) = format_expr {
37524 if let Literal::String(s) = lit.as_ref() {
37525 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
37526 Expression::Literal(Box::new(Literal::String(norm)))
37527 } else {
37528 format_expr.clone()
37529 }
37530 } else {
37531 format_expr.clone()
37532 }
37533 }
37534}
37535
37536#[cfg(test)]
37537mod tests {
37538 use super::*;
37539
37540 #[test]
37541 fn test_dialect_type_from_str() {
37542 assert_eq!(
37543 "postgres".parse::<DialectType>().unwrap(),
37544 DialectType::PostgreSQL
37545 );
37546 assert_eq!(
37547 "postgresql".parse::<DialectType>().unwrap(),
37548 DialectType::PostgreSQL
37549 );
37550 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
37551 assert_eq!(
37552 "bigquery".parse::<DialectType>().unwrap(),
37553 DialectType::BigQuery
37554 );
37555 }
37556
37557 #[test]
37558 fn test_basic_transpile() {
37559 let dialect = Dialect::get(DialectType::Generic);
37560 let result = dialect
37561 .transpile("SELECT 1", DialectType::PostgreSQL)
37562 .unwrap();
37563 assert_eq!(result.len(), 1);
37564 assert_eq!(result[0], "SELECT 1");
37565 }
37566
37567 #[test]
37568 fn test_function_transformation_mysql() {
37569 // NVL should be transformed to IFNULL in MySQL
37570 let dialect = Dialect::get(DialectType::Generic);
37571 let result = dialect
37572 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
37573 .unwrap();
37574 assert_eq!(result[0], "SELECT IFNULL(a, b)");
37575 }
37576
37577 #[test]
37578 fn test_get_path_duckdb() {
37579 // Test: step by step
37580 let snowflake = Dialect::get(DialectType::Snowflake);
37581
37582 // Step 1: Parse and check what Snowflake produces as intermediate
37583 let result_sf_sf = snowflake
37584 .transpile(
37585 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37586 DialectType::Snowflake,
37587 )
37588 .unwrap();
37589 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
37590
37591 // Step 2: DuckDB target
37592 let result_sf_dk = snowflake
37593 .transpile(
37594 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37595 DialectType::DuckDB,
37596 )
37597 .unwrap();
37598 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
37599
37600 // Step 3: GET_PATH directly
37601 let result_gp = snowflake
37602 .transpile(
37603 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
37604 DialectType::DuckDB,
37605 )
37606 .unwrap();
37607 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
37608 }
37609
37610 #[test]
37611 fn test_function_transformation_postgres() {
37612 // IFNULL should be transformed to COALESCE in PostgreSQL
37613 let dialect = Dialect::get(DialectType::Generic);
37614 let result = dialect
37615 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
37616 .unwrap();
37617 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37618
37619 // NVL should also be transformed to COALESCE
37620 let result = dialect
37621 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
37622 .unwrap();
37623 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37624 }
37625
37626 #[test]
37627 fn test_hive_cast_to_trycast() {
37628 // Hive CAST should become TRY_CAST for targets that support it
37629 let hive = Dialect::get(DialectType::Hive);
37630 let result = hive
37631 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
37632 .unwrap();
37633 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
37634
37635 let result = hive
37636 .transpile("CAST(1 AS INT)", DialectType::Presto)
37637 .unwrap();
37638 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
37639 }
37640
37641 #[test]
37642 fn test_hive_array_identity() {
37643 // Hive ARRAY<DATE> should preserve angle bracket syntax
37644 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
37645 let hive = Dialect::get(DialectType::Hive);
37646
37647 // Test via transpile (this works)
37648 let result = hive.transpile(sql, DialectType::Hive).unwrap();
37649 eprintln!("Hive ARRAY via transpile: {}", result[0]);
37650 assert!(
37651 result[0].contains("ARRAY<DATE>"),
37652 "transpile: Expected ARRAY<DATE>, got: {}",
37653 result[0]
37654 );
37655
37656 // Test via parse -> transform -> generate (identity test path)
37657 let ast = hive.parse(sql).unwrap();
37658 let transformed = hive.transform(ast[0].clone()).unwrap();
37659 let output = hive.generate(&transformed).unwrap();
37660 eprintln!("Hive ARRAY via identity path: {}", output);
37661 assert!(
37662 output.contains("ARRAY<DATE>"),
37663 "identity path: Expected ARRAY<DATE>, got: {}",
37664 output
37665 );
37666 }
37667
37668 #[test]
37669 fn test_starrocks_delete_between_expansion() {
37670 // StarRocks doesn't support BETWEEN in DELETE statements
37671 let dialect = Dialect::get(DialectType::Generic);
37672
37673 // BETWEEN should be expanded to >= AND <= in DELETE
37674 let result = dialect
37675 .transpile(
37676 "DELETE FROM t WHERE a BETWEEN b AND c",
37677 DialectType::StarRocks,
37678 )
37679 .unwrap();
37680 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
37681
37682 // NOT BETWEEN should be expanded to < OR > in DELETE
37683 let result = dialect
37684 .transpile(
37685 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
37686 DialectType::StarRocks,
37687 )
37688 .unwrap();
37689 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
37690
37691 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
37692 let result = dialect
37693 .transpile(
37694 "SELECT * FROM t WHERE a BETWEEN b AND c",
37695 DialectType::StarRocks,
37696 )
37697 .unwrap();
37698 assert!(
37699 result[0].contains("BETWEEN"),
37700 "BETWEEN should be preserved in SELECT"
37701 );
37702 }
37703
37704 #[test]
37705 fn test_snowflake_ltrim_rtrim_parse() {
37706 let sf = Dialect::get(DialectType::Snowflake);
37707 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
37708 let result = sf.transpile(sql, DialectType::DuckDB);
37709 match &result {
37710 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
37711 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
37712 }
37713 assert!(
37714 result.is_ok(),
37715 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
37716 result.err()
37717 );
37718 }
37719
37720 #[test]
37721 fn test_duckdb_count_if_parse() {
37722 let duck = Dialect::get(DialectType::DuckDB);
37723 let sql = "COUNT_IF(x)";
37724 let result = duck.transpile(sql, DialectType::DuckDB);
37725 match &result {
37726 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
37727 Err(e) => eprintln!("COUNT_IF error: {}", e),
37728 }
37729 assert!(
37730 result.is_ok(),
37731 "Expected successful parse of COUNT_IF(x), got error: {:?}",
37732 result.err()
37733 );
37734 }
37735
37736 #[test]
37737 fn test_tsql_cast_tinyint_parse() {
37738 let tsql = Dialect::get(DialectType::TSQL);
37739 let sql = "CAST(X AS TINYINT)";
37740 let result = tsql.transpile(sql, DialectType::DuckDB);
37741 match &result {
37742 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
37743 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
37744 }
37745 assert!(
37746 result.is_ok(),
37747 "Expected successful transpile, got error: {:?}",
37748 result.err()
37749 );
37750 }
37751
37752 #[test]
37753 fn test_pg_hash_bitwise_xor() {
37754 let dialect = Dialect::get(DialectType::PostgreSQL);
37755 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
37756 assert_eq!(result[0], "x # y");
37757 }
37758
37759 #[test]
37760 fn test_pg_array_to_duckdb() {
37761 let dialect = Dialect::get(DialectType::PostgreSQL);
37762 let result = dialect
37763 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
37764 .unwrap();
37765 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
37766 }
37767
37768 #[test]
37769 fn test_array_remove_bigquery() {
37770 let dialect = Dialect::get(DialectType::Generic);
37771 let result = dialect
37772 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
37773 .unwrap();
37774 assert_eq!(
37775 result[0],
37776 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
37777 );
37778 }
37779
37780 #[test]
37781 fn test_map_clickhouse_case() {
37782 let dialect = Dialect::get(DialectType::Generic);
37783 let parsed = dialect
37784 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
37785 .unwrap();
37786 eprintln!("MAP parsed: {:?}", parsed);
37787 let result = dialect
37788 .transpile(
37789 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
37790 DialectType::ClickHouse,
37791 )
37792 .unwrap();
37793 eprintln!("MAP result: {}", result[0]);
37794 }
37795
37796 #[test]
37797 fn test_generate_date_array_presto() {
37798 let dialect = Dialect::get(DialectType::Generic);
37799 let result = dialect.transpile(
37800 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37801 DialectType::Presto,
37802 ).unwrap();
37803 eprintln!("GDA -> Presto: {}", result[0]);
37804 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
37805 }
37806
37807 #[test]
37808 fn test_generate_date_array_postgres() {
37809 let dialect = Dialect::get(DialectType::Generic);
37810 let result = dialect.transpile(
37811 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37812 DialectType::PostgreSQL,
37813 ).unwrap();
37814 eprintln!("GDA -> PostgreSQL: {}", result[0]);
37815 }
37816
37817 #[test]
37818 fn test_generate_date_array_snowflake() {
37819 let dialect = Dialect::get(DialectType::Generic);
37820 let result = dialect
37821 .transpile(
37822 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37823 DialectType::Snowflake,
37824 )
37825 .unwrap();
37826 eprintln!("GDA -> Snowflake: {}", result[0]);
37827 }
37828
37829 #[test]
37830 fn test_array_length_generate_date_array_snowflake() {
37831 let dialect = Dialect::get(DialectType::Generic);
37832 let result = dialect.transpile(
37833 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37834 DialectType::Snowflake,
37835 ).unwrap();
37836 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
37837 }
37838
37839 #[test]
37840 fn test_generate_date_array_mysql() {
37841 let dialect = Dialect::get(DialectType::Generic);
37842 let result = dialect.transpile(
37843 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37844 DialectType::MySQL,
37845 ).unwrap();
37846 eprintln!("GDA -> MySQL: {}", result[0]);
37847 }
37848
37849 #[test]
37850 fn test_generate_date_array_redshift() {
37851 let dialect = Dialect::get(DialectType::Generic);
37852 let result = dialect.transpile(
37853 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37854 DialectType::Redshift,
37855 ).unwrap();
37856 eprintln!("GDA -> Redshift: {}", result[0]);
37857 }
37858
37859 #[test]
37860 fn test_generate_date_array_tsql() {
37861 let dialect = Dialect::get(DialectType::Generic);
37862 let result = dialect.transpile(
37863 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37864 DialectType::TSQL,
37865 ).unwrap();
37866 eprintln!("GDA -> TSQL: {}", result[0]);
37867 }
37868
37869 #[test]
37870 fn test_struct_colon_syntax() {
37871 let dialect = Dialect::get(DialectType::Generic);
37872 // Test without colon first
37873 let result = dialect.transpile(
37874 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
37875 DialectType::ClickHouse,
37876 );
37877 match result {
37878 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
37879 Err(e) => eprintln!("STRUCT no colon error: {}", e),
37880 }
37881 // Now test with colon
37882 let result = dialect.transpile(
37883 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
37884 DialectType::ClickHouse,
37885 );
37886 match result {
37887 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
37888 Err(e) => eprintln!("STRUCT colon error: {}", e),
37889 }
37890 }
37891
37892 #[test]
37893 fn test_generate_date_array_cte_wrapped_mysql() {
37894 let dialect = Dialect::get(DialectType::Generic);
37895 let result = dialect.transpile(
37896 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37897 DialectType::MySQL,
37898 ).unwrap();
37899 eprintln!("GDA CTE -> MySQL: {}", result[0]);
37900 }
37901
37902 #[test]
37903 fn test_generate_date_array_cte_wrapped_tsql() {
37904 let dialect = Dialect::get(DialectType::Generic);
37905 let result = dialect.transpile(
37906 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37907 DialectType::TSQL,
37908 ).unwrap();
37909 eprintln!("GDA CTE -> TSQL: {}", result[0]);
37910 }
37911
37912 #[test]
37913 fn test_decode_literal_no_null_check() {
37914 // Oracle DECODE with all literals should produce simple equality, no IS NULL
37915 let dialect = Dialect::get(DialectType::Oracle);
37916 let result = dialect
37917 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
37918 .unwrap();
37919 assert_eq!(
37920 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
37921 "Literal DECODE should not have IS NULL checks"
37922 );
37923 }
37924
37925 #[test]
37926 fn test_decode_column_vs_literal_no_null_check() {
37927 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
37928 let dialect = Dialect::get(DialectType::Oracle);
37929 let result = dialect
37930 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
37931 .unwrap();
37932 assert_eq!(
37933 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
37934 "Column vs literal DECODE should not have IS NULL checks"
37935 );
37936 }
37937
37938 #[test]
37939 fn test_decode_column_vs_column_keeps_null_check() {
37940 // Oracle DECODE with column vs column should keep null-safe comparison
37941 let dialect = Dialect::get(DialectType::Oracle);
37942 let result = dialect
37943 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
37944 .unwrap();
37945 assert!(
37946 result[0].contains("IS NULL"),
37947 "Column vs column DECODE should have IS NULL checks, got: {}",
37948 result[0]
37949 );
37950 }
37951
37952 #[test]
37953 fn test_decode_null_search() {
37954 // Oracle DECODE with NULL search should use IS NULL
37955 let dialect = Dialect::get(DialectType::Oracle);
37956 let result = dialect
37957 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
37958 .unwrap();
37959 assert_eq!(
37960 result[0],
37961 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
37962 );
37963 }
37964
37965 // =========================================================================
37966 // REGEXP function transpilation tests
37967 // =========================================================================
37968
37969 #[test]
37970 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
37971 let dialect = Dialect::get(DialectType::Snowflake);
37972 let result = dialect
37973 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
37974 .unwrap();
37975 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37976 }
37977
37978 #[test]
37979 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
37980 let dialect = Dialect::get(DialectType::Snowflake);
37981 let result = dialect
37982 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
37983 .unwrap();
37984 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37985 }
37986
37987 #[test]
37988 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
37989 let dialect = Dialect::get(DialectType::Snowflake);
37990 let result = dialect
37991 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
37992 .unwrap();
37993 assert_eq!(
37994 result[0],
37995 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
37996 );
37997 }
37998
37999 #[test]
38000 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
38001 let dialect = Dialect::get(DialectType::Snowflake);
38002 let result = dialect
38003 .transpile(
38004 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
38005 DialectType::DuckDB,
38006 )
38007 .unwrap();
38008 assert_eq!(
38009 result[0],
38010 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
38011 );
38012 }
38013
38014 #[test]
38015 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
38016 let dialect = Dialect::get(DialectType::Snowflake);
38017 let result = dialect
38018 .transpile(
38019 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
38020 DialectType::DuckDB,
38021 )
38022 .unwrap();
38023 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38024 }
38025
38026 #[test]
38027 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
38028 let dialect = Dialect::get(DialectType::Snowflake);
38029 let result = dialect
38030 .transpile(
38031 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38032 DialectType::DuckDB,
38033 )
38034 .unwrap();
38035 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38036 }
38037
38038 #[test]
38039 fn test_regexp_substr_snowflake_identity_strip_group0() {
38040 let dialect = Dialect::get(DialectType::Snowflake);
38041 let result = dialect
38042 .transpile(
38043 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38044 DialectType::Snowflake,
38045 )
38046 .unwrap();
38047 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
38048 }
38049
38050 #[test]
38051 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
38052 let dialect = Dialect::get(DialectType::Snowflake);
38053 let result = dialect
38054 .transpile(
38055 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
38056 DialectType::DuckDB,
38057 )
38058 .unwrap();
38059 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38060 }
38061
38062 #[test]
38063 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
38064 let dialect = Dialect::get(DialectType::Snowflake);
38065 let result = dialect
38066 .transpile(
38067 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
38068 DialectType::DuckDB,
38069 )
38070 .unwrap();
38071 assert_eq!(
38072 result[0],
38073 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
38074 );
38075 }
38076
38077 #[test]
38078 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
38079 let dialect = Dialect::get(DialectType::Snowflake);
38080 let result = dialect
38081 .transpile(
38082 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
38083 DialectType::DuckDB,
38084 )
38085 .unwrap();
38086 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38087 }
38088
38089 #[test]
38090 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
38091 let dialect = Dialect::get(DialectType::Snowflake);
38092 let result = dialect
38093 .transpile(
38094 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38095 DialectType::DuckDB,
38096 )
38097 .unwrap();
38098 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38099 }
38100
38101 #[test]
38102 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
38103 let dialect = Dialect::get(DialectType::Snowflake);
38104 let result = dialect
38105 .transpile(
38106 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38107 DialectType::Snowflake,
38108 )
38109 .unwrap();
38110 assert_eq!(
38111 result[0],
38112 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
38113 );
38114 }
38115
38116 #[test]
38117 fn test_regexp_count_snowflake_to_duckdb_2arg() {
38118 let dialect = Dialect::get(DialectType::Snowflake);
38119 let result = dialect
38120 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
38121 .unwrap();
38122 assert_eq!(
38123 result[0],
38124 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
38125 );
38126 }
38127
38128 #[test]
38129 fn test_regexp_count_snowflake_to_duckdb_3arg() {
38130 let dialect = Dialect::get(DialectType::Snowflake);
38131 let result = dialect
38132 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
38133 .unwrap();
38134 assert_eq!(
38135 result[0],
38136 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
38137 );
38138 }
38139
38140 #[test]
38141 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
38142 let dialect = Dialect::get(DialectType::Snowflake);
38143 let result = dialect
38144 .transpile(
38145 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
38146 DialectType::DuckDB,
38147 )
38148 .unwrap();
38149 assert_eq!(
38150 result[0],
38151 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
38152 );
38153 }
38154
38155 #[test]
38156 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
38157 let dialect = Dialect::get(DialectType::Snowflake);
38158 let result = dialect
38159 .transpile(
38160 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
38161 DialectType::DuckDB,
38162 )
38163 .unwrap();
38164 assert_eq!(
38165 result[0],
38166 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
38167 );
38168 }
38169
38170 #[test]
38171 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
38172 let dialect = Dialect::get(DialectType::Snowflake);
38173 let result = dialect
38174 .transpile(
38175 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
38176 DialectType::DuckDB,
38177 )
38178 .unwrap();
38179 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
38180 }
38181
38182 #[test]
38183 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
38184 let dialect = Dialect::get(DialectType::Snowflake);
38185 let result = dialect
38186 .transpile(
38187 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
38188 DialectType::DuckDB,
38189 )
38190 .unwrap();
38191 assert_eq!(
38192 result[0],
38193 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
38194 );
38195 }
38196
38197 #[test]
38198 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
38199 let dialect = Dialect::get(DialectType::Snowflake);
38200 let result = dialect
38201 .transpile(
38202 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
38203 DialectType::DuckDB,
38204 )
38205 .unwrap();
38206 assert_eq!(
38207 result[0],
38208 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
38209 );
38210 }
38211
38212 #[test]
38213 fn test_rlike_snowflake_to_duckdb_2arg() {
38214 let dialect = Dialect::get(DialectType::Snowflake);
38215 let result = dialect
38216 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
38217 .unwrap();
38218 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
38219 }
38220
38221 #[test]
38222 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
38223 let dialect = Dialect::get(DialectType::Snowflake);
38224 let result = dialect
38225 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
38226 .unwrap();
38227 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
38228 }
38229
38230 #[test]
38231 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
38232 let dialect = Dialect::get(DialectType::BigQuery);
38233 let result = dialect
38234 .transpile(
38235 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
38236 DialectType::Snowflake,
38237 )
38238 .unwrap();
38239 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
38240 }
38241
38242 #[test]
38243 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
38244 let dialect = Dialect::get(DialectType::BigQuery);
38245 let result = dialect
38246 .transpile(
38247 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
38248 DialectType::Snowflake,
38249 )
38250 .unwrap();
38251 assert_eq!(
38252 result[0],
38253 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
38254 );
38255 }
38256
38257 #[test]
38258 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
38259 let dialect = Dialect::get(DialectType::Snowflake);
38260 let result = dialect
38261 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
38262 .unwrap();
38263 assert!(
38264 result[0].contains("CASE WHEN"),
38265 "Expected CASE WHEN in result: {}",
38266 result[0]
38267 );
38268 assert!(
38269 result[0].contains("LIST_SUM"),
38270 "Expected LIST_SUM in result: {}",
38271 result[0]
38272 );
38273 }
38274
38275 #[test]
38276 fn test_array_except_generic_to_duckdb() {
38277 let dialect = Dialect::get(DialectType::Generic);
38278 let result = dialect
38279 .transpile(
38280 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38281 DialectType::DuckDB,
38282 )
38283 .unwrap();
38284 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
38285 assert!(
38286 result[0].contains("CASE WHEN"),
38287 "Expected CASE WHEN: {}",
38288 result[0]
38289 );
38290 assert!(
38291 result[0].contains("LIST_FILTER"),
38292 "Expected LIST_FILTER: {}",
38293 result[0]
38294 );
38295 assert!(
38296 result[0].contains("LIST_DISTINCT"),
38297 "Expected LIST_DISTINCT: {}",
38298 result[0]
38299 );
38300 assert!(
38301 result[0].contains("IS NOT DISTINCT FROM"),
38302 "Expected IS NOT DISTINCT FROM: {}",
38303 result[0]
38304 );
38305 assert!(
38306 result[0].contains("= 0"),
38307 "Expected = 0 filter: {}",
38308 result[0]
38309 );
38310 }
38311
38312 #[test]
38313 fn test_array_except_generic_to_snowflake() {
38314 let dialect = Dialect::get(DialectType::Generic);
38315 let result = dialect
38316 .transpile(
38317 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38318 DialectType::Snowflake,
38319 )
38320 .unwrap();
38321 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
38322 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
38323 }
38324
38325 #[test]
38326 fn test_array_except_generic_to_presto() {
38327 let dialect = Dialect::get(DialectType::Generic);
38328 let result = dialect
38329 .transpile(
38330 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38331 DialectType::Presto,
38332 )
38333 .unwrap();
38334 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
38335 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
38336 }
38337
38338 #[test]
38339 fn test_array_except_snowflake_to_duckdb() {
38340 let dialect = Dialect::get(DialectType::Snowflake);
38341 let result = dialect
38342 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
38343 .unwrap();
38344 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
38345 assert!(
38346 result[0].contains("CASE WHEN"),
38347 "Expected CASE WHEN: {}",
38348 result[0]
38349 );
38350 assert!(
38351 result[0].contains("LIST_TRANSFORM"),
38352 "Expected LIST_TRANSFORM: {}",
38353 result[0]
38354 );
38355 }
38356
38357 #[test]
38358 fn test_array_contains_snowflake_to_snowflake() {
38359 let dialect = Dialect::get(DialectType::Snowflake);
38360 let result = dialect
38361 .transpile(
38362 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38363 DialectType::Snowflake,
38364 )
38365 .unwrap();
38366 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
38367 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
38368 }
38369
38370 #[test]
38371 fn test_array_contains_snowflake_to_duckdb() {
38372 let dialect = Dialect::get(DialectType::Snowflake);
38373 let result = dialect
38374 .transpile(
38375 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38376 DialectType::DuckDB,
38377 )
38378 .unwrap();
38379 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
38380 assert!(
38381 result[0].contains("CASE WHEN"),
38382 "Expected CASE WHEN: {}",
38383 result[0]
38384 );
38385 assert!(
38386 result[0].contains("NULLIF"),
38387 "Expected NULLIF: {}",
38388 result[0]
38389 );
38390 assert!(
38391 result[0].contains("ARRAY_CONTAINS"),
38392 "Expected ARRAY_CONTAINS: {}",
38393 result[0]
38394 );
38395 }
38396
38397 #[test]
38398 fn test_array_distinct_snowflake_to_duckdb() {
38399 let dialect = Dialect::get(DialectType::Snowflake);
38400 let result = dialect
38401 .transpile(
38402 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
38403 DialectType::DuckDB,
38404 )
38405 .unwrap();
38406 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
38407 assert!(
38408 result[0].contains("CASE WHEN"),
38409 "Expected CASE WHEN: {}",
38410 result[0]
38411 );
38412 assert!(
38413 result[0].contains("LIST_DISTINCT"),
38414 "Expected LIST_DISTINCT: {}",
38415 result[0]
38416 );
38417 assert!(
38418 result[0].contains("LIST_APPEND"),
38419 "Expected LIST_APPEND: {}",
38420 result[0]
38421 );
38422 assert!(
38423 result[0].contains("LIST_FILTER"),
38424 "Expected LIST_FILTER: {}",
38425 result[0]
38426 );
38427 }
38428}