datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::HashSet;
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::logical_plan::Subquery;
30use crate::{AggregateUDF, Volatility};
31use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
32
33use arrow::datatypes::{DataType, Field, FieldRef};
34use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
35use datafusion_common::tree_node::{
36    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
37};
38use datafusion_common::{
39    Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
40};
41use datafusion_functions_window_common::field::WindowUDFFieldArgs;
42#[cfg(feature = "sql")]
43use sqlparser::ast::{
44    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
45    RenameSelectItem, ReplaceSelectElement,
46};
47
48// Moved in 51.0.0 to datafusion_common
49pub use datafusion_common::metadata::FieldMetadata;
50use datafusion_common::metadata::ScalarAndMetadata;
51
52// This mirrors sqlparser::ast::NullTreatment but we need our own variant
53// for when the sql feature is disabled.
54#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
55pub enum NullTreatment {
56    IgnoreNulls,
57    RespectNulls,
58}
59
60impl Display for NullTreatment {
61    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
62        f.write_str(match self {
63            NullTreatment::IgnoreNulls => "IGNORE NULLS",
64            NullTreatment::RespectNulls => "RESPECT NULLS",
65        })
66    }
67}
68
69#[cfg(feature = "sql")]
70impl From<sqlparser::ast::NullTreatment> for NullTreatment {
71    fn from(value: sqlparser::ast::NullTreatment) -> Self {
72        match value {
73            sqlparser::ast::NullTreatment::IgnoreNulls => Self::IgnoreNulls,
74            sqlparser::ast::NullTreatment::RespectNulls => Self::RespectNulls,
75        }
76    }
77}
78
79/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
80///
81/// For example the expression `A + 1` will be represented as
82///
83///```text
84///  BinaryExpr {
85///    left: Expr::Column("A"),
86///    op: Operator::Plus,
87///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
88/// }
89/// ```
90///
91/// # Creating Expressions
92///
93/// `Expr`s can be created directly, but it is often easier and less verbose to
94/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
95/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
96///
97/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
98///
99/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
100///
101/// # Printing Expressions
102///
103/// You can print `Expr`s using the `Debug` trait, `Display` trait, or
104/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
105///
106/// If you need  SQL to pass to other systems, consider using [`Unparser`].
107///
108/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
109///
110/// # Schema Access
111///
112/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
113/// of an `Expr`.
114///
115/// # Visiting and Rewriting `Expr`s
116///
117/// The `Expr` struct implements the [`TreeNode`] trait for walking and
118/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
119/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
120/// the examples below and [`TreeNode`] for more information.
121///
122/// # Examples: Creating and Using `Expr`s
123///
124/// ## Column References and Literals
125///
126/// [`Expr::Column`] refer to the values of columns and are often created with
127/// the [`col`] function. For example to create an expression `c1` referring to
128/// column named "c1":
129///
130/// [`col`]: crate::expr_fn::col
131///
132/// ```
133/// # use datafusion_common::Column;
134/// # use datafusion_expr::{lit, col, Expr};
135/// let expr = col("c1");
136/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
137/// ```
138///
139/// [`Expr::Literal`] refer to literal, or constant, values. These are created
140/// with the [`lit`] function. For example to create an expression `42`:
141///
142/// [`lit`]: crate::lit
143///
144/// ```
145/// # use datafusion_common::{Column, ScalarValue};
146/// # use datafusion_expr::{lit, col, Expr};
147/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
148/// let expr = lit(42i64);
149/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
150/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
151/// // To make a (typed) NULL:
152/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
153/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
154/// let expr = lit(ScalarValue::Null);
155/// ```
156///
157/// ## Binary Expressions
158///
159/// Exprs implement traits that allow easy to understand construction of more
160/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
161/// "c2" together
162///
163/// ```
164/// # use datafusion_expr::{lit, col, Operator, Expr};
165/// // Use the `+` operator to add two columns together
166/// let expr = col("c1") + col("c2");
167/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
168/// if let Expr::BinaryExpr(binary_expr) = expr {
169///     assert_eq!(*binary_expr.left, col("c1"));
170///     assert_eq!(*binary_expr.right, col("c2"));
171///     assert_eq!(binary_expr.op, Operator::Plus);
172/// }
173/// ```
174///
175/// The expression `c1 = 42` to compares the value in column "c1" to the
176/// literal value `42`:
177///
178/// ```
179/// # use datafusion_common::ScalarValue;
180/// # use datafusion_expr::{lit, col, Operator, Expr};
181/// let expr = col("c1").eq(lit(42_i32));
182/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
183/// if let Expr::BinaryExpr(binary_expr) = expr {
184///     assert_eq!(*binary_expr.left, col("c1"));
185///     let scalar = ScalarValue::Int32(Some(42));
186///     assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
187///     assert_eq!(binary_expr.op, Operator::Eq);
188/// }
189/// ```
190///
191/// Here is how to implement the equivalent of `SELECT *` to select all
192/// [`Expr::Column`] from a [`DFSchema`]'s columns:
193///
194/// ```
195/// # use arrow::datatypes::{DataType, Field, Schema};
196/// # use datafusion_common::{DFSchema, Column};
197/// # use datafusion_expr::Expr;
198/// // Create a schema c1(int, c2 float)
199/// let arrow_schema = Schema::new(vec![
200///     Field::new("c1", DataType::Int32, false),
201///     Field::new("c2", DataType::Float64, false),
202/// ]);
203/// // DFSchema is a an Arrow schema with optional relation name
204/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
205///
206/// // Form Vec<Expr> with an expression for each column in the schema
207/// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect();
208///
209/// assert_eq!(
210///     exprs,
211///     vec![
212///         Expr::from(Column::from_qualified_name("t1.c1")),
213///         Expr::from(Column::from_qualified_name("t1.c2")),
214///     ]
215/// );
216/// ```
217///
218/// # Examples: Displaying `Exprs`
219///
220/// There are three ways to print an `Expr` depending on the usecase.
221///
222/// ## Use `Debug` trait
223///
224/// Following Rust conventions, the `Debug` implementation prints out the
225/// internal structure of the expression, which is useful for debugging.
226///
227/// ```
228/// # use datafusion_expr::{lit, col};
229/// let expr = col("c1") + lit(42);
230/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
231/// ```
232///
233/// ## Use the `Display` trait  (detailed expression)
234///
235/// The `Display` implementation prints out the expression in a SQL-like form,
236/// but has additional details such as the data type of literals. This is useful
237/// for understanding the expression in more detail and is used for the low level
238/// [`ExplainFormat::Indent`] explain plan format.
239///
240/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
241///
242/// ```
243/// # use datafusion_expr::{lit, col};
244/// let expr = col("c1") + lit(42);
245/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
246/// ```
247///
248/// ## Use [`Self::human_display`] (human readable)
249///
250/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
251/// for human consumption by end users. It is used for the
252/// [`ExplainFormat::Tree`] explain plan format.
253///
254/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
255///
256///```
257/// # use datafusion_expr::{lit, col};
258/// let expr = col("c1") + lit(42);
259/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
260/// ```
261///
262/// # Examples: Visiting and Rewriting `Expr`s
263///
264/// Here is an example that finds all literals in an `Expr` tree:
265/// ```
266/// # use std::collections::{HashSet};
267/// use datafusion_common::ScalarValue;
268/// # use datafusion_expr::{col, Expr, lit};
269/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
270/// // Expression a = 5 AND b = 6
271/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
272/// // find all literals in a HashMap
273/// let mut scalars = HashSet::new();
274/// // apply recursively visits all nodes in the expression tree
275/// expr.apply(|e| {
276///     if let Expr::Literal(scalar, _) = e {
277///         scalars.insert(scalar);
278///     }
279///     // The return value controls whether to continue visiting the tree
280///     Ok(TreeNodeRecursion::Continue)
281/// })
282/// .unwrap();
283/// // All subtrees have been visited and literals found
284/// assert_eq!(scalars.len(), 2);
285/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
286/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
287/// ```
288///
289/// Rewrite an expression, replacing references to column "a" in an
290/// to the literal `42`:
291///
292///  ```
293/// # use datafusion_common::tree_node::{Transformed, TreeNode};
294/// # use datafusion_expr::{col, Expr, lit};
295/// // expression a = 5 AND b = 6
296/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
297/// // rewrite all references to column "a" to the literal 42
298/// let rewritten = expr.transform(|e| {
299///   if let Expr::Column(c) = &e {
300///     if &c.name == "a" {
301///       // return Transformed::yes to indicate the node was changed
302///       return Ok(Transformed::yes(lit(42)))
303///     }
304///   }
305///   // return Transformed::no to indicate the node was not changed
306///   Ok(Transformed::no(e))
307/// }).unwrap();
308/// // The expression has been rewritten
309/// assert!(rewritten.transformed);
310/// // to 42 = 5 AND b = 6
311/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
312#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
313pub enum Expr {
314    /// An expression with a specific name.
315    Alias(Alias),
316    /// A named reference to a qualified field in a schema.
317    Column(Column),
318    /// A named reference to a variable in a registry.
319    ScalarVariable(DataType, Vec<String>),
320    /// A constant value along with associated [`FieldMetadata`].
321    Literal(ScalarValue, Option<FieldMetadata>),
322    /// A binary expression such as "age > 21"
323    BinaryExpr(BinaryExpr),
324    /// LIKE expression
325    Like(Like),
326    /// LIKE expression that uses regular expressions
327    SimilarTo(Like),
328    /// Negation of an expression. The expression's type must be a boolean to make sense.
329    Not(Box<Expr>),
330    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
331    IsNotNull(Box<Expr>),
332    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
333    IsNull(Box<Expr>),
334    /// True if argument is true, false otherwise. This expression itself is never NULL.
335    IsTrue(Box<Expr>),
336    /// True if argument is  false, false otherwise. This expression itself is never NULL.
337    IsFalse(Box<Expr>),
338    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
339    IsUnknown(Box<Expr>),
340    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
341    IsNotTrue(Box<Expr>),
342    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
343    IsNotFalse(Box<Expr>),
344    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
345    IsNotUnknown(Box<Expr>),
346    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
347    Negative(Box<Expr>),
348    /// Whether an expression is between a given range.
349    Between(Between),
350    /// A CASE expression (see docs on [`Case`])
351    Case(Case),
352    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
353    /// This expression is guaranteed to have a fixed type.
354    Cast(Cast),
355    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
356    /// This expression is guaranteed to have a fixed type.
357    TryCast(TryCast),
358    /// Call a scalar function with a set of arguments.
359    ScalarFunction(ScalarFunction),
360    /// Calls an aggregate function with arguments, and optional
361    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
362    ///
363    /// See also [`ExprFunctionExt`] to set these fields.
364    ///
365    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
366    AggregateFunction(AggregateFunction),
367    /// Call a window function with a set of arguments.
368    WindowFunction(Box<WindowFunction>),
369    /// Returns whether the list contains the expr value.
370    InList(InList),
371    /// EXISTS subquery
372    Exists(Exists),
373    /// IN subquery
374    InSubquery(InSubquery),
375    /// Scalar subquery
376    ScalarSubquery(Subquery),
377    /// Represents a reference to all available fields in a specific schema,
378    /// with an optional (schema) qualifier.
379    ///
380    /// This expr has to be resolved to a list of columns before translating logical
381    /// plan into physical plan.
382    #[deprecated(
383        since = "46.0.0",
384        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
385    )]
386    Wildcard {
387        qualifier: Option<TableReference>,
388        options: Box<WildcardOptions>,
389    },
390    /// List of grouping set expressions. Only valid in the context of an aggregate
391    /// GROUP BY expression list
392    GroupingSet(GroupingSet),
393    /// A place holder for parameters in a prepared statement
394    /// (e.g. `$foo` or `$1`)
395    Placeholder(Placeholder),
396    /// A placeholder which holds a reference to a qualified field
397    /// in the outer query, used for correlated sub queries.
398    OuterReferenceColumn(FieldRef, Column),
399    /// Unnest expression
400    Unnest(Unnest),
401}
402
403impl Default for Expr {
404    fn default() -> Self {
405        Expr::Literal(ScalarValue::Null, None)
406    }
407}
408
409impl AsRef<Expr> for Expr {
410    fn as_ref(&self) -> &Expr {
411        self
412    }
413}
414
415/// Create an [`Expr`] from a [`Column`]
416impl From<Column> for Expr {
417    fn from(value: Column) -> Self {
418        Expr::Column(value)
419    }
420}
421
422/// Create an [`Expr`] from a [`WindowFunction`]
423impl From<WindowFunction> for Expr {
424    fn from(value: WindowFunction) -> Self {
425        Expr::WindowFunction(Box::new(value))
426    }
427}
428
429/// Create an [`Expr`] from an [`ScalarAndMetadata`]
430impl From<ScalarAndMetadata> for Expr {
431    fn from(value: ScalarAndMetadata) -> Self {
432        let (value, metadata) = value.into_inner();
433        Expr::Literal(value, metadata)
434    }
435}
436
437/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
438/// useful for creating [`Expr`] from a [`DFSchema`].
439///
440/// See example on [`Expr`]
441impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
442    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
443        Expr::from(Column::from(value))
444    }
445}
446
447impl<'a> TreeNodeContainer<'a, Self> for Expr {
448    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
449        &'a self,
450        mut f: F,
451    ) -> Result<TreeNodeRecursion> {
452        f(self)
453    }
454
455    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
456        self,
457        mut f: F,
458    ) -> Result<Transformed<Self>> {
459        f(self)
460    }
461}
462
463/// The metadata used in [`Field::metadata`].
464///
465/// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
466///
467/// # Common Use Cases
468///
469/// Field metadata is commonly used to store:
470/// - Default values for columns when data is missing
471/// - Column descriptions or documentation
472/// - Data lineage information
473/// - Custom application-specific annotations
474/// - Encoding hints or display formatting preferences
475///
476/// # Example: Storing Default Values
477///
478/// A practical example of using field metadata is storing default values for columns
479/// that may be missing in the physical data but present in the logical schema.
480/// See the [default_column_values.rs] example implementation.
481///
482/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs
483pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
484
485/// Intersects multiple metadata instances for UNION operations.
486///
487/// This function implements the intersection strategy used by UNION operations,
488/// where only metadata keys that exist in ALL inputs with identical values
489/// are preserved in the result.
490///
491/// # Union Metadata Behavior
492///
493/// Union operations require consistent metadata across all branches:
494/// - Only metadata keys present in ALL union branches are kept
495/// - For each kept key, the value must be identical across all branches
496/// - If a key has different values across branches, it is excluded from the result
497/// - If any input has no metadata, the result will be empty
498///
499/// # Arguments
500///
501/// * `metadatas` - An iterator of `SchemaFieldMetadata` instances to intersect
502///
503/// # Returns
504///
505/// A new `SchemaFieldMetadata` containing only the intersected metadata
506pub fn intersect_metadata_for_union<'a>(
507    metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>,
508) -> SchemaFieldMetadata {
509    let mut metadatas = metadatas.into_iter();
510    let Some(mut intersected) = metadatas.next().cloned() else {
511        return Default::default();
512    };
513
514    for metadata in metadatas {
515        // Only keep keys that exist in both with the same value
516        intersected.retain(|k, v| metadata.get(k) == Some(v));
517    }
518
519    intersected
520}
521
522/// UNNEST expression.
523#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
524pub struct Unnest {
525    pub expr: Box<Expr>,
526}
527
528impl Unnest {
529    /// Create a new Unnest expression.
530    pub fn new(expr: Expr) -> Self {
531        Self {
532            expr: Box::new(expr),
533        }
534    }
535
536    /// Create a new Unnest expression.
537    pub fn new_boxed(boxed: Box<Expr>) -> Self {
538        Self { expr: boxed }
539    }
540}
541
542/// Alias expression
543#[derive(Clone, PartialEq, Eq, Debug)]
544pub struct Alias {
545    pub expr: Box<Expr>,
546    pub relation: Option<TableReference>,
547    pub name: String,
548    pub metadata: Option<FieldMetadata>,
549}
550
551impl Hash for Alias {
552    fn hash<H: Hasher>(&self, state: &mut H) {
553        self.expr.hash(state);
554        self.relation.hash(state);
555        self.name.hash(state);
556    }
557}
558
559impl PartialOrd for Alias {
560    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
561        let cmp = self.expr.partial_cmp(&other.expr);
562        let Some(Ordering::Equal) = cmp else {
563            return cmp;
564        };
565        let cmp = self.relation.partial_cmp(&other.relation);
566        let Some(Ordering::Equal) = cmp else {
567            return cmp;
568        };
569        self.name
570            .partial_cmp(&other.name)
571            // TODO (https://github.com/apache/datafusion/issues/17477) avoid recomparing all fields
572            .filter(|cmp| *cmp != Ordering::Equal || self == other)
573    }
574}
575
576impl Alias {
577    /// Create an alias with an optional schema/field qualifier.
578    pub fn new(
579        expr: Expr,
580        relation: Option<impl Into<TableReference>>,
581        name: impl Into<String>,
582    ) -> Self {
583        Self {
584            expr: Box::new(expr),
585            relation: relation.map(|r| r.into()),
586            name: name.into(),
587            metadata: None,
588        }
589    }
590
591    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
592        self.metadata = metadata;
593        self
594    }
595}
596
597/// Binary expression
598#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
599pub struct BinaryExpr {
600    /// Left-hand side of the expression
601    pub left: Box<Expr>,
602    /// The comparison operator
603    pub op: Operator,
604    /// Right-hand side of the expression
605    pub right: Box<Expr>,
606}
607
608impl BinaryExpr {
609    /// Create a new binary expression
610    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
611        Self { left, op, right }
612    }
613}
614
615impl Display for BinaryExpr {
616    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
617        // Put parentheses around child binary expressions so that we can see the difference
618        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
619        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
620        // equivalent and the parentheses are not necessary.
621
622        fn write_child(
623            f: &mut Formatter<'_>,
624            expr: &Expr,
625            precedence: u8,
626        ) -> fmt::Result {
627            match expr {
628                Expr::BinaryExpr(child) => {
629                    let p = child.op.precedence();
630                    if p == 0 || p < precedence {
631                        write!(f, "({child})")?;
632                    } else {
633                        write!(f, "{child}")?;
634                    }
635                }
636                _ => write!(f, "{expr}")?,
637            }
638            Ok(())
639        }
640
641        let precedence = self.op.precedence();
642        write_child(f, self.left.as_ref(), precedence)?;
643        write!(f, " {} ", self.op)?;
644        write_child(f, self.right.as_ref(), precedence)
645    }
646}
647
648/// CASE expression
649///
650/// The CASE expression is similar to a series of nested if/else and there are two forms that
651/// can be used. The first form consists of a series of boolean "when" expressions with
652/// corresponding "then" expressions, and an optional "else" expression.
653///
654/// ```text
655/// CASE WHEN condition THEN result
656///      [WHEN ...]
657///      [ELSE result]
658/// END
659/// ```
660///
661/// The second form uses a base expression and then a series of "when" clauses that match on a
662/// literal value.
663///
664/// ```text
665/// CASE expression
666///     WHEN value THEN result
667///     [WHEN ...]
668///     [ELSE result]
669/// END
670/// ```
671#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
672pub struct Case {
673    /// Optional base expression that can be compared to literal values in the "when" expressions
674    pub expr: Option<Box<Expr>>,
675    /// One or more when/then expressions
676    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
677    /// Optional "else" expression
678    pub else_expr: Option<Box<Expr>>,
679}
680
681impl Case {
682    /// Create a new Case expression
683    pub fn new(
684        expr: Option<Box<Expr>>,
685        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
686        else_expr: Option<Box<Expr>>,
687    ) -> Self {
688        Self {
689            expr,
690            when_then_expr,
691            else_expr,
692        }
693    }
694}
695
696/// LIKE expression
697#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
698pub struct Like {
699    pub negated: bool,
700    pub expr: Box<Expr>,
701    pub pattern: Box<Expr>,
702    pub escape_char: Option<char>,
703    /// Whether to ignore case on comparing
704    pub case_insensitive: bool,
705}
706
707impl Like {
708    /// Create a new Like expression
709    pub fn new(
710        negated: bool,
711        expr: Box<Expr>,
712        pattern: Box<Expr>,
713        escape_char: Option<char>,
714        case_insensitive: bool,
715    ) -> Self {
716        Self {
717            negated,
718            expr,
719            pattern,
720            escape_char,
721            case_insensitive,
722        }
723    }
724}
725
726/// BETWEEN expression
727#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
728pub struct Between {
729    /// The value to compare
730    pub expr: Box<Expr>,
731    /// Whether the expression is negated
732    pub negated: bool,
733    /// The low end of the range
734    pub low: Box<Expr>,
735    /// The high end of the range
736    pub high: Box<Expr>,
737}
738
739impl Between {
740    /// Create a new Between expression
741    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
742        Self {
743            expr,
744            negated,
745            low,
746            high,
747        }
748    }
749}
750
751/// Invoke a [`ScalarUDF`] with a set of arguments
752///
753/// [`ScalarUDF`]: crate::ScalarUDF
754#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
755pub struct ScalarFunction {
756    /// The function
757    pub func: Arc<crate::ScalarUDF>,
758    /// List of expressions to feed to the functions as arguments
759    pub args: Vec<Expr>,
760}
761
762impl ScalarFunction {
763    // return the Function's name
764    pub fn name(&self) -> &str {
765        self.func.name()
766    }
767}
768
769impl ScalarFunction {
770    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
771    ///
772    /// [`ScalarUDF`]: crate::ScalarUDF
773    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
774        Self { func: udf, args }
775    }
776}
777
778/// Access a sub field of a nested type, such as `Field` or `List`
779#[derive(Clone, PartialEq, Eq, Hash, Debug)]
780pub enum GetFieldAccess {
781    /// Named field, for example `struct["name"]`
782    NamedStructField { name: ScalarValue },
783    /// Single list index, for example: `list[i]`
784    ListIndex { key: Box<Expr> },
785    /// List stride, for example `list[i:j:k]`
786    ListRange {
787        start: Box<Expr>,
788        stop: Box<Expr>,
789        stride: Box<Expr>,
790    },
791}
792
793/// Cast expression
794#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
795pub struct Cast {
796    /// The expression being cast
797    pub expr: Box<Expr>,
798    /// The `DataType` the expression will yield
799    pub data_type: DataType,
800}
801
802impl Cast {
803    /// Create a new Cast expression
804    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
805        Self { expr, data_type }
806    }
807}
808
809/// TryCast Expression
810#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
811pub struct TryCast {
812    /// The expression being cast
813    pub expr: Box<Expr>,
814    /// The `DataType` the expression will yield
815    pub data_type: DataType,
816}
817
818impl TryCast {
819    /// Create a new TryCast expression
820    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
821        Self { expr, data_type }
822    }
823}
824
825/// SORT expression
826#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
827pub struct Sort {
828    /// The expression to sort on
829    pub expr: Expr,
830    /// The direction of the sort
831    pub asc: bool,
832    /// Whether to put Nulls before all other data values
833    pub nulls_first: bool,
834}
835
836impl Sort {
837    /// Create a new Sort expression
838    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
839        Self {
840            expr,
841            asc,
842            nulls_first,
843        }
844    }
845
846    /// Create a new Sort expression with the opposite sort direction
847    pub fn reverse(&self) -> Self {
848        Self {
849            expr: self.expr.clone(),
850            asc: !self.asc,
851            nulls_first: !self.nulls_first,
852        }
853    }
854
855    /// Replaces the Sort expressions with `expr`
856    pub fn with_expr(&self, expr: Expr) -> Self {
857        Self {
858            expr,
859            asc: self.asc,
860            nulls_first: self.nulls_first,
861        }
862    }
863}
864
865impl Display for Sort {
866    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
867        write!(f, "{}", self.expr)?;
868        if self.asc {
869            write!(f, " ASC")?;
870        } else {
871            write!(f, " DESC")?;
872        }
873        if self.nulls_first {
874            write!(f, " NULLS FIRST")?;
875        } else {
876            write!(f, " NULLS LAST")?;
877        }
878        Ok(())
879    }
880}
881
882impl<'a> TreeNodeContainer<'a, Expr> for Sort {
883    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
884        &'a self,
885        f: F,
886    ) -> Result<TreeNodeRecursion> {
887        self.expr.apply_elements(f)
888    }
889
890    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
891        self,
892        f: F,
893    ) -> Result<Transformed<Self>> {
894        self.expr
895            .map_elements(f)?
896            .map_data(|expr| Ok(Self { expr, ..self }))
897    }
898}
899
900/// Aggregate function
901///
902/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
903///
904/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
905#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
906pub struct AggregateFunction {
907    /// Name of the function
908    pub func: Arc<AggregateUDF>,
909    pub params: AggregateFunctionParams,
910}
911
912#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
913pub struct AggregateFunctionParams {
914    pub args: Vec<Expr>,
915    /// Whether this is a DISTINCT aggregation or not
916    pub distinct: bool,
917    /// Optional filter
918    pub filter: Option<Box<Expr>>,
919    /// Optional ordering
920    pub order_by: Vec<Sort>,
921    pub null_treatment: Option<NullTreatment>,
922}
923
924impl AggregateFunction {
925    /// Create a new AggregateFunction expression with a user-defined function (UDF)
926    pub fn new_udf(
927        func: Arc<AggregateUDF>,
928        args: Vec<Expr>,
929        distinct: bool,
930        filter: Option<Box<Expr>>,
931        order_by: Vec<Sort>,
932        null_treatment: Option<NullTreatment>,
933    ) -> Self {
934        Self {
935            func,
936            params: AggregateFunctionParams {
937                args,
938                distinct,
939                filter,
940                order_by,
941                null_treatment,
942            },
943        }
944    }
945}
946
947/// A function used as a SQL window function
948///
949/// In SQL, you can use:
950/// - Actual window functions ([`WindowUDF`])
951/// - Normal aggregate functions ([`AggregateUDF`])
952#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
953pub enum WindowFunctionDefinition {
954    /// A user defined aggregate function
955    AggregateUDF(Arc<AggregateUDF>),
956    /// A user defined aggregate function
957    WindowUDF(Arc<WindowUDF>),
958}
959
960impl WindowFunctionDefinition {
961    /// Returns the datatype of the window function
962    pub fn return_field(
963        &self,
964        input_expr_fields: &[FieldRef],
965        display_name: &str,
966    ) -> Result<FieldRef> {
967        match self {
968            WindowFunctionDefinition::AggregateUDF(fun) => {
969                fun.return_field(input_expr_fields)
970            }
971            WindowFunctionDefinition::WindowUDF(fun) => {
972                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
973            }
974        }
975    }
976
977    /// The signatures supported by the function `fun`.
978    pub fn signature(&self) -> Signature {
979        match self {
980            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
981            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
982        }
983    }
984
985    /// Function's name for display
986    pub fn name(&self) -> &str {
987        match self {
988            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
989            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
990        }
991    }
992
993    /// Return the inner window simplification function, if any
994    ///
995    /// See [`WindowFunctionSimplification`] for more information
996    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
997        match self {
998            WindowFunctionDefinition::AggregateUDF(_) => None,
999            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
1000        }
1001    }
1002}
1003
1004impl Display for WindowFunctionDefinition {
1005    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1006        match self {
1007            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
1008            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
1009        }
1010    }
1011}
1012
1013impl From<Arc<AggregateUDF>> for WindowFunctionDefinition {
1014    fn from(value: Arc<AggregateUDF>) -> Self {
1015        Self::AggregateUDF(value)
1016    }
1017}
1018
1019impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
1020    fn from(value: Arc<WindowUDF>) -> Self {
1021        Self::WindowUDF(value)
1022    }
1023}
1024
1025/// Window function
1026///
1027/// Holds the actual function to call [`WindowFunction`] as well as its
1028/// arguments (`args`) and the contents of the `OVER` clause:
1029///
1030/// 1. `PARTITION BY`
1031/// 2. `ORDER BY`
1032/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
1033///
1034/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
1035///
1036/// [`ExprFunctionExt`]: crate::ExprFunctionExt
1037#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1038pub struct WindowFunction {
1039    /// Name of the function
1040    pub fun: WindowFunctionDefinition,
1041    pub params: WindowFunctionParams,
1042}
1043
1044#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1045pub struct WindowFunctionParams {
1046    /// List of expressions to feed to the functions as arguments
1047    pub args: Vec<Expr>,
1048    /// List of partition by expressions
1049    pub partition_by: Vec<Expr>,
1050    /// List of order by expressions
1051    pub order_by: Vec<Sort>,
1052    /// Window frame
1053    pub window_frame: WindowFrame,
1054    /// Optional filter expression (FILTER (WHERE ...))
1055    pub filter: Option<Box<Expr>>,
1056    /// Specifies how NULL value is treated: ignore or respect
1057    pub null_treatment: Option<NullTreatment>,
1058    /// Distinct flag
1059    pub distinct: bool,
1060}
1061
1062impl WindowFunction {
1063    /// Create a new Window expression with the specified argument an
1064    /// empty `OVER` clause
1065    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
1066        Self {
1067            fun: fun.into(),
1068            params: WindowFunctionParams {
1069                args,
1070                partition_by: Vec::default(),
1071                order_by: Vec::default(),
1072                window_frame: WindowFrame::new(None),
1073                filter: None,
1074                null_treatment: None,
1075                distinct: false,
1076            },
1077        }
1078    }
1079
1080    /// Return the inner window simplification function, if any
1081    ///
1082    /// See [`WindowFunctionSimplification`] for more information
1083    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1084        self.fun.simplify()
1085    }
1086}
1087
1088/// EXISTS expression
1089#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1090pub struct Exists {
1091    /// Subquery that will produce a single column of data
1092    pub subquery: Subquery,
1093    /// Whether the expression is negated
1094    pub negated: bool,
1095}
1096
1097impl Exists {
1098    // Create a new Exists expression.
1099    pub fn new(subquery: Subquery, negated: bool) -> Self {
1100        Self { subquery, negated }
1101    }
1102}
1103
1104/// InList expression
1105#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1106pub struct InList {
1107    /// The expression to compare
1108    pub expr: Box<Expr>,
1109    /// The list of values to compare against
1110    pub list: Vec<Expr>,
1111    /// Whether the expression is negated
1112    pub negated: bool,
1113}
1114
1115impl InList {
1116    /// Create a new InList expression
1117    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1118        Self {
1119            expr,
1120            list,
1121            negated,
1122        }
1123    }
1124}
1125
1126/// IN subquery
1127#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1128pub struct InSubquery {
1129    /// The expression to compare
1130    pub expr: Box<Expr>,
1131    /// Subquery that will produce a single column of data to compare against
1132    pub subquery: Subquery,
1133    /// Whether the expression is negated
1134    pub negated: bool,
1135}
1136
1137impl InSubquery {
1138    /// Create a new InSubquery expression
1139    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1140        Self {
1141            expr,
1142            subquery,
1143            negated,
1144        }
1145    }
1146}
1147
1148/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1149///
1150/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1151/// or can be specified directly using `PREPARE` statements.
1152#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1153pub struct Placeholder {
1154    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1155    pub id: String,
1156    /// The type the parameter will be filled in with
1157    pub field: Option<FieldRef>,
1158}
1159
1160impl Placeholder {
1161    /// Create a new Placeholder expression
1162    #[deprecated(since = "51.0.0", note = "Use new_with_field instead")]
1163    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1164        Self {
1165            id,
1166            field: data_type.map(|dt| Arc::new(Field::new("", dt, true))),
1167        }
1168    }
1169
1170    /// Create a new Placeholder expression from a Field
1171    pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self {
1172        Self { id, field }
1173    }
1174}
1175
1176/// Grouping sets
1177///
1178/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1179/// for Postgres definition.
1180/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1181/// for Apache Spark definition.
1182#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1183pub enum GroupingSet {
1184    /// Rollup grouping sets
1185    Rollup(Vec<Expr>),
1186    /// Cube grouping sets
1187    Cube(Vec<Expr>),
1188    /// User-defined grouping sets
1189    GroupingSets(Vec<Vec<Expr>>),
1190}
1191
1192impl GroupingSet {
1193    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1194    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1195    /// the exprs in the underlying sets.
1196    pub fn distinct_expr(&self) -> Vec<&Expr> {
1197        match self {
1198            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1199                exprs.iter().collect()
1200            }
1201            GroupingSet::GroupingSets(groups) => {
1202                let mut exprs: Vec<&Expr> = vec![];
1203                for exp in groups.iter().flatten() {
1204                    if !exprs.contains(&exp) {
1205                        exprs.push(exp);
1206                    }
1207                }
1208                exprs
1209            }
1210        }
1211    }
1212}
1213
1214#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1215#[cfg(not(feature = "sql"))]
1216pub struct IlikeSelectItem {
1217    pub pattern: String,
1218}
1219#[cfg(not(feature = "sql"))]
1220impl Display for IlikeSelectItem {
1221    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1222        write!(f, "ILIKE '{}'", &self.pattern)?;
1223        Ok(())
1224    }
1225}
1226#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1227#[cfg(not(feature = "sql"))]
1228pub enum ExcludeSelectItem {
1229    Single(Ident),
1230    Multiple(Vec<Ident>),
1231}
1232#[cfg(not(feature = "sql"))]
1233impl Display for ExcludeSelectItem {
1234    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1235        write!(f, "EXCLUDE")?;
1236        match self {
1237            Self::Single(column) => {
1238                write!(f, " {column}")?;
1239            }
1240            Self::Multiple(columns) => {
1241                write!(f, " ({})", display_comma_separated(columns))?;
1242            }
1243        }
1244        Ok(())
1245    }
1246}
1247#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1248#[cfg(not(feature = "sql"))]
1249pub struct ExceptSelectItem {
1250    pub first_element: Ident,
1251    pub additional_elements: Vec<Ident>,
1252}
1253#[cfg(not(feature = "sql"))]
1254impl Display for ExceptSelectItem {
1255    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1256        write!(f, "EXCEPT ")?;
1257        if self.additional_elements.is_empty() {
1258            write!(f, "({})", self.first_element)?;
1259        } else {
1260            write!(
1261                f,
1262                "({}, {})",
1263                self.first_element,
1264                display_comma_separated(&self.additional_elements)
1265            )?;
1266        }
1267        Ok(())
1268    }
1269}
1270
1271#[cfg(not(feature = "sql"))]
1272pub fn display_comma_separated<T>(slice: &[T]) -> String
1273where
1274    T: Display,
1275{
1276    use itertools::Itertools;
1277    slice.iter().map(|v| format!("{v}")).join(", ")
1278}
1279
1280#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1281#[cfg(not(feature = "sql"))]
1282pub enum RenameSelectItem {
1283    Single(String),
1284    Multiple(Vec<String>),
1285}
1286#[cfg(not(feature = "sql"))]
1287impl Display for RenameSelectItem {
1288    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1289        write!(f, "RENAME")?;
1290        match self {
1291            Self::Single(column) => {
1292                write!(f, " {column}")?;
1293            }
1294            Self::Multiple(columns) => {
1295                write!(f, " ({})", display_comma_separated(columns))?;
1296            }
1297        }
1298        Ok(())
1299    }
1300}
1301
1302#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1303#[cfg(not(feature = "sql"))]
1304pub struct Ident {
1305    /// The value of the identifier without quotes.
1306    pub value: String,
1307    /// The starting quote if any. Valid quote characters are the single quote,
1308    /// double quote, backtick, and opening square bracket.
1309    pub quote_style: Option<char>,
1310    /// The span of the identifier in the original SQL string.
1311    pub span: String,
1312}
1313#[cfg(not(feature = "sql"))]
1314impl Display for Ident {
1315    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1316        write!(f, "[{}]", self.value)
1317    }
1318}
1319
1320#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1321#[cfg(not(feature = "sql"))]
1322pub struct ReplaceSelectElement {
1323    pub expr: String,
1324    pub column_name: Ident,
1325    pub as_keyword: bool,
1326}
1327#[cfg(not(feature = "sql"))]
1328impl Display for ReplaceSelectElement {
1329    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1330        if self.as_keyword {
1331            write!(f, "{} AS {}", self.expr, self.column_name)
1332        } else {
1333            write!(f, "{} {}", self.expr, self.column_name)
1334        }
1335    }
1336}
1337
1338/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1339#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1340pub struct WildcardOptions {
1341    /// `[ILIKE...]`.
1342    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1343    pub ilike: Option<IlikeSelectItem>,
1344    /// `[EXCLUDE...]`.
1345    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1346    pub exclude: Option<ExcludeSelectItem>,
1347    /// `[EXCEPT...]`.
1348    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1349    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1350    pub except: Option<ExceptSelectItem>,
1351    /// `[REPLACE]`
1352    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1353    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1354    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1355    pub replace: Option<PlannedReplaceSelectItem>,
1356    /// `[RENAME ...]`.
1357    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1358    pub rename: Option<RenameSelectItem>,
1359}
1360
1361impl WildcardOptions {
1362    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1363        WildcardOptions {
1364            ilike: self.ilike,
1365            exclude: self.exclude,
1366            except: self.except,
1367            replace: Some(replace),
1368            rename: self.rename,
1369        }
1370    }
1371}
1372
1373impl Display for WildcardOptions {
1374    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1375        if let Some(ilike) = &self.ilike {
1376            write!(f, " {ilike}")?;
1377        }
1378        if let Some(exclude) = &self.exclude {
1379            write!(f, " {exclude}")?;
1380        }
1381        if let Some(except) = &self.except {
1382            write!(f, " {except}")?;
1383        }
1384        if let Some(replace) = &self.replace {
1385            write!(f, " {replace}")?;
1386        }
1387        if let Some(rename) = &self.rename {
1388            write!(f, " {rename}")?;
1389        }
1390        Ok(())
1391    }
1392}
1393
1394/// The planned expressions for `REPLACE`
1395#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1396pub struct PlannedReplaceSelectItem {
1397    /// The original ast nodes
1398    pub items: Vec<ReplaceSelectElement>,
1399    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1400    pub planned_expressions: Vec<Expr>,
1401}
1402
1403impl Display for PlannedReplaceSelectItem {
1404    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1405        write!(f, "REPLACE")?;
1406        write!(f, " ({})", display_comma_separated(&self.items))?;
1407        Ok(())
1408    }
1409}
1410
1411impl PlannedReplaceSelectItem {
1412    pub fn items(&self) -> &[ReplaceSelectElement] {
1413        &self.items
1414    }
1415
1416    pub fn expressions(&self) -> &[Expr] {
1417        &self.planned_expressions
1418    }
1419}
1420
1421impl Expr {
1422    /// The name of the column (field) that this `Expr` will produce.
1423    ///
1424    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1425    /// [`Schema`] will have a field with this name.
1426    ///
1427    /// Note that the resulting string is subtlety different from the `Display`
1428    /// representation for certain `Expr`. Some differences:
1429    ///
1430    /// 1. [`Expr::Alias`], which shows only the alias itself
1431    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1432    ///
1433    /// # Example
1434    /// ```
1435    /// # use datafusion_expr::{col, lit};
1436    /// let expr = col("foo").eq(lit(42));
1437    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1438    ///
1439    /// let expr = col("foo").alias("bar").eq(lit(11));
1440    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1441    /// ```
1442    ///
1443    /// [`Schema`]: arrow::datatypes::Schema
1444    pub fn schema_name(&self) -> impl Display + '_ {
1445        SchemaDisplay(self)
1446    }
1447
1448    /// Human readable display formatting for this expression.
1449    ///
1450    /// This function is primarily used in printing the explain tree output,
1451    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1452    /// show how expressions are used in physical and logical plans. See the
1453    /// [`Expr`] for other ways to format expressions
1454    ///
1455    /// Note this format is intended for human consumption rather than SQL for
1456    /// other systems. If you need  SQL to pass to other systems, consider using
1457    /// [`Unparser`].
1458    ///
1459    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1460    ///
1461    /// # Example
1462    /// ```
1463    /// # use datafusion_expr::{col, lit};
1464    /// let expr = col("foo") + lit(42);
1465    /// // For EXPLAIN output:
1466    /// // "foo + 42"
1467    /// println!("{}", expr.human_display());
1468    /// ```
1469    pub fn human_display(&self) -> impl Display + '_ {
1470        SqlDisplay(self)
1471    }
1472
1473    /// Returns the qualifier and the schema name of this expression.
1474    ///
1475    /// Used when the expression forms the output field of a certain plan.
1476    /// The result is the field's qualifier and field name in the plan's
1477    /// output schema. We can use this qualified name to reference the field.
1478    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1479        match self {
1480            Expr::Column(Column {
1481                relation,
1482                name,
1483                spans: _,
1484            }) => (relation.clone(), name.clone()),
1485            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1486            _ => (None, self.schema_name().to_string()),
1487        }
1488    }
1489
1490    /// Return String representation of the variant represented by `self`
1491    /// Useful for non-rust based bindings
1492    pub fn variant_name(&self) -> &str {
1493        match self {
1494            Expr::AggregateFunction { .. } => "AggregateFunction",
1495            Expr::Alias(..) => "Alias",
1496            Expr::Between { .. } => "Between",
1497            Expr::BinaryExpr { .. } => "BinaryExpr",
1498            Expr::Case { .. } => "Case",
1499            Expr::Cast { .. } => "Cast",
1500            Expr::Column(..) => "Column",
1501            Expr::OuterReferenceColumn(_, _) => "Outer",
1502            Expr::Exists { .. } => "Exists",
1503            Expr::GroupingSet(..) => "GroupingSet",
1504            Expr::InList { .. } => "InList",
1505            Expr::InSubquery(..) => "InSubquery",
1506            Expr::IsNotNull(..) => "IsNotNull",
1507            Expr::IsNull(..) => "IsNull",
1508            Expr::Like { .. } => "Like",
1509            Expr::SimilarTo { .. } => "RLike",
1510            Expr::IsTrue(..) => "IsTrue",
1511            Expr::IsFalse(..) => "IsFalse",
1512            Expr::IsUnknown(..) => "IsUnknown",
1513            Expr::IsNotTrue(..) => "IsNotTrue",
1514            Expr::IsNotFalse(..) => "IsNotFalse",
1515            Expr::IsNotUnknown(..) => "IsNotUnknown",
1516            Expr::Literal(..) => "Literal",
1517            Expr::Negative(..) => "Negative",
1518            Expr::Not(..) => "Not",
1519            Expr::Placeholder(_) => "Placeholder",
1520            Expr::ScalarFunction(..) => "ScalarFunction",
1521            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1522            Expr::ScalarVariable(..) => "ScalarVariable",
1523            Expr::TryCast { .. } => "TryCast",
1524            Expr::WindowFunction { .. } => "WindowFunction",
1525            #[expect(deprecated)]
1526            Expr::Wildcard { .. } => "Wildcard",
1527            Expr::Unnest { .. } => "Unnest",
1528        }
1529    }
1530
1531    /// Return `self == other`
1532    pub fn eq(self, other: Expr) -> Expr {
1533        binary_expr(self, Operator::Eq, other)
1534    }
1535
1536    /// Return `self != other`
1537    pub fn not_eq(self, other: Expr) -> Expr {
1538        binary_expr(self, Operator::NotEq, other)
1539    }
1540
1541    /// Return `self > other`
1542    pub fn gt(self, other: Expr) -> Expr {
1543        binary_expr(self, Operator::Gt, other)
1544    }
1545
1546    /// Return `self >= other`
1547    pub fn gt_eq(self, other: Expr) -> Expr {
1548        binary_expr(self, Operator::GtEq, other)
1549    }
1550
1551    /// Return `self < other`
1552    pub fn lt(self, other: Expr) -> Expr {
1553        binary_expr(self, Operator::Lt, other)
1554    }
1555
1556    /// Return `self <= other`
1557    pub fn lt_eq(self, other: Expr) -> Expr {
1558        binary_expr(self, Operator::LtEq, other)
1559    }
1560
1561    /// Return `self && other`
1562    pub fn and(self, other: Expr) -> Expr {
1563        binary_expr(self, Operator::And, other)
1564    }
1565
1566    /// Return `self || other`
1567    pub fn or(self, other: Expr) -> Expr {
1568        binary_expr(self, Operator::Or, other)
1569    }
1570
1571    /// Return `self LIKE other`
1572    pub fn like(self, other: Expr) -> Expr {
1573        Expr::Like(Like::new(
1574            false,
1575            Box::new(self),
1576            Box::new(other),
1577            None,
1578            false,
1579        ))
1580    }
1581
1582    /// Return `self NOT LIKE other`
1583    pub fn not_like(self, other: Expr) -> Expr {
1584        Expr::Like(Like::new(
1585            true,
1586            Box::new(self),
1587            Box::new(other),
1588            None,
1589            false,
1590        ))
1591    }
1592
1593    /// Return `self ILIKE other`
1594    pub fn ilike(self, other: Expr) -> Expr {
1595        Expr::Like(Like::new(
1596            false,
1597            Box::new(self),
1598            Box::new(other),
1599            None,
1600            true,
1601        ))
1602    }
1603
1604    /// Return `self NOT ILIKE other`
1605    pub fn not_ilike(self, other: Expr) -> Expr {
1606        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1607    }
1608
1609    /// Return the name to use for the specific Expr
1610    pub fn name_for_alias(&self) -> Result<String> {
1611        Ok(self.schema_name().to_string())
1612    }
1613
1614    /// Ensure `expr` has the name as `original_name` by adding an
1615    /// alias if necessary.
1616    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1617        let new_name = self.name_for_alias()?;
1618        if new_name == original_name {
1619            return Ok(self);
1620        }
1621
1622        Ok(self.alias(original_name))
1623    }
1624
1625    /// Return `self AS name` alias expression
1626    pub fn alias(self, name: impl Into<String>) -> Expr {
1627        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1628    }
1629
1630    /// Return `self AS name` alias expression with metadata
1631    ///
1632    /// The metadata will be attached to the Arrow Schema field when the expression
1633    /// is converted to a field via `Expr.to_field()`.
1634    ///
1635    /// # Example
1636    /// ```
1637    /// # use datafusion_expr::col;
1638    /// # use std::collections::HashMap;
1639    /// # use datafusion_common::metadata::FieldMetadata;
1640    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1641    /// let metadata = FieldMetadata::from(metadata);
1642    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1643    /// ```
1644    pub fn alias_with_metadata(
1645        self,
1646        name: impl Into<String>,
1647        metadata: Option<FieldMetadata>,
1648    ) -> Expr {
1649        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1650    }
1651
1652    /// Return `self AS name` alias expression with a specific qualifier
1653    pub fn alias_qualified(
1654        self,
1655        relation: Option<impl Into<TableReference>>,
1656        name: impl Into<String>,
1657    ) -> Expr {
1658        Expr::Alias(Alias::new(self, relation, name.into()))
1659    }
1660
1661    /// Return `self AS name` alias expression with a specific qualifier and metadata
1662    ///
1663    /// The metadata will be attached to the Arrow Schema field when the expression
1664    /// is converted to a field via `Expr.to_field()`.
1665    ///
1666    /// # Example
1667    /// ```
1668    /// # use datafusion_expr::col;
1669    /// # use std::collections::HashMap;
1670    /// # use datafusion_common::metadata::FieldMetadata;
1671    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1672    /// let metadata = FieldMetadata::from(metadata);
1673    /// let expr =
1674    ///     col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1675    /// ```
1676    pub fn alias_qualified_with_metadata(
1677        self,
1678        relation: Option<impl Into<TableReference>>,
1679        name: impl Into<String>,
1680        metadata: Option<FieldMetadata>,
1681    ) -> Expr {
1682        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1683    }
1684
1685    /// Remove an alias from an expression if one exists.
1686    ///
1687    /// If the expression is not an alias, the expression is returned unchanged.
1688    /// This method does not remove aliases from nested expressions.
1689    ///
1690    /// # Example
1691    /// ```
1692    /// # use datafusion_expr::col;
1693    /// // `foo as "bar"` is unaliased to `foo`
1694    /// let expr = col("foo").alias("bar");
1695    /// assert_eq!(expr.unalias(), col("foo"));
1696    ///
1697    /// // `foo as "bar" + baz` is not unaliased
1698    /// let expr = col("foo").alias("bar") + col("baz");
1699    /// assert_eq!(expr.clone().unalias(), expr);
1700    ///
1701    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1702    /// let expr = col("foo").alias("bar").alias("baz");
1703    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1704    /// ```
1705    pub fn unalias(self) -> Expr {
1706        match self {
1707            Expr::Alias(alias) => *alias.expr,
1708            _ => self,
1709        }
1710    }
1711
1712    /// Recursively removed potentially multiple aliases from an expression.
1713    ///
1714    /// This method removes nested aliases and returns [`Transformed`]
1715    /// to signal if the expression was changed.
1716    ///
1717    /// # Example
1718    /// ```
1719    /// # use datafusion_expr::col;
1720    /// // `foo as "bar"` is unaliased to `foo`
1721    /// let expr = col("foo").alias("bar");
1722    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1723    ///
1724    /// // `foo as "bar" + baz` is  unaliased
1725    /// let expr = col("foo").alias("bar") + col("baz");
1726    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1727    ///
1728    /// // `foo as "bar" as "baz" is unalaised to foo
1729    /// let expr = col("foo").alias("bar").alias("baz");
1730    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1731    /// ```
1732    pub fn unalias_nested(self) -> Transformed<Expr> {
1733        self.transform_down_up(
1734            |expr| {
1735                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1736                let recursion = if matches!(
1737                    expr,
1738                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1739                ) {
1740                    // Subqueries could contain aliases so don't recurse into those
1741                    TreeNodeRecursion::Jump
1742                } else {
1743                    TreeNodeRecursion::Continue
1744                };
1745                Ok(Transformed::new(expr, false, recursion))
1746            },
1747            |expr| {
1748                // f_up: unalias on up so we can remove nested aliases like
1749                // `(x as foo) as bar`
1750                if let Expr::Alias(alias) = expr {
1751                    match alias
1752                        .metadata
1753                        .as_ref()
1754                        .map(|h| h.is_empty())
1755                        .unwrap_or(true)
1756                    {
1757                        true => Ok(Transformed::yes(*alias.expr)),
1758                        false => Ok(Transformed::no(Expr::Alias(alias))),
1759                    }
1760                } else {
1761                    Ok(Transformed::no(expr))
1762                }
1763            },
1764        )
1765        // Unreachable code: internal closure doesn't return err
1766        .unwrap()
1767    }
1768
1769    /// Return `self IN <list>` if `negated` is false, otherwise
1770    /// return `self NOT IN <list>`.a
1771    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1772        Expr::InList(InList::new(Box::new(self), list, negated))
1773    }
1774
1775    /// Return `IsNull(Box(self))
1776    pub fn is_null(self) -> Expr {
1777        Expr::IsNull(Box::new(self))
1778    }
1779
1780    /// Return `IsNotNull(Box(self))
1781    pub fn is_not_null(self) -> Expr {
1782        Expr::IsNotNull(Box::new(self))
1783    }
1784
1785    /// Create a sort configuration from an existing expression.
1786    ///
1787    /// ```
1788    /// # use datafusion_expr::col;
1789    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1790    /// ```
1791    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1792        Sort::new(self, asc, nulls_first)
1793    }
1794
1795    /// Return `IsTrue(Box(self))`
1796    pub fn is_true(self) -> Expr {
1797        Expr::IsTrue(Box::new(self))
1798    }
1799
1800    /// Return `IsNotTrue(Box(self))`
1801    pub fn is_not_true(self) -> Expr {
1802        Expr::IsNotTrue(Box::new(self))
1803    }
1804
1805    /// Return `IsFalse(Box(self))`
1806    pub fn is_false(self) -> Expr {
1807        Expr::IsFalse(Box::new(self))
1808    }
1809
1810    /// Return `IsNotFalse(Box(self))`
1811    pub fn is_not_false(self) -> Expr {
1812        Expr::IsNotFalse(Box::new(self))
1813    }
1814
1815    /// Return `IsUnknown(Box(self))`
1816    pub fn is_unknown(self) -> Expr {
1817        Expr::IsUnknown(Box::new(self))
1818    }
1819
1820    /// Return `IsNotUnknown(Box(self))`
1821    pub fn is_not_unknown(self) -> Expr {
1822        Expr::IsNotUnknown(Box::new(self))
1823    }
1824
1825    /// return `self BETWEEN low AND high`
1826    pub fn between(self, low: Expr, high: Expr) -> Expr {
1827        Expr::Between(Between::new(
1828            Box::new(self),
1829            false,
1830            Box::new(low),
1831            Box::new(high),
1832        ))
1833    }
1834
1835    /// Return `self NOT BETWEEN low AND high`
1836    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1837        Expr::Between(Between::new(
1838            Box::new(self),
1839            true,
1840            Box::new(low),
1841            Box::new(high),
1842        ))
1843    }
1844    /// Return a reference to the inner `Column` if any
1845    ///
1846    /// returns `None` if the expression is not a `Column`
1847    ///
1848    /// Note: None may be returned for expressions that are not `Column` but
1849    /// are convertible to `Column` such as `Cast` expressions.
1850    ///
1851    /// Example
1852    /// ```
1853    /// # use datafusion_common::Column;
1854    /// use datafusion_expr::{col, Expr};
1855    /// let expr = col("foo");
1856    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1857    ///
1858    /// let expr = col("foo").alias("bar");
1859    /// assert_eq!(expr.try_as_col(), None);
1860    /// ```
1861    pub fn try_as_col(&self) -> Option<&Column> {
1862        if let Expr::Column(it) = self {
1863            Some(it)
1864        } else {
1865            None
1866        }
1867    }
1868
1869    /// Returns the inner `Column` if any. This is a specialized version of
1870    /// [`Self::try_as_col`] that take Cast expressions into account when the
1871    /// expression is as on condition for joins.
1872    ///
1873    /// Called this method when you are sure that the expression is a `Column`
1874    /// or a `Cast` expression that wraps a `Column`.
1875    pub fn get_as_join_column(&self) -> Option<&Column> {
1876        match self {
1877            Expr::Column(c) => Some(c),
1878            Expr::Cast(Cast { expr, .. }) => match &**expr {
1879                Expr::Column(c) => Some(c),
1880                _ => None,
1881            },
1882            _ => None,
1883        }
1884    }
1885
1886    /// Return all references to columns in this expression.
1887    ///
1888    /// # Example
1889    /// ```
1890    /// # use std::collections::HashSet;
1891    /// # use datafusion_common::Column;
1892    /// # use datafusion_expr::col;
1893    /// // For an expression `a + (b * a)`
1894    /// let expr = col("a") + (col("b") * col("a"));
1895    /// let refs = expr.column_refs();
1896    /// // refs contains "a" and "b"
1897    /// assert_eq!(refs.len(), 2);
1898    /// assert!(refs.contains(&Column::new_unqualified("a")));
1899    /// assert!(refs.contains(&Column::new_unqualified("b")));
1900    /// ```
1901    pub fn column_refs(&self) -> HashSet<&Column> {
1902        let mut using_columns = HashSet::new();
1903        self.add_column_refs(&mut using_columns);
1904        using_columns
1905    }
1906
1907    /// Adds references to all columns in this expression to the set
1908    ///
1909    /// See [`Self::column_refs`] for details
1910    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1911        self.apply(|expr| {
1912            if let Expr::Column(col) = expr {
1913                set.insert(col);
1914            }
1915            Ok(TreeNodeRecursion::Continue)
1916        })
1917        .expect("traversal is infallible");
1918    }
1919
1920    /// Return all references to columns and their occurrence counts in the expression.
1921    ///
1922    /// # Example
1923    /// ```
1924    /// # use std::collections::HashMap;
1925    /// # use datafusion_common::Column;
1926    /// # use datafusion_expr::col;
1927    /// // For an expression `a + (b * a)`
1928    /// let expr = col("a") + (col("b") * col("a"));
1929    /// let mut refs = expr.column_refs_counts();
1930    /// // refs contains "a" and "b"
1931    /// assert_eq!(refs.len(), 2);
1932    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1933    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1934    /// ```
1935    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1936        let mut map = HashMap::new();
1937        self.add_column_ref_counts(&mut map);
1938        map
1939    }
1940
1941    /// Adds references to all columns and their occurrence counts in the expression to
1942    /// the map.
1943    ///
1944    /// See [`Self::column_refs_counts`] for details
1945    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1946        self.apply(|expr| {
1947            if let Expr::Column(col) = expr {
1948                *map.entry(col).or_default() += 1;
1949            }
1950            Ok(TreeNodeRecursion::Continue)
1951        })
1952        .expect("traversal is infallible");
1953    }
1954
1955    /// Returns true if there are any column references in this Expr
1956    pub fn any_column_refs(&self) -> bool {
1957        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
1958            .expect("exists closure is infallible")
1959    }
1960
1961    /// Return true if the expression contains out reference(correlated) expressions.
1962    pub fn contains_outer(&self) -> bool {
1963        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1964            .expect("exists closure is infallible")
1965    }
1966
1967    /// Returns true if the expression node is volatile, i.e. whether it can return
1968    /// different results when evaluated multiple times with the same input.
1969    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1970    /// - `rand()` returns `true`,
1971    /// - `a + rand()` returns `false`
1972    pub fn is_volatile_node(&self) -> bool {
1973        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
1974    }
1975
1976    /// Returns true if the expression is volatile, i.e. whether it can return different
1977    /// results when evaluated multiple times with the same input.
1978    ///
1979    /// For example the function call `RANDOM()` is volatile as each call will
1980    /// return a different value.
1981    ///
1982    /// See [`Volatility`] for more information.
1983    pub fn is_volatile(&self) -> bool {
1984        self.exists(|expr| Ok(expr.is_volatile_node()))
1985            .expect("exists closure is infallible")
1986    }
1987
1988    /// Recursively find all [`Expr::Placeholder`] expressions, and
1989    /// to infer their [`DataType`] from the context of their use.
1990    ///
1991    /// For example, given an expression like `<int32> = $0` will infer `$0` to
1992    /// have type `int32`.
1993    ///
1994    /// Returns transformed expression and flag that is true if expression contains
1995    /// at least one placeholder.
1996    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
1997        let mut has_placeholder = false;
1998        self.transform(|mut expr| {
1999            match &mut expr {
2000                // Default to assuming the arguments are the same type
2001                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
2002                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
2003                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
2004                }
2005                Expr::Between(Between {
2006                    expr,
2007                    negated: _,
2008                    low,
2009                    high,
2010                }) => {
2011                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
2012                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
2013                }
2014                Expr::InList(InList {
2015                    expr,
2016                    list,
2017                    negated: _,
2018                }) => {
2019                    for item in list.iter_mut() {
2020                        rewrite_placeholder(item, expr.as_ref(), schema)?;
2021                    }
2022                }
2023                Expr::Like(Like { expr, pattern, .. })
2024                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
2025                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
2026                }
2027                Expr::Placeholder(_) => {
2028                    has_placeholder = true;
2029                }
2030                _ => {}
2031            }
2032            Ok(Transformed::yes(expr))
2033        })
2034        .data()
2035        .map(|data| (data, has_placeholder))
2036    }
2037
2038    /// Returns true if some of this `exprs` subexpressions may not be evaluated
2039    /// and thus any side effects (like divide by zero) may not be encountered
2040    pub fn short_circuits(&self) -> bool {
2041        match self {
2042            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
2043            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
2044                matches!(op, Operator::And | Operator::Or)
2045            }
2046            Expr::Case { .. } => true,
2047            // Use explicit pattern match instead of a default
2048            // implementation, so that in the future if someone adds
2049            // new Expr types, they will check here as well
2050            // TODO: remove the next line after `Expr::Wildcard` is removed
2051            #[expect(deprecated)]
2052            Expr::AggregateFunction(..)
2053            | Expr::Alias(..)
2054            | Expr::Between(..)
2055            | Expr::Cast(..)
2056            | Expr::Column(..)
2057            | Expr::Exists(..)
2058            | Expr::GroupingSet(..)
2059            | Expr::InList(..)
2060            | Expr::InSubquery(..)
2061            | Expr::IsFalse(..)
2062            | Expr::IsNotFalse(..)
2063            | Expr::IsNotNull(..)
2064            | Expr::IsNotTrue(..)
2065            | Expr::IsNotUnknown(..)
2066            | Expr::IsNull(..)
2067            | Expr::IsTrue(..)
2068            | Expr::IsUnknown(..)
2069            | Expr::Like(..)
2070            | Expr::ScalarSubquery(..)
2071            | Expr::ScalarVariable(_, _)
2072            | Expr::SimilarTo(..)
2073            | Expr::Not(..)
2074            | Expr::Negative(..)
2075            | Expr::OuterReferenceColumn(_, _)
2076            | Expr::TryCast(..)
2077            | Expr::Unnest(..)
2078            | Expr::Wildcard { .. }
2079            | Expr::WindowFunction(..)
2080            | Expr::Literal(..)
2081            | Expr::Placeholder(..) => false,
2082        }
2083    }
2084
2085    /// Returns a reference to the set of locations in the SQL query where this
2086    /// expression appears, if known. [`None`] is returned if the expression
2087    /// type doesn't support tracking locations yet.
2088    pub fn spans(&self) -> Option<&Spans> {
2089        match self {
2090            Expr::Column(col) => Some(&col.spans),
2091            _ => None,
2092        }
2093    }
2094
2095    /// Check if the Expr is literal and get the literal value if it is.
2096    pub fn as_literal(&self) -> Option<&ScalarValue> {
2097        if let Expr::Literal(lit, _) = self {
2098            Some(lit)
2099        } else {
2100            None
2101        }
2102    }
2103}
2104
2105impl Normalizeable for Expr {
2106    fn can_normalize(&self) -> bool {
2107        #[allow(clippy::match_like_matches_macro)]
2108        match self {
2109            Expr::BinaryExpr(BinaryExpr {
2110                op:
2111                    _op @ (Operator::Plus
2112                    | Operator::Multiply
2113                    | Operator::BitwiseAnd
2114                    | Operator::BitwiseOr
2115                    | Operator::BitwiseXor
2116                    | Operator::Eq
2117                    | Operator::NotEq),
2118                ..
2119            }) => true,
2120            _ => false,
2121        }
2122    }
2123}
2124
2125impl NormalizeEq for Expr {
2126    fn normalize_eq(&self, other: &Self) -> bool {
2127        match (self, other) {
2128            (
2129                Expr::BinaryExpr(BinaryExpr {
2130                    left: self_left,
2131                    op: self_op,
2132                    right: self_right,
2133                }),
2134                Expr::BinaryExpr(BinaryExpr {
2135                    left: other_left,
2136                    op: other_op,
2137                    right: other_right,
2138                }),
2139            ) => {
2140                if self_op != other_op {
2141                    return false;
2142                }
2143
2144                if matches!(
2145                    self_op,
2146                    Operator::Plus
2147                        | Operator::Multiply
2148                        | Operator::BitwiseAnd
2149                        | Operator::BitwiseOr
2150                        | Operator::BitwiseXor
2151                        | Operator::Eq
2152                        | Operator::NotEq
2153                ) {
2154                    (self_left.normalize_eq(other_left)
2155                        && self_right.normalize_eq(other_right))
2156                        || (self_left.normalize_eq(other_right)
2157                            && self_right.normalize_eq(other_left))
2158                } else {
2159                    self_left.normalize_eq(other_left)
2160                        && self_right.normalize_eq(other_right)
2161                }
2162            }
2163            (
2164                Expr::Alias(Alias {
2165                    expr: self_expr,
2166                    relation: self_relation,
2167                    name: self_name,
2168                    ..
2169                }),
2170                Expr::Alias(Alias {
2171                    expr: other_expr,
2172                    relation: other_relation,
2173                    name: other_name,
2174                    ..
2175                }),
2176            ) => {
2177                self_name == other_name
2178                    && self_relation == other_relation
2179                    && self_expr.normalize_eq(other_expr)
2180            }
2181            (
2182                Expr::Like(Like {
2183                    negated: self_negated,
2184                    expr: self_expr,
2185                    pattern: self_pattern,
2186                    escape_char: self_escape_char,
2187                    case_insensitive: self_case_insensitive,
2188                }),
2189                Expr::Like(Like {
2190                    negated: other_negated,
2191                    expr: other_expr,
2192                    pattern: other_pattern,
2193                    escape_char: other_escape_char,
2194                    case_insensitive: other_case_insensitive,
2195                }),
2196            )
2197            | (
2198                Expr::SimilarTo(Like {
2199                    negated: self_negated,
2200                    expr: self_expr,
2201                    pattern: self_pattern,
2202                    escape_char: self_escape_char,
2203                    case_insensitive: self_case_insensitive,
2204                }),
2205                Expr::SimilarTo(Like {
2206                    negated: other_negated,
2207                    expr: other_expr,
2208                    pattern: other_pattern,
2209                    escape_char: other_escape_char,
2210                    case_insensitive: other_case_insensitive,
2211                }),
2212            ) => {
2213                self_negated == other_negated
2214                    && self_escape_char == other_escape_char
2215                    && self_case_insensitive == other_case_insensitive
2216                    && self_expr.normalize_eq(other_expr)
2217                    && self_pattern.normalize_eq(other_pattern)
2218            }
2219            (Expr::Not(self_expr), Expr::Not(other_expr))
2220            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2221            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2222            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2223            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2224            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2225            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2226            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2227            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2228            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2229            | (
2230                Expr::Unnest(Unnest { expr: self_expr }),
2231                Expr::Unnest(Unnest { expr: other_expr }),
2232            ) => self_expr.normalize_eq(other_expr),
2233            (
2234                Expr::Between(Between {
2235                    expr: self_expr,
2236                    negated: self_negated,
2237                    low: self_low,
2238                    high: self_high,
2239                }),
2240                Expr::Between(Between {
2241                    expr: other_expr,
2242                    negated: other_negated,
2243                    low: other_low,
2244                    high: other_high,
2245                }),
2246            ) => {
2247                self_negated == other_negated
2248                    && self_expr.normalize_eq(other_expr)
2249                    && self_low.normalize_eq(other_low)
2250                    && self_high.normalize_eq(other_high)
2251            }
2252            (
2253                Expr::Cast(Cast {
2254                    expr: self_expr,
2255                    data_type: self_data_type,
2256                }),
2257                Expr::Cast(Cast {
2258                    expr: other_expr,
2259                    data_type: other_data_type,
2260                }),
2261            )
2262            | (
2263                Expr::TryCast(TryCast {
2264                    expr: self_expr,
2265                    data_type: self_data_type,
2266                }),
2267                Expr::TryCast(TryCast {
2268                    expr: other_expr,
2269                    data_type: other_data_type,
2270                }),
2271            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
2272            (
2273                Expr::ScalarFunction(ScalarFunction {
2274                    func: self_func,
2275                    args: self_args,
2276                }),
2277                Expr::ScalarFunction(ScalarFunction {
2278                    func: other_func,
2279                    args: other_args,
2280                }),
2281            ) => {
2282                self_func.name() == other_func.name()
2283                    && self_args.len() == other_args.len()
2284                    && self_args
2285                        .iter()
2286                        .zip(other_args.iter())
2287                        .all(|(a, b)| a.normalize_eq(b))
2288            }
2289            (
2290                Expr::AggregateFunction(AggregateFunction {
2291                    func: self_func,
2292                    params:
2293                        AggregateFunctionParams {
2294                            args: self_args,
2295                            distinct: self_distinct,
2296                            filter: self_filter,
2297                            order_by: self_order_by,
2298                            null_treatment: self_null_treatment,
2299                        },
2300                }),
2301                Expr::AggregateFunction(AggregateFunction {
2302                    func: other_func,
2303                    params:
2304                        AggregateFunctionParams {
2305                            args: other_args,
2306                            distinct: other_distinct,
2307                            filter: other_filter,
2308                            order_by: other_order_by,
2309                            null_treatment: other_null_treatment,
2310                        },
2311                }),
2312            ) => {
2313                self_func.name() == other_func.name()
2314                    && self_distinct == other_distinct
2315                    && self_null_treatment == other_null_treatment
2316                    && self_args.len() == other_args.len()
2317                    && self_args
2318                        .iter()
2319                        .zip(other_args.iter())
2320                        .all(|(a, b)| a.normalize_eq(b))
2321                    && match (self_filter, other_filter) {
2322                        (Some(self_filter), Some(other_filter)) => {
2323                            self_filter.normalize_eq(other_filter)
2324                        }
2325                        (None, None) => true,
2326                        _ => false,
2327                    }
2328                    && self_order_by
2329                        .iter()
2330                        .zip(other_order_by.iter())
2331                        .all(|(a, b)| {
2332                            a.asc == b.asc
2333                                && a.nulls_first == b.nulls_first
2334                                && a.expr.normalize_eq(&b.expr)
2335                        })
2336                    && self_order_by.len() == other_order_by.len()
2337            }
2338            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2339                let WindowFunction {
2340                    fun: self_fun,
2341                    params:
2342                        WindowFunctionParams {
2343                            args: self_args,
2344                            window_frame: self_window_frame,
2345                            partition_by: self_partition_by,
2346                            order_by: self_order_by,
2347                            filter: self_filter,
2348                            null_treatment: self_null_treatment,
2349                            distinct: self_distinct,
2350                        },
2351                } = left.as_ref();
2352                let WindowFunction {
2353                    fun: other_fun,
2354                    params:
2355                        WindowFunctionParams {
2356                            args: other_args,
2357                            window_frame: other_window_frame,
2358                            partition_by: other_partition_by,
2359                            order_by: other_order_by,
2360                            filter: other_filter,
2361                            null_treatment: other_null_treatment,
2362                            distinct: other_distinct,
2363                        },
2364                } = other.as_ref();
2365
2366                self_fun.name() == other_fun.name()
2367                    && self_window_frame == other_window_frame
2368                    && match (self_filter, other_filter) {
2369                        (Some(a), Some(b)) => a.normalize_eq(b),
2370                        (None, None) => true,
2371                        _ => false,
2372                    }
2373                    && self_null_treatment == other_null_treatment
2374                    && self_args.len() == other_args.len()
2375                    && self_args
2376                        .iter()
2377                        .zip(other_args.iter())
2378                        .all(|(a, b)| a.normalize_eq(b))
2379                    && self_partition_by
2380                        .iter()
2381                        .zip(other_partition_by.iter())
2382                        .all(|(a, b)| a.normalize_eq(b))
2383                    && self_order_by
2384                        .iter()
2385                        .zip(other_order_by.iter())
2386                        .all(|(a, b)| {
2387                            a.asc == b.asc
2388                                && a.nulls_first == b.nulls_first
2389                                && a.expr.normalize_eq(&b.expr)
2390                        })
2391                    && self_distinct == other_distinct
2392            }
2393            (
2394                Expr::Exists(Exists {
2395                    subquery: self_subquery,
2396                    negated: self_negated,
2397                }),
2398                Expr::Exists(Exists {
2399                    subquery: other_subquery,
2400                    negated: other_negated,
2401                }),
2402            ) => {
2403                self_negated == other_negated
2404                    && self_subquery.normalize_eq(other_subquery)
2405            }
2406            (
2407                Expr::InSubquery(InSubquery {
2408                    expr: self_expr,
2409                    subquery: self_subquery,
2410                    negated: self_negated,
2411                }),
2412                Expr::InSubquery(InSubquery {
2413                    expr: other_expr,
2414                    subquery: other_subquery,
2415                    negated: other_negated,
2416                }),
2417            ) => {
2418                self_negated == other_negated
2419                    && self_expr.normalize_eq(other_expr)
2420                    && self_subquery.normalize_eq(other_subquery)
2421            }
2422            (
2423                Expr::ScalarSubquery(self_subquery),
2424                Expr::ScalarSubquery(other_subquery),
2425            ) => self_subquery.normalize_eq(other_subquery),
2426            (
2427                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2428                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2429            )
2430            | (
2431                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2432                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2433            ) => {
2434                self_exprs.len() == other_exprs.len()
2435                    && self_exprs
2436                        .iter()
2437                        .zip(other_exprs.iter())
2438                        .all(|(a, b)| a.normalize_eq(b))
2439            }
2440            (
2441                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2442                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2443            ) => {
2444                self_exprs.len() == other_exprs.len()
2445                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2446                        a.len() == b.len()
2447                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2448                    })
2449            }
2450            (
2451                Expr::InList(InList {
2452                    expr: self_expr,
2453                    list: self_list,
2454                    negated: self_negated,
2455                }),
2456                Expr::InList(InList {
2457                    expr: other_expr,
2458                    list: other_list,
2459                    negated: other_negated,
2460                }),
2461            ) => {
2462                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2463                self_negated == other_negated
2464                    && self_expr.normalize_eq(other_expr)
2465                    && self_list.len() == other_list.len()
2466                    && self_list
2467                        .iter()
2468                        .zip(other_list.iter())
2469                        .all(|(a, b)| a.normalize_eq(b))
2470            }
2471            (
2472                Expr::Case(Case {
2473                    expr: self_expr,
2474                    when_then_expr: self_when_then_expr,
2475                    else_expr: self_else_expr,
2476                }),
2477                Expr::Case(Case {
2478                    expr: other_expr,
2479                    when_then_expr: other_when_then_expr,
2480                    else_expr: other_else_expr,
2481                }),
2482            ) => {
2483                // TODO: normalize_eq for when_then_expr
2484                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2485                self_when_then_expr.len() == other_when_then_expr.len()
2486                    && self_when_then_expr
2487                        .iter()
2488                        .zip(other_when_then_expr.iter())
2489                        .all(|((self_when, self_then), (other_when, other_then))| {
2490                            self_when.normalize_eq(other_when)
2491                                && self_then.normalize_eq(other_then)
2492                        })
2493                    && match (self_expr, other_expr) {
2494                        (Some(self_expr), Some(other_expr)) => {
2495                            self_expr.normalize_eq(other_expr)
2496                        }
2497                        (None, None) => true,
2498                        (_, _) => false,
2499                    }
2500                    && match (self_else_expr, other_else_expr) {
2501                        (Some(self_else_expr), Some(other_else_expr)) => {
2502                            self_else_expr.normalize_eq(other_else_expr)
2503                        }
2504                        (None, None) => true,
2505                        (_, _) => false,
2506                    }
2507            }
2508            (_, _) => self == other,
2509        }
2510    }
2511}
2512
2513impl HashNode for Expr {
2514    /// As it is pretty easy to forget changing this method when `Expr` changes the
2515    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2516    /// compile time.
2517    fn hash_node<H: Hasher>(&self, state: &mut H) {
2518        mem::discriminant(self).hash(state);
2519        match self {
2520            Expr::Alias(Alias {
2521                expr: _expr,
2522                relation,
2523                name,
2524                ..
2525            }) => {
2526                relation.hash(state);
2527                name.hash(state);
2528            }
2529            Expr::Column(column) => {
2530                column.hash(state);
2531            }
2532            Expr::ScalarVariable(data_type, name) => {
2533                data_type.hash(state);
2534                name.hash(state);
2535            }
2536            Expr::Literal(scalar_value, _) => {
2537                scalar_value.hash(state);
2538            }
2539            Expr::BinaryExpr(BinaryExpr {
2540                left: _left,
2541                op,
2542                right: _right,
2543            }) => {
2544                op.hash(state);
2545            }
2546            Expr::Like(Like {
2547                negated,
2548                expr: _expr,
2549                pattern: _pattern,
2550                escape_char,
2551                case_insensitive,
2552            })
2553            | Expr::SimilarTo(Like {
2554                negated,
2555                expr: _expr,
2556                pattern: _pattern,
2557                escape_char,
2558                case_insensitive,
2559            }) => {
2560                negated.hash(state);
2561                escape_char.hash(state);
2562                case_insensitive.hash(state);
2563            }
2564            Expr::Not(_expr)
2565            | Expr::IsNotNull(_expr)
2566            | Expr::IsNull(_expr)
2567            | Expr::IsTrue(_expr)
2568            | Expr::IsFalse(_expr)
2569            | Expr::IsUnknown(_expr)
2570            | Expr::IsNotTrue(_expr)
2571            | Expr::IsNotFalse(_expr)
2572            | Expr::IsNotUnknown(_expr)
2573            | Expr::Negative(_expr) => {}
2574            Expr::Between(Between {
2575                expr: _expr,
2576                negated,
2577                low: _low,
2578                high: _high,
2579            }) => {
2580                negated.hash(state);
2581            }
2582            Expr::Case(Case {
2583                expr: _expr,
2584                when_then_expr: _when_then_expr,
2585                else_expr: _else_expr,
2586            }) => {}
2587            Expr::Cast(Cast {
2588                expr: _expr,
2589                data_type,
2590            })
2591            | Expr::TryCast(TryCast {
2592                expr: _expr,
2593                data_type,
2594            }) => {
2595                data_type.hash(state);
2596            }
2597            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2598                func.hash(state);
2599            }
2600            Expr::AggregateFunction(AggregateFunction {
2601                func,
2602                params:
2603                    AggregateFunctionParams {
2604                        args: _args,
2605                        distinct,
2606                        filter: _,
2607                        order_by: _,
2608                        null_treatment,
2609                    },
2610            }) => {
2611                func.hash(state);
2612                distinct.hash(state);
2613                null_treatment.hash(state);
2614            }
2615            Expr::WindowFunction(window_fun) => {
2616                let WindowFunction {
2617                    fun,
2618                    params:
2619                        WindowFunctionParams {
2620                            args: _args,
2621                            partition_by: _,
2622                            order_by: _,
2623                            window_frame,
2624                            filter,
2625                            null_treatment,
2626                            distinct,
2627                        },
2628                } = window_fun.as_ref();
2629                fun.hash(state);
2630                window_frame.hash(state);
2631                filter.hash(state);
2632                null_treatment.hash(state);
2633                distinct.hash(state);
2634            }
2635            Expr::InList(InList {
2636                expr: _expr,
2637                list: _list,
2638                negated,
2639            }) => {
2640                negated.hash(state);
2641            }
2642            Expr::Exists(Exists { subquery, negated }) => {
2643                subquery.hash(state);
2644                negated.hash(state);
2645            }
2646            Expr::InSubquery(InSubquery {
2647                expr: _expr,
2648                subquery,
2649                negated,
2650            }) => {
2651                subquery.hash(state);
2652                negated.hash(state);
2653            }
2654            Expr::ScalarSubquery(subquery) => {
2655                subquery.hash(state);
2656            }
2657            #[expect(deprecated)]
2658            Expr::Wildcard { qualifier, options } => {
2659                qualifier.hash(state);
2660                options.hash(state);
2661            }
2662            Expr::GroupingSet(grouping_set) => {
2663                mem::discriminant(grouping_set).hash(state);
2664                match grouping_set {
2665                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2666                    GroupingSet::GroupingSets(_exprs) => {}
2667                }
2668            }
2669            Expr::Placeholder(place_holder) => {
2670                place_holder.hash(state);
2671            }
2672            Expr::OuterReferenceColumn(field, column) => {
2673                field.hash(state);
2674                column.hash(state);
2675            }
2676            Expr::Unnest(Unnest { expr: _expr }) => {}
2677        };
2678    }
2679}
2680
2681// Modifies expr to match the DataType, metadata, and nullability of other if it is
2682// a placeholder with previously unspecified type information (i.e., most placeholders)
2683fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2684    if let Expr::Placeholder(Placeholder { id: _, field }) = expr {
2685        if field.is_none() {
2686            let other_field = other.to_field(schema);
2687            match other_field {
2688                Err(e) => {
2689                    Err(e.context(format!(
2690                        "Can not find type of {other} needed to infer type of {expr}"
2691                    )))?;
2692                }
2693                Ok((_, other_field)) => {
2694                    // We can't infer the nullability of the future parameter that might
2695                    // be bound, so ensure this is set to true
2696                    *field =
2697                        Some(other_field.as_ref().clone().with_nullable(true).into());
2698                }
2699            }
2700        };
2701    }
2702    Ok(())
2703}
2704
2705#[macro_export]
2706macro_rules! expr_vec_fmt {
2707    ( $ARRAY:expr ) => {{
2708        $ARRAY
2709            .iter()
2710            .map(|e| format!("{e}"))
2711            .collect::<Vec<String>>()
2712            .join(", ")
2713    }};
2714}
2715
2716struct SchemaDisplay<'a>(&'a Expr);
2717impl Display for SchemaDisplay<'_> {
2718    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2719        match self.0 {
2720            // The same as Display
2721            // TODO: remove the next line after `Expr::Wildcard` is removed
2722            #[expect(deprecated)]
2723            Expr::Column(_)
2724            | Expr::Literal(_, _)
2725            | Expr::ScalarVariable(..)
2726            | Expr::OuterReferenceColumn(..)
2727            | Expr::Placeholder(_)
2728            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2729            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2730                match func.schema_name(params) {
2731                    Ok(name) => {
2732                        write!(f, "{name}")
2733                    }
2734                    Err(e) => {
2735                        write!(f, "got error from schema_name {e}")
2736                    }
2737                }
2738            }
2739            // Expr is not shown since it is aliased
2740            Expr::Alias(Alias {
2741                name,
2742                relation: Some(relation),
2743                ..
2744            }) => write!(f, "{relation}.{name}"),
2745            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2746            Expr::Between(Between {
2747                expr,
2748                negated,
2749                low,
2750                high,
2751            }) => {
2752                if *negated {
2753                    write!(
2754                        f,
2755                        "{} NOT BETWEEN {} AND {}",
2756                        SchemaDisplay(expr),
2757                        SchemaDisplay(low),
2758                        SchemaDisplay(high),
2759                    )
2760                } else {
2761                    write!(
2762                        f,
2763                        "{} BETWEEN {} AND {}",
2764                        SchemaDisplay(expr),
2765                        SchemaDisplay(low),
2766                        SchemaDisplay(high),
2767                    )
2768                }
2769            }
2770            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2771                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2772            }
2773            Expr::Case(Case {
2774                expr,
2775                when_then_expr,
2776                else_expr,
2777            }) => {
2778                write!(f, "CASE ")?;
2779
2780                if let Some(e) = expr {
2781                    write!(f, "{} ", SchemaDisplay(e))?;
2782                }
2783
2784                for (when, then) in when_then_expr {
2785                    write!(
2786                        f,
2787                        "WHEN {} THEN {} ",
2788                        SchemaDisplay(when),
2789                        SchemaDisplay(then),
2790                    )?;
2791                }
2792
2793                if let Some(e) = else_expr {
2794                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2795                }
2796
2797                write!(f, "END")
2798            }
2799            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2800            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2801                write!(f, "{}", SchemaDisplay(expr))
2802            }
2803            Expr::InList(InList {
2804                expr,
2805                list,
2806                negated,
2807            }) => {
2808                let inlist_name = schema_name_from_exprs(list)?;
2809
2810                if *negated {
2811                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2812                } else {
2813                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2814                }
2815            }
2816            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2817            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2818            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2819                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2820            }
2821            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2822                write!(f, "GROUPING SETS (")?;
2823                for exprs in lists_of_exprs.iter() {
2824                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2825                }
2826                write!(f, ")")
2827            }
2828            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2829                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2830            }
2831            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2832            Expr::IsNotNull(expr) => {
2833                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2834            }
2835            Expr::IsUnknown(expr) => {
2836                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2837            }
2838            Expr::IsNotUnknown(expr) => {
2839                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2840            }
2841            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2842                write!(f, "NOT IN")
2843            }
2844            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2845            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2846            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2847            Expr::IsNotTrue(expr) => {
2848                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2849            }
2850            Expr::IsNotFalse(expr) => {
2851                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2852            }
2853            Expr::Like(Like {
2854                negated,
2855                expr,
2856                pattern,
2857                escape_char,
2858                case_insensitive,
2859            }) => {
2860                write!(
2861                    f,
2862                    "{} {}{} {}",
2863                    SchemaDisplay(expr),
2864                    if *negated { "NOT " } else { "" },
2865                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2866                    SchemaDisplay(pattern),
2867                )?;
2868
2869                if let Some(char) = escape_char {
2870                    write!(f, " CHAR '{char}'")?;
2871                }
2872
2873                Ok(())
2874            }
2875            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2876            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2877            Expr::Unnest(Unnest { expr }) => {
2878                write!(f, "UNNEST({})", SchemaDisplay(expr))
2879            }
2880            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2881                match func.schema_name(args) {
2882                    Ok(name) => {
2883                        write!(f, "{name}")
2884                    }
2885                    Err(e) => {
2886                        write!(f, "got error from schema_name {e}")
2887                    }
2888                }
2889            }
2890            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2891                write!(f, "{}", subquery.schema().field(0).name())
2892            }
2893            Expr::SimilarTo(Like {
2894                negated,
2895                expr,
2896                pattern,
2897                escape_char,
2898                ..
2899            }) => {
2900                write!(
2901                    f,
2902                    "{} {} {}",
2903                    SchemaDisplay(expr),
2904                    if *negated {
2905                        "NOT SIMILAR TO"
2906                    } else {
2907                        "SIMILAR TO"
2908                    },
2909                    SchemaDisplay(pattern),
2910                )?;
2911                if let Some(char) = escape_char {
2912                    write!(f, " CHAR '{char}'")?;
2913                }
2914
2915                Ok(())
2916            }
2917            Expr::WindowFunction(window_fun) => {
2918                let WindowFunction { fun, params } = window_fun.as_ref();
2919                match fun {
2920                    WindowFunctionDefinition::AggregateUDF(fun) => {
2921                        match fun.window_function_schema_name(params) {
2922                            Ok(name) => {
2923                                write!(f, "{name}")
2924                            }
2925                            Err(e) => {
2926                                write!(
2927                                    f,
2928                                    "got error from window_function_schema_name {e}"
2929                                )
2930                            }
2931                        }
2932                    }
2933                    _ => {
2934                        let WindowFunctionParams {
2935                            args,
2936                            partition_by,
2937                            order_by,
2938                            window_frame,
2939                            filter,
2940                            null_treatment,
2941                            distinct,
2942                        } = params;
2943
2944                        // Write function name and open parenthesis
2945                        write!(f, "{fun}(")?;
2946
2947                        // If DISTINCT, emit the keyword
2948                        if *distinct {
2949                            write!(f, "DISTINCT ")?;
2950                        }
2951
2952                        // Write the comma‑separated argument list
2953                        write!(
2954                            f,
2955                            "{}",
2956                            schema_name_from_exprs_comma_separated_without_space(args)?
2957                        )?;
2958
2959                        // **Close the argument parenthesis**
2960                        write!(f, ")")?;
2961
2962                        if let Some(null_treatment) = null_treatment {
2963                            write!(f, " {null_treatment}")?;
2964                        }
2965
2966                        if let Some(filter) = filter {
2967                            write!(f, " FILTER (WHERE {filter})")?;
2968                        }
2969
2970                        if !partition_by.is_empty() {
2971                            write!(
2972                                f,
2973                                " PARTITION BY [{}]",
2974                                schema_name_from_exprs(partition_by)?
2975                            )?;
2976                        }
2977
2978                        if !order_by.is_empty() {
2979                            write!(
2980                                f,
2981                                " ORDER BY [{}]",
2982                                schema_name_from_sorts(order_by)?
2983                            )?;
2984                        };
2985
2986                        write!(f, " {window_frame}")
2987                    }
2988                }
2989            }
2990        }
2991    }
2992}
2993
2994/// A helper struct for displaying an `Expr` as an SQL-like string.
2995struct SqlDisplay<'a>(&'a Expr);
2996
2997impl Display for SqlDisplay<'_> {
2998    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2999        match self.0 {
3000            Expr::Literal(scalar, _) => scalar.fmt(f),
3001            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
3002            Expr::Between(Between {
3003                expr,
3004                negated,
3005                low,
3006                high,
3007            }) => {
3008                if *negated {
3009                    write!(
3010                        f,
3011                        "{} NOT BETWEEN {} AND {}",
3012                        SqlDisplay(expr),
3013                        SqlDisplay(low),
3014                        SqlDisplay(high),
3015                    )
3016                } else {
3017                    write!(
3018                        f,
3019                        "{} BETWEEN {} AND {}",
3020                        SqlDisplay(expr),
3021                        SqlDisplay(low),
3022                        SqlDisplay(high),
3023                    )
3024                }
3025            }
3026            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
3027                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
3028            }
3029            Expr::Case(Case {
3030                expr,
3031                when_then_expr,
3032                else_expr,
3033            }) => {
3034                write!(f, "CASE ")?;
3035
3036                if let Some(e) = expr {
3037                    write!(f, "{} ", SqlDisplay(e))?;
3038                }
3039
3040                for (when, then) in when_then_expr {
3041                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
3042                }
3043
3044                if let Some(e) = else_expr {
3045                    write!(f, "ELSE {} ", SqlDisplay(e))?;
3046                }
3047
3048                write!(f, "END")
3049            }
3050            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
3051                write!(f, "{}", SqlDisplay(expr))
3052            }
3053            Expr::InList(InList {
3054                expr,
3055                list,
3056                negated,
3057            }) => {
3058                write!(
3059                    f,
3060                    "{}{} IN {}",
3061                    SqlDisplay(expr),
3062                    if *negated { " NOT" } else { "" },
3063                    ExprListDisplay::comma_separated(list.as_slice())
3064                )
3065            }
3066            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
3067                write!(
3068                    f,
3069                    "ROLLUP ({})",
3070                    ExprListDisplay::comma_separated(exprs.as_slice())
3071                )
3072            }
3073            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
3074                write!(f, "GROUPING SETS (")?;
3075                for exprs in lists_of_exprs.iter() {
3076                    write!(
3077                        f,
3078                        "({})",
3079                        ExprListDisplay::comma_separated(exprs.as_slice())
3080                    )?;
3081                }
3082                write!(f, ")")
3083            }
3084            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
3085                write!(
3086                    f,
3087                    "ROLLUP ({})",
3088                    ExprListDisplay::comma_separated(exprs.as_slice())
3089                )
3090            }
3091            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
3092            Expr::IsNotNull(expr) => {
3093                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
3094            }
3095            Expr::IsUnknown(expr) => {
3096                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
3097            }
3098            Expr::IsNotUnknown(expr) => {
3099                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
3100            }
3101            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
3102            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
3103            Expr::IsNotTrue(expr) => {
3104                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
3105            }
3106            Expr::IsNotFalse(expr) => {
3107                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
3108            }
3109            Expr::Like(Like {
3110                negated,
3111                expr,
3112                pattern,
3113                escape_char,
3114                case_insensitive,
3115            }) => {
3116                write!(
3117                    f,
3118                    "{} {}{} {}",
3119                    SqlDisplay(expr),
3120                    if *negated { "NOT " } else { "" },
3121                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3122                    SqlDisplay(pattern),
3123                )?;
3124
3125                if let Some(char) = escape_char {
3126                    write!(f, " CHAR '{char}'")?;
3127                }
3128
3129                Ok(())
3130            }
3131            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
3132            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
3133            Expr::Unnest(Unnest { expr }) => {
3134                write!(f, "UNNEST({})", SqlDisplay(expr))
3135            }
3136            Expr::SimilarTo(Like {
3137                negated,
3138                expr,
3139                pattern,
3140                escape_char,
3141                ..
3142            }) => {
3143                write!(
3144                    f,
3145                    "{} {} {}",
3146                    SqlDisplay(expr),
3147                    if *negated {
3148                        "NOT SIMILAR TO"
3149                    } else {
3150                        "SIMILAR TO"
3151                    },
3152                    SqlDisplay(pattern),
3153                )?;
3154                if let Some(char) = escape_char {
3155                    write!(f, " CHAR '{char}'")?;
3156                }
3157
3158                Ok(())
3159            }
3160            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3161                match func.human_display(params) {
3162                    Ok(name) => {
3163                        write!(f, "{name}")
3164                    }
3165                    Err(e) => {
3166                        write!(f, "got error from schema_name {e}")
3167                    }
3168                }
3169            }
3170            _ => write!(f, "{}", self.0),
3171        }
3172    }
3173}
3174
3175/// Get schema_name for Vector of expressions
3176///
3177/// Internal usage. Please call `schema_name_from_exprs` instead
3178// TODO: Use ", " to standardize the formatting of Vec<Expr>,
3179// <https://github.com/apache/datafusion/issues/10364>
3180pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
3181    exprs: &[Expr],
3182) -> Result<String, fmt::Error> {
3183    schema_name_from_exprs_inner(exprs, ",")
3184}
3185
3186/// Formats a list of `&Expr` with a custom separator using SQL display format
3187pub struct ExprListDisplay<'a> {
3188    exprs: &'a [Expr],
3189    sep: &'a str,
3190}
3191
3192impl<'a> ExprListDisplay<'a> {
3193    /// Create a new display struct with the given expressions and separator
3194    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
3195        Self { exprs, sep }
3196    }
3197
3198    /// Create a new display struct with comma-space separator
3199    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
3200        Self::new(exprs, ", ")
3201    }
3202}
3203
3204impl Display for ExprListDisplay<'_> {
3205    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3206        let mut first = true;
3207        for expr in self.exprs {
3208            if !first {
3209                write!(f, "{}", self.sep)?;
3210            }
3211            write!(f, "{}", SqlDisplay(expr))?;
3212            first = false;
3213        }
3214        Ok(())
3215    }
3216}
3217
3218/// Get schema_name for Vector of expressions
3219pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
3220    schema_name_from_exprs_inner(exprs, ", ")
3221}
3222
3223fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
3224    let mut s = String::new();
3225    for (i, e) in exprs.iter().enumerate() {
3226        if i > 0 {
3227            write!(&mut s, "{sep}")?;
3228        }
3229        write!(&mut s, "{}", SchemaDisplay(e))?;
3230    }
3231
3232    Ok(s)
3233}
3234
3235pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
3236    let mut s = String::new();
3237    for (i, e) in sorts.iter().enumerate() {
3238        if i > 0 {
3239            write!(&mut s, ", ")?;
3240        }
3241        let ordering = if e.asc { "ASC" } else { "DESC" };
3242        let nulls_ordering = if e.nulls_first {
3243            "NULLS FIRST"
3244        } else {
3245            "NULLS LAST"
3246        };
3247        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
3248    }
3249
3250    Ok(s)
3251}
3252
3253pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3254pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3255
3256/// Format expressions for display as part of a logical plan. In many cases, this will produce
3257/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3258impl Display for Expr {
3259    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3260        match self {
3261            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3262            Expr::Column(c) => write!(f, "{c}"),
3263            Expr::OuterReferenceColumn(_, c) => {
3264                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3265            }
3266            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3267            Expr::Literal(v, metadata) => {
3268                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3269                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3270                    true => write!(f, "{v:?}"),
3271                }
3272            }
3273            Expr::Case(case) => {
3274                write!(f, "CASE ")?;
3275                if let Some(e) = &case.expr {
3276                    write!(f, "{e} ")?;
3277                }
3278                for (w, t) in &case.when_then_expr {
3279                    write!(f, "WHEN {w} THEN {t} ")?;
3280                }
3281                if let Some(e) = &case.else_expr {
3282                    write!(f, "ELSE {e} ")?;
3283                }
3284                write!(f, "END")
3285            }
3286            Expr::Cast(Cast { expr, data_type }) => {
3287                write!(f, "CAST({expr} AS {data_type})")
3288            }
3289            Expr::TryCast(TryCast { expr, data_type }) => {
3290                write!(f, "TRY_CAST({expr} AS {data_type})")
3291            }
3292            Expr::Not(expr) => write!(f, "NOT {expr}"),
3293            Expr::Negative(expr) => write!(f, "(- {expr})"),
3294            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3295            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3296            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3297            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3298            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3299            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3300            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3301            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3302            Expr::Exists(Exists {
3303                subquery,
3304                negated: true,
3305            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3306            Expr::Exists(Exists {
3307                subquery,
3308                negated: false,
3309            }) => write!(f, "EXISTS ({subquery:?})"),
3310            Expr::InSubquery(InSubquery {
3311                expr,
3312                subquery,
3313                negated: true,
3314            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3315            Expr::InSubquery(InSubquery {
3316                expr,
3317                subquery,
3318                negated: false,
3319            }) => write!(f, "{expr} IN ({subquery:?})"),
3320            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3321            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3322            Expr::ScalarFunction(fun) => {
3323                fmt_function(f, fun.name(), false, &fun.args, true)
3324            }
3325            Expr::WindowFunction(window_fun) => {
3326                let WindowFunction { fun, params } = window_fun.as_ref();
3327                match fun {
3328                    WindowFunctionDefinition::AggregateUDF(fun) => {
3329                        match fun.window_function_display_name(params) {
3330                            Ok(name) => {
3331                                write!(f, "{name}")
3332                            }
3333                            Err(e) => {
3334                                write!(
3335                                    f,
3336                                    "got error from window_function_display_name {e}"
3337                                )
3338                            }
3339                        }
3340                    }
3341                    WindowFunctionDefinition::WindowUDF(fun) => {
3342                        let WindowFunctionParams {
3343                            args,
3344                            partition_by,
3345                            order_by,
3346                            window_frame,
3347                            filter,
3348                            null_treatment,
3349                            distinct,
3350                        } = params;
3351
3352                        fmt_function(f, &fun.to_string(), *distinct, args, true)?;
3353
3354                        if let Some(nt) = null_treatment {
3355                            write!(f, "{nt}")?;
3356                        }
3357
3358                        if let Some(fe) = filter {
3359                            write!(f, " FILTER (WHERE {fe})")?;
3360                        }
3361
3362                        if !partition_by.is_empty() {
3363                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3364                        }
3365                        if !order_by.is_empty() {
3366                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3367                        }
3368                        write!(
3369                            f,
3370                            " {} BETWEEN {} AND {}",
3371                            window_frame.units,
3372                            window_frame.start_bound,
3373                            window_frame.end_bound
3374                        )
3375                    }
3376                }
3377            }
3378            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3379                match func.display_name(params) {
3380                    Ok(name) => {
3381                        write!(f, "{name}")
3382                    }
3383                    Err(e) => {
3384                        write!(f, "got error from display_name {e}")
3385                    }
3386                }
3387            }
3388            Expr::Between(Between {
3389                expr,
3390                negated,
3391                low,
3392                high,
3393            }) => {
3394                if *negated {
3395                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3396                } else {
3397                    write!(f, "{expr} BETWEEN {low} AND {high}")
3398                }
3399            }
3400            Expr::Like(Like {
3401                negated,
3402                expr,
3403                pattern,
3404                escape_char,
3405                case_insensitive,
3406            }) => {
3407                write!(f, "{expr}")?;
3408                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3409                if *negated {
3410                    write!(f, " NOT")?;
3411                }
3412                if let Some(char) = escape_char {
3413                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3414                } else {
3415                    write!(f, " {op_name} {pattern}")
3416                }
3417            }
3418            Expr::SimilarTo(Like {
3419                negated,
3420                expr,
3421                pattern,
3422                escape_char,
3423                case_insensitive: _,
3424            }) => {
3425                write!(f, "{expr}")?;
3426                if *negated {
3427                    write!(f, " NOT")?;
3428                }
3429                if let Some(char) = escape_char {
3430                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3431                } else {
3432                    write!(f, " SIMILAR TO {pattern}")
3433                }
3434            }
3435            Expr::InList(InList {
3436                expr,
3437                list,
3438                negated,
3439            }) => {
3440                if *negated {
3441                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3442                } else {
3443                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3444                }
3445            }
3446            #[expect(deprecated)]
3447            Expr::Wildcard { qualifier, options } => match qualifier {
3448                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3449                None => write!(f, "*{options}"),
3450            },
3451            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3452                GroupingSet::Rollup(exprs) => {
3453                    // ROLLUP (c0, c1, c2)
3454                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3455                }
3456                GroupingSet::Cube(exprs) => {
3457                    // CUBE (c0, c1, c2)
3458                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3459                }
3460                GroupingSet::GroupingSets(lists_of_exprs) => {
3461                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3462                    write!(
3463                        f,
3464                        "GROUPING SETS ({})",
3465                        lists_of_exprs
3466                            .iter()
3467                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3468                            .collect::<Vec<String>>()
3469                            .join(", ")
3470                    )
3471                }
3472            },
3473            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3474            Expr::Unnest(Unnest { expr }) => {
3475                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3476            }
3477        }
3478    }
3479}
3480
3481fn fmt_function(
3482    f: &mut Formatter,
3483    fun: &str,
3484    distinct: bool,
3485    args: &[Expr],
3486    display: bool,
3487) -> fmt::Result {
3488    let args: Vec<String> = match display {
3489        true => args.iter().map(|arg| format!("{arg}")).collect(),
3490        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3491    };
3492
3493    let distinct_str = match distinct {
3494        true => "DISTINCT ",
3495        false => "",
3496    };
3497    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3498}
3499
3500/// The name of the column (field) that this `Expr` will produce in the physical plan.
3501/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3502pub fn physical_name(expr: &Expr) -> Result<String> {
3503    match expr {
3504        Expr::Column(col) => Ok(col.name.clone()),
3505        Expr::Alias(alias) => Ok(alias.name.clone()),
3506        _ => Ok(expr.schema_name().to_string()),
3507    }
3508}
3509
3510#[cfg(test)]
3511mod test {
3512    use crate::expr_fn::col;
3513    use crate::{
3514        case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
3515        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
3516    };
3517    use arrow::datatypes::{Field, Schema};
3518    use sqlparser::ast;
3519    use sqlparser::ast::{Ident, IdentWithAlias};
3520    use std::any::Any;
3521
3522    #[test]
3523    fn infer_placeholder_in_clause() {
3524        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3525        let column = col("department_id");
3526        let param_placeholders = vec![
3527            Expr::Placeholder(Placeholder {
3528                id: "$1".to_string(),
3529                field: None,
3530            }),
3531            Expr::Placeholder(Placeholder {
3532                id: "$2".to_string(),
3533                field: None,
3534            }),
3535            Expr::Placeholder(Placeholder {
3536                id: "$3".to_string(),
3537                field: None,
3538            }),
3539        ];
3540        let in_list = Expr::InList(InList {
3541            expr: Box::new(column),
3542            list: param_placeholders,
3543            negated: false,
3544        });
3545
3546        let schema = Arc::new(Schema::new(vec![
3547            Field::new("name", DataType::Utf8, true),
3548            Field::new("department_id", DataType::Int32, true),
3549        ]));
3550        let df_schema = DFSchema::try_from(schema).unwrap();
3551
3552        let (inferred_expr, contains_placeholder) =
3553            in_list.infer_placeholder_types(&df_schema).unwrap();
3554
3555        assert!(contains_placeholder);
3556
3557        match inferred_expr {
3558            Expr::InList(in_list) => {
3559                for expr in in_list.list {
3560                    match expr {
3561                        Expr::Placeholder(placeholder) => {
3562                            assert_eq!(
3563                                placeholder.field.unwrap().data_type(),
3564                                &DataType::Int32,
3565                                "Placeholder {} should infer Int32",
3566                                placeholder.id
3567                            );
3568                        }
3569                        _ => panic!("Expected Placeholder expression"),
3570                    }
3571                }
3572            }
3573            _ => panic!("Expected InList expression"),
3574        }
3575    }
3576
3577    #[test]
3578    fn infer_placeholder_like_and_similar_to() {
3579        // name LIKE $1
3580        let schema =
3581            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3582        let df_schema = DFSchema::try_from(schema).unwrap();
3583
3584        let like = Like {
3585            expr: Box::new(col("name")),
3586            pattern: Box::new(Expr::Placeholder(Placeholder {
3587                id: "$1".to_string(),
3588                field: None,
3589            })),
3590            negated: false,
3591            case_insensitive: false,
3592            escape_char: None,
3593        };
3594
3595        let expr = Expr::Like(like.clone());
3596
3597        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3598        match inferred_expr {
3599            Expr::Like(like) => match *like.pattern {
3600                Expr::Placeholder(placeholder) => {
3601                    assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8);
3602                }
3603                _ => panic!("Expected Placeholder"),
3604            },
3605            _ => panic!("Expected Like"),
3606        }
3607
3608        // name SIMILAR TO $1
3609        let expr = Expr::SimilarTo(like);
3610
3611        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3612        match inferred_expr {
3613            Expr::SimilarTo(like) => match *like.pattern {
3614                Expr::Placeholder(placeholder) => {
3615                    assert_eq!(
3616                        placeholder.field.unwrap().data_type(),
3617                        &DataType::Utf8,
3618                        "Placeholder {} should infer Utf8",
3619                        placeholder.id
3620                    );
3621                }
3622                _ => panic!("Expected Placeholder expression"),
3623            },
3624            _ => panic!("Expected SimilarTo expression"),
3625        }
3626    }
3627
3628    #[test]
3629    fn infer_placeholder_with_metadata() {
3630        // name == $1, where name is a non-nullable string
3631        let schema =
3632            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)
3633                .with_metadata(
3634                    [("some_key".to_string(), "some_value".to_string())].into(),
3635                )]));
3636        let df_schema = DFSchema::try_from(schema).unwrap();
3637
3638        let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
3639
3640        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3641        match inferred_expr {
3642            Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right {
3643                Expr::Placeholder(placeholder) => {
3644                    assert_eq!(
3645                        placeholder.field.as_ref().unwrap().data_type(),
3646                        &DataType::Utf8
3647                    );
3648                    assert_eq!(
3649                        placeholder.field.as_ref().unwrap().metadata(),
3650                        df_schema.field(0).metadata()
3651                    );
3652                    // Inferred placeholder should still be nullable
3653                    assert!(placeholder.field.as_ref().unwrap().is_nullable());
3654                }
3655                _ => panic!("Expected Placeholder"),
3656            },
3657            _ => panic!("Expected BinaryExpr"),
3658        }
3659    }
3660
3661    #[test]
3662    fn format_case_when() -> Result<()> {
3663        let expr = case(col("a"))
3664            .when(lit(1), lit(true))
3665            .when(lit(0), lit(false))
3666            .otherwise(lit(ScalarValue::Null))?;
3667        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3668        assert_eq!(expected, format!("{expr}"));
3669        Ok(())
3670    }
3671
3672    #[test]
3673    fn format_cast() -> Result<()> {
3674        let expr = Expr::Cast(Cast {
3675            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3676            data_type: DataType::Utf8,
3677        });
3678        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3679        assert_eq!(expected_canonical, format!("{expr}"));
3680        // Note that CAST intentionally has a name that is different from its `Display`
3681        // representation. CAST does not change the name of expressions.
3682        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3683        Ok(())
3684    }
3685
3686    #[test]
3687    fn test_partial_ord() {
3688        // Test validates that partial ord is defined for Expr, not
3689        // intended to exhaustively test all possibilities
3690        let exp1 = col("a") + lit(1);
3691        let exp2 = col("a") + lit(2);
3692        let exp3 = !(col("a") + lit(2));
3693
3694        assert!(exp1 < exp2);
3695        assert!(exp3 > exp2);
3696        assert!(exp1 < exp3)
3697    }
3698
3699    #[test]
3700    fn test_collect_expr() -> Result<()> {
3701        // single column
3702        {
3703            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3704            let columns = expr.column_refs();
3705            assert_eq!(1, columns.len());
3706            assert!(columns.contains(&Column::from_name("a")));
3707        }
3708
3709        // multiple columns
3710        {
3711            let expr = col("a") + col("b") + lit(1);
3712            let columns = expr.column_refs();
3713            assert_eq!(2, columns.len());
3714            assert!(columns.contains(&Column::from_name("a")));
3715            assert!(columns.contains(&Column::from_name("b")));
3716        }
3717
3718        Ok(())
3719    }
3720
3721    #[test]
3722    fn test_logical_ops() {
3723        assert_eq!(
3724            format!("{}", lit(1u32).eq(lit(2u32))),
3725            "UInt32(1) = UInt32(2)"
3726        );
3727        assert_eq!(
3728            format!("{}", lit(1u32).not_eq(lit(2u32))),
3729            "UInt32(1) != UInt32(2)"
3730        );
3731        assert_eq!(
3732            format!("{}", lit(1u32).gt(lit(2u32))),
3733            "UInt32(1) > UInt32(2)"
3734        );
3735        assert_eq!(
3736            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3737            "UInt32(1) >= UInt32(2)"
3738        );
3739        assert_eq!(
3740            format!("{}", lit(1u32).lt(lit(2u32))),
3741            "UInt32(1) < UInt32(2)"
3742        );
3743        assert_eq!(
3744            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3745            "UInt32(1) <= UInt32(2)"
3746        );
3747        assert_eq!(
3748            format!("{}", lit(1u32).and(lit(2u32))),
3749            "UInt32(1) AND UInt32(2)"
3750        );
3751        assert_eq!(
3752            format!("{}", lit(1u32).or(lit(2u32))),
3753            "UInt32(1) OR UInt32(2)"
3754        );
3755    }
3756
3757    #[test]
3758    fn test_is_volatile_scalar_func() {
3759        // UDF
3760        #[derive(Debug, PartialEq, Eq, Hash)]
3761        struct TestScalarUDF {
3762            signature: Signature,
3763        }
3764        impl ScalarUDFImpl for TestScalarUDF {
3765            fn as_any(&self) -> &dyn Any {
3766                self
3767            }
3768            fn name(&self) -> &str {
3769                "TestScalarUDF"
3770            }
3771
3772            fn signature(&self) -> &Signature {
3773                &self.signature
3774            }
3775
3776            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3777                Ok(DataType::Utf8)
3778            }
3779
3780            fn invoke_with_args(
3781                &self,
3782                _args: ScalarFunctionArgs,
3783            ) -> Result<ColumnarValue> {
3784                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
3785            }
3786        }
3787        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3788            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
3789        }));
3790        assert_ne!(udf.signature().volatility, Volatility::Volatile);
3791
3792        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3793            signature: Signature::uniform(
3794                1,
3795                vec![DataType::Float32],
3796                Volatility::Volatile,
3797            ),
3798        }));
3799        assert_eq!(udf.signature().volatility, Volatility::Volatile);
3800    }
3801
3802    use super::*;
3803
3804    #[test]
3805    fn test_display_wildcard() {
3806        assert_eq!(format!("{}", wildcard()), "*");
3807        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
3808        assert_eq!(
3809            format!(
3810                "{}",
3811                wildcard_with_options(wildcard_options(
3812                    Some(IlikeSelectItem {
3813                        pattern: "c1".to_string()
3814                    }),
3815                    None,
3816                    None,
3817                    None,
3818                    None
3819                ))
3820            ),
3821            "* ILIKE 'c1'"
3822        );
3823        assert_eq!(
3824            format!(
3825                "{}",
3826                wildcard_with_options(wildcard_options(
3827                    None,
3828                    Some(ExcludeSelectItem::Multiple(vec![
3829                        Ident::from("c1"),
3830                        Ident::from("c2")
3831                    ])),
3832                    None,
3833                    None,
3834                    None
3835                ))
3836            ),
3837            "* EXCLUDE (c1, c2)"
3838        );
3839        assert_eq!(
3840            format!(
3841                "{}",
3842                wildcard_with_options(wildcard_options(
3843                    None,
3844                    None,
3845                    Some(ExceptSelectItem {
3846                        first_element: Ident::from("c1"),
3847                        additional_elements: vec![Ident::from("c2")]
3848                    }),
3849                    None,
3850                    None
3851                ))
3852            ),
3853            "* EXCEPT (c1, c2)"
3854        );
3855        assert_eq!(
3856            format!(
3857                "{}",
3858                wildcard_with_options(wildcard_options(
3859                    None,
3860                    None,
3861                    None,
3862                    Some(PlannedReplaceSelectItem {
3863                        items: vec![ReplaceSelectElement {
3864                            expr: ast::Expr::Identifier(Ident::from("c1")),
3865                            column_name: Ident::from("a1"),
3866                            as_keyword: false
3867                        }],
3868                        planned_expressions: vec![]
3869                    }),
3870                    None
3871                ))
3872            ),
3873            "* REPLACE (c1 a1)"
3874        );
3875        assert_eq!(
3876            format!(
3877                "{}",
3878                wildcard_with_options(wildcard_options(
3879                    None,
3880                    None,
3881                    None,
3882                    None,
3883                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3884                        ident: Ident::from("c1"),
3885                        alias: Ident::from("a1")
3886                    }]))
3887                ))
3888            ),
3889            "* RENAME (c1 AS a1)"
3890        )
3891    }
3892
3893    #[test]
3894    fn test_schema_display_alias_with_relation() {
3895        assert_eq!(
3896            format!(
3897                "{}",
3898                SchemaDisplay(
3899                    &lit(1).alias_qualified("table_name".into(), "column_name")
3900                )
3901            ),
3902            "table_name.column_name"
3903        );
3904    }
3905
3906    #[test]
3907    fn test_schema_display_alias_without_relation() {
3908        assert_eq!(
3909            format!(
3910                "{}",
3911                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3912            ),
3913            "column_name"
3914        );
3915    }
3916
3917    fn wildcard_options(
3918        opt_ilike: Option<IlikeSelectItem>,
3919        opt_exclude: Option<ExcludeSelectItem>,
3920        opt_except: Option<ExceptSelectItem>,
3921        opt_replace: Option<PlannedReplaceSelectItem>,
3922        opt_rename: Option<RenameSelectItem>,
3923    ) -> WildcardOptions {
3924        WildcardOptions {
3925            ilike: opt_ilike,
3926            exclude: opt_exclude,
3927            except: opt_except,
3928            replace: opt_replace,
3929            rename: opt_rename,
3930        }
3931    }
3932
3933    #[test]
3934    fn test_size_of_expr() {
3935        // because Expr is such a widely used struct in DataFusion
3936        // it is important to keep its size as small as possible
3937        //
3938        // If this test fails when you change `Expr`, please try
3939        // `Box`ing the fields to make `Expr` smaller
3940        // See https://github.com/apache/datafusion/issues/16199 for details
3941        assert_eq!(size_of::<Expr>(), 112);
3942        assert_eq!(size_of::<ScalarValue>(), 64);
3943        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3944        assert_eq!(size_of::<Vec<Expr>>(), 24);
3945        assert_eq!(size_of::<Arc<Expr>>(), 8);
3946    }
3947
3948    #[test]
3949    fn test_accept_exprs() {
3950        fn accept_exprs<E: AsRef<Expr>>(_: &[E]) {}
3951
3952        let expr = || -> Expr { lit(1) };
3953
3954        // Call accept_exprs with owned expressions
3955        let owned_exprs = vec![expr(), expr()];
3956        accept_exprs(&owned_exprs);
3957
3958        // Call accept_exprs with expressions from expr tree
3959        let udf = Expr::ScalarFunction(ScalarFunction {
3960            func: Arc::new(ScalarUDF::new_from_impl(TestUDF {})),
3961            args: vec![expr(), expr()],
3962        });
3963        let Expr::ScalarFunction(scalar) = &udf else {
3964            unreachable!()
3965        };
3966        accept_exprs(&scalar.args);
3967
3968        // Call accept_exprs with expressions collected from expr tree, without cloning
3969        let mut collected_refs: Vec<&Expr> = scalar.args.iter().collect();
3970        collected_refs.extend(&owned_exprs);
3971        accept_exprs(&collected_refs);
3972
3973        // test helpers
3974        #[derive(Debug, PartialEq, Eq, Hash)]
3975        struct TestUDF {}
3976        impl ScalarUDFImpl for TestUDF {
3977            fn as_any(&self) -> &dyn Any {
3978                unimplemented!()
3979            }
3980
3981            fn name(&self) -> &str {
3982                unimplemented!()
3983            }
3984
3985            fn signature(&self) -> &Signature {
3986                unimplemented!()
3987            }
3988
3989            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3990                unimplemented!()
3991            }
3992
3993            fn invoke_with_args(
3994                &self,
3995                _args: ScalarFunctionArgs,
3996            ) -> Result<ColumnarValue> {
3997                unimplemented!()
3998            }
3999        }
4000    }
4001}