Skip to main content

datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::HashSet;
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::higher_order_function::{HigherOrderUDF, resolve_lambda_variables};
30use crate::logical_plan::Subquery;
31use crate::type_coercion::functions::value_fields_with_higher_order_udf;
32use crate::{AggregateUDF, LambdaParametersProgress, ValueOrLambda, Volatility};
33use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
34
35use arrow::datatypes::{DataType, Field, FieldRef};
36use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
37use datafusion_common::datatype::DataTypeExt;
38use datafusion_common::metadata::format_type_and_metadata;
39use datafusion_common::tree_node::{
40    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
41};
42use datafusion_common::{
43    Column, DFSchema, ExprSchema, HashMap, Result, ScalarValue, Spans, TableReference,
44    plan_err,
45};
46use datafusion_expr_common::placement::ExpressionPlacement;
47use datafusion_functions_window_common::field::WindowUDFFieldArgs;
48#[cfg(feature = "sql")]
49pub use sqlparser::ast::{
50    ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem,
51    ReplaceSelectElement,
52};
53// Use shims for sqlparser types when the sql feature is disabled.
54#[cfg(not(feature = "sql"))]
55pub use crate::sql::{
56    ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem,
57    ReplaceSelectElement,
58};
59
60// Moved in 51.0.0 to datafusion_common
61pub use datafusion_common::metadata::FieldMetadata;
62use datafusion_common::metadata::ScalarAndMetadata;
63
64// This mirrors sqlparser::ast::NullTreatment but we need our own variant
65// for when the sql feature is disabled.
66#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
67pub enum NullTreatment {
68    IgnoreNulls,
69    RespectNulls,
70}
71
72impl Display for NullTreatment {
73    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
74        f.write_str(match self {
75            NullTreatment::IgnoreNulls => "IGNORE NULLS",
76            NullTreatment::RespectNulls => "RESPECT NULLS",
77        })
78    }
79}
80
81#[cfg(feature = "sql")]
82impl From<sqlparser::ast::NullTreatment> for NullTreatment {
83    fn from(value: sqlparser::ast::NullTreatment) -> Self {
84        match value {
85            sqlparser::ast::NullTreatment::IgnoreNulls => Self::IgnoreNulls,
86            sqlparser::ast::NullTreatment::RespectNulls => Self::RespectNulls,
87        }
88    }
89}
90
91/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
92///
93/// For example the expression `A + 1` will be represented as
94///
95///```text
96///  BinaryExpr {
97///    left: Expr::Column("A"),
98///    op: Operator::Plus,
99///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
100/// }
101/// ```
102///
103/// # Creating Expressions
104///
105/// `Expr`s can be created directly, but it is often easier and less verbose to
106/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
107/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
108///
109/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
110///
111/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
112///
113/// # Printing Expressions
114///
115/// You can print `Expr`s using the `Debug` trait, `Display` trait, or
116/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
117///
118/// If you need  SQL to pass to other systems, consider using [`Unparser`].
119///
120/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
121///
122/// # Schema Access
123///
124/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
125/// of an `Expr`.
126///
127/// # Visiting and Rewriting `Expr`s
128///
129/// The `Expr` struct implements the [`TreeNode`] trait for walking and
130/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
131/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
132/// the examples below and [`TreeNode`] for more information.
133///
134/// # Examples: Creating and Using `Expr`s
135///
136/// ## Column References and Literals
137///
138/// [`Expr::Column`] refer to the values of columns and are often created with
139/// the [`col`] function. For example to create an expression `c1` referring to
140/// column named "c1":
141///
142/// [`col`]: crate::expr_fn::col
143///
144/// ```
145/// # use datafusion_common::Column;
146/// # use datafusion_expr::{lit, col, Expr};
147/// let expr = col("c1");
148/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
149/// ```
150///
151/// [`Expr::Literal`] refer to literal, or constant, values. These are created
152/// with the [`lit`] function. For example to create an expression `42`:
153///
154/// [`lit`]: crate::lit
155///
156/// ```
157/// # use datafusion_common::{Column, ScalarValue};
158/// # use datafusion_expr::{lit, col, Expr};
159/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
160/// let expr = lit(42i64);
161/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
162/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
163/// // To make a (typed) NULL:
164/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
165/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
166/// let expr = lit(ScalarValue::Null);
167/// ```
168///
169/// ## Binary Expressions
170///
171/// Exprs implement traits that allow easy to understand construction of more
172/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
173/// "c2" together
174///
175/// ```
176/// # use datafusion_expr::{lit, col, Operator, Expr};
177/// // Use the `+` operator to add two columns together
178/// let expr = col("c1") + col("c2");
179/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
180/// if let Expr::BinaryExpr(binary_expr) = expr {
181///     assert_eq!(*binary_expr.left, col("c1"));
182///     assert_eq!(*binary_expr.right, col("c2"));
183///     assert_eq!(binary_expr.op, Operator::Plus);
184/// }
185/// ```
186///
187/// The expression `c1 = 42` to compares the value in column "c1" to the
188/// literal value `42`:
189///
190/// ```
191/// # use datafusion_common::ScalarValue;
192/// # use datafusion_expr::{lit, col, Operator, Expr};
193/// let expr = col("c1").eq(lit(42_i32));
194/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
195/// if let Expr::BinaryExpr(binary_expr) = expr {
196///     assert_eq!(*binary_expr.left, col("c1"));
197///     let scalar = ScalarValue::Int32(Some(42));
198///     assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
199///     assert_eq!(binary_expr.op, Operator::Eq);
200/// }
201/// ```
202///
203/// Here is how to implement the equivalent of `SELECT *` to select all
204/// [`Expr::Column`] from a [`DFSchema`]'s columns:
205///
206/// ```
207/// # use arrow::datatypes::{DataType, Field, Schema};
208/// # use datafusion_common::{DFSchema, Column};
209/// # use datafusion_expr::Expr;
210/// // Create a schema c1(int, c2 float)
211/// let arrow_schema = Schema::new(vec![
212///     Field::new("c1", DataType::Int32, false),
213///     Field::new("c2", DataType::Float64, false),
214/// ]);
215/// // DFSchema is a an Arrow schema with optional relation name
216/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
217///
218/// // Form Vec<Expr> with an expression for each column in the schema
219/// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect();
220///
221/// assert_eq!(
222///     exprs,
223///     vec![
224///         Expr::from(Column::from_qualified_name("t1.c1")),
225///         Expr::from(Column::from_qualified_name("t1.c2")),
226///     ]
227/// );
228/// ```
229///
230/// # Examples: Displaying `Exprs`
231///
232/// There are three ways to print an `Expr` depending on the usecase.
233///
234/// ## Use `Debug` trait
235///
236/// Following Rust conventions, the `Debug` implementation prints out the
237/// internal structure of the expression, which is useful for debugging.
238///
239/// ```
240/// # use datafusion_expr::{lit, col};
241/// let expr = col("c1") + lit(42);
242/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
243/// ```
244///
245/// ## Use the `Display` trait  (detailed expression)
246///
247/// The `Display` implementation prints out the expression in a SQL-like form,
248/// but has additional details such as the data type of literals. This is useful
249/// for understanding the expression in more detail and is used for the low level
250/// [`ExplainFormat::Indent`] explain plan format.
251///
252/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
253///
254/// ```
255/// # use datafusion_expr::{lit, col};
256/// let expr = col("c1") + lit(42);
257/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
258/// ```
259///
260/// ## Use [`Self::human_display`] (human readable)
261///
262/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
263/// for human consumption by end users. It is used for the
264/// [`ExplainFormat::Tree`] explain plan format.
265///
266/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
267///
268///```
269/// # use datafusion_expr::{lit, col};
270/// let expr = col("c1") + lit(42);
271/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
272/// ```
273///
274/// # Examples: Visiting and Rewriting `Expr`s
275///
276/// Here is an example that finds all literals in an `Expr` tree:
277/// ```
278/// # use std::collections::{HashSet};
279/// use datafusion_common::ScalarValue;
280/// # use datafusion_expr::{col, Expr, lit};
281/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
282/// // Expression a = 5 AND b = 6
283/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
284/// // find all literals in a HashMap
285/// let mut scalars = HashSet::new();
286/// // apply recursively visits all nodes in the expression tree
287/// expr.apply(|e| {
288///     if let Expr::Literal(scalar, _) = e {
289///         scalars.insert(scalar);
290///     }
291///     // The return value controls whether to continue visiting the tree
292///     Ok(TreeNodeRecursion::Continue)
293/// })
294/// .unwrap();
295/// // All subtrees have been visited and literals found
296/// assert_eq!(scalars.len(), 2);
297/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
298/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
299/// ```
300///
301/// Rewrite an expression, replacing references to column "a" in an
302/// to the literal `42`:
303///
304///  ```
305/// # use datafusion_common::tree_node::{Transformed, TreeNode};
306/// # use datafusion_expr::{col, Expr, lit};
307/// // expression a = 5 AND b = 6
308/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
309/// // rewrite all references to column "a" to the literal 42
310/// let rewritten = expr.transform(|e| {
311///   if let Expr::Column(c) = &e {
312///     if &c.name == "a" {
313///       // return Transformed::yes to indicate the node was changed
314///       return Ok(Transformed::yes(lit(42)))
315///     }
316///   }
317///   // return Transformed::no to indicate the node was not changed
318///   Ok(Transformed::no(e))
319/// }).unwrap();
320/// // The expression has been rewritten
321/// assert!(rewritten.transformed);
322/// // to 42 = 5 AND b = 6
323/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
324/// ```
325#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
326pub enum Expr {
327    /// An expression with a specific name.
328    Alias(Alias),
329    /// A named reference to a qualified field in a schema.
330    Column(Column),
331    /// A named reference to a variable in a registry.
332    ScalarVariable(FieldRef, Vec<String>),
333    /// A constant value along with associated [`FieldMetadata`].
334    Literal(ScalarValue, Option<FieldMetadata>),
335    /// A binary expression such as "age > 21"
336    BinaryExpr(BinaryExpr),
337    /// LIKE expression
338    Like(Like),
339    /// LIKE expression that uses regular expressions
340    SimilarTo(Like),
341    /// Negation of an expression. The expression's type must be a boolean to make sense.
342    Not(Box<Expr>),
343    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
344    IsNotNull(Box<Expr>),
345    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
346    IsNull(Box<Expr>),
347    /// True if argument is true, false otherwise. This expression itself is never NULL.
348    IsTrue(Box<Expr>),
349    /// True if argument is  false, false otherwise. This expression itself is never NULL.
350    IsFalse(Box<Expr>),
351    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
352    IsUnknown(Box<Expr>),
353    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
354    IsNotTrue(Box<Expr>),
355    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
356    IsNotFalse(Box<Expr>),
357    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
358    IsNotUnknown(Box<Expr>),
359    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
360    Negative(Box<Expr>),
361    /// Whether an expression is between a given range.
362    Between(Between),
363    /// A CASE expression (see docs on [`Case`])
364    Case(Case),
365    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
366    /// This expression is guaranteed to have a fixed type.
367    Cast(Cast),
368    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
369    /// This expression is guaranteed to have a fixed type.
370    TryCast(TryCast),
371    /// Call a scalar function with a set of arguments.
372    ScalarFunction(ScalarFunction),
373    /// Calls an aggregate function with arguments, and optional
374    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
375    ///
376    /// See also [`ExprFunctionExt`] to set these fields.
377    ///
378    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
379    AggregateFunction(AggregateFunction),
380    /// Call a window function with a set of arguments.
381    WindowFunction(Box<WindowFunction>),
382    /// Returns whether the list contains the expr value.
383    InList(InList),
384    /// EXISTS subquery
385    Exists(Exists),
386    /// IN subquery
387    InSubquery(InSubquery),
388    /// Set comparison subquery (e.g. `= ANY`, `> ALL`)
389    SetComparison(SetComparison),
390    /// Scalar subquery
391    ScalarSubquery(Subquery),
392    /// Represents a reference to all available fields in a specific schema,
393    /// with an optional (schema) qualifier.
394    ///
395    /// This expr has to be resolved to a list of columns before translating logical
396    /// plan into physical plan.
397    #[deprecated(
398        since = "46.0.0",
399        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
400    )]
401    Wildcard {
402        qualifier: Option<TableReference>,
403        options: Box<WildcardOptions>,
404    },
405    /// List of grouping set expressions. Only valid in the context of an aggregate
406    /// GROUP BY expression list
407    GroupingSet(GroupingSet),
408    /// A place holder for parameters in a prepared statement
409    /// (e.g. `$foo` or `$1`)
410    Placeholder(Placeholder),
411    /// A placeholder which holds a reference to a qualified field
412    /// in the outer query, used for correlated sub queries.
413    OuterReferenceColumn(FieldRef, Column),
414    /// Unnest expression
415    Unnest(Unnest),
416    /// Call a higher order function with a set of arguments.
417    ///
418    /// For example, `array_transform([1,2,3], v -> v+1)` would be equivalent to:
419    ///
420    /// ```text
421    /// HigherOrderFunction(array_transform)
422    /// ├── args[0]: Literal([1,2,3])
423    /// └── args[1]: Lambda
424    ///     ├── params: ["v"]
425    ///     └── body: BinaryExpr(+)
426    ///         ├── LambdaVariable("v")
427    ///         └── Literal(1)
428    /// ```
429    HigherOrderFunction(HigherOrderFunction),
430    /// A Lambda expression with a set of parameters names and a body
431    Lambda(Lambda),
432    /// A named reference to a lambda parameter
433    LambdaVariable(LambdaVariable),
434}
435
436/// Invoke a [`HigherOrderUDF`] with a set of arguments
437#[derive(Clone, Eq, PartialOrd, Debug)]
438pub struct HigherOrderFunction {
439    /// The function
440    pub func: Arc<HigherOrderUDF>,
441    /// List of expressions to feed to the functions as arguments
442    pub args: Vec<Expr>,
443}
444
445impl HigherOrderFunction {
446    /// Create a new `HigherOrderFunction` from a [`HigherOrderUDF`]
447    pub fn new(func: Arc<HigherOrderUDF>, args: Vec<Expr>) -> Self {
448        Self { func, args }
449    }
450
451    pub fn name(&self) -> &str {
452        self.func.name()
453    }
454
455    /// Invokes the inner function [`crate::HigherOrderUDFImpl::lambda_parameters`]
456    /// using the arguments of this invocation. This expression lambda
457    /// variables must be already resolved either by coming from the
458    /// default sql planner or by calling [Expr::resolve_lambda_variables]
459    /// or [LogicalPlan::resolve_lambda_variables]
460    ///
461    /// [LogicalPlan::resolve_lambda_variables]: crate::LogicalPlan::resolve_lambda_variables
462    pub fn lambda_parameters(
463        &self,
464        schema: &dyn ExprSchema,
465    ) -> Result<Vec<Vec<FieldRef>>> {
466        let args = self
467            .args
468            .iter()
469            .map(|e| match e {
470                Expr::Lambda(lambda) => {
471                    Ok(ValueOrLambda::Lambda(Some(lambda.body.to_field(schema)?.1)))
472                }
473                _ => Ok(ValueOrLambda::Value(e.to_field(schema)?.1)),
474            })
475            .collect::<Result<Vec<_>>>()?;
476
477        let coerced_fields =
478            value_fields_with_higher_order_udf(&args, self.func.as_ref())?;
479
480        match self.func.lambda_parameters(0, &coerced_fields)? {
481            LambdaParametersProgress::Partial(_) => plan_err!(
482                "{} lambda_parameters returned a partial result when the return type of all it's lambdas were provided",
483                self.name()
484            ),
485            LambdaParametersProgress::Complete(items) => Ok(items),
486        }
487    }
488}
489
490impl Hash for HigherOrderFunction {
491    fn hash<H: Hasher>(&self, state: &mut H) {
492        self.func.hash(state);
493        self.args.hash(state);
494    }
495}
496
497impl PartialEq for HigherOrderFunction {
498    fn eq(&self, other: &Self) -> bool {
499        self.func.as_ref() == other.func.as_ref() && self.args == other.args
500    }
501}
502
503/// A named reference to a lambda parameter which includes it's own [`FieldRef`],
504/// which is used to implement [`ExprSchemable`], for example. It is an option only to make
505/// easier for `expr_api` users to construct lambda variables, but any expression
506/// tree or [`LogicalPlan`] containing unresolved variables must be resolved before
507/// usage with either [`Expr::resolve_lambda_variables`] or
508/// [`LogicalPlan::resolve_lambda_variables`]. The default SQL planner produces
509/// already resolved variables and no further resolving is required.
510///
511/// After resolving, if any argument from the lambda function which this
512/// variables originates from have it's field changed (type, nullability,
513/// metadata, etc), the resolved variable may became outdated and must be
514/// resolved again.
515///
516/// [`LogicalPlan`]: crate::LogicalPlan
517/// [`LogicalPlan::resolve_lambda_variables`]: crate::LogicalPlan::resolve_lambda_variables
518#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
519pub struct LambdaVariable {
520    pub name: String,
521    pub field: Option<FieldRef>,
522    pub spans: Spans,
523}
524
525impl LambdaVariable {
526    /// Create a lambda variable from a name and an optional field.
527    /// If the field is none, the expression tree or LogicalPlan which
528    /// owns this variable must be resolved before usage with either
529    /// [`Expr::resolve_lambda_variables`] or [`LogicalPlan::resolve_lambda_variables`].
530    ///
531    /// [`LogicalPlan::resolve_lambda_variables`]: crate::LogicalPlan::resolve_lambda_variables
532    pub fn new(name: String, field: Option<FieldRef>) -> Self {
533        Self {
534            name,
535            field,
536            spans: Spans::new(),
537        }
538    }
539
540    pub fn spans_mut(&mut self) -> &mut Spans {
541        &mut self.spans
542    }
543}
544
545impl Default for Expr {
546    fn default() -> Self {
547        Expr::Literal(ScalarValue::Null, None)
548    }
549}
550
551impl AsRef<Expr> for Expr {
552    fn as_ref(&self) -> &Expr {
553        self
554    }
555}
556
557/// Create an [`Expr`] from a [`Column`]
558impl From<Column> for Expr {
559    fn from(value: Column) -> Self {
560        Expr::Column(value)
561    }
562}
563
564/// Create an [`Expr`] from a [`WindowFunction`]
565impl From<WindowFunction> for Expr {
566    fn from(value: WindowFunction) -> Self {
567        Expr::WindowFunction(Box::new(value))
568    }
569}
570
571/// Create an [`Expr`] from an [`ScalarAndMetadata`]
572impl From<ScalarAndMetadata> for Expr {
573    fn from(value: ScalarAndMetadata) -> Self {
574        let (value, metadata) = value.into_inner();
575        Expr::Literal(value, metadata)
576    }
577}
578
579/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
580/// useful for creating [`Expr`] from a [`DFSchema`].
581///
582/// See example on [`Expr`]
583impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
584    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
585        Expr::from(Column::from(value))
586    }
587}
588
589impl<'a> TreeNodeContainer<'a, Self> for Expr {
590    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
591        &'a self,
592        mut f: F,
593    ) -> Result<TreeNodeRecursion> {
594        f(self)
595    }
596
597    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
598        self,
599        mut f: F,
600    ) -> Result<Transformed<Self>> {
601        f(self)
602    }
603}
604
605/// The metadata used in [`Field::metadata`].
606///
607/// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
608///
609/// # Common Use Cases
610///
611/// Field metadata is commonly used to store:
612/// - Default values for columns when data is missing
613/// - Column descriptions or documentation
614/// - Data lineage information
615/// - Custom application-specific annotations
616/// - Encoding hints or display formatting preferences
617///
618/// # Example: Storing Default Values
619///
620/// A practical example of using field metadata is storing default values for columns
621/// that may be missing in the physical data but present in the logical schema.
622/// See the [default_column_values.rs] example implementation.
623///
624/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs
625pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
626
627/// Intersects multiple metadata instances for UNION operations.
628///
629/// This function implements the intersection strategy used by UNION operations,
630/// where only metadata keys that exist in ALL inputs with identical values
631/// are preserved in the result.
632///
633/// # Union Metadata Behavior
634///
635/// Union operations require consistent metadata across all branches:
636/// - Only metadata keys present in ALL union branches are kept
637/// - For each kept key, the value must be identical across all branches
638/// - If a key has different values across branches, it is excluded from the result
639/// - If any input has no metadata, the result will be empty
640///
641/// # Arguments
642///
643/// * `metadatas` - An iterator of `SchemaFieldMetadata` instances to intersect
644///
645/// # Returns
646///
647/// A new `SchemaFieldMetadata` containing only the intersected metadata
648pub fn intersect_metadata_for_union<'a>(
649    metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>,
650) -> SchemaFieldMetadata {
651    let mut intersected: Option<SchemaFieldMetadata> = None;
652
653    for metadata in metadatas {
654        // Skip empty metadata (e.g. from NULL literals or computed expressions)
655        // to avoid dropping metadata from branches that have it.
656        if metadata.is_empty() {
657            continue;
658        }
659        match &mut intersected {
660            None => {
661                intersected = Some(metadata.clone());
662            }
663            Some(current) => {
664                // Only keep keys that exist in both with the same value
665                current.retain(|k, v| metadata.get(k) == Some(v));
666            }
667        }
668    }
669
670    intersected.unwrap_or_default()
671}
672
673/// UNNEST expression.
674#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
675pub struct Unnest {
676    pub expr: Box<Expr>,
677}
678
679impl Unnest {
680    /// Create a new Unnest expression.
681    pub fn new(expr: Expr) -> Self {
682        Self {
683            expr: Box::new(expr),
684        }
685    }
686
687    /// Create a new Unnest expression.
688    pub fn new_boxed(boxed: Box<Expr>) -> Self {
689        Self { expr: boxed }
690    }
691}
692
693/// Alias expression
694#[derive(Clone, PartialEq, Eq, Debug)]
695pub struct Alias {
696    pub expr: Box<Expr>,
697    pub relation: Option<TableReference>,
698    pub name: String,
699    pub metadata: Option<FieldMetadata>,
700}
701
702impl Hash for Alias {
703    fn hash<H: Hasher>(&self, state: &mut H) {
704        self.expr.hash(state);
705        self.relation.hash(state);
706        self.name.hash(state);
707    }
708}
709
710impl PartialOrd for Alias {
711    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
712        let cmp = self.expr.partial_cmp(&other.expr);
713        let Some(Ordering::Equal) = cmp else {
714            return cmp;
715        };
716        let cmp = self.relation.partial_cmp(&other.relation);
717        let Some(Ordering::Equal) = cmp else {
718            return cmp;
719        };
720        self.name
721            .partial_cmp(&other.name)
722            // TODO (https://github.com/apache/datafusion/issues/17477) avoid recomparing all fields
723            .filter(|cmp| *cmp != Ordering::Equal || self == other)
724    }
725}
726
727impl Alias {
728    /// Create an alias with an optional schema/field qualifier.
729    pub fn new(
730        expr: Expr,
731        relation: Option<impl Into<TableReference>>,
732        name: impl Into<String>,
733    ) -> Self {
734        Self {
735            expr: Box::new(expr),
736            relation: relation.map(|r| r.into()),
737            name: name.into(),
738            metadata: None,
739        }
740    }
741
742    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
743        self.metadata = metadata;
744        self
745    }
746
747    #[doc(hidden)]
748    pub fn with_expr(mut self, expr: Expr) -> Self {
749        self.expr = Box::new(expr);
750        self
751    }
752
753    #[doc(hidden)]
754    pub fn try_map_expr(self, f: impl FnOnce(Expr) -> Result<Expr>) -> Result<Expr> {
755        let Alias {
756            expr,
757            relation,
758            name,
759            metadata,
760        } = self;
761        Ok(Expr::Alias(Alias {
762            expr: Box::new(f(*expr)?),
763            relation,
764            name,
765            metadata,
766        }))
767    }
768}
769
770/// Binary expression for [`Expr::BinaryExpr`]
771#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
772pub struct BinaryExpr {
773    /// Left-hand side of the expression
774    pub left: Box<Expr>,
775    /// The comparison operator
776    pub op: Operator,
777    /// Right-hand side of the expression
778    pub right: Box<Expr>,
779}
780
781impl BinaryExpr {
782    /// Create a new binary expression
783    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
784        Self { left, op, right }
785    }
786}
787
788impl Display for BinaryExpr {
789    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
790        // Put parentheses around child binary expressions so that we can see the difference
791        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
792        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
793        // equivalent and the parentheses are not necessary.
794
795        fn write_child(
796            f: &mut Formatter<'_>,
797            expr: &Expr,
798            precedence: u8,
799        ) -> fmt::Result {
800            match expr {
801                Expr::BinaryExpr(child) => {
802                    let p = child.op.precedence();
803                    if p == 0 || p < precedence {
804                        write!(f, "({child})")?;
805                    } else {
806                        write!(f, "{child}")?;
807                    }
808                }
809                _ => write!(f, "{expr}")?,
810            }
811            Ok(())
812        }
813
814        let precedence = self.op.precedence();
815        write_child(f, self.left.as_ref(), precedence)?;
816        write!(f, " {} ", self.op)?;
817        write_child(f, self.right.as_ref(), precedence)
818    }
819}
820
821/// CASE expression
822///
823/// The CASE expression is similar to a series of nested if/else and there are two forms that
824/// can be used. The first form consists of a series of boolean "when" expressions with
825/// corresponding "then" expressions, and an optional "else" expression.
826///
827/// ```text
828/// CASE WHEN condition THEN result
829///      [WHEN ...]
830///      [ELSE result]
831/// END
832/// ```
833///
834/// The second form uses a base expression and then a series of "when" clauses that match on a
835/// literal value.
836///
837/// ```text
838/// CASE expression
839///     WHEN value THEN result
840///     [WHEN ...]
841///     [ELSE result]
842/// END
843/// ```
844#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
845pub struct Case {
846    /// Optional base expression that can be compared to literal values in the "when" expressions
847    pub expr: Option<Box<Expr>>,
848    /// One or more when/then expressions
849    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
850    /// Optional "else" expression
851    pub else_expr: Option<Box<Expr>>,
852}
853
854impl Case {
855    /// Create a new Case expression
856    pub fn new(
857        expr: Option<Box<Expr>>,
858        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
859        else_expr: Option<Box<Expr>>,
860    ) -> Self {
861        Self {
862            expr,
863            when_then_expr,
864            else_expr,
865        }
866    }
867}
868
869/// LIKE expression
870#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
871pub struct Like {
872    pub negated: bool,
873    pub expr: Box<Expr>,
874    pub pattern: Box<Expr>,
875    pub escape_char: Option<char>,
876    /// Whether to ignore case on comparing
877    pub case_insensitive: bool,
878}
879
880impl Like {
881    /// Create a new Like expression
882    pub fn new(
883        negated: bool,
884        expr: Box<Expr>,
885        pattern: Box<Expr>,
886        escape_char: Option<char>,
887        case_insensitive: bool,
888    ) -> Self {
889        Self {
890            negated,
891            expr,
892            pattern,
893            escape_char,
894            case_insensitive,
895        }
896    }
897}
898
899/// BETWEEN expression
900#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
901pub struct Between {
902    /// The value to compare
903    pub expr: Box<Expr>,
904    /// Whether the expression is negated
905    pub negated: bool,
906    /// The low end of the range
907    pub low: Box<Expr>,
908    /// The high end of the range
909    pub high: Box<Expr>,
910}
911
912impl Between {
913    /// Create a new Between expression
914    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
915        Self {
916            expr,
917            negated,
918            low,
919            high,
920        }
921    }
922}
923
924/// Invoke a [`ScalarUDF`] with a set of arguments
925///
926/// [`ScalarUDF`]: crate::ScalarUDF
927#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
928pub struct ScalarFunction {
929    /// The function
930    pub func: Arc<crate::ScalarUDF>,
931    /// List of expressions to feed to the functions as arguments
932    pub args: Vec<Expr>,
933}
934
935impl ScalarFunction {
936    // return the Function's name
937    pub fn name(&self) -> &str {
938        self.func.name()
939    }
940}
941
942impl ScalarFunction {
943    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
944    ///
945    /// [`ScalarUDF`]: crate::ScalarUDF
946    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
947        Self { func: udf, args }
948    }
949}
950
951/// Access a sub field of a nested type, such as `Field` or `List`
952#[derive(Clone, PartialEq, Eq, Hash, Debug)]
953pub enum GetFieldAccess {
954    /// Named field, for example `struct["name"]`
955    NamedStructField { name: ScalarValue },
956    /// Single list index, for example: `list[i]`
957    ListIndex { key: Box<Expr> },
958    /// List stride, for example `list[i:j:k]`
959    ListRange {
960        start: Box<Expr>,
961        stop: Box<Expr>,
962        stride: Box<Expr>,
963    },
964}
965
966/// Cast expression
967#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
968pub struct Cast {
969    /// The expression being cast
970    pub expr: Box<Expr>,
971    /// The `DataType` the expression will yield
972    pub field: FieldRef,
973}
974
975impl Cast {
976    /// Create a new Cast expression
977    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
978        Self {
979            expr,
980            field: data_type.into_nullable_field_ref(),
981        }
982    }
983
984    pub fn new_from_field(expr: Box<Expr>, field: FieldRef) -> Self {
985        Self { expr, field }
986    }
987}
988
989/// TryCast Expression
990#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
991pub struct TryCast {
992    /// The expression being cast
993    pub expr: Box<Expr>,
994    /// The `DataType` the expression will yield
995    pub field: FieldRef,
996}
997
998impl TryCast {
999    /// Create a new TryCast expression
1000    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
1001        Self {
1002            expr,
1003            field: data_type.into_nullable_field_ref(),
1004        }
1005    }
1006
1007    pub fn new_from_field(expr: Box<Expr>, field: FieldRef) -> Self {
1008        Self { expr, field }
1009    }
1010}
1011
1012/// SORT expression
1013#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1014pub struct Sort {
1015    /// The expression to sort on
1016    pub expr: Expr,
1017    /// The direction of the sort
1018    pub asc: bool,
1019    /// Whether to put Nulls before all other data values
1020    pub nulls_first: bool,
1021}
1022
1023impl Sort {
1024    /// Create a new Sort expression
1025    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
1026        Self {
1027            expr,
1028            asc,
1029            nulls_first,
1030        }
1031    }
1032
1033    /// Create a new Sort expression with the opposite sort direction
1034    pub fn reverse(&self) -> Self {
1035        Self {
1036            expr: self.expr.clone(),
1037            asc: !self.asc,
1038            nulls_first: !self.nulls_first,
1039        }
1040    }
1041
1042    /// Replaces the Sort expressions with `expr`
1043    pub fn with_expr(&self, expr: Expr) -> Self {
1044        Self {
1045            expr,
1046            asc: self.asc,
1047            nulls_first: self.nulls_first,
1048        }
1049    }
1050}
1051
1052impl Display for Sort {
1053    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1054        write!(f, "{}", self.expr)?;
1055        if self.asc {
1056            write!(f, " ASC")?;
1057        } else {
1058            write!(f, " DESC")?;
1059        }
1060        if self.nulls_first {
1061            write!(f, " NULLS FIRST")?;
1062        } else {
1063            write!(f, " NULLS LAST")?;
1064        }
1065        Ok(())
1066    }
1067}
1068
1069impl<'a> TreeNodeContainer<'a, Expr> for Sort {
1070    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
1071        &'a self,
1072        f: F,
1073    ) -> Result<TreeNodeRecursion> {
1074        self.expr.apply_elements(f)
1075    }
1076
1077    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
1078        self,
1079        f: F,
1080    ) -> Result<Transformed<Self>> {
1081        self.expr
1082            .map_elements(f)?
1083            .map_data(|expr| Ok(Self { expr, ..self }))
1084    }
1085}
1086
1087/// Aggregate function
1088///
1089/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
1090///
1091/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
1092#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1093pub struct AggregateFunction {
1094    /// Name of the function
1095    pub func: Arc<AggregateUDF>,
1096    pub params: AggregateFunctionParams,
1097}
1098
1099#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1100pub struct AggregateFunctionParams {
1101    pub args: Vec<Expr>,
1102    /// Whether this is a DISTINCT aggregation or not
1103    pub distinct: bool,
1104    /// Optional filter
1105    pub filter: Option<Box<Expr>>,
1106    /// Optional ordering
1107    pub order_by: Vec<Sort>,
1108    pub null_treatment: Option<NullTreatment>,
1109}
1110
1111impl AggregateFunction {
1112    /// Create a new AggregateFunction expression with a user-defined function (UDF)
1113    pub fn new_udf(
1114        func: Arc<AggregateUDF>,
1115        args: Vec<Expr>,
1116        distinct: bool,
1117        filter: Option<Box<Expr>>,
1118        order_by: Vec<Sort>,
1119        null_treatment: Option<NullTreatment>,
1120    ) -> Self {
1121        Self {
1122            func,
1123            params: AggregateFunctionParams {
1124                args,
1125                distinct,
1126                filter,
1127                order_by,
1128                null_treatment,
1129            },
1130        }
1131    }
1132}
1133
1134/// A function used as a SQL window function
1135///
1136/// In SQL, you can use:
1137/// - Actual window functions ([`WindowUDF`])
1138/// - Normal aggregate functions ([`AggregateUDF`])
1139#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
1140pub enum WindowFunctionDefinition {
1141    /// A user defined aggregate function
1142    AggregateUDF(Arc<AggregateUDF>),
1143    /// A user defined window function
1144    WindowUDF(Arc<WindowUDF>),
1145}
1146
1147impl WindowFunctionDefinition {
1148    /// Returns the datatype of the window function
1149    pub fn return_field(
1150        &self,
1151        input_expr_fields: &[FieldRef],
1152        display_name: &str,
1153    ) -> Result<FieldRef> {
1154        match self {
1155            WindowFunctionDefinition::AggregateUDF(fun) => {
1156                fun.return_field(input_expr_fields)
1157            }
1158            WindowFunctionDefinition::WindowUDF(fun) => {
1159                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
1160            }
1161        }
1162    }
1163
1164    /// The signatures supported by the function `fun`.
1165    pub fn signature(&self) -> Signature {
1166        match self {
1167            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
1168            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
1169        }
1170    }
1171
1172    /// Function's name for display
1173    pub fn name(&self) -> &str {
1174        match self {
1175            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
1176            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
1177        }
1178    }
1179
1180    /// Returns this window function's simplification hook, if any.
1181    ///
1182    /// See [`WindowFunctionSimplification`] for more information
1183    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1184        match self {
1185            WindowFunctionDefinition::AggregateUDF(_) => None,
1186            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
1187        }
1188    }
1189}
1190
1191impl Display for WindowFunctionDefinition {
1192    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1193        match self {
1194            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
1195            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
1196        }
1197    }
1198}
1199
1200impl From<Arc<AggregateUDF>> for WindowFunctionDefinition {
1201    fn from(value: Arc<AggregateUDF>) -> Self {
1202        Self::AggregateUDF(value)
1203    }
1204}
1205
1206impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
1207    fn from(value: Arc<WindowUDF>) -> Self {
1208        Self::WindowUDF(value)
1209    }
1210}
1211
1212/// Window function
1213///
1214/// Holds the actual function to call [`WindowFunction`] as well as its
1215/// arguments (`args`) and the contents of the `OVER` clause:
1216///
1217/// 1. `PARTITION BY`
1218/// 2. `ORDER BY`
1219/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
1220///
1221/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
1222///
1223/// [`ExprFunctionExt`]: crate::ExprFunctionExt
1224#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1225pub struct WindowFunction {
1226    /// Name of the function
1227    pub fun: WindowFunctionDefinition,
1228    pub params: WindowFunctionParams,
1229}
1230
1231#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1232pub struct WindowFunctionParams {
1233    /// List of expressions to feed to the functions as arguments
1234    pub args: Vec<Expr>,
1235    /// List of partition by expressions
1236    pub partition_by: Vec<Expr>,
1237    /// List of order by expressions
1238    pub order_by: Vec<Sort>,
1239    /// Window frame
1240    pub window_frame: WindowFrame,
1241    /// Optional filter expression (FILTER (WHERE ...))
1242    pub filter: Option<Box<Expr>>,
1243    /// Specifies how NULL value is treated: ignore or respect
1244    pub null_treatment: Option<NullTreatment>,
1245    /// Distinct flag
1246    pub distinct: bool,
1247}
1248
1249impl WindowFunction {
1250    /// Create a new Window expression with the specified argument an
1251    /// empty `OVER` clause
1252    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
1253        Self {
1254            fun: fun.into(),
1255            params: WindowFunctionParams {
1256                args,
1257                partition_by: Vec::default(),
1258                order_by: Vec::default(),
1259                window_frame: WindowFrame::new(None),
1260                filter: None,
1261                null_treatment: None,
1262                distinct: false,
1263            },
1264        }
1265    }
1266
1267    /// Returns this window function's simplification hook, if any.
1268    ///
1269    /// See [`WindowFunctionSimplification`] for more information
1270    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1271        self.fun.simplify()
1272    }
1273}
1274
1275/// EXISTS expression
1276#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1277pub struct Exists {
1278    /// Subquery that will produce a single column of data
1279    pub subquery: Subquery,
1280    /// Whether the expression is negated
1281    pub negated: bool,
1282}
1283
1284impl Exists {
1285    // Create a new Exists expression.
1286    pub fn new(subquery: Subquery, negated: bool) -> Self {
1287        Self { subquery, negated }
1288    }
1289}
1290
1291/// Whether the set comparison uses `ANY`/`SOME` or `ALL`
1292#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Hash, Debug)]
1293pub enum SetQuantifier {
1294    /// `ANY` (or `SOME`)
1295    Any,
1296    /// `ALL`
1297    All,
1298}
1299
1300impl Display for SetQuantifier {
1301    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1302        match self {
1303            SetQuantifier::Any => write!(f, "ANY"),
1304            SetQuantifier::All => write!(f, "ALL"),
1305        }
1306    }
1307}
1308
1309/// Set comparison subquery (e.g. `= ANY`, `> ALL`)
1310#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1311pub struct SetComparison {
1312    /// The expression to compare
1313    pub expr: Box<Expr>,
1314    /// Subquery that will produce a single column of data to compare against
1315    pub subquery: Subquery,
1316    /// Comparison operator (e.g. `=`, `>`, `<`)
1317    pub op: Operator,
1318    /// Quantifier (`ANY`/`ALL`)
1319    pub quantifier: SetQuantifier,
1320}
1321
1322impl SetComparison {
1323    /// Create a new set comparison expression
1324    pub fn new(
1325        expr: Box<Expr>,
1326        subquery: Subquery,
1327        op: Operator,
1328        quantifier: SetQuantifier,
1329    ) -> Self {
1330        Self {
1331            expr,
1332            subquery,
1333            op,
1334            quantifier,
1335        }
1336    }
1337}
1338
1339/// InList expression
1340#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1341pub struct InList {
1342    /// The expression to compare
1343    pub expr: Box<Expr>,
1344    /// The list of values to compare against
1345    pub list: Vec<Expr>,
1346    /// Whether the expression is negated
1347    pub negated: bool,
1348}
1349
1350impl InList {
1351    /// Create a new InList expression
1352    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1353        Self {
1354            expr,
1355            list,
1356            negated,
1357        }
1358    }
1359}
1360
1361/// IN subquery
1362#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1363pub struct InSubquery {
1364    /// The expression to compare
1365    pub expr: Box<Expr>,
1366    /// Subquery that will produce a single column of data to compare against
1367    pub subquery: Subquery,
1368    /// Whether the expression is negated
1369    pub negated: bool,
1370}
1371
1372impl InSubquery {
1373    /// Create a new InSubquery expression
1374    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1375        Self {
1376            expr,
1377            subquery,
1378            negated,
1379        }
1380    }
1381}
1382
1383/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1384///
1385/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1386/// or can be specified directly using `PREPARE` statements.
1387#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1388pub struct Placeholder {
1389    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1390    pub id: String,
1391    /// The type the parameter will be filled in with
1392    pub field: Option<FieldRef>,
1393}
1394
1395impl Placeholder {
1396    /// Create a new Placeholder expression
1397    #[deprecated(since = "51.0.0", note = "Use new_with_field instead")]
1398    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1399        Self {
1400            id,
1401            field: data_type.map(|dt| Arc::new(Field::new("", dt, true))),
1402        }
1403    }
1404
1405    /// Create a new Placeholder expression from a Field
1406    pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self {
1407        Self { id, field }
1408    }
1409}
1410
1411/// Grouping sets
1412///
1413/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1414/// for Postgres definition.
1415/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1416/// for Apache Spark definition.
1417#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1418pub enum GroupingSet {
1419    /// Rollup grouping sets
1420    Rollup(Vec<Expr>),
1421    /// Cube grouping sets
1422    Cube(Vec<Expr>),
1423    /// User-defined grouping sets
1424    GroupingSets(Vec<Vec<Expr>>),
1425}
1426
1427impl GroupingSet {
1428    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1429    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1430    /// the exprs in the underlying sets.
1431    pub fn distinct_expr(&self) -> Vec<&Expr> {
1432        match self {
1433            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1434                exprs.iter().collect()
1435            }
1436            GroupingSet::GroupingSets(groups) => {
1437                let mut exprs: Vec<&Expr> = vec![];
1438                for exp in groups.iter().flatten() {
1439                    if !exprs.contains(&exp) {
1440                        exprs.push(exp);
1441                    }
1442                }
1443                exprs
1444            }
1445        }
1446    }
1447}
1448
1449/// A Lambda expression with a set of parameters names and a body
1450#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1451pub struct Lambda {
1452    /// The parameters names
1453    pub params: Vec<String>,
1454    /// The body expression
1455    pub body: Box<Expr>,
1456}
1457
1458impl Lambda {
1459    /// Create a new lambda expression
1460    pub fn new(params: Vec<String>, body: Expr) -> Self {
1461        Self {
1462            params,
1463            body: Box::new(body),
1464        }
1465    }
1466}
1467
1468pub fn display_comma_separated<T>(slice: &[T]) -> String
1469where
1470    T: Display,
1471{
1472    use itertools::Itertools;
1473    slice.iter().map(|v| format!("{v}")).join(", ")
1474}
1475
1476/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1477#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1478pub struct WildcardOptions {
1479    /// `[ILIKE...]`.
1480    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1481    pub ilike: Option<IlikeSelectItem>,
1482    /// `[EXCLUDE...]`.
1483    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1484    pub exclude: Option<ExcludeSelectItem>,
1485    /// `[EXCEPT...]`.
1486    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1487    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1488    pub except: Option<ExceptSelectItem>,
1489    /// `[REPLACE]`
1490    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1491    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1492    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1493    pub replace: Option<PlannedReplaceSelectItem>,
1494    /// `[RENAME ...]`.
1495    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1496    pub rename: Option<RenameSelectItem>,
1497}
1498
1499impl WildcardOptions {
1500    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1501        WildcardOptions {
1502            ilike: self.ilike,
1503            exclude: self.exclude,
1504            except: self.except,
1505            replace: Some(replace),
1506            rename: self.rename,
1507        }
1508    }
1509}
1510
1511impl Display for WildcardOptions {
1512    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1513        if let Some(ilike) = &self.ilike {
1514            write!(f, " {ilike}")?;
1515        }
1516        if let Some(exclude) = &self.exclude {
1517            write!(f, " {exclude}")?;
1518        }
1519        if let Some(except) = &self.except {
1520            write!(f, " {except}")?;
1521        }
1522        if let Some(replace) = &self.replace {
1523            write!(f, " {replace}")?;
1524        }
1525        if let Some(rename) = &self.rename {
1526            write!(f, " {rename}")?;
1527        }
1528        Ok(())
1529    }
1530}
1531
1532/// The planned expressions for `REPLACE`
1533#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1534pub struct PlannedReplaceSelectItem {
1535    /// The original ast nodes
1536    pub items: Vec<ReplaceSelectElement>,
1537    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1538    pub planned_expressions: Vec<Expr>,
1539}
1540
1541impl Display for PlannedReplaceSelectItem {
1542    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1543        write!(f, "REPLACE")?;
1544        write!(f, " ({})", display_comma_separated(&self.items))?;
1545        Ok(())
1546    }
1547}
1548
1549impl PlannedReplaceSelectItem {
1550    pub fn items(&self) -> &[ReplaceSelectElement] {
1551        &self.items
1552    }
1553
1554    pub fn expressions(&self) -> &[Expr] {
1555        &self.planned_expressions
1556    }
1557}
1558
1559impl Expr {
1560    /// The name of the column (field) that this `Expr` will produce.
1561    ///
1562    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1563    /// [`Schema`] will have a field with this name.
1564    ///
1565    /// Note that the resulting string is subtlety different from the `Display`
1566    /// representation for certain `Expr`. Some differences:
1567    ///
1568    /// 1. [`Expr::Alias`], which shows only the alias itself
1569    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1570    ///
1571    /// # Example
1572    /// ```
1573    /// # use datafusion_expr::{col, lit};
1574    /// let expr = col("foo").eq(lit(42));
1575    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1576    ///
1577    /// let expr = col("foo").alias("bar").eq(lit(11));
1578    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1579    /// ```
1580    ///
1581    /// [`Schema`]: arrow::datatypes::Schema
1582    pub fn schema_name(&self) -> impl Display + '_ {
1583        SchemaDisplay(self)
1584    }
1585
1586    /// Human readable display formatting for this expression.
1587    ///
1588    /// This function is primarily used in printing the explain tree output,
1589    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1590    /// show how expressions are used in physical and logical plans. See the
1591    /// [`Expr`] for other ways to format expressions
1592    ///
1593    /// Note this format is intended for human consumption rather than SQL for
1594    /// other systems. If you need  SQL to pass to other systems, consider using
1595    /// [`Unparser`].
1596    ///
1597    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1598    ///
1599    /// # Example
1600    /// ```
1601    /// # use datafusion_expr::{col, lit};
1602    /// let expr = col("foo") + lit(42);
1603    /// // For EXPLAIN output:
1604    /// // "foo + 42"
1605    /// println!("{}", expr.human_display());
1606    /// ```
1607    pub fn human_display(&self) -> impl Display + '_ {
1608        SqlDisplay(self)
1609    }
1610
1611    /// Returns the qualifier and the schema name of this expression.
1612    ///
1613    /// Used when the expression forms the output field of a certain plan.
1614    /// The result is the field's qualifier and field name in the plan's
1615    /// output schema. We can use this qualified name to reference the field.
1616    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1617        match self {
1618            Expr::Column(Column {
1619                relation,
1620                name,
1621                spans: _,
1622            }) => (relation.clone(), name.clone()),
1623            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1624            _ => (None, self.schema_name().to_string()),
1625        }
1626    }
1627
1628    /// Returns placement information for this expression.
1629    ///
1630    /// This is used by optimizers to make decisions about expression placement,
1631    /// such as whether to push expressions down through projections.
1632    pub fn placement(&self) -> ExpressionPlacement {
1633        match self {
1634            Expr::Column(_) => ExpressionPlacement::Column,
1635            Expr::Literal(_, _) => ExpressionPlacement::Literal,
1636            Expr::Alias(inner) => inner.expr.placement(),
1637            Expr::ScalarFunction(func) => {
1638                let arg_placements: Vec<_> =
1639                    func.args.iter().map(|arg| arg.placement()).collect();
1640                func.func.placement(&arg_placements)
1641            }
1642            _ => ExpressionPlacement::KeepInPlace,
1643        }
1644    }
1645
1646    /// Return String representation of the variant represented by `self`
1647    /// Useful for non-rust based bindings
1648    pub fn variant_name(&self) -> &str {
1649        match self {
1650            Expr::AggregateFunction { .. } => "AggregateFunction",
1651            Expr::Alias(..) => "Alias",
1652            Expr::Between { .. } => "Between",
1653            Expr::BinaryExpr { .. } => "BinaryExpr",
1654            Expr::Case { .. } => "Case",
1655            Expr::Cast { .. } => "Cast",
1656            Expr::Column(..) => "Column",
1657            Expr::OuterReferenceColumn(_, _) => "Outer",
1658            Expr::Exists { .. } => "Exists",
1659            Expr::GroupingSet(..) => "GroupingSet",
1660            Expr::InList { .. } => "InList",
1661            Expr::InSubquery(..) => "InSubquery",
1662            Expr::SetComparison(..) => "SetComparison",
1663            Expr::IsNotNull(..) => "IsNotNull",
1664            Expr::IsNull(..) => "IsNull",
1665            Expr::Like { .. } => "Like",
1666            Expr::SimilarTo { .. } => "RLike",
1667            Expr::IsTrue(..) => "IsTrue",
1668            Expr::IsFalse(..) => "IsFalse",
1669            Expr::IsUnknown(..) => "IsUnknown",
1670            Expr::IsNotTrue(..) => "IsNotTrue",
1671            Expr::IsNotFalse(..) => "IsNotFalse",
1672            Expr::IsNotUnknown(..) => "IsNotUnknown",
1673            Expr::Literal(..) => "Literal",
1674            Expr::Negative(..) => "Negative",
1675            Expr::Not(..) => "Not",
1676            Expr::Placeholder(_) => "Placeholder",
1677            Expr::ScalarFunction(..) => "ScalarFunction",
1678            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1679            Expr::ScalarVariable(..) => "ScalarVariable",
1680            Expr::TryCast { .. } => "TryCast",
1681            Expr::WindowFunction { .. } => "WindowFunction",
1682            #[expect(deprecated)]
1683            Expr::Wildcard { .. } => "Wildcard",
1684            Expr::Unnest { .. } => "Unnest",
1685            Expr::HigherOrderFunction { .. } => "HigherOrderFunction",
1686            Expr::Lambda { .. } => "Lambda",
1687            Expr::LambdaVariable { .. } => "LambdaVariable",
1688        }
1689    }
1690
1691    /// Return `self == other`
1692    pub fn eq(self, other: Expr) -> Expr {
1693        binary_expr(self, Operator::Eq, other)
1694    }
1695
1696    /// Return `self != other`
1697    pub fn not_eq(self, other: Expr) -> Expr {
1698        binary_expr(self, Operator::NotEq, other)
1699    }
1700
1701    /// Return `self > other`
1702    pub fn gt(self, other: Expr) -> Expr {
1703        binary_expr(self, Operator::Gt, other)
1704    }
1705
1706    /// Return `self >= other`
1707    pub fn gt_eq(self, other: Expr) -> Expr {
1708        binary_expr(self, Operator::GtEq, other)
1709    }
1710
1711    /// Return `self < other`
1712    pub fn lt(self, other: Expr) -> Expr {
1713        binary_expr(self, Operator::Lt, other)
1714    }
1715
1716    /// Return `self <= other`
1717    pub fn lt_eq(self, other: Expr) -> Expr {
1718        binary_expr(self, Operator::LtEq, other)
1719    }
1720
1721    /// Return `self && other`
1722    pub fn and(self, other: Expr) -> Expr {
1723        binary_expr(self, Operator::And, other)
1724    }
1725
1726    /// Return `self || other`
1727    pub fn or(self, other: Expr) -> Expr {
1728        binary_expr(self, Operator::Or, other)
1729    }
1730
1731    /// Return `self LIKE other`
1732    pub fn like(self, other: Expr) -> Expr {
1733        Expr::Like(Like::new(
1734            false,
1735            Box::new(self),
1736            Box::new(other),
1737            None,
1738            false,
1739        ))
1740    }
1741
1742    /// Return `self NOT LIKE other`
1743    pub fn not_like(self, other: Expr) -> Expr {
1744        Expr::Like(Like::new(
1745            true,
1746            Box::new(self),
1747            Box::new(other),
1748            None,
1749            false,
1750        ))
1751    }
1752
1753    /// Return `self ILIKE other`
1754    pub fn ilike(self, other: Expr) -> Expr {
1755        Expr::Like(Like::new(
1756            false,
1757            Box::new(self),
1758            Box::new(other),
1759            None,
1760            true,
1761        ))
1762    }
1763
1764    /// Return `self NOT ILIKE other`
1765    pub fn not_ilike(self, other: Expr) -> Expr {
1766        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1767    }
1768
1769    /// Return the name to use for the specific Expr
1770    pub fn name_for_alias(&self) -> Result<String> {
1771        Ok(self.schema_name().to_string())
1772    }
1773
1774    /// Ensure `expr` has the name as `original_name` by adding an
1775    /// alias if necessary.
1776    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1777        let new_name = self.name_for_alias()?;
1778        if new_name == original_name {
1779            return Ok(self);
1780        }
1781
1782        Ok(self.alias(original_name))
1783    }
1784
1785    /// Return `self AS name` alias expression
1786    pub fn alias(self, name: impl Into<String>) -> Expr {
1787        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1788    }
1789
1790    /// Return `self AS name` alias expression with metadata
1791    ///
1792    /// The metadata will be attached to the Arrow Schema field when the expression
1793    /// is converted to a field via `Expr.to_field()`.
1794    ///
1795    /// # Example
1796    /// ```
1797    /// # use datafusion_expr::col;
1798    /// # use std::collections::HashMap;
1799    /// # use datafusion_common::metadata::FieldMetadata;
1800    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1801    /// let metadata = FieldMetadata::from(metadata);
1802    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1803    /// ```
1804    pub fn alias_with_metadata(
1805        self,
1806        name: impl Into<String>,
1807        metadata: Option<FieldMetadata>,
1808    ) -> Expr {
1809        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1810    }
1811
1812    /// Return `self AS name` alias expression with a specific qualifier
1813    pub fn alias_qualified(
1814        self,
1815        relation: Option<impl Into<TableReference>>,
1816        name: impl Into<String>,
1817    ) -> Expr {
1818        Expr::Alias(Alias::new(self, relation, name.into()))
1819    }
1820
1821    /// Return `self AS name` alias expression with a specific qualifier and metadata
1822    ///
1823    /// The metadata will be attached to the Arrow Schema field when the expression
1824    /// is converted to a field via `Expr.to_field()`.
1825    ///
1826    /// # Example
1827    /// ```
1828    /// # use datafusion_expr::col;
1829    /// # use std::collections::HashMap;
1830    /// # use datafusion_common::metadata::FieldMetadata;
1831    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1832    /// let metadata = FieldMetadata::from(metadata);
1833    /// let expr =
1834    ///     col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1835    /// ```
1836    pub fn alias_qualified_with_metadata(
1837        self,
1838        relation: Option<impl Into<TableReference>>,
1839        name: impl Into<String>,
1840        metadata: Option<FieldMetadata>,
1841    ) -> Expr {
1842        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1843    }
1844
1845    /// Remove an alias from an expression if one exists.
1846    ///
1847    /// If the expression is not an alias, the expression is returned unchanged.
1848    /// This method does not remove aliases from nested expressions.
1849    ///
1850    /// # Example
1851    /// ```
1852    /// # use datafusion_expr::col;
1853    /// // `foo as "bar"` is unaliased to `foo`
1854    /// let expr = col("foo").alias("bar");
1855    /// assert_eq!(expr.unalias(), col("foo"));
1856    ///
1857    /// // `foo as "bar" + baz` is not unaliased
1858    /// let expr = col("foo").alias("bar") + col("baz");
1859    /// assert_eq!(expr.clone().unalias(), expr);
1860    ///
1861    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1862    /// let expr = col("foo").alias("bar").alias("baz");
1863    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1864    /// ```
1865    pub fn unalias(self) -> Expr {
1866        match self {
1867            Expr::Alias(alias) => *alias.expr,
1868            _ => self,
1869        }
1870    }
1871
1872    /// Recursively removed potentially multiple aliases from an expression.
1873    ///
1874    /// This method removes nested aliases and returns [`Transformed`]
1875    /// to signal if the expression was changed.
1876    ///
1877    /// # Example
1878    /// ```
1879    /// # use datafusion_expr::col;
1880    /// // `foo as "bar"` is unaliased to `foo`
1881    /// let expr = col("foo").alias("bar");
1882    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1883    ///
1884    /// // `foo as "bar" + baz` is  unaliased
1885    /// let expr = col("foo").alias("bar") + col("baz");
1886    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1887    ///
1888    /// // `foo as "bar" as "baz" is unalaised to foo
1889    /// let expr = col("foo").alias("bar").alias("baz");
1890    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1891    /// ```
1892    pub fn unalias_nested(self) -> Transformed<Expr> {
1893        self.transform_down_up(
1894            |expr| {
1895                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1896                let recursion = if matches!(
1897                    expr,
1898                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1899                ) {
1900                    // Subqueries could contain aliases so don't recurse into those
1901                    TreeNodeRecursion::Jump
1902                } else {
1903                    TreeNodeRecursion::Continue
1904                };
1905                Ok(Transformed::new(expr, false, recursion))
1906            },
1907            |expr| {
1908                // f_up: unalias on up so we can remove nested aliases like
1909                // `(x as foo) as bar`
1910                if let Expr::Alias(alias) = expr {
1911                    match alias
1912                        .metadata
1913                        .as_ref()
1914                        .map(|h| h.is_empty())
1915                        .unwrap_or(true)
1916                    {
1917                        true => Ok(Transformed::yes(*alias.expr)),
1918                        false => Ok(Transformed::no(Expr::Alias(alias))),
1919                    }
1920                } else {
1921                    Ok(Transformed::no(expr))
1922                }
1923            },
1924        )
1925        // Unreachable code: internal closure doesn't return err
1926        .unwrap()
1927    }
1928
1929    /// Return `self IN <list>` if `negated` is false, otherwise
1930    /// return `self NOT IN <list>`.a
1931    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1932        Expr::InList(InList::new(Box::new(self), list, negated))
1933    }
1934
1935    /// Return `IsNull(Box(self))
1936    pub fn is_null(self) -> Expr {
1937        Expr::IsNull(Box::new(self))
1938    }
1939
1940    /// Return `IsNotNull(Box(self))
1941    pub fn is_not_null(self) -> Expr {
1942        Expr::IsNotNull(Box::new(self))
1943    }
1944
1945    /// Create a sort configuration from an existing expression.
1946    ///
1947    /// ```
1948    /// # use datafusion_expr::col;
1949    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1950    /// ```
1951    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1952        Sort::new(self, asc, nulls_first)
1953    }
1954
1955    /// Return `IsTrue(Box(self))`
1956    pub fn is_true(self) -> Expr {
1957        Expr::IsTrue(Box::new(self))
1958    }
1959
1960    /// Return `IsNotTrue(Box(self))`
1961    pub fn is_not_true(self) -> Expr {
1962        Expr::IsNotTrue(Box::new(self))
1963    }
1964
1965    /// Return `IsFalse(Box(self))`
1966    pub fn is_false(self) -> Expr {
1967        Expr::IsFalse(Box::new(self))
1968    }
1969
1970    /// Return `IsNotFalse(Box(self))`
1971    pub fn is_not_false(self) -> Expr {
1972        Expr::IsNotFalse(Box::new(self))
1973    }
1974
1975    /// Return `IsUnknown(Box(self))`
1976    pub fn is_unknown(self) -> Expr {
1977        Expr::IsUnknown(Box::new(self))
1978    }
1979
1980    /// Return `IsNotUnknown(Box(self))`
1981    pub fn is_not_unknown(self) -> Expr {
1982        Expr::IsNotUnknown(Box::new(self))
1983    }
1984
1985    /// return `self BETWEEN low AND high`
1986    pub fn between(self, low: Expr, high: Expr) -> Expr {
1987        Expr::Between(Between::new(
1988            Box::new(self),
1989            false,
1990            Box::new(low),
1991            Box::new(high),
1992        ))
1993    }
1994
1995    /// Return `self NOT BETWEEN low AND high`
1996    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1997        Expr::Between(Between::new(
1998            Box::new(self),
1999            true,
2000            Box::new(low),
2001            Box::new(high),
2002        ))
2003    }
2004    /// Return a reference to the inner `Column` if any
2005    ///
2006    /// returns `None` if the expression is not a `Column`
2007    ///
2008    /// Note: None may be returned for expressions that are not `Column` but
2009    /// are convertible to `Column` such as `Cast` expressions.
2010    ///
2011    /// Example
2012    /// ```
2013    /// # use datafusion_common::Column;
2014    /// use datafusion_expr::{col, Expr};
2015    /// let expr = col("foo");
2016    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
2017    ///
2018    /// let expr = col("foo").alias("bar");
2019    /// assert_eq!(expr.try_as_col(), None);
2020    /// ```
2021    pub fn try_as_col(&self) -> Option<&Column> {
2022        if let Expr::Column(it) = self {
2023            Some(it)
2024        } else {
2025            None
2026        }
2027    }
2028
2029    /// Returns the inner `Column` if any. This is a specialized version of
2030    /// [`Self::try_as_col`] that take Cast expressions into account when the
2031    /// expression is as on condition for joins.
2032    ///
2033    /// Called this method when you are sure that the expression is a `Column`
2034    /// or a `Cast` expression that wraps a `Column`.
2035    pub fn get_as_join_column(&self) -> Option<&Column> {
2036        match self {
2037            Expr::Column(c) => Some(c),
2038            Expr::Cast(Cast { expr, .. }) => match &**expr {
2039                Expr::Column(c) => Some(c),
2040                _ => None,
2041            },
2042            _ => None,
2043        }
2044    }
2045
2046    /// Return all references to columns in this expression.
2047    ///
2048    /// # Example
2049    /// ```
2050    /// # use std::collections::HashSet;
2051    /// # use datafusion_common::Column;
2052    /// # use datafusion_expr::col;
2053    /// // For an expression `a + (b * a)`
2054    /// let expr = col("a") + (col("b") * col("a"));
2055    /// let refs = expr.column_refs();
2056    /// // refs contains "a" and "b"
2057    /// assert_eq!(refs.len(), 2);
2058    /// assert!(refs.contains(&Column::new_unqualified("a")));
2059    /// assert!(refs.contains(&Column::new_unqualified("b")));
2060    /// ```
2061    pub fn column_refs(&self) -> HashSet<&Column> {
2062        let mut using_columns = HashSet::new();
2063        self.add_column_refs(&mut using_columns);
2064        using_columns
2065    }
2066
2067    /// Adds references to all columns in this expression to the set
2068    ///
2069    /// See [`Self::column_refs`] for details
2070    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
2071        self.apply(|expr| {
2072            if let Expr::Column(col) = expr {
2073                set.insert(col);
2074            }
2075            Ok(TreeNodeRecursion::Continue)
2076        })
2077        .expect("traversal is infallible");
2078    }
2079
2080    /// Return all references to columns and their occurrence counts in the expression.
2081    ///
2082    /// # Example
2083    /// ```
2084    /// # use std::collections::HashMap;
2085    /// # use datafusion_common::Column;
2086    /// # use datafusion_expr::col;
2087    /// // For an expression `a + (b * a)`
2088    /// let expr = col("a") + (col("b") * col("a"));
2089    /// let mut refs = expr.column_refs_counts();
2090    /// // refs contains "a" and "b"
2091    /// assert_eq!(refs.len(), 2);
2092    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
2093    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
2094    /// ```
2095    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
2096        let mut map = HashMap::new();
2097        self.add_column_ref_counts(&mut map);
2098        map
2099    }
2100
2101    /// Adds references to all columns and their occurrence counts in the expression to
2102    /// the map.
2103    ///
2104    /// See [`Self::column_refs_counts`] for details
2105    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
2106        self.apply(|expr| {
2107            if let Expr::Column(col) = expr {
2108                *map.entry(col).or_default() += 1;
2109            }
2110            Ok(TreeNodeRecursion::Continue)
2111        })
2112        .expect("traversal is infallible");
2113    }
2114
2115    /// Returns true if there are any column references in this Expr
2116    pub fn any_column_refs(&self) -> bool {
2117        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
2118            .expect("exists closure is infallible")
2119    }
2120
2121    /// Return true if the expression contains out reference(correlated) expressions.
2122    pub fn contains_outer(&self) -> bool {
2123        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
2124            .expect("exists closure is infallible")
2125    }
2126
2127    /// Returns true if the expression contains a scalar subquery.
2128    pub fn contains_scalar_subquery(&self) -> bool {
2129        self.exists(|expr| Ok(matches!(expr, Expr::ScalarSubquery(_))))
2130            .expect("exists closure is infallible")
2131    }
2132
2133    /// Returns true if the expression node is volatile, i.e. whether it can return
2134    /// different results when evaluated multiple times with the same input.
2135    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
2136    /// - `rand()` returns `true`,
2137    /// - `a + rand()` returns `false`
2138    pub fn is_volatile_node(&self) -> bool {
2139        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
2140    }
2141
2142    /// Returns true if the expression is volatile, i.e. whether it can return different
2143    /// results when evaluated multiple times with the same input.
2144    ///
2145    /// For example the function call `RANDOM()` is volatile as each call will
2146    /// return a different value.
2147    ///
2148    /// See [`Volatility`] for more information.
2149    pub fn is_volatile(&self) -> bool {
2150        self.exists(|expr| Ok(expr.is_volatile_node()))
2151            .expect("exists closure is infallible")
2152    }
2153
2154    /// Recursively find all [`Expr::Placeholder`] expressions, and
2155    /// to infer their [`DataType`] from the context of their use.
2156    ///
2157    /// For example, given an expression like `<int32> = $0` will infer `$0` to
2158    /// have type `int32`.
2159    ///
2160    /// Returns transformed expression and flag that is true if expression contains
2161    /// at least one placeholder.
2162    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
2163        let mut has_placeholder = false;
2164        self.transform(|mut expr| {
2165            match &mut expr {
2166                // Default to assuming the arguments are the same type
2167                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
2168                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
2169                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
2170                }
2171                Expr::Between(Between {
2172                    expr,
2173                    negated: _,
2174                    low,
2175                    high,
2176                }) => {
2177                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
2178                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
2179                }
2180                Expr::InList(InList {
2181                    expr,
2182                    list,
2183                    negated: _,
2184                }) => {
2185                    for item in list.iter_mut() {
2186                        rewrite_placeholder(item, expr.as_ref(), schema)?;
2187                    }
2188                }
2189                Expr::Like(Like { expr, pattern, .. })
2190                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
2191                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
2192                }
2193                Expr::Placeholder(_) => {
2194                    has_placeholder = true;
2195                }
2196                _ => {}
2197            }
2198            Ok(Transformed::yes(expr))
2199        })
2200        .data()
2201        .map(|data| (data, has_placeholder))
2202    }
2203
2204    /// Returns true if some of this `exprs` subexpressions may not be evaluated
2205    /// and thus any side effects (like divide by zero) may not be encountered
2206    pub fn short_circuits(&self) -> bool {
2207        match self {
2208            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
2209            Expr::HigherOrderFunction(HigherOrderFunction { func, .. }) => {
2210                func.short_circuits()
2211            }
2212            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
2213                matches!(op, Operator::And | Operator::Or)
2214            }
2215            Expr::Case { .. } => true,
2216            // Use explicit pattern match instead of a default
2217            // implementation, so that in the future if someone adds
2218            // new Expr types, they will check here as well
2219            // TODO: remove the next line after `Expr::Wildcard` is removed
2220            #[expect(deprecated)]
2221            Expr::AggregateFunction(..)
2222            | Expr::Alias(..)
2223            | Expr::Between(..)
2224            | Expr::Cast(..)
2225            | Expr::Column(..)
2226            | Expr::Exists(..)
2227            | Expr::GroupingSet(..)
2228            | Expr::InList(..)
2229            | Expr::InSubquery(..)
2230            | Expr::SetComparison(..)
2231            | Expr::IsFalse(..)
2232            | Expr::IsNotFalse(..)
2233            | Expr::IsNotNull(..)
2234            | Expr::IsNotTrue(..)
2235            | Expr::IsNotUnknown(..)
2236            | Expr::IsNull(..)
2237            | Expr::IsTrue(..)
2238            | Expr::IsUnknown(..)
2239            | Expr::Like(..)
2240            | Expr::ScalarSubquery(..)
2241            | Expr::ScalarVariable(_, _)
2242            | Expr::SimilarTo(..)
2243            | Expr::Not(..)
2244            | Expr::Negative(..)
2245            | Expr::OuterReferenceColumn(_, _)
2246            | Expr::TryCast(..)
2247            | Expr::Unnest(..)
2248            | Expr::Wildcard { .. }
2249            | Expr::WindowFunction(..)
2250            | Expr::Literal(..)
2251            | Expr::Placeholder(..)
2252            | Expr::Lambda(..)
2253            | Expr::LambdaVariable(..) => false,
2254        }
2255    }
2256
2257    /// Returns a reference to the set of locations in the SQL query where this
2258    /// expression appears, if known. [`None`] is returned if the expression
2259    /// type doesn't support tracking locations yet.
2260    pub fn spans(&self) -> Option<&Spans> {
2261        match self {
2262            Expr::Column(col) => Some(&col.spans),
2263            _ => None,
2264        }
2265    }
2266
2267    /// Check if the Expr is literal and get the literal value if it is.
2268    pub fn as_literal(&self) -> Option<&ScalarValue> {
2269        if let Expr::Literal(lit, _) = self {
2270            Some(lit)
2271        } else {
2272            None
2273        }
2274    }
2275
2276    /// Return a `Expr` with all [`LambdaVariable`] resolved only if all of them
2277    /// are contained in the subtree of the [`HigherOrderFunction`] it originates from,
2278    /// otherwise returns an error
2279    pub fn resolve_lambda_variables(
2280        self,
2281        schema: &DFSchema,
2282    ) -> Result<Transformed<Expr>> {
2283        resolve_lambda_variables(self, schema, &mut HashMap::new())
2284    }
2285}
2286
2287impl Normalizeable for Expr {
2288    fn can_normalize(&self) -> bool {
2289        #[expect(clippy::match_like_matches_macro)]
2290        match self {
2291            Expr::BinaryExpr(BinaryExpr {
2292                op:
2293                    _op @ (Operator::Plus
2294                    | Operator::Multiply
2295                    | Operator::BitwiseAnd
2296                    | Operator::BitwiseOr
2297                    | Operator::BitwiseXor
2298                    | Operator::Eq
2299                    | Operator::NotEq),
2300                ..
2301            }) => true,
2302            _ => false,
2303        }
2304    }
2305}
2306
2307impl NormalizeEq for Expr {
2308    fn normalize_eq(&self, other: &Self) -> bool {
2309        match (self, other) {
2310            (
2311                Expr::BinaryExpr(BinaryExpr {
2312                    left: self_left,
2313                    op: self_op,
2314                    right: self_right,
2315                }),
2316                Expr::BinaryExpr(BinaryExpr {
2317                    left: other_left,
2318                    op: other_op,
2319                    right: other_right,
2320                }),
2321            ) => {
2322                if self_op != other_op {
2323                    return false;
2324                }
2325
2326                if matches!(
2327                    self_op,
2328                    Operator::Plus
2329                        | Operator::Multiply
2330                        | Operator::BitwiseAnd
2331                        | Operator::BitwiseOr
2332                        | Operator::BitwiseXor
2333                        | Operator::Eq
2334                        | Operator::NotEq
2335                ) {
2336                    (self_left.normalize_eq(other_left)
2337                        && self_right.normalize_eq(other_right))
2338                        || (self_left.normalize_eq(other_right)
2339                            && self_right.normalize_eq(other_left))
2340                } else {
2341                    self_left.normalize_eq(other_left)
2342                        && self_right.normalize_eq(other_right)
2343                }
2344            }
2345            (
2346                Expr::Alias(Alias {
2347                    expr: self_expr,
2348                    relation: self_relation,
2349                    name: self_name,
2350                    ..
2351                }),
2352                Expr::Alias(Alias {
2353                    expr: other_expr,
2354                    relation: other_relation,
2355                    name: other_name,
2356                    ..
2357                }),
2358            ) => {
2359                self_name == other_name
2360                    && self_relation == other_relation
2361                    && self_expr.normalize_eq(other_expr)
2362            }
2363            (
2364                Expr::Like(Like {
2365                    negated: self_negated,
2366                    expr: self_expr,
2367                    pattern: self_pattern,
2368                    escape_char: self_escape_char,
2369                    case_insensitive: self_case_insensitive,
2370                }),
2371                Expr::Like(Like {
2372                    negated: other_negated,
2373                    expr: other_expr,
2374                    pattern: other_pattern,
2375                    escape_char: other_escape_char,
2376                    case_insensitive: other_case_insensitive,
2377                }),
2378            )
2379            | (
2380                Expr::SimilarTo(Like {
2381                    negated: self_negated,
2382                    expr: self_expr,
2383                    pattern: self_pattern,
2384                    escape_char: self_escape_char,
2385                    case_insensitive: self_case_insensitive,
2386                }),
2387                Expr::SimilarTo(Like {
2388                    negated: other_negated,
2389                    expr: other_expr,
2390                    pattern: other_pattern,
2391                    escape_char: other_escape_char,
2392                    case_insensitive: other_case_insensitive,
2393                }),
2394            ) => {
2395                self_negated == other_negated
2396                    && self_escape_char == other_escape_char
2397                    && self_case_insensitive == other_case_insensitive
2398                    && self_expr.normalize_eq(other_expr)
2399                    && self_pattern.normalize_eq(other_pattern)
2400            }
2401            (Expr::Not(self_expr), Expr::Not(other_expr))
2402            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2403            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2404            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2405            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2406            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2407            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2408            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2409            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2410            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2411            | (
2412                Expr::Unnest(Unnest { expr: self_expr }),
2413                Expr::Unnest(Unnest { expr: other_expr }),
2414            ) => self_expr.normalize_eq(other_expr),
2415            (
2416                Expr::Between(Between {
2417                    expr: self_expr,
2418                    negated: self_negated,
2419                    low: self_low,
2420                    high: self_high,
2421                }),
2422                Expr::Between(Between {
2423                    expr: other_expr,
2424                    negated: other_negated,
2425                    low: other_low,
2426                    high: other_high,
2427                }),
2428            ) => {
2429                self_negated == other_negated
2430                    && self_expr.normalize_eq(other_expr)
2431                    && self_low.normalize_eq(other_low)
2432                    && self_high.normalize_eq(other_high)
2433            }
2434            (
2435                Expr::Cast(Cast {
2436                    expr: self_expr,
2437                    field: self_field,
2438                }),
2439                Expr::Cast(Cast {
2440                    expr: other_expr,
2441                    field: other_field,
2442                }),
2443            )
2444            | (
2445                Expr::TryCast(TryCast {
2446                    expr: self_expr,
2447                    field: self_field,
2448                }),
2449                Expr::TryCast(TryCast {
2450                    expr: other_expr,
2451                    field: other_field,
2452                }),
2453            ) => self_field == other_field && self_expr.normalize_eq(other_expr),
2454            (
2455                Expr::ScalarFunction(ScalarFunction {
2456                    func: self_func,
2457                    args: self_args,
2458                }),
2459                Expr::ScalarFunction(ScalarFunction {
2460                    func: other_func,
2461                    args: other_args,
2462                }),
2463            ) => {
2464                self_func.name() == other_func.name()
2465                    && self_args.len() == other_args.len()
2466                    && self_args
2467                        .iter()
2468                        .zip(other_args.iter())
2469                        .all(|(a, b)| a.normalize_eq(b))
2470            }
2471            (
2472                Expr::AggregateFunction(AggregateFunction {
2473                    func: self_func,
2474                    params:
2475                        AggregateFunctionParams {
2476                            args: self_args,
2477                            distinct: self_distinct,
2478                            filter: self_filter,
2479                            order_by: self_order_by,
2480                            null_treatment: self_null_treatment,
2481                        },
2482                }),
2483                Expr::AggregateFunction(AggregateFunction {
2484                    func: other_func,
2485                    params:
2486                        AggregateFunctionParams {
2487                            args: other_args,
2488                            distinct: other_distinct,
2489                            filter: other_filter,
2490                            order_by: other_order_by,
2491                            null_treatment: other_null_treatment,
2492                        },
2493                }),
2494            ) => {
2495                self_func.name() == other_func.name()
2496                    && self_distinct == other_distinct
2497                    && self_null_treatment == other_null_treatment
2498                    && self_args.len() == other_args.len()
2499                    && self_args
2500                        .iter()
2501                        .zip(other_args.iter())
2502                        .all(|(a, b)| a.normalize_eq(b))
2503                    && match (self_filter, other_filter) {
2504                        (Some(self_filter), Some(other_filter)) => {
2505                            self_filter.normalize_eq(other_filter)
2506                        }
2507                        (None, None) => true,
2508                        _ => false,
2509                    }
2510                    && self_order_by
2511                        .iter()
2512                        .zip(other_order_by.iter())
2513                        .all(|(a, b)| {
2514                            a.asc == b.asc
2515                                && a.nulls_first == b.nulls_first
2516                                && a.expr.normalize_eq(&b.expr)
2517                        })
2518                    && self_order_by.len() == other_order_by.len()
2519            }
2520            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2521                let WindowFunction {
2522                    fun: self_fun,
2523                    params:
2524                        WindowFunctionParams {
2525                            args: self_args,
2526                            window_frame: self_window_frame,
2527                            partition_by: self_partition_by,
2528                            order_by: self_order_by,
2529                            filter: self_filter,
2530                            null_treatment: self_null_treatment,
2531                            distinct: self_distinct,
2532                        },
2533                } = left.as_ref();
2534                let WindowFunction {
2535                    fun: other_fun,
2536                    params:
2537                        WindowFunctionParams {
2538                            args: other_args,
2539                            window_frame: other_window_frame,
2540                            partition_by: other_partition_by,
2541                            order_by: other_order_by,
2542                            filter: other_filter,
2543                            null_treatment: other_null_treatment,
2544                            distinct: other_distinct,
2545                        },
2546                } = other.as_ref();
2547
2548                self_fun.name() == other_fun.name()
2549                    && self_window_frame == other_window_frame
2550                    && match (self_filter, other_filter) {
2551                        (Some(a), Some(b)) => a.normalize_eq(b),
2552                        (None, None) => true,
2553                        _ => false,
2554                    }
2555                    && self_null_treatment == other_null_treatment
2556                    && self_args.len() == other_args.len()
2557                    && self_args
2558                        .iter()
2559                        .zip(other_args.iter())
2560                        .all(|(a, b)| a.normalize_eq(b))
2561                    && self_partition_by
2562                        .iter()
2563                        .zip(other_partition_by.iter())
2564                        .all(|(a, b)| a.normalize_eq(b))
2565                    && self_order_by
2566                        .iter()
2567                        .zip(other_order_by.iter())
2568                        .all(|(a, b)| {
2569                            a.asc == b.asc
2570                                && a.nulls_first == b.nulls_first
2571                                && a.expr.normalize_eq(&b.expr)
2572                        })
2573                    && self_distinct == other_distinct
2574            }
2575            (
2576                Expr::Exists(Exists {
2577                    subquery: self_subquery,
2578                    negated: self_negated,
2579                }),
2580                Expr::Exists(Exists {
2581                    subquery: other_subquery,
2582                    negated: other_negated,
2583                }),
2584            ) => {
2585                self_negated == other_negated
2586                    && self_subquery.normalize_eq(other_subquery)
2587            }
2588            (
2589                Expr::InSubquery(InSubquery {
2590                    expr: self_expr,
2591                    subquery: self_subquery,
2592                    negated: self_negated,
2593                }),
2594                Expr::InSubquery(InSubquery {
2595                    expr: other_expr,
2596                    subquery: other_subquery,
2597                    negated: other_negated,
2598                }),
2599            ) => {
2600                self_negated == other_negated
2601                    && self_expr.normalize_eq(other_expr)
2602                    && self_subquery.normalize_eq(other_subquery)
2603            }
2604            (
2605                Expr::ScalarSubquery(self_subquery),
2606                Expr::ScalarSubquery(other_subquery),
2607            ) => self_subquery.normalize_eq(other_subquery),
2608            (
2609                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2610                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2611            )
2612            | (
2613                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2614                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2615            ) => {
2616                self_exprs.len() == other_exprs.len()
2617                    && self_exprs
2618                        .iter()
2619                        .zip(other_exprs.iter())
2620                        .all(|(a, b)| a.normalize_eq(b))
2621            }
2622            (
2623                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2624                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2625            ) => {
2626                self_exprs.len() == other_exprs.len()
2627                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2628                        a.len() == b.len()
2629                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2630                    })
2631            }
2632            (
2633                Expr::InList(InList {
2634                    expr: self_expr,
2635                    list: self_list,
2636                    negated: self_negated,
2637                }),
2638                Expr::InList(InList {
2639                    expr: other_expr,
2640                    list: other_list,
2641                    negated: other_negated,
2642                }),
2643            ) => {
2644                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2645                self_negated == other_negated
2646                    && self_expr.normalize_eq(other_expr)
2647                    && self_list.len() == other_list.len()
2648                    && self_list
2649                        .iter()
2650                        .zip(other_list.iter())
2651                        .all(|(a, b)| a.normalize_eq(b))
2652            }
2653            (
2654                Expr::Case(Case {
2655                    expr: self_expr,
2656                    when_then_expr: self_when_then_expr,
2657                    else_expr: self_else_expr,
2658                }),
2659                Expr::Case(Case {
2660                    expr: other_expr,
2661                    when_then_expr: other_when_then_expr,
2662                    else_expr: other_else_expr,
2663                }),
2664            ) => {
2665                // TODO: normalize_eq for when_then_expr
2666                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2667                self_when_then_expr.len() == other_when_then_expr.len()
2668                    && self_when_then_expr
2669                        .iter()
2670                        .zip(other_when_then_expr.iter())
2671                        .all(|((self_when, self_then), (other_when, other_then))| {
2672                            self_when.normalize_eq(other_when)
2673                                && self_then.normalize_eq(other_then)
2674                        })
2675                    && match (self_expr, other_expr) {
2676                        (Some(self_expr), Some(other_expr)) => {
2677                            self_expr.normalize_eq(other_expr)
2678                        }
2679                        (None, None) => true,
2680                        (_, _) => false,
2681                    }
2682                    && match (self_else_expr, other_else_expr) {
2683                        (Some(self_else_expr), Some(other_else_expr)) => {
2684                            self_else_expr.normalize_eq(other_else_expr)
2685                        }
2686                        (None, None) => true,
2687                        (_, _) => false,
2688                    }
2689            }
2690            (_, _) => self == other,
2691        }
2692    }
2693}
2694
2695impl HashNode for Expr {
2696    /// As it is pretty easy to forget changing this method when `Expr` changes the
2697    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2698    /// compile time.
2699    fn hash_node<H: Hasher>(&self, state: &mut H) {
2700        mem::discriminant(self).hash(state);
2701        match self {
2702            Expr::Alias(Alias {
2703                expr: _expr,
2704                relation,
2705                name,
2706                ..
2707            }) => {
2708                relation.hash(state);
2709                name.hash(state);
2710            }
2711            Expr::Column(column) => {
2712                column.hash(state);
2713            }
2714            Expr::ScalarVariable(field, name) => {
2715                field.hash(state);
2716                name.hash(state);
2717            }
2718            Expr::Literal(scalar_value, _) => {
2719                scalar_value.hash(state);
2720            }
2721            Expr::BinaryExpr(BinaryExpr {
2722                left: _left,
2723                op,
2724                right: _right,
2725            }) => {
2726                op.hash(state);
2727            }
2728            Expr::Like(Like {
2729                negated,
2730                expr: _expr,
2731                pattern: _pattern,
2732                escape_char,
2733                case_insensitive,
2734            })
2735            | Expr::SimilarTo(Like {
2736                negated,
2737                expr: _expr,
2738                pattern: _pattern,
2739                escape_char,
2740                case_insensitive,
2741            }) => {
2742                negated.hash(state);
2743                escape_char.hash(state);
2744                case_insensitive.hash(state);
2745            }
2746            Expr::Not(_expr)
2747            | Expr::IsNotNull(_expr)
2748            | Expr::IsNull(_expr)
2749            | Expr::IsTrue(_expr)
2750            | Expr::IsFalse(_expr)
2751            | Expr::IsUnknown(_expr)
2752            | Expr::IsNotTrue(_expr)
2753            | Expr::IsNotFalse(_expr)
2754            | Expr::IsNotUnknown(_expr)
2755            | Expr::Negative(_expr) => {}
2756            Expr::Between(Between {
2757                expr: _expr,
2758                negated,
2759                low: _low,
2760                high: _high,
2761            }) => {
2762                negated.hash(state);
2763            }
2764            Expr::Case(Case {
2765                expr: _expr,
2766                when_then_expr: _when_then_expr,
2767                else_expr: _else_expr,
2768            }) => {}
2769            Expr::Cast(Cast { expr: _expr, field })
2770            | Expr::TryCast(TryCast { expr: _expr, field }) => {
2771                field.hash(state);
2772            }
2773            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2774                func.hash(state);
2775            }
2776            Expr::AggregateFunction(AggregateFunction {
2777                func,
2778                params:
2779                    AggregateFunctionParams {
2780                        args: _args,
2781                        distinct,
2782                        filter: _,
2783                        order_by: _,
2784                        null_treatment,
2785                    },
2786            }) => {
2787                func.hash(state);
2788                distinct.hash(state);
2789                null_treatment.hash(state);
2790            }
2791            Expr::WindowFunction(window_fun) => {
2792                let WindowFunction {
2793                    fun,
2794                    params:
2795                        WindowFunctionParams {
2796                            args: _args,
2797                            partition_by: _,
2798                            order_by: _,
2799                            window_frame,
2800                            filter,
2801                            null_treatment,
2802                            distinct,
2803                        },
2804                } = window_fun.as_ref();
2805                fun.hash(state);
2806                window_frame.hash(state);
2807                filter.hash(state);
2808                null_treatment.hash(state);
2809                distinct.hash(state);
2810            }
2811            Expr::InList(InList {
2812                expr: _expr,
2813                list: _list,
2814                negated,
2815            }) => {
2816                negated.hash(state);
2817            }
2818            Expr::Exists(Exists { subquery, negated }) => {
2819                subquery.hash(state);
2820                negated.hash(state);
2821            }
2822            Expr::InSubquery(InSubquery {
2823                expr: _expr,
2824                subquery,
2825                negated,
2826            }) => {
2827                subquery.hash(state);
2828                negated.hash(state);
2829            }
2830            Expr::SetComparison(SetComparison {
2831                expr: _,
2832                subquery,
2833                op,
2834                quantifier,
2835            }) => {
2836                subquery.hash(state);
2837                op.hash(state);
2838                quantifier.hash(state);
2839            }
2840            Expr::ScalarSubquery(subquery) => {
2841                subquery.hash(state);
2842            }
2843            #[expect(deprecated)]
2844            Expr::Wildcard { qualifier, options } => {
2845                qualifier.hash(state);
2846                options.hash(state);
2847            }
2848            Expr::GroupingSet(grouping_set) => {
2849                mem::discriminant(grouping_set).hash(state);
2850                match grouping_set {
2851                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2852                    GroupingSet::GroupingSets(_exprs) => {}
2853                }
2854            }
2855            Expr::Placeholder(place_holder) => {
2856                place_holder.hash(state);
2857            }
2858            Expr::OuterReferenceColumn(field, column) => {
2859                field.hash(state);
2860                column.hash(state);
2861            }
2862            Expr::Unnest(Unnest { expr: _expr }) => {}
2863            Expr::HigherOrderFunction(HigherOrderFunction { func, args: _args }) => {
2864                func.hash(state);
2865            }
2866            Expr::Lambda(Lambda { params, body: _ }) => {
2867                params.hash(state);
2868            }
2869            Expr::LambdaVariable(LambdaVariable {
2870                name,
2871                field,
2872                spans: _,
2873            }) => {
2874                name.hash(state);
2875                field.hash(state);
2876            }
2877        };
2878    }
2879}
2880
2881// Modifies expr to match the DataType, metadata, and nullability of other if it is
2882// a placeholder with previously unspecified type information (i.e., most placeholders)
2883fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2884    if let Expr::Placeholder(Placeholder { id: _, field }) = expr
2885        && field.is_none()
2886    {
2887        let other_field = other.to_field(schema);
2888        match other_field {
2889            Err(e) => {
2890                Err(e.context(format!(
2891                    "Can not find type of {other} needed to infer type of {expr}"
2892                )))?;
2893            }
2894            Ok((_, other_field)) => {
2895                // We can't infer the nullability of the future parameter that might
2896                // be bound, so ensure this is set to true
2897                *field = Some(other_field.as_ref().clone().with_nullable(true).into());
2898            }
2899        }
2900    };
2901    Ok(())
2902}
2903
2904#[macro_export]
2905macro_rules! expr_vec_fmt {
2906    ( $ARRAY:expr ) => {{
2907        $ARRAY
2908            .iter()
2909            .map(|e| format!("{e}"))
2910            .collect::<Vec<String>>()
2911            .join(", ")
2912    }};
2913}
2914
2915struct SchemaDisplay<'a>(&'a Expr);
2916impl Display for SchemaDisplay<'_> {
2917    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2918        match self.0 {
2919            // The same as Display
2920            // TODO: remove the next line after `Expr::Wildcard` is removed
2921            #[expect(deprecated)]
2922            Expr::Column(_)
2923            | Expr::Literal(_, _)
2924            | Expr::ScalarVariable(..)
2925            | Expr::OuterReferenceColumn(..)
2926            | Expr::Placeholder(_)
2927            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2928            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2929                match func.schema_name(params) {
2930                    Ok(name) => {
2931                        write!(f, "{name}")
2932                    }
2933                    Err(e) => {
2934                        write!(f, "got error from schema_name {e}")
2935                    }
2936                }
2937            }
2938            // Expr is not shown since it is aliased
2939            Expr::Alias(Alias {
2940                name,
2941                relation: Some(relation),
2942                ..
2943            }) => write!(f, "{relation}.{name}"),
2944            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2945            Expr::Between(Between {
2946                expr,
2947                negated,
2948                low,
2949                high,
2950            }) => {
2951                if *negated {
2952                    write!(
2953                        f,
2954                        "{} NOT BETWEEN {} AND {}",
2955                        SchemaDisplay(expr),
2956                        SchemaDisplay(low),
2957                        SchemaDisplay(high),
2958                    )
2959                } else {
2960                    write!(
2961                        f,
2962                        "{} BETWEEN {} AND {}",
2963                        SchemaDisplay(expr),
2964                        SchemaDisplay(low),
2965                        SchemaDisplay(high),
2966                    )
2967                }
2968            }
2969            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2970                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2971            }
2972            Expr::Case(Case {
2973                expr,
2974                when_then_expr,
2975                else_expr,
2976            }) => {
2977                write!(f, "CASE ")?;
2978
2979                if let Some(e) = expr {
2980                    write!(f, "{} ", SchemaDisplay(e))?;
2981                }
2982
2983                for (when, then) in when_then_expr {
2984                    write!(
2985                        f,
2986                        "WHEN {} THEN {} ",
2987                        SchemaDisplay(when),
2988                        SchemaDisplay(then),
2989                    )?;
2990                }
2991
2992                if let Some(e) = else_expr {
2993                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2994                }
2995
2996                write!(f, "END")
2997            }
2998            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2999            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
3000                write!(f, "{}", SchemaDisplay(expr))
3001            }
3002            Expr::InList(InList {
3003                expr,
3004                list,
3005                negated,
3006            }) => {
3007                let inlist_name = schema_name_from_exprs(list)?;
3008
3009                if *negated {
3010                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
3011                } else {
3012                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
3013                }
3014            }
3015            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
3016            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
3017            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
3018                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
3019            }
3020            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
3021                write!(f, "GROUPING SETS (")?;
3022                for exprs in lists_of_exprs.iter() {
3023                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
3024                }
3025                write!(f, ")")
3026            }
3027            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
3028                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
3029            }
3030            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
3031            Expr::IsNotNull(expr) => {
3032                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
3033            }
3034            Expr::IsUnknown(expr) => {
3035                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
3036            }
3037            Expr::IsNotUnknown(expr) => {
3038                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
3039            }
3040            Expr::InSubquery(InSubquery { negated: true, .. }) => {
3041                write!(f, "NOT IN")
3042            }
3043            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
3044            Expr::SetComparison(SetComparison {
3045                expr,
3046                op,
3047                quantifier,
3048                ..
3049            }) => write!(f, "{} {op} {quantifier}", SchemaDisplay(expr.as_ref())),
3050            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
3051            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
3052            Expr::IsNotTrue(expr) => {
3053                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
3054            }
3055            Expr::IsNotFalse(expr) => {
3056                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
3057            }
3058            Expr::Like(Like {
3059                negated,
3060                expr,
3061                pattern,
3062                escape_char,
3063                case_insensitive,
3064            }) => {
3065                write!(
3066                    f,
3067                    "{} {}{} {}",
3068                    SchemaDisplay(expr),
3069                    if *negated { "NOT " } else { "" },
3070                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3071                    SchemaDisplay(pattern),
3072                )?;
3073
3074                if let Some(char) = escape_char {
3075                    write!(f, " CHAR '{char}'")?;
3076                }
3077
3078                Ok(())
3079            }
3080            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
3081            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
3082            Expr::Unnest(Unnest { expr }) => {
3083                write!(f, "UNNEST({})", SchemaDisplay(expr))
3084            }
3085            Expr::ScalarFunction(ScalarFunction { func, args }) => {
3086                match func.schema_name(args) {
3087                    Ok(name) => {
3088                        write!(f, "{name}")
3089                    }
3090                    Err(e) => {
3091                        write!(f, "got error from schema_name {e}")
3092                    }
3093                }
3094            }
3095            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
3096                write!(f, "{}", subquery.schema().field(0).name())
3097            }
3098            Expr::SimilarTo(Like {
3099                negated,
3100                expr,
3101                pattern,
3102                escape_char,
3103                ..
3104            }) => {
3105                write!(
3106                    f,
3107                    "{} {} {}",
3108                    SchemaDisplay(expr),
3109                    if *negated {
3110                        "NOT SIMILAR TO"
3111                    } else {
3112                        "SIMILAR TO"
3113                    },
3114                    SchemaDisplay(pattern),
3115                )?;
3116                if let Some(char) = escape_char {
3117                    write!(f, " CHAR '{char}'")?;
3118                }
3119
3120                Ok(())
3121            }
3122            Expr::WindowFunction(window_fun) => {
3123                let WindowFunction { fun, params } = window_fun.as_ref();
3124                match fun {
3125                    WindowFunctionDefinition::AggregateUDF(fun) => {
3126                        match fun.window_function_schema_name(params) {
3127                            Ok(name) => {
3128                                write!(f, "{name}")
3129                            }
3130                            Err(e) => {
3131                                write!(
3132                                    f,
3133                                    "got error from window_function_schema_name {e}"
3134                                )
3135                            }
3136                        }
3137                    }
3138                    _ => {
3139                        let WindowFunctionParams {
3140                            args,
3141                            partition_by,
3142                            order_by,
3143                            window_frame,
3144                            filter,
3145                            null_treatment,
3146                            distinct,
3147                        } = params;
3148
3149                        // Write function name and open parenthesis
3150                        write!(f, "{fun}(")?;
3151
3152                        // If DISTINCT, emit the keyword
3153                        if *distinct {
3154                            write!(f, "DISTINCT ")?;
3155                        }
3156
3157                        // Write the comma‑separated argument list
3158                        write!(
3159                            f,
3160                            "{}",
3161                            schema_name_from_exprs_comma_separated_without_space(args)?
3162                        )?;
3163
3164                        // **Close the argument parenthesis**
3165                        write!(f, ")")?;
3166
3167                        if let Some(null_treatment) = null_treatment {
3168                            write!(f, " {null_treatment}")?;
3169                        }
3170
3171                        if let Some(filter) = filter {
3172                            write!(f, " FILTER (WHERE {filter})")?;
3173                        }
3174
3175                        if !partition_by.is_empty() {
3176                            write!(
3177                                f,
3178                                " PARTITION BY [{}]",
3179                                schema_name_from_exprs(partition_by)?
3180                            )?;
3181                        }
3182
3183                        if !order_by.is_empty() {
3184                            write!(
3185                                f,
3186                                " ORDER BY [{}]",
3187                                schema_name_from_sorts(order_by)?
3188                            )?;
3189                        };
3190
3191                        write!(f, " {window_frame}")
3192                    }
3193                }
3194            }
3195            Expr::HigherOrderFunction(HigherOrderFunction { func, args }) => {
3196                match func.schema_name(args) {
3197                    Ok(name) => {
3198                        write!(f, "{name}")
3199                    }
3200                    Err(e) => {
3201                        write!(f, "got error from schema_name {e}")
3202                    }
3203                }
3204            }
3205            Expr::Lambda(Lambda { params, body }) => {
3206                write!(
3207                    f,
3208                    "({}) -> {}",
3209                    display_comma_separated(params),
3210                    SchemaDisplay(body)
3211                )
3212            }
3213            Expr::LambdaVariable(c) => f.write_str(&c.name),
3214        }
3215    }
3216}
3217
3218/// A helper struct for displaying an `Expr` as an SQL-like string.
3219struct SqlDisplay<'a>(&'a Expr);
3220
3221impl Display for SqlDisplay<'_> {
3222    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
3223        match self.0 {
3224            Expr::Literal(scalar, _) => scalar.fmt(f),
3225            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
3226            Expr::Between(Between {
3227                expr,
3228                negated,
3229                low,
3230                high,
3231            }) => {
3232                if *negated {
3233                    write!(
3234                        f,
3235                        "{} NOT BETWEEN {} AND {}",
3236                        SqlDisplay(expr),
3237                        SqlDisplay(low),
3238                        SqlDisplay(high),
3239                    )
3240                } else {
3241                    write!(
3242                        f,
3243                        "{} BETWEEN {} AND {}",
3244                        SqlDisplay(expr),
3245                        SqlDisplay(low),
3246                        SqlDisplay(high),
3247                    )
3248                }
3249            }
3250            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
3251                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
3252            }
3253            Expr::Case(Case {
3254                expr,
3255                when_then_expr,
3256                else_expr,
3257            }) => {
3258                write!(f, "CASE ")?;
3259
3260                if let Some(e) = expr {
3261                    write!(f, "{} ", SqlDisplay(e))?;
3262                }
3263
3264                for (when, then) in when_then_expr {
3265                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
3266                }
3267
3268                if let Some(e) = else_expr {
3269                    write!(f, "ELSE {} ", SqlDisplay(e))?;
3270                }
3271
3272                write!(f, "END")
3273            }
3274            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
3275                write!(f, "{}", SqlDisplay(expr))
3276            }
3277            Expr::InList(InList {
3278                expr,
3279                list,
3280                negated,
3281            }) => {
3282                write!(
3283                    f,
3284                    "{}{} IN {}",
3285                    SqlDisplay(expr),
3286                    if *negated { " NOT" } else { "" },
3287                    ExprListDisplay::comma_separated(list.as_slice())
3288                )
3289            }
3290            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
3291                write!(
3292                    f,
3293                    "ROLLUP ({})",
3294                    ExprListDisplay::comma_separated(exprs.as_slice())
3295                )
3296            }
3297            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
3298                write!(f, "GROUPING SETS (")?;
3299                for exprs in lists_of_exprs.iter() {
3300                    write!(
3301                        f,
3302                        "({})",
3303                        ExprListDisplay::comma_separated(exprs.as_slice())
3304                    )?;
3305                }
3306                write!(f, ")")
3307            }
3308            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
3309                write!(
3310                    f,
3311                    "ROLLUP ({})",
3312                    ExprListDisplay::comma_separated(exprs.as_slice())
3313                )
3314            }
3315            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
3316            Expr::IsNotNull(expr) => {
3317                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
3318            }
3319            Expr::IsUnknown(expr) => {
3320                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
3321            }
3322            Expr::IsNotUnknown(expr) => {
3323                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
3324            }
3325            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
3326            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
3327            Expr::IsNotTrue(expr) => {
3328                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
3329            }
3330            Expr::IsNotFalse(expr) => {
3331                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
3332            }
3333            Expr::Like(Like {
3334                negated,
3335                expr,
3336                pattern,
3337                escape_char,
3338                case_insensitive,
3339            }) => {
3340                write!(
3341                    f,
3342                    "{} {}{} {}",
3343                    SqlDisplay(expr),
3344                    if *negated { "NOT " } else { "" },
3345                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3346                    SqlDisplay(pattern),
3347                )?;
3348
3349                if let Some(char) = escape_char {
3350                    write!(f, " CHAR '{char}'")?;
3351                }
3352
3353                Ok(())
3354            }
3355            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
3356            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
3357            Expr::Unnest(Unnest { expr }) => {
3358                write!(f, "UNNEST({})", SqlDisplay(expr))
3359            }
3360            Expr::SimilarTo(Like {
3361                negated,
3362                expr,
3363                pattern,
3364                escape_char,
3365                ..
3366            }) => {
3367                write!(
3368                    f,
3369                    "{} {} {}",
3370                    SqlDisplay(expr),
3371                    if *negated {
3372                        "NOT SIMILAR TO"
3373                    } else {
3374                        "SIMILAR TO"
3375                    },
3376                    SqlDisplay(pattern),
3377                )?;
3378                if let Some(char) = escape_char {
3379                    write!(f, " CHAR '{char}'")?;
3380                }
3381
3382                Ok(())
3383            }
3384            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3385                match func.human_display(params) {
3386                    Ok(name) => {
3387                        write!(f, "{name}")
3388                    }
3389                    Err(e) => {
3390                        write!(f, "got error from schema_name {e}")
3391                    }
3392                }
3393            }
3394            Expr::Lambda(Lambda { params, body }) => {
3395                write!(f, "({}) -> {}", params.join(", "), SchemaDisplay(body))
3396            }
3397            _ => write!(f, "{}", self.0),
3398        }
3399    }
3400}
3401
3402/// Get schema_name for Vector of expressions
3403///
3404/// Internal usage. Please call `schema_name_from_exprs` instead
3405// TODO: Use ", " to standardize the formatting of Vec<Expr>,
3406// <https://github.com/apache/datafusion/issues/10364>
3407pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
3408    exprs: &[Expr],
3409) -> Result<String, fmt::Error> {
3410    schema_name_from_exprs_inner(exprs, ",")
3411}
3412
3413/// Formats a list of `&Expr` with a custom separator using SQL display format
3414pub struct ExprListDisplay<'a> {
3415    exprs: &'a [Expr],
3416    sep: &'a str,
3417}
3418
3419impl<'a> ExprListDisplay<'a> {
3420    /// Create a new display struct with the given expressions and separator
3421    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
3422        Self { exprs, sep }
3423    }
3424
3425    /// Create a new display struct with comma-space separator
3426    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
3427        Self::new(exprs, ", ")
3428    }
3429}
3430
3431impl Display for ExprListDisplay<'_> {
3432    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3433        let mut first = true;
3434        for expr in self.exprs {
3435            if !first {
3436                write!(f, "{}", self.sep)?;
3437            }
3438            write!(f, "{}", SqlDisplay(expr))?;
3439            first = false;
3440        }
3441        Ok(())
3442    }
3443}
3444
3445/// Get schema_name for Vector of expressions
3446pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
3447    schema_name_from_exprs_inner(exprs, ", ")
3448}
3449
3450fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
3451    let mut s = String::new();
3452    for (i, e) in exprs.iter().enumerate() {
3453        if i > 0 {
3454            write!(&mut s, "{sep}")?;
3455        }
3456        write!(&mut s, "{}", SchemaDisplay(e))?;
3457    }
3458
3459    Ok(s)
3460}
3461
3462pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
3463    let mut s = String::new();
3464    for (i, e) in sorts.iter().enumerate() {
3465        if i > 0 {
3466            write!(&mut s, ", ")?;
3467        }
3468        let ordering = if e.asc { "ASC" } else { "DESC" };
3469        let nulls_ordering = if e.nulls_first {
3470            "NULLS FIRST"
3471        } else {
3472            "NULLS LAST"
3473        };
3474        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
3475    }
3476
3477    Ok(s)
3478}
3479
3480pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3481pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3482
3483/// Format expressions for display as part of a logical plan. In many cases, this will produce
3484/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3485impl Display for Expr {
3486    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3487        match self {
3488            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3489            Expr::Column(c) => write!(f, "{c}"),
3490            Expr::OuterReferenceColumn(_, c) => {
3491                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3492            }
3493            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3494            Expr::Literal(v, metadata) => {
3495                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3496                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3497                    true => write!(f, "{v:?}"),
3498                }
3499            }
3500            Expr::Case(case) => {
3501                write!(f, "CASE ")?;
3502                if let Some(e) = &case.expr {
3503                    write!(f, "{e} ")?;
3504                }
3505                for (w, t) in &case.when_then_expr {
3506                    write!(f, "WHEN {w} THEN {t} ")?;
3507                }
3508                if let Some(e) = &case.else_expr {
3509                    write!(f, "ELSE {e} ")?;
3510                }
3511                write!(f, "END")
3512            }
3513            Expr::Cast(Cast { expr, field }) => {
3514                let formatted =
3515                    format_type_and_metadata(field.data_type(), Some(field.metadata()));
3516                write!(f, "CAST({expr} AS {formatted})")
3517            }
3518            Expr::TryCast(TryCast { expr, field }) => {
3519                let formatted =
3520                    format_type_and_metadata(field.data_type(), Some(field.metadata()));
3521                write!(f, "TRY_CAST({expr} AS {formatted})")
3522            }
3523            Expr::Not(expr) => write!(f, "NOT {expr}"),
3524            Expr::Negative(expr) => write!(f, "(- {expr})"),
3525            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3526            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3527            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3528            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3529            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3530            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3531            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3532            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3533            Expr::Exists(Exists {
3534                subquery,
3535                negated: true,
3536            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3537            Expr::Exists(Exists {
3538                subquery,
3539                negated: false,
3540            }) => write!(f, "EXISTS ({subquery:?})"),
3541            Expr::InSubquery(InSubquery {
3542                expr,
3543                subquery,
3544                negated: true,
3545            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3546            Expr::InSubquery(InSubquery {
3547                expr,
3548                subquery,
3549                negated: false,
3550            }) => write!(f, "{expr} IN ({subquery:?})"),
3551            Expr::SetComparison(SetComparison {
3552                expr,
3553                subquery,
3554                op,
3555                quantifier,
3556            }) => write!(f, "{expr} {op} {quantifier} ({subquery:?})"),
3557            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3558            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3559            Expr::ScalarFunction(fun) => {
3560                fmt_function(f, fun.name(), false, &fun.args, true)
3561            }
3562            Expr::WindowFunction(window_fun) => {
3563                let WindowFunction { fun, params } = window_fun.as_ref();
3564                match fun {
3565                    WindowFunctionDefinition::AggregateUDF(fun) => {
3566                        match fun.window_function_display_name(params) {
3567                            Ok(name) => {
3568                                write!(f, "{name}")
3569                            }
3570                            Err(e) => {
3571                                write!(
3572                                    f,
3573                                    "got error from window_function_display_name {e}"
3574                                )
3575                            }
3576                        }
3577                    }
3578                    WindowFunctionDefinition::WindowUDF(fun) => {
3579                        let WindowFunctionParams {
3580                            args,
3581                            partition_by,
3582                            order_by,
3583                            window_frame,
3584                            filter,
3585                            null_treatment,
3586                            distinct,
3587                        } = params;
3588
3589                        fmt_function(f, &fun.to_string(), *distinct, args, true)?;
3590
3591                        if let Some(nt) = null_treatment {
3592                            write!(f, "{nt}")?;
3593                        }
3594
3595                        if let Some(fe) = filter {
3596                            write!(f, " FILTER (WHERE {fe})")?;
3597                        }
3598
3599                        if !partition_by.is_empty() {
3600                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3601                        }
3602                        if !order_by.is_empty() {
3603                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3604                        }
3605                        write!(
3606                            f,
3607                            " {} BETWEEN {} AND {}",
3608                            window_frame.units,
3609                            window_frame.start_bound,
3610                            window_frame.end_bound
3611                        )
3612                    }
3613                }
3614            }
3615            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3616                match func.display_name(params) {
3617                    Ok(name) => {
3618                        write!(f, "{name}")
3619                    }
3620                    Err(e) => {
3621                        write!(f, "got error from display_name {e}")
3622                    }
3623                }
3624            }
3625            Expr::Between(Between {
3626                expr,
3627                negated,
3628                low,
3629                high,
3630            }) => {
3631                if *negated {
3632                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3633                } else {
3634                    write!(f, "{expr} BETWEEN {low} AND {high}")
3635                }
3636            }
3637            Expr::Like(Like {
3638                negated,
3639                expr,
3640                pattern,
3641                escape_char,
3642                case_insensitive,
3643            }) => {
3644                write!(f, "{expr}")?;
3645                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3646                if *negated {
3647                    write!(f, " NOT")?;
3648                }
3649                if let Some(char) = escape_char {
3650                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3651                } else {
3652                    write!(f, " {op_name} {pattern}")
3653                }
3654            }
3655            Expr::SimilarTo(Like {
3656                negated,
3657                expr,
3658                pattern,
3659                escape_char,
3660                case_insensitive: _,
3661            }) => {
3662                write!(f, "{expr}")?;
3663                if *negated {
3664                    write!(f, " NOT")?;
3665                }
3666                if let Some(char) = escape_char {
3667                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3668                } else {
3669                    write!(f, " SIMILAR TO {pattern}")
3670                }
3671            }
3672            Expr::InList(InList {
3673                expr,
3674                list,
3675                negated,
3676            }) => {
3677                if *negated {
3678                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3679                } else {
3680                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3681                }
3682            }
3683            #[expect(deprecated)]
3684            Expr::Wildcard { qualifier, options } => match qualifier {
3685                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3686                None => write!(f, "*{options}"),
3687            },
3688            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3689                GroupingSet::Rollup(exprs) => {
3690                    // ROLLUP (c0, c1, c2)
3691                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3692                }
3693                GroupingSet::Cube(exprs) => {
3694                    // CUBE (c0, c1, c2)
3695                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3696                }
3697                GroupingSet::GroupingSets(lists_of_exprs) => {
3698                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3699                    write!(
3700                        f,
3701                        "GROUPING SETS ({})",
3702                        lists_of_exprs
3703                            .iter()
3704                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3705                            .collect::<Vec<String>>()
3706                            .join(", ")
3707                    )
3708                }
3709            },
3710            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3711            Expr::Unnest(Unnest { expr }) => {
3712                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3713            }
3714            Expr::HigherOrderFunction(fun) => {
3715                fmt_function(f, fun.name(), false, &fun.args, true)
3716            }
3717            Expr::Lambda(Lambda { params, body }) => {
3718                write!(f, "({}) -> {body}", params.join(", "))
3719            }
3720            Expr::LambdaVariable(c) => f.write_str(&c.name),
3721        }
3722    }
3723}
3724
3725fn fmt_function(
3726    f: &mut Formatter,
3727    fun: &str,
3728    distinct: bool,
3729    args: &[Expr],
3730    display: bool,
3731) -> fmt::Result {
3732    let args: Vec<String> = match display {
3733        true => args.iter().map(|arg| format!("{arg}")).collect(),
3734        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3735    };
3736
3737    let distinct_str = match distinct {
3738        true => "DISTINCT ",
3739        false => "",
3740    };
3741    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3742}
3743
3744/// The name of the column (field) that this `Expr` will produce in the physical plan.
3745/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3746pub fn physical_name(expr: &Expr) -> Result<String> {
3747    match expr {
3748        Expr::Column(col) => Ok(col.name.clone()),
3749        Expr::Alias(alias) => Ok(alias.name.clone()),
3750        _ => Ok(expr.schema_name().to_string()),
3751    }
3752}
3753
3754#[cfg(test)]
3755mod test {
3756    use crate::expr_fn::col;
3757    use crate::{
3758        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, case,
3759        lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
3760    };
3761    use arrow::datatypes::{Field, Schema};
3762    use sqlparser::ast;
3763    use sqlparser::ast::{Ident, IdentWithAlias};
3764
3765    #[test]
3766    fn infer_placeholder_in_clause() {
3767        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3768        let column = col("department_id");
3769        let param_placeholders = vec![
3770            Expr::Placeholder(Placeholder {
3771                id: "$1".to_string(),
3772                field: None,
3773            }),
3774            Expr::Placeholder(Placeholder {
3775                id: "$2".to_string(),
3776                field: None,
3777            }),
3778            Expr::Placeholder(Placeholder {
3779                id: "$3".to_string(),
3780                field: None,
3781            }),
3782        ];
3783        let in_list = Expr::InList(InList {
3784            expr: Box::new(column),
3785            list: param_placeholders,
3786            negated: false,
3787        });
3788
3789        let schema = Arc::new(Schema::new(vec![
3790            Field::new("name", DataType::Utf8, true),
3791            Field::new("department_id", DataType::Int32, true),
3792        ]));
3793        let df_schema = DFSchema::try_from(schema).unwrap();
3794
3795        let (inferred_expr, contains_placeholder) =
3796            in_list.infer_placeholder_types(&df_schema).unwrap();
3797
3798        assert!(contains_placeholder);
3799
3800        match inferred_expr {
3801            Expr::InList(in_list) => {
3802                for expr in in_list.list {
3803                    match expr {
3804                        Expr::Placeholder(placeholder) => {
3805                            assert_eq!(
3806                                placeholder.field.unwrap().data_type(),
3807                                &DataType::Int32,
3808                                "Placeholder {} should infer Int32",
3809                                placeholder.id
3810                            );
3811                        }
3812                        _ => panic!("Expected Placeholder expression"),
3813                    }
3814                }
3815            }
3816            _ => panic!("Expected InList expression"),
3817        }
3818    }
3819
3820    #[test]
3821    fn infer_placeholder_like_and_similar_to() {
3822        // name LIKE $1
3823        let schema =
3824            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3825        let df_schema = DFSchema::try_from(schema).unwrap();
3826
3827        let like = Like {
3828            expr: Box::new(col("name")),
3829            pattern: Box::new(Expr::Placeholder(Placeholder {
3830                id: "$1".to_string(),
3831                field: None,
3832            })),
3833            negated: false,
3834            case_insensitive: false,
3835            escape_char: None,
3836        };
3837
3838        let expr = Expr::Like(like.clone());
3839
3840        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3841        match inferred_expr {
3842            Expr::Like(like) => match *like.pattern {
3843                Expr::Placeholder(placeholder) => {
3844                    assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8);
3845                }
3846                _ => panic!("Expected Placeholder"),
3847            },
3848            _ => panic!("Expected Like"),
3849        }
3850
3851        // name SIMILAR TO $1
3852        let expr = Expr::SimilarTo(like);
3853
3854        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3855        match inferred_expr {
3856            Expr::SimilarTo(like) => match *like.pattern {
3857                Expr::Placeholder(placeholder) => {
3858                    assert_eq!(
3859                        placeholder.field.unwrap().data_type(),
3860                        &DataType::Utf8,
3861                        "Placeholder {} should infer Utf8",
3862                        placeholder.id
3863                    );
3864                }
3865                _ => panic!("Expected Placeholder expression"),
3866            },
3867            _ => panic!("Expected SimilarTo expression"),
3868        }
3869    }
3870
3871    #[test]
3872    fn infer_placeholder_with_metadata() {
3873        // name == $1, where name is a non-nullable string
3874        let schema = Arc::new(Schema::new(vec![
3875            Field::new("name", DataType::Utf8, false).with_metadata(
3876                [("some_key".to_string(), "some_value".to_string())].into(),
3877            ),
3878        ]));
3879        let df_schema = DFSchema::try_from(schema).unwrap();
3880
3881        let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
3882
3883        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3884        match inferred_expr {
3885            Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right {
3886                Expr::Placeholder(placeholder) => {
3887                    assert_eq!(
3888                        placeholder.field.as_ref().unwrap().data_type(),
3889                        &DataType::Utf8
3890                    );
3891                    assert_eq!(
3892                        placeholder.field.as_ref().unwrap().metadata(),
3893                        df_schema.field(0).metadata()
3894                    );
3895                    // Inferred placeholder should still be nullable
3896                    assert!(placeholder.field.as_ref().unwrap().is_nullable());
3897                }
3898                _ => panic!("Expected Placeholder"),
3899            },
3900            _ => panic!("Expected BinaryExpr"),
3901        }
3902    }
3903
3904    #[test]
3905    fn format_case_when() -> Result<()> {
3906        let expr = case(col("a"))
3907            .when(lit(1), lit(true))
3908            .when(lit(0), lit(false))
3909            .otherwise(lit(ScalarValue::Null))?;
3910        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3911        assert_eq!(expected, format!("{expr}"));
3912        Ok(())
3913    }
3914
3915    #[test]
3916    fn format_cast() -> Result<()> {
3917        let expr = Expr::Cast(Cast {
3918            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3919            field: DataType::Utf8.into_nullable_field_ref(),
3920        });
3921        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3922        assert_eq!(expected_canonical, format!("{expr}"));
3923        // Note that CAST intentionally has a name that is different from its `Display`
3924        // representation. CAST does not change the name of expressions.
3925        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3926        Ok(())
3927    }
3928
3929    #[test]
3930    fn test_partial_ord() {
3931        // Test validates that partial ord is defined for Expr, not
3932        // intended to exhaustively test all possibilities
3933        let exp1 = col("a") + lit(1);
3934        let exp2 = col("a") + lit(2);
3935        let exp3 = !(col("a") + lit(2));
3936
3937        assert!(exp1 < exp2);
3938        assert!(exp3 > exp2);
3939        assert!(exp1 < exp3)
3940    }
3941
3942    #[test]
3943    fn test_collect_expr() -> Result<()> {
3944        // single column
3945        {
3946            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3947            let columns = expr.column_refs();
3948            assert_eq!(1, columns.len());
3949            assert!(columns.contains(&Column::from_name("a")));
3950        }
3951
3952        // multiple columns
3953        {
3954            let expr = col("a") + col("b") + lit(1);
3955            let columns = expr.column_refs();
3956            assert_eq!(2, columns.len());
3957            assert!(columns.contains(&Column::from_name("a")));
3958            assert!(columns.contains(&Column::from_name("b")));
3959        }
3960
3961        Ok(())
3962    }
3963
3964    #[test]
3965    fn test_logical_ops() {
3966        assert_eq!(
3967            format!("{}", lit(1u32).eq(lit(2u32))),
3968            "UInt32(1) = UInt32(2)"
3969        );
3970        assert_eq!(
3971            format!("{}", lit(1u32).not_eq(lit(2u32))),
3972            "UInt32(1) != UInt32(2)"
3973        );
3974        assert_eq!(
3975            format!("{}", lit(1u32).gt(lit(2u32))),
3976            "UInt32(1) > UInt32(2)"
3977        );
3978        assert_eq!(
3979            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3980            "UInt32(1) >= UInt32(2)"
3981        );
3982        assert_eq!(
3983            format!("{}", lit(1u32).lt(lit(2u32))),
3984            "UInt32(1) < UInt32(2)"
3985        );
3986        assert_eq!(
3987            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3988            "UInt32(1) <= UInt32(2)"
3989        );
3990        assert_eq!(
3991            format!("{}", lit(1u32).and(lit(2u32))),
3992            "UInt32(1) AND UInt32(2)"
3993        );
3994        assert_eq!(
3995            format!("{}", lit(1u32).or(lit(2u32))),
3996            "UInt32(1) OR UInt32(2)"
3997        );
3998    }
3999
4000    #[test]
4001    fn test_is_volatile_scalar_func() {
4002        // UDF
4003        #[derive(Debug, PartialEq, Eq, Hash)]
4004        struct TestScalarUDF {
4005            signature: Signature,
4006        }
4007        impl ScalarUDFImpl for TestScalarUDF {
4008            fn name(&self) -> &str {
4009                "TestScalarUDF"
4010            }
4011
4012            fn signature(&self) -> &Signature {
4013                &self.signature
4014            }
4015
4016            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4017                Ok(DataType::Utf8)
4018            }
4019
4020            fn invoke_with_args(
4021                &self,
4022                _args: ScalarFunctionArgs,
4023            ) -> Result<ColumnarValue> {
4024                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
4025            }
4026        }
4027        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
4028            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
4029        }));
4030        assert_ne!(udf.signature().volatility, Volatility::Volatile);
4031
4032        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
4033            signature: Signature::uniform(
4034                1,
4035                vec![DataType::Float32],
4036                Volatility::Volatile,
4037            ),
4038        }));
4039        assert_eq!(udf.signature().volatility, Volatility::Volatile);
4040    }
4041
4042    use super::*;
4043    use crate::logical_plan::{EmptyRelation, LogicalPlan};
4044
4045    #[test]
4046    fn test_display_wildcard() {
4047        assert_eq!(format!("{}", wildcard()), "*");
4048        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
4049        assert_eq!(
4050            format!(
4051                "{}",
4052                wildcard_with_options(wildcard_options(
4053                    Some(IlikeSelectItem {
4054                        pattern: "c1".to_string()
4055                    }),
4056                    None,
4057                    None,
4058                    None,
4059                    None
4060                ))
4061            ),
4062            "* ILIKE 'c1'"
4063        );
4064        assert_eq!(
4065            format!(
4066                "{}",
4067                wildcard_with_options(wildcard_options(
4068                    None,
4069                    Some(ExcludeSelectItem::Multiple(vec![
4070                        Ident::from("c1").into(),
4071                        Ident::from("c2").into()
4072                    ])),
4073                    None,
4074                    None,
4075                    None
4076                ))
4077            ),
4078            "* EXCLUDE (c1, c2)"
4079        );
4080        assert_eq!(
4081            format!(
4082                "{}",
4083                wildcard_with_options(wildcard_options(
4084                    None,
4085                    None,
4086                    Some(ExceptSelectItem {
4087                        first_element: Ident::from("c1"),
4088                        additional_elements: vec![Ident::from("c2")]
4089                    }),
4090                    None,
4091                    None
4092                ))
4093            ),
4094            "* EXCEPT (c1, c2)"
4095        );
4096        assert_eq!(
4097            format!(
4098                "{}",
4099                wildcard_with_options(wildcard_options(
4100                    None,
4101                    None,
4102                    None,
4103                    Some(PlannedReplaceSelectItem {
4104                        items: vec![ReplaceSelectElement {
4105                            expr: ast::Expr::Identifier(Ident::from("c1")),
4106                            column_name: Ident::from("a1"),
4107                            as_keyword: false
4108                        }],
4109                        planned_expressions: vec![]
4110                    }),
4111                    None
4112                ))
4113            ),
4114            "* REPLACE (c1 a1)"
4115        );
4116        assert_eq!(
4117            format!(
4118                "{}",
4119                wildcard_with_options(wildcard_options(
4120                    None,
4121                    None,
4122                    None,
4123                    None,
4124                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
4125                        ident: Ident::from("c1"),
4126                        alias: Ident::from("a1")
4127                    }]))
4128                ))
4129            ),
4130            "* RENAME (c1 AS a1)"
4131        )
4132    }
4133
4134    #[test]
4135    fn test_display_set_comparison() {
4136        let subquery = Subquery {
4137            subquery: Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
4138                produce_one_row: false,
4139                schema: Arc::new(DFSchema::empty()),
4140            })),
4141            outer_ref_columns: vec![],
4142            spans: Spans::new(),
4143        };
4144
4145        let expr = Expr::SetComparison(SetComparison::new(
4146            Box::new(Expr::Column(Column::from_name("a"))),
4147            subquery,
4148            Operator::Gt,
4149            SetQuantifier::Any,
4150        ));
4151
4152        assert_eq!(format!("{expr}"), "a > ANY (<subquery>)");
4153        assert_eq!(format!("{}", expr.human_display()), "a > ANY (<subquery>)");
4154    }
4155
4156    #[test]
4157    fn test_schema_display_alias_with_relation() {
4158        assert_eq!(
4159            format!(
4160                "{}",
4161                SchemaDisplay(
4162                    &lit(1).alias_qualified("table_name".into(), "column_name")
4163                )
4164            ),
4165            "table_name.column_name"
4166        );
4167    }
4168
4169    #[test]
4170    fn test_schema_display_alias_without_relation() {
4171        assert_eq!(
4172            format!(
4173                "{}",
4174                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
4175            ),
4176            "column_name"
4177        );
4178    }
4179
4180    #[test]
4181    fn test_unalias_nested_respects_user_metadata() {
4182        use std::collections::HashMap;
4183
4184        let base_expr = col("id");
4185
4186        let no_metadata = base_expr.clone().alias("alias");
4187        assert_eq!(no_metadata.unalias_nested().data, base_expr);
4188
4189        let Expr::Alias(empty_metadata_alias) = base_expr.clone().alias("alias") else {
4190            unreachable!();
4191        };
4192        let empty_metadata_alias = Expr::Alias(
4193            empty_metadata_alias.with_metadata(Some(FieldMetadata::default())),
4194        );
4195        assert_eq!(empty_metadata_alias.unalias_nested().data, base_expr);
4196
4197        let user_metadata = FieldMetadata::from(HashMap::from([(
4198            "some_key".to_string(),
4199            "some_value".to_string(),
4200        )]));
4201
4202        let Expr::Alias(user_alias) = base_expr.clone().alias("alias") else {
4203            unreachable!();
4204        };
4205        let user_alias =
4206            Expr::Alias(user_alias.with_metadata(Some(user_metadata.clone())));
4207        assert_eq!(user_alias.clone().unalias_nested().data, user_alias);
4208    }
4209
4210    fn wildcard_options(
4211        opt_ilike: Option<IlikeSelectItem>,
4212        opt_exclude: Option<ExcludeSelectItem>,
4213        opt_except: Option<ExceptSelectItem>,
4214        opt_replace: Option<PlannedReplaceSelectItem>,
4215        opt_rename: Option<RenameSelectItem>,
4216    ) -> WildcardOptions {
4217        WildcardOptions {
4218            ilike: opt_ilike,
4219            exclude: opt_exclude,
4220            except: opt_except,
4221            replace: opt_replace,
4222            rename: opt_rename,
4223        }
4224    }
4225
4226    #[test]
4227    fn test_size_of_expr() {
4228        // because Expr is such a widely used struct in DataFusion
4229        // it is important to keep its size as small as possible
4230        //
4231        // If this test fails when you change `Expr`, please try
4232        // `Box`ing the fields to make `Expr` smaller
4233        // See https://github.com/apache/datafusion/issues/16199 for details
4234        assert_eq!(size_of::<Expr>(), 112);
4235        assert_eq!(size_of::<ScalarValue>(), 64);
4236        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
4237        assert_eq!(size_of::<Vec<Expr>>(), 24);
4238        assert_eq!(size_of::<Arc<Expr>>(), 8);
4239    }
4240
4241    #[test]
4242    fn test_accept_exprs() {
4243        fn accept_exprs<E: AsRef<Expr>>(_: &[E]) {}
4244
4245        let expr = || -> Expr { lit(1) };
4246
4247        // Call accept_exprs with owned expressions
4248        let owned_exprs = vec![expr(), expr()];
4249        accept_exprs(&owned_exprs);
4250
4251        // Call accept_exprs with expressions from expr tree
4252        let udf = Expr::ScalarFunction(ScalarFunction {
4253            func: Arc::new(ScalarUDF::new_from_impl(TestUDF {})),
4254            args: vec![expr(), expr()],
4255        });
4256        let Expr::ScalarFunction(scalar) = &udf else {
4257            unreachable!()
4258        };
4259        accept_exprs(&scalar.args);
4260
4261        // Call accept_exprs with expressions collected from expr tree, without cloning
4262        let mut collected_refs: Vec<&Expr> = scalar.args.iter().collect();
4263        collected_refs.extend(&owned_exprs);
4264        accept_exprs(&collected_refs);
4265
4266        // test helpers
4267        #[derive(Debug, PartialEq, Eq, Hash)]
4268        struct TestUDF {}
4269        impl ScalarUDFImpl for TestUDF {
4270            fn name(&self) -> &str {
4271                unimplemented!()
4272            }
4273
4274            fn signature(&self) -> &Signature {
4275                unimplemented!()
4276            }
4277
4278            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4279                unimplemented!()
4280            }
4281
4282            fn invoke_with_args(
4283                &self,
4284                _args: ScalarFunctionArgs,
4285            ) -> Result<ColumnarValue> {
4286                unimplemented!()
4287            }
4288        }
4289    }
4290
4291    mod intersect_metadata_tests {
4292        use super::super::intersect_metadata_for_union;
4293        use std::collections::HashMap;
4294
4295        #[test]
4296        fn all_branches_same_metadata() {
4297            let m1 = HashMap::from([("key".into(), "val".into())]);
4298            let m2 = HashMap::from([("key".into(), "val".into())]);
4299            let result = intersect_metadata_for_union([&m1, &m2]);
4300            assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4301        }
4302
4303        #[test]
4304        fn conflicting_metadata_dropped() {
4305            let m1 = HashMap::from([("key".into(), "a".into())]);
4306            let m2 = HashMap::from([("key".into(), "b".into())]);
4307            let result = intersect_metadata_for_union([&m1, &m2]);
4308            assert!(result.is_empty());
4309        }
4310
4311        #[test]
4312        fn empty_metadata_branch_skipped() {
4313            let m1 = HashMap::from([("key".into(), "val".into())]);
4314            let m2 = HashMap::new(); // e.g. NULL literal
4315            let result = intersect_metadata_for_union([&m1, &m2]);
4316            assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4317        }
4318
4319        #[test]
4320        fn empty_metadata_first_branch_skipped() {
4321            let m1 = HashMap::new();
4322            let m2 = HashMap::from([("key".into(), "val".into())]);
4323            let result = intersect_metadata_for_union([&m1, &m2]);
4324            assert_eq!(result, HashMap::from([("key".into(), "val".into())]));
4325        }
4326
4327        #[test]
4328        fn all_branches_empty_metadata() {
4329            let m1: HashMap<String, String> = HashMap::new();
4330            let m2: HashMap<String, String> = HashMap::new();
4331            let result = intersect_metadata_for_union([&m1, &m2]);
4332            assert!(result.is_empty());
4333        }
4334
4335        #[test]
4336        fn mixed_empty_and_conflicting() {
4337            let m1 = HashMap::from([("key".into(), "a".into())]);
4338            let m2 = HashMap::new();
4339            let m3 = HashMap::from([("key".into(), "b".into())]);
4340            let result = intersect_metadata_for_union([&m1, &m2, &m3]);
4341            // m2 is skipped; m1 and m3 conflict → dropped
4342            assert!(result.is_empty());
4343        }
4344
4345        #[test]
4346        fn no_inputs() {
4347            let result = intersect_metadata_for_union(std::iter::empty::<
4348                &HashMap<String, String>,
4349            >());
4350            assert!(result.is_empty());
4351        }
4352    }
4353}