datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::{BTreeMap, HashSet};
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::logical_plan::Subquery;
30use crate::{AggregateUDF, Volatility};
31use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
32
33use arrow::datatypes::{DataType, Field, FieldRef};
34use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
35use datafusion_common::tree_node::{
36    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
37};
38use datafusion_common::{
39    Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
40};
41use datafusion_functions_window_common::field::WindowUDFFieldArgs;
42use sqlparser::ast::{
43    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
44    NullTreatment, RenameSelectItem, ReplaceSelectElement,
45};
46
47/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
48///
49/// For example the expression `A + 1` will be represented as
50///
51///```text
52///  BinaryExpr {
53///    left: Expr::Column("A"),
54///    op: Operator::Plus,
55///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
56/// }
57/// ```
58///
59/// # Creating Expressions
60///
61/// `Expr`s can be created directly, but it is often easier and less verbose to
62/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
63/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
64///
65/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
66///
67/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
68///
69/// # Printing Expressions
70///
71/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
72/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
73///
74/// If you need  SQL to pass to other systems, consider using [`Unparser`].
75///
76/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
77///
78/// # Schema Access
79///
80/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
81/// of an `Expr`.
82///
83/// # Visiting and Rewriting `Expr`s
84///
85/// The `Expr` struct implements the [`TreeNode`] trait for walking and
86/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
87/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
88/// the examples below and [`TreeNode`] for more information.
89///
90/// # Examples: Creating and Using `Expr`s
91///
92/// ## Column References and Literals
93///
94/// [`Expr::Column`] refer to the values of columns and are often created with
95/// the [`col`] function. For example to create an expression `c1` referring to
96/// column named "c1":
97///
98/// [`col`]: crate::expr_fn::col
99///
100/// ```
101/// # use datafusion_common::Column;
102/// # use datafusion_expr::{lit, col, Expr};
103/// let expr = col("c1");
104/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
105/// ```
106///
107/// [`Expr::Literal`] refer to literal, or constant, values. These are created
108/// with the [`lit`] function. For example to create an expression `42`:
109///
110/// [`lit`]: crate::lit
111///
112/// ```
113/// # use datafusion_common::{Column, ScalarValue};
114/// # use datafusion_expr::{lit, col, Expr};
115/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
116/// let expr = lit(42i64);
117/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
118/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
119/// // To make a (typed) NULL:
120/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
121/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
122/// let expr = lit(ScalarValue::Null);
123/// ```
124///
125/// ## Binary Expressions
126///
127/// Exprs implement traits that allow easy to understand construction of more
128/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
129/// "c2" together
130///
131/// ```
132/// # use datafusion_expr::{lit, col, Operator, Expr};
133/// // Use the `+` operator to add two columns together
134/// let expr = col("c1") + col("c2");
135/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
136/// if let Expr::BinaryExpr(binary_expr) = expr {
137///   assert_eq!(*binary_expr.left, col("c1"));
138///   assert_eq!(*binary_expr.right, col("c2"));
139///   assert_eq!(binary_expr.op, Operator::Plus);
140/// }
141/// ```
142///
143/// The expression `c1 = 42` to compares the value in column "c1" to the
144/// literal value `42`:
145///
146/// ```
147/// # use datafusion_common::ScalarValue;
148/// # use datafusion_expr::{lit, col, Operator, Expr};
149/// let expr = col("c1").eq(lit(42_i32));
150/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
151/// if let Expr::BinaryExpr(binary_expr) = expr {
152///   assert_eq!(*binary_expr.left, col("c1"));
153///   let scalar = ScalarValue::Int32(Some(42));
154///   assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
155///   assert_eq!(binary_expr.op, Operator::Eq);
156/// }
157/// ```
158///
159/// Here is how to implement the equivalent of `SELECT *` to select all
160/// [`Expr::Column`] from a [`DFSchema`]'s columns:
161///
162/// ```
163/// # use arrow::datatypes::{DataType, Field, Schema};
164/// # use datafusion_common::{DFSchema, Column};
165/// # use datafusion_expr::Expr;
166/// // Create a schema c1(int, c2 float)
167/// let arrow_schema = Schema::new(vec![
168///    Field::new("c1", DataType::Int32, false),
169///    Field::new("c2", DataType::Float64, false),
170/// ]);
171/// // DFSchema is a an Arrow schema with optional relation name
172/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
173///   .unwrap();
174///
175/// // Form Vec<Expr> with an expression for each column in the schema
176/// let exprs: Vec<_> = df_schema.iter()
177///   .map(Expr::from)
178///   .collect();
179///
180/// assert_eq!(exprs, vec![
181///   Expr::from(Column::from_qualified_name("t1.c1")),
182///   Expr::from(Column::from_qualified_name("t1.c2")),
183/// ]);
184/// ```
185///
186/// # Examples: Displaying `Exprs`
187///
188/// There are three ways to print an `Expr` depending on the usecase.
189///
190/// ## Use `Debug` trait
191///
192/// Following Rust conventions, the `Debug` implementation prints out the
193/// internal structure of the expression, which is useful for debugging.
194///
195/// ```
196/// # use datafusion_expr::{lit, col};
197/// let expr = col("c1") + lit(42);
198/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
199/// ```
200///
201/// ## Use the `Display` trait  (detailed expression)
202///
203/// The `Display` implementation prints out the expression in a SQL-like form,
204/// but has additional details such as the data type of literals. This is useful
205/// for understanding the expression in more detail and is used for the low level
206/// [`ExplainFormat::Indent`] explain plan format.
207///
208/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
209///
210/// ```
211/// # use datafusion_expr::{lit, col};
212/// let expr = col("c1") + lit(42);
213/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
214/// ```
215///
216/// ## Use [`Self::human_display`] (human readable)
217///
218/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
219/// for human consumption by end users. It is used for the
220/// [`ExplainFormat::Tree`] explain plan format.
221///
222/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
223///
224///```
225/// # use datafusion_expr::{lit, col};
226/// let expr = col("c1") + lit(42);
227/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
228/// ```
229///
230/// # Examples: Visiting and Rewriting `Expr`s
231///
232/// Here is an example that finds all literals in an `Expr` tree:
233/// ```
234/// # use std::collections::{HashSet};
235/// use datafusion_common::ScalarValue;
236/// # use datafusion_expr::{col, Expr, lit};
237/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
238/// // Expression a = 5 AND b = 6
239/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
240/// // find all literals in a HashMap
241/// let mut scalars = HashSet::new();
242/// // apply recursively visits all nodes in the expression tree
243/// expr.apply(|e| {
244///    if let Expr::Literal(scalar, _) = e {
245///       scalars.insert(scalar);
246///    }
247///    // The return value controls whether to continue visiting the tree
248///    Ok(TreeNodeRecursion::Continue)
249/// }).unwrap();
250/// // All subtrees have been visited and literals found
251/// assert_eq!(scalars.len(), 2);
252/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
253/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
254/// ```
255///
256/// Rewrite an expression, replacing references to column "a" in an
257/// to the literal `42`:
258///
259///  ```
260/// # use datafusion_common::tree_node::{Transformed, TreeNode};
261/// # use datafusion_expr::{col, Expr, lit};
262/// // expression a = 5 AND b = 6
263/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
264/// // rewrite all references to column "a" to the literal 42
265/// let rewritten = expr.transform(|e| {
266///   if let Expr::Column(c) = &e {
267///     if &c.name == "a" {
268///       // return Transformed::yes to indicate the node was changed
269///       return Ok(Transformed::yes(lit(42)))
270///     }
271///   }
272///   // return Transformed::no to indicate the node was not changed
273///   Ok(Transformed::no(e))
274/// }).unwrap();
275/// // The expression has been rewritten
276/// assert!(rewritten.transformed);
277/// // to 42 = 5 AND b = 6
278/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
279#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
280pub enum Expr {
281    /// An expression with a specific name.
282    Alias(Alias),
283    /// A named reference to a qualified field in a schema.
284    Column(Column),
285    /// A named reference to a variable in a registry.
286    ScalarVariable(DataType, Vec<String>),
287    /// A constant value along with associated [`FieldMetadata`].
288    Literal(ScalarValue, Option<FieldMetadata>),
289    /// A binary expression such as "age > 21"
290    BinaryExpr(BinaryExpr),
291    /// LIKE expression
292    Like(Like),
293    /// LIKE expression that uses regular expressions
294    SimilarTo(Like),
295    /// Negation of an expression. The expression's type must be a boolean to make sense.
296    Not(Box<Expr>),
297    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
298    IsNotNull(Box<Expr>),
299    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
300    IsNull(Box<Expr>),
301    /// True if argument is true, false otherwise. This expression itself is never NULL.
302    IsTrue(Box<Expr>),
303    /// True if argument is  false, false otherwise. This expression itself is never NULL.
304    IsFalse(Box<Expr>),
305    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
306    IsUnknown(Box<Expr>),
307    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
308    IsNotTrue(Box<Expr>),
309    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
310    IsNotFalse(Box<Expr>),
311    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
312    IsNotUnknown(Box<Expr>),
313    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
314    Negative(Box<Expr>),
315    /// Whether an expression is between a given range.
316    Between(Between),
317    /// A CASE expression (see docs on [`Case`])
318    Case(Case),
319    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
320    /// This expression is guaranteed to have a fixed type.
321    Cast(Cast),
322    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
323    /// This expression is guaranteed to have a fixed type.
324    TryCast(TryCast),
325    /// Call a scalar function with a set of arguments.
326    ScalarFunction(ScalarFunction),
327    /// Calls an aggregate function with arguments, and optional
328    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
329    ///
330    /// See also [`ExprFunctionExt`] to set these fields.
331    ///
332    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
333    AggregateFunction(AggregateFunction),
334    /// Call a window function with a set of arguments.
335    WindowFunction(Box<WindowFunction>),
336    /// Returns whether the list contains the expr value.
337    InList(InList),
338    /// EXISTS subquery
339    Exists(Exists),
340    /// IN subquery
341    InSubquery(InSubquery),
342    /// Scalar subquery
343    ScalarSubquery(Subquery),
344    /// Represents a reference to all available fields in a specific schema,
345    /// with an optional (schema) qualifier.
346    ///
347    /// This expr has to be resolved to a list of columns before translating logical
348    /// plan into physical plan.
349    #[deprecated(
350        since = "46.0.0",
351        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
352    )]
353    Wildcard {
354        qualifier: Option<TableReference>,
355        options: Box<WildcardOptions>,
356    },
357    /// List of grouping set expressions. Only valid in the context of an aggregate
358    /// GROUP BY expression list
359    GroupingSet(GroupingSet),
360    /// A place holder for parameters in a prepared statement
361    /// (e.g. `$foo` or `$1`)
362    Placeholder(Placeholder),
363    /// A placeholder which holds a reference to a qualified field
364    /// in the outer query, used for correlated sub queries.
365    OuterReferenceColumn(DataType, Column),
366    /// Unnest expression
367    Unnest(Unnest),
368}
369
370impl Default for Expr {
371    fn default() -> Self {
372        Expr::Literal(ScalarValue::Null, None)
373    }
374}
375
376/// Create an [`Expr`] from a [`Column`]
377impl From<Column> for Expr {
378    fn from(value: Column) -> Self {
379        Expr::Column(value)
380    }
381}
382
383/// Create an [`Expr`] from a [`WindowFunction`]
384impl From<WindowFunction> for Expr {
385    fn from(value: WindowFunction) -> Self {
386        Expr::WindowFunction(Box::new(value))
387    }
388}
389
390/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
391/// useful for creating [`Expr`] from a [`DFSchema`].
392///
393/// See example on [`Expr`]
394impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
395    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
396        Expr::from(Column::from(value))
397    }
398}
399
400impl<'a> TreeNodeContainer<'a, Self> for Expr {
401    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
402        &'a self,
403        mut f: F,
404    ) -> Result<TreeNodeRecursion> {
405        f(self)
406    }
407
408    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
409        self,
410        mut f: F,
411    ) -> Result<Transformed<Self>> {
412        f(self)
413    }
414}
415
416/// Literal metadata
417///
418/// Stores metadata associated with a literal expressions
419/// and is designed to be fast to `clone`.
420///
421/// This structure is used to store metadata associated with a literal expression, and it
422/// corresponds to the `metadata` field on [`Field`].
423///
424/// # Example: Create [`FieldMetadata`] from a [`Field`]
425/// ```
426/// # use std::collections::HashMap;
427/// # use datafusion_expr::expr::FieldMetadata;
428/// # use arrow::datatypes::{Field, DataType};
429/// # let field = Field::new("c1", DataType::Int32, true)
430/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
431/// // Create a new `FieldMetadata` instance from a `Field`
432/// let metadata = FieldMetadata::new_from_field(&field);
433/// // There is also a `From` impl:
434/// let metadata = FieldMetadata::from(&field);
435/// ```
436///
437/// # Example: Update a [`Field`] with [`FieldMetadata`]
438/// ```
439/// # use datafusion_expr::expr::FieldMetadata;
440/// # use arrow::datatypes::{Field, DataType};
441/// # let field = Field::new("c1", DataType::Int32, true);
442/// # let metadata = FieldMetadata::new_from_field(&field);
443/// // Add any metadata from `FieldMetadata` to `Field`
444/// let updated_field = metadata.add_to_field(field);
445/// ```
446///
447#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
448pub struct FieldMetadata {
449    /// The inner metadata of a literal expression, which is a map of string
450    /// keys to string values.
451    ///
452    /// Note this is not a `HashMap` because `HashMap` does not provide
453    /// implementations for traits like `Debug` and `Hash`.
454    inner: Arc<BTreeMap<String, String>>,
455}
456
457impl Default for FieldMetadata {
458    fn default() -> Self {
459        Self::new_empty()
460    }
461}
462
463impl FieldMetadata {
464    /// Create a new empty metadata instance.
465    pub fn new_empty() -> Self {
466        Self {
467            inner: Arc::new(BTreeMap::new()),
468        }
469    }
470
471    /// Merges two optional `FieldMetadata` instances, overwriting any existing
472    /// keys in `m` with keys from `n` if present.
473    ///
474    /// This function is commonly used in alias operations, particularly for literals
475    /// with metadata. When creating an alias expression, the metadata from the original
476    /// expression (such as a literal) is combined with any metadata specified on the alias.
477    ///
478    /// # Arguments
479    ///
480    /// * `m` - The first metadata (typically from the original expression like a literal)
481    /// * `n` - The second metadata (typically from the alias definition)
482    ///
483    /// # Merge Strategy
484    ///
485    /// - If both metadata instances exist, they are merged with `n` taking precedence
486    /// - Keys from `n` will overwrite keys from `m` if they have the same name
487    /// - If only one metadata instance exists, it is returned unchanged
488    /// - If neither exists, `None` is returned
489    ///
490    /// # Example usage
491    /// ```rust
492    /// use datafusion_expr::expr::FieldMetadata;
493    /// use std::collections::BTreeMap;
494    ///
495    /// // Create metadata for a literal expression
496    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
497    ///     ("source".to_string(), "constant".to_string()),
498    ///     ("type".to_string(), "int".to_string()),
499    /// ])));
500    ///
501    /// // Create metadata for an alias
502    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
503    ///     ("description".to_string(), "answer".to_string()),
504    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
505    /// ])));
506    ///
507    /// // Merge the metadata
508    /// let merged = FieldMetadata::merge_options(
509    ///     literal_metadata.as_ref(),
510    ///     alias_metadata.as_ref(),
511    /// );
512    ///
513    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
514    /// assert!(merged.is_some());
515    /// ```
516    pub fn merge_options(
517        m: Option<&FieldMetadata>,
518        n: Option<&FieldMetadata>,
519    ) -> Option<FieldMetadata> {
520        match (m, n) {
521            (Some(m), Some(n)) => {
522                let mut merged = m.clone();
523                merged.extend(n.clone());
524                Some(merged)
525            }
526            (Some(m), None) => Some(m.clone()),
527            (None, Some(n)) => Some(n.clone()),
528            (None, None) => None,
529        }
530    }
531
532    /// Create a new metadata instance from a `Field`'s metadata.
533    pub fn new_from_field(field: &Field) -> Self {
534        let inner = field
535            .metadata()
536            .iter()
537            .map(|(k, v)| (k.to_string(), v.to_string()))
538            .collect();
539        Self {
540            inner: Arc::new(inner),
541        }
542    }
543
544    /// Create a new metadata instance from a map of string keys to string values.
545    pub fn new(inner: BTreeMap<String, String>) -> Self {
546        Self {
547            inner: Arc::new(inner),
548        }
549    }
550
551    /// Get the inner metadata as a reference to a `BTreeMap`.
552    pub fn inner(&self) -> &BTreeMap<String, String> {
553        &self.inner
554    }
555
556    /// Return the inner metadata
557    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
558        self.inner
559    }
560
561    /// Adds metadata from `other` into `self`, overwriting any existing keys.
562    pub fn extend(&mut self, other: Self) {
563        if other.is_empty() {
564            return;
565        }
566        let other = Arc::unwrap_or_clone(other.into_inner());
567        Arc::make_mut(&mut self.inner).extend(other);
568    }
569
570    /// Returns true if the metadata is empty.
571    pub fn is_empty(&self) -> bool {
572        self.inner.is_empty()
573    }
574
575    /// Returns the number of key-value pairs in the metadata.
576    pub fn len(&self) -> usize {
577        self.inner.len()
578    }
579
580    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
581    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
582        self.inner
583            .iter()
584            .map(|(k, v)| (k.to_string(), v.to_string()))
585            .collect()
586    }
587
588    /// Updates the metadata on the Field with this metadata, if it is not empty.
589    pub fn add_to_field(&self, field: Field) -> Field {
590        if self.inner.is_empty() {
591            return field;
592        }
593
594        field.with_metadata(self.to_hashmap())
595    }
596}
597
598impl From<&Field> for FieldMetadata {
599    fn from(field: &Field) -> Self {
600        Self::new_from_field(field)
601    }
602}
603
604impl From<BTreeMap<String, String>> for FieldMetadata {
605    fn from(inner: BTreeMap<String, String>) -> Self {
606        Self::new(inner)
607    }
608}
609
610impl From<std::collections::HashMap<String, String>> for FieldMetadata {
611    fn from(map: std::collections::HashMap<String, String>) -> Self {
612        Self::new(map.into_iter().collect())
613    }
614}
615
616/// From reference
617impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
618    fn from(map: &std::collections::HashMap<String, String>) -> Self {
619        let inner = map
620            .iter()
621            .map(|(k, v)| (k.to_string(), v.to_string()))
622            .collect();
623        Self::new(inner)
624    }
625}
626
627/// From hashbrown map
628impl From<HashMap<String, String>> for FieldMetadata {
629    fn from(map: HashMap<String, String>) -> Self {
630        let inner = map.into_iter().collect();
631        Self::new(inner)
632    }
633}
634
635impl From<&HashMap<String, String>> for FieldMetadata {
636    fn from(map: &HashMap<String, String>) -> Self {
637        let inner = map
638            .into_iter()
639            .map(|(k, v)| (k.to_string(), v.to_string()))
640            .collect();
641        Self::new(inner)
642    }
643}
644
645/// The metadata used in [`Field::metadata`].
646///
647/// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
648///
649/// # Common Use Cases
650///
651/// Field metadata is commonly used to store:
652/// - Default values for columns when data is missing
653/// - Column descriptions or documentation
654/// - Data lineage information
655/// - Custom application-specific annotations
656/// - Encoding hints or display formatting preferences
657///
658/// # Example: Storing Default Values
659///
660/// A practical example of using field metadata is storing default values for columns
661/// that may be missing in the physical data but present in the logical schema.
662/// See the [default_column_values.rs] example implementation.
663///
664/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs
665pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
666
667/// Intersects multiple metadata instances for UNION operations.
668///
669/// This function implements the intersection strategy used by UNION operations,
670/// where only metadata keys that exist in ALL inputs with identical values
671/// are preserved in the result.
672///
673/// # Union Metadata Behavior
674///
675/// Union operations require consistent metadata across all branches:
676/// - Only metadata keys present in ALL union branches are kept
677/// - For each kept key, the value must be identical across all branches
678/// - If a key has different values across branches, it is excluded from the result
679/// - If any input has no metadata, the result will be empty
680///
681/// # Arguments
682///
683/// * `metadatas` - An iterator of `SchemaFieldMetadata` instances to intersect
684///
685/// # Returns
686///
687/// A new `SchemaFieldMetadata` containing only the intersected metadata
688pub fn intersect_metadata_for_union<'a>(
689    metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>,
690) -> SchemaFieldMetadata {
691    let mut metadatas = metadatas.into_iter();
692    let Some(mut intersected) = metadatas.next().cloned() else {
693        return Default::default();
694    };
695
696    for metadata in metadatas {
697        // Only keep keys that exist in both with the same value
698        intersected.retain(|k, v| metadata.get(k) == Some(v));
699    }
700
701    intersected
702}
703
704/// UNNEST expression.
705#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
706pub struct Unnest {
707    pub expr: Box<Expr>,
708}
709
710impl Unnest {
711    /// Create a new Unnest expression.
712    pub fn new(expr: Expr) -> Self {
713        Self {
714            expr: Box::new(expr),
715        }
716    }
717
718    /// Create a new Unnest expression.
719    pub fn new_boxed(boxed: Box<Expr>) -> Self {
720        Self { expr: boxed }
721    }
722}
723
724/// Alias expression
725#[derive(Clone, PartialEq, Eq, Debug)]
726pub struct Alias {
727    pub expr: Box<Expr>,
728    pub relation: Option<TableReference>,
729    pub name: String,
730    pub metadata: Option<FieldMetadata>,
731}
732
733impl Hash for Alias {
734    fn hash<H: Hasher>(&self, state: &mut H) {
735        self.expr.hash(state);
736        self.relation.hash(state);
737        self.name.hash(state);
738    }
739}
740
741impl PartialOrd for Alias {
742    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
743        let cmp = self.expr.partial_cmp(&other.expr);
744        let Some(Ordering::Equal) = cmp else {
745            return cmp;
746        };
747        let cmp = self.relation.partial_cmp(&other.relation);
748        let Some(Ordering::Equal) = cmp else {
749            return cmp;
750        };
751        self.name.partial_cmp(&other.name)
752    }
753}
754
755impl Alias {
756    /// Create an alias with an optional schema/field qualifier.
757    pub fn new(
758        expr: Expr,
759        relation: Option<impl Into<TableReference>>,
760        name: impl Into<String>,
761    ) -> Self {
762        Self {
763            expr: Box::new(expr),
764            relation: relation.map(|r| r.into()),
765            name: name.into(),
766            metadata: None,
767        }
768    }
769
770    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
771        self.metadata = metadata;
772        self
773    }
774}
775
776/// Binary expression
777#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
778pub struct BinaryExpr {
779    /// Left-hand side of the expression
780    pub left: Box<Expr>,
781    /// The comparison operator
782    pub op: Operator,
783    /// Right-hand side of the expression
784    pub right: Box<Expr>,
785}
786
787impl BinaryExpr {
788    /// Create a new binary expression
789    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
790        Self { left, op, right }
791    }
792}
793
794impl Display for BinaryExpr {
795    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
796        // Put parentheses around child binary expressions so that we can see the difference
797        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
798        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
799        // equivalent and the parentheses are not necessary.
800
801        fn write_child(
802            f: &mut Formatter<'_>,
803            expr: &Expr,
804            precedence: u8,
805        ) -> fmt::Result {
806            match expr {
807                Expr::BinaryExpr(child) => {
808                    let p = child.op.precedence();
809                    if p == 0 || p < precedence {
810                        write!(f, "({child})")?;
811                    } else {
812                        write!(f, "{child}")?;
813                    }
814                }
815                _ => write!(f, "{expr}")?,
816            }
817            Ok(())
818        }
819
820        let precedence = self.op.precedence();
821        write_child(f, self.left.as_ref(), precedence)?;
822        write!(f, " {} ", self.op)?;
823        write_child(f, self.right.as_ref(), precedence)
824    }
825}
826
827/// CASE expression
828///
829/// The CASE expression is similar to a series of nested if/else and there are two forms that
830/// can be used. The first form consists of a series of boolean "when" expressions with
831/// corresponding "then" expressions, and an optional "else" expression.
832///
833/// ```text
834/// CASE WHEN condition THEN result
835///      [WHEN ...]
836///      [ELSE result]
837/// END
838/// ```
839///
840/// The second form uses a base expression and then a series of "when" clauses that match on a
841/// literal value.
842///
843/// ```text
844/// CASE expression
845///     WHEN value THEN result
846///     [WHEN ...]
847///     [ELSE result]
848/// END
849/// ```
850#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
851pub struct Case {
852    /// Optional base expression that can be compared to literal values in the "when" expressions
853    pub expr: Option<Box<Expr>>,
854    /// One or more when/then expressions
855    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
856    /// Optional "else" expression
857    pub else_expr: Option<Box<Expr>>,
858}
859
860impl Case {
861    /// Create a new Case expression
862    pub fn new(
863        expr: Option<Box<Expr>>,
864        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
865        else_expr: Option<Box<Expr>>,
866    ) -> Self {
867        Self {
868            expr,
869            when_then_expr,
870            else_expr,
871        }
872    }
873}
874
875/// LIKE expression
876#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
877pub struct Like {
878    pub negated: bool,
879    pub expr: Box<Expr>,
880    pub pattern: Box<Expr>,
881    pub escape_char: Option<char>,
882    /// Whether to ignore case on comparing
883    pub case_insensitive: bool,
884}
885
886impl Like {
887    /// Create a new Like expression
888    pub fn new(
889        negated: bool,
890        expr: Box<Expr>,
891        pattern: Box<Expr>,
892        escape_char: Option<char>,
893        case_insensitive: bool,
894    ) -> Self {
895        Self {
896            negated,
897            expr,
898            pattern,
899            escape_char,
900            case_insensitive,
901        }
902    }
903}
904
905/// BETWEEN expression
906#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
907pub struct Between {
908    /// The value to compare
909    pub expr: Box<Expr>,
910    /// Whether the expression is negated
911    pub negated: bool,
912    /// The low end of the range
913    pub low: Box<Expr>,
914    /// The high end of the range
915    pub high: Box<Expr>,
916}
917
918impl Between {
919    /// Create a new Between expression
920    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
921        Self {
922            expr,
923            negated,
924            low,
925            high,
926        }
927    }
928}
929
930/// Invoke a [`ScalarUDF`] with a set of arguments
931///
932/// [`ScalarUDF`]: crate::ScalarUDF
933#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
934pub struct ScalarFunction {
935    /// The function
936    pub func: Arc<crate::ScalarUDF>,
937    /// List of expressions to feed to the functions as arguments
938    pub args: Vec<Expr>,
939}
940
941impl ScalarFunction {
942    // return the Function's name
943    pub fn name(&self) -> &str {
944        self.func.name()
945    }
946}
947
948impl ScalarFunction {
949    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
950    ///
951    /// [`ScalarUDF`]: crate::ScalarUDF
952    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
953        Self { func: udf, args }
954    }
955}
956
957/// Access a sub field of a nested type, such as `Field` or `List`
958#[derive(Clone, PartialEq, Eq, Hash, Debug)]
959pub enum GetFieldAccess {
960    /// Named field, for example `struct["name"]`
961    NamedStructField { name: ScalarValue },
962    /// Single list index, for example: `list[i]`
963    ListIndex { key: Box<Expr> },
964    /// List stride, for example `list[i:j:k]`
965    ListRange {
966        start: Box<Expr>,
967        stop: Box<Expr>,
968        stride: Box<Expr>,
969    },
970}
971
972/// Cast expression
973#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
974pub struct Cast {
975    /// The expression being cast
976    pub expr: Box<Expr>,
977    /// The `DataType` the expression will yield
978    pub data_type: DataType,
979}
980
981impl Cast {
982    /// Create a new Cast expression
983    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
984        Self { expr, data_type }
985    }
986}
987
988/// TryCast Expression
989#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
990pub struct TryCast {
991    /// The expression being cast
992    pub expr: Box<Expr>,
993    /// The `DataType` the expression will yield
994    pub data_type: DataType,
995}
996
997impl TryCast {
998    /// Create a new TryCast expression
999    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
1000        Self { expr, data_type }
1001    }
1002}
1003
1004/// SORT expression
1005#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1006pub struct Sort {
1007    /// The expression to sort on
1008    pub expr: Expr,
1009    /// The direction of the sort
1010    pub asc: bool,
1011    /// Whether to put Nulls before all other data values
1012    pub nulls_first: bool,
1013}
1014
1015impl Sort {
1016    /// Create a new Sort expression
1017    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
1018        Self {
1019            expr,
1020            asc,
1021            nulls_first,
1022        }
1023    }
1024
1025    /// Create a new Sort expression with the opposite sort direction
1026    pub fn reverse(&self) -> Self {
1027        Self {
1028            expr: self.expr.clone(),
1029            asc: !self.asc,
1030            nulls_first: !self.nulls_first,
1031        }
1032    }
1033
1034    /// Replaces the Sort expressions with `expr`
1035    pub fn with_expr(&self, expr: Expr) -> Self {
1036        Self {
1037            expr,
1038            asc: self.asc,
1039            nulls_first: self.nulls_first,
1040        }
1041    }
1042}
1043
1044impl Display for Sort {
1045    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1046        write!(f, "{}", self.expr)?;
1047        if self.asc {
1048            write!(f, " ASC")?;
1049        } else {
1050            write!(f, " DESC")?;
1051        }
1052        if self.nulls_first {
1053            write!(f, " NULLS FIRST")?;
1054        } else {
1055            write!(f, " NULLS LAST")?;
1056        }
1057        Ok(())
1058    }
1059}
1060
1061impl<'a> TreeNodeContainer<'a, Expr> for Sort {
1062    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
1063        &'a self,
1064        f: F,
1065    ) -> Result<TreeNodeRecursion> {
1066        self.expr.apply_elements(f)
1067    }
1068
1069    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
1070        self,
1071        f: F,
1072    ) -> Result<Transformed<Self>> {
1073        self.expr
1074            .map_elements(f)?
1075            .map_data(|expr| Ok(Self { expr, ..self }))
1076    }
1077}
1078
1079/// Aggregate function
1080///
1081/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
1082///
1083/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
1084#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1085pub struct AggregateFunction {
1086    /// Name of the function
1087    pub func: Arc<AggregateUDF>,
1088    pub params: AggregateFunctionParams,
1089}
1090
1091#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1092pub struct AggregateFunctionParams {
1093    pub args: Vec<Expr>,
1094    /// Whether this is a DISTINCT aggregation or not
1095    pub distinct: bool,
1096    /// Optional filter
1097    pub filter: Option<Box<Expr>>,
1098    /// Optional ordering
1099    pub order_by: Vec<Sort>,
1100    pub null_treatment: Option<NullTreatment>,
1101}
1102
1103impl AggregateFunction {
1104    /// Create a new AggregateFunction expression with a user-defined function (UDF)
1105    pub fn new_udf(
1106        func: Arc<AggregateUDF>,
1107        args: Vec<Expr>,
1108        distinct: bool,
1109        filter: Option<Box<Expr>>,
1110        order_by: Vec<Sort>,
1111        null_treatment: Option<NullTreatment>,
1112    ) -> Self {
1113        Self {
1114            func,
1115            params: AggregateFunctionParams {
1116                args,
1117                distinct,
1118                filter,
1119                order_by,
1120                null_treatment,
1121            },
1122        }
1123    }
1124}
1125
1126/// A function used as a SQL window function
1127///
1128/// In SQL, you can use:
1129/// - Actual window functions ([`WindowUDF`])
1130/// - Normal aggregate functions ([`AggregateUDF`])
1131#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
1132pub enum WindowFunctionDefinition {
1133    /// A user defined aggregate function
1134    AggregateUDF(Arc<AggregateUDF>),
1135    /// A user defined aggregate function
1136    WindowUDF(Arc<WindowUDF>),
1137}
1138
1139impl WindowFunctionDefinition {
1140    /// Returns the datatype of the window function
1141    pub fn return_field(
1142        &self,
1143        input_expr_fields: &[FieldRef],
1144        display_name: &str,
1145    ) -> Result<FieldRef> {
1146        match self {
1147            WindowFunctionDefinition::AggregateUDF(fun) => {
1148                fun.return_field(input_expr_fields)
1149            }
1150            WindowFunctionDefinition::WindowUDF(fun) => {
1151                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
1152            }
1153        }
1154    }
1155
1156    /// The signatures supported by the function `fun`.
1157    pub fn signature(&self) -> Signature {
1158        match self {
1159            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
1160            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
1161        }
1162    }
1163
1164    /// Function's name for display
1165    pub fn name(&self) -> &str {
1166        match self {
1167            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
1168            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
1169        }
1170    }
1171
1172    /// Return the the inner window simplification function, if any
1173    ///
1174    /// See [`WindowFunctionSimplification`] for more information
1175    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1176        match self {
1177            WindowFunctionDefinition::AggregateUDF(_) => None,
1178            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
1179        }
1180    }
1181}
1182
1183impl Display for WindowFunctionDefinition {
1184    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1185        match self {
1186            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
1187            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
1188        }
1189    }
1190}
1191
1192impl From<Arc<AggregateUDF>> for WindowFunctionDefinition {
1193    fn from(value: Arc<AggregateUDF>) -> Self {
1194        Self::AggregateUDF(value)
1195    }
1196}
1197
1198impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
1199    fn from(value: Arc<WindowUDF>) -> Self {
1200        Self::WindowUDF(value)
1201    }
1202}
1203
1204/// Window function
1205///
1206/// Holds the actual function to call [`WindowFunction`] as well as its
1207/// arguments (`args`) and the contents of the `OVER` clause:
1208///
1209/// 1. `PARTITION BY`
1210/// 2. `ORDER BY`
1211/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
1212///
1213/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
1214///
1215/// [`ExprFunctionExt`]: crate::ExprFunctionExt
1216#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1217pub struct WindowFunction {
1218    /// Name of the function
1219    pub fun: WindowFunctionDefinition,
1220    pub params: WindowFunctionParams,
1221}
1222
1223#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1224pub struct WindowFunctionParams {
1225    /// List of expressions to feed to the functions as arguments
1226    pub args: Vec<Expr>,
1227    /// List of partition by expressions
1228    pub partition_by: Vec<Expr>,
1229    /// List of order by expressions
1230    pub order_by: Vec<Sort>,
1231    /// Window frame
1232    pub window_frame: WindowFrame,
1233    /// Optional filter expression (FILTER (WHERE ...))
1234    pub filter: Option<Box<Expr>>,
1235    /// Specifies how NULL value is treated: ignore or respect
1236    pub null_treatment: Option<NullTreatment>,
1237    /// Distinct flag
1238    pub distinct: bool,
1239}
1240
1241impl WindowFunction {
1242    /// Create a new Window expression with the specified argument an
1243    /// empty `OVER` clause
1244    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
1245        Self {
1246            fun: fun.into(),
1247            params: WindowFunctionParams {
1248                args,
1249                partition_by: Vec::default(),
1250                order_by: Vec::default(),
1251                window_frame: WindowFrame::new(None),
1252                filter: None,
1253                null_treatment: None,
1254                distinct: false,
1255            },
1256        }
1257    }
1258
1259    /// Return the the inner window simplification function, if any
1260    ///
1261    /// See [`WindowFunctionSimplification`] for more information
1262    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1263        self.fun.simplify()
1264    }
1265}
1266
1267/// EXISTS expression
1268#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1269pub struct Exists {
1270    /// Subquery that will produce a single column of data
1271    pub subquery: Subquery,
1272    /// Whether the expression is negated
1273    pub negated: bool,
1274}
1275
1276impl Exists {
1277    // Create a new Exists expression.
1278    pub fn new(subquery: Subquery, negated: bool) -> Self {
1279        Self { subquery, negated }
1280    }
1281}
1282
1283/// InList expression
1284#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1285pub struct InList {
1286    /// The expression to compare
1287    pub expr: Box<Expr>,
1288    /// The list of values to compare against
1289    pub list: Vec<Expr>,
1290    /// Whether the expression is negated
1291    pub negated: bool,
1292}
1293
1294impl InList {
1295    /// Create a new InList expression
1296    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1297        Self {
1298            expr,
1299            list,
1300            negated,
1301        }
1302    }
1303}
1304
1305/// IN subquery
1306#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1307pub struct InSubquery {
1308    /// The expression to compare
1309    pub expr: Box<Expr>,
1310    /// Subquery that will produce a single column of data to compare against
1311    pub subquery: Subquery,
1312    /// Whether the expression is negated
1313    pub negated: bool,
1314}
1315
1316impl InSubquery {
1317    /// Create a new InSubquery expression
1318    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1319        Self {
1320            expr,
1321            subquery,
1322            negated,
1323        }
1324    }
1325}
1326
1327/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1328///
1329/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1330/// or can be specified directly using `PREPARE` statements.
1331#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1332pub struct Placeholder {
1333    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1334    pub id: String,
1335    /// The type the parameter will be filled in with
1336    pub data_type: Option<DataType>,
1337}
1338
1339impl Placeholder {
1340    /// Create a new Placeholder expression
1341    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1342        Self { id, data_type }
1343    }
1344}
1345
1346/// Grouping sets
1347///
1348/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1349/// for Postgres definition.
1350/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1351/// for Apache Spark definition.
1352#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1353pub enum GroupingSet {
1354    /// Rollup grouping sets
1355    Rollup(Vec<Expr>),
1356    /// Cube grouping sets
1357    Cube(Vec<Expr>),
1358    /// User-defined grouping sets
1359    GroupingSets(Vec<Vec<Expr>>),
1360}
1361
1362impl GroupingSet {
1363    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1364    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1365    /// the exprs in the underlying sets.
1366    pub fn distinct_expr(&self) -> Vec<&Expr> {
1367        match self {
1368            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1369                exprs.iter().collect()
1370            }
1371            GroupingSet::GroupingSets(groups) => {
1372                let mut exprs: Vec<&Expr> = vec![];
1373                for exp in groups.iter().flatten() {
1374                    if !exprs.contains(&exp) {
1375                        exprs.push(exp);
1376                    }
1377                }
1378                exprs
1379            }
1380        }
1381    }
1382}
1383
1384/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1385#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1386pub struct WildcardOptions {
1387    /// `[ILIKE...]`.
1388    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1389    pub ilike: Option<IlikeSelectItem>,
1390    /// `[EXCLUDE...]`.
1391    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1392    pub exclude: Option<ExcludeSelectItem>,
1393    /// `[EXCEPT...]`.
1394    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1395    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1396    pub except: Option<ExceptSelectItem>,
1397    /// `[REPLACE]`
1398    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1399    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1400    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1401    pub replace: Option<PlannedReplaceSelectItem>,
1402    /// `[RENAME ...]`.
1403    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1404    pub rename: Option<RenameSelectItem>,
1405}
1406
1407impl WildcardOptions {
1408    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1409        WildcardOptions {
1410            ilike: self.ilike,
1411            exclude: self.exclude,
1412            except: self.except,
1413            replace: Some(replace),
1414            rename: self.rename,
1415        }
1416    }
1417}
1418
1419impl Display for WildcardOptions {
1420    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1421        if let Some(ilike) = &self.ilike {
1422            write!(f, " {ilike}")?;
1423        }
1424        if let Some(exclude) = &self.exclude {
1425            write!(f, " {exclude}")?;
1426        }
1427        if let Some(except) = &self.except {
1428            write!(f, " {except}")?;
1429        }
1430        if let Some(replace) = &self.replace {
1431            write!(f, " {replace}")?;
1432        }
1433        if let Some(rename) = &self.rename {
1434            write!(f, " {rename}")?;
1435        }
1436        Ok(())
1437    }
1438}
1439
1440/// The planned expressions for `REPLACE`
1441#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1442pub struct PlannedReplaceSelectItem {
1443    /// The original ast nodes
1444    pub items: Vec<ReplaceSelectElement>,
1445    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1446    pub planned_expressions: Vec<Expr>,
1447}
1448
1449impl Display for PlannedReplaceSelectItem {
1450    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1451        write!(f, "REPLACE")?;
1452        write!(f, " ({})", display_comma_separated(&self.items))?;
1453        Ok(())
1454    }
1455}
1456
1457impl PlannedReplaceSelectItem {
1458    pub fn items(&self) -> &[ReplaceSelectElement] {
1459        &self.items
1460    }
1461
1462    pub fn expressions(&self) -> &[Expr] {
1463        &self.planned_expressions
1464    }
1465}
1466
1467impl Expr {
1468    /// The name of the column (field) that this `Expr` will produce.
1469    ///
1470    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1471    /// [`Schema`] will have a field with this name.
1472    ///
1473    /// Note that the resulting string is subtlety different from the `Display`
1474    /// representation for certain `Expr`. Some differences:
1475    ///
1476    /// 1. [`Expr::Alias`], which shows only the alias itself
1477    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1478    ///
1479    /// # Example
1480    /// ```
1481    /// # use datafusion_expr::{col, lit};
1482    /// let expr = col("foo").eq(lit(42));
1483    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1484    ///
1485    /// let expr = col("foo").alias("bar").eq(lit(11));
1486    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1487    /// ```
1488    ///
1489    /// [`Schema`]: arrow::datatypes::Schema
1490    pub fn schema_name(&self) -> impl Display + '_ {
1491        SchemaDisplay(self)
1492    }
1493
1494    /// Human readable display formatting for this expression.
1495    ///
1496    /// This function is primarily used in printing the explain tree output,
1497    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1498    /// show how expressions are used in physical and logical plans. See the
1499    /// [`Expr`] for other ways to format expressions
1500    ///
1501    /// Note this format is intended for human consumption rather than SQL for
1502    /// other systems. If you need  SQL to pass to other systems, consider using
1503    /// [`Unparser`].
1504    ///
1505    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1506    ///
1507    /// # Example
1508    /// ```
1509    /// # use datafusion_expr::{col, lit};
1510    /// let expr = col("foo") + lit(42);
1511    /// // For EXPLAIN output:
1512    /// // "foo + 42"
1513    /// println!("{}", expr.human_display());
1514    /// ```
1515    pub fn human_display(&self) -> impl Display + '_ {
1516        SqlDisplay(self)
1517    }
1518
1519    /// Returns the qualifier and the schema name of this expression.
1520    ///
1521    /// Used when the expression forms the output field of a certain plan.
1522    /// The result is the field's qualifier and field name in the plan's
1523    /// output schema. We can use this qualified name to reference the field.
1524    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1525        match self {
1526            Expr::Column(Column {
1527                relation,
1528                name,
1529                spans: _,
1530            }) => (relation.clone(), name.clone()),
1531            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1532            _ => (None, self.schema_name().to_string()),
1533        }
1534    }
1535
1536    /// Return String representation of the variant represented by `self`
1537    /// Useful for non-rust based bindings
1538    pub fn variant_name(&self) -> &str {
1539        match self {
1540            Expr::AggregateFunction { .. } => "AggregateFunction",
1541            Expr::Alias(..) => "Alias",
1542            Expr::Between { .. } => "Between",
1543            Expr::BinaryExpr { .. } => "BinaryExpr",
1544            Expr::Case { .. } => "Case",
1545            Expr::Cast { .. } => "Cast",
1546            Expr::Column(..) => "Column",
1547            Expr::OuterReferenceColumn(_, _) => "Outer",
1548            Expr::Exists { .. } => "Exists",
1549            Expr::GroupingSet(..) => "GroupingSet",
1550            Expr::InList { .. } => "InList",
1551            Expr::InSubquery(..) => "InSubquery",
1552            Expr::IsNotNull(..) => "IsNotNull",
1553            Expr::IsNull(..) => "IsNull",
1554            Expr::Like { .. } => "Like",
1555            Expr::SimilarTo { .. } => "RLike",
1556            Expr::IsTrue(..) => "IsTrue",
1557            Expr::IsFalse(..) => "IsFalse",
1558            Expr::IsUnknown(..) => "IsUnknown",
1559            Expr::IsNotTrue(..) => "IsNotTrue",
1560            Expr::IsNotFalse(..) => "IsNotFalse",
1561            Expr::IsNotUnknown(..) => "IsNotUnknown",
1562            Expr::Literal(..) => "Literal",
1563            Expr::Negative(..) => "Negative",
1564            Expr::Not(..) => "Not",
1565            Expr::Placeholder(_) => "Placeholder",
1566            Expr::ScalarFunction(..) => "ScalarFunction",
1567            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1568            Expr::ScalarVariable(..) => "ScalarVariable",
1569            Expr::TryCast { .. } => "TryCast",
1570            Expr::WindowFunction { .. } => "WindowFunction",
1571            #[expect(deprecated)]
1572            Expr::Wildcard { .. } => "Wildcard",
1573            Expr::Unnest { .. } => "Unnest",
1574        }
1575    }
1576
1577    /// Return `self == other`
1578    pub fn eq(self, other: Expr) -> Expr {
1579        binary_expr(self, Operator::Eq, other)
1580    }
1581
1582    /// Return `self != other`
1583    pub fn not_eq(self, other: Expr) -> Expr {
1584        binary_expr(self, Operator::NotEq, other)
1585    }
1586
1587    /// Return `self > other`
1588    pub fn gt(self, other: Expr) -> Expr {
1589        binary_expr(self, Operator::Gt, other)
1590    }
1591
1592    /// Return `self >= other`
1593    pub fn gt_eq(self, other: Expr) -> Expr {
1594        binary_expr(self, Operator::GtEq, other)
1595    }
1596
1597    /// Return `self < other`
1598    pub fn lt(self, other: Expr) -> Expr {
1599        binary_expr(self, Operator::Lt, other)
1600    }
1601
1602    /// Return `self <= other`
1603    pub fn lt_eq(self, other: Expr) -> Expr {
1604        binary_expr(self, Operator::LtEq, other)
1605    }
1606
1607    /// Return `self && other`
1608    pub fn and(self, other: Expr) -> Expr {
1609        binary_expr(self, Operator::And, other)
1610    }
1611
1612    /// Return `self || other`
1613    pub fn or(self, other: Expr) -> Expr {
1614        binary_expr(self, Operator::Or, other)
1615    }
1616
1617    /// Return `self LIKE other`
1618    pub fn like(self, other: Expr) -> Expr {
1619        Expr::Like(Like::new(
1620            false,
1621            Box::new(self),
1622            Box::new(other),
1623            None,
1624            false,
1625        ))
1626    }
1627
1628    /// Return `self NOT LIKE other`
1629    pub fn not_like(self, other: Expr) -> Expr {
1630        Expr::Like(Like::new(
1631            true,
1632            Box::new(self),
1633            Box::new(other),
1634            None,
1635            false,
1636        ))
1637    }
1638
1639    /// Return `self ILIKE other`
1640    pub fn ilike(self, other: Expr) -> Expr {
1641        Expr::Like(Like::new(
1642            false,
1643            Box::new(self),
1644            Box::new(other),
1645            None,
1646            true,
1647        ))
1648    }
1649
1650    /// Return `self NOT ILIKE other`
1651    pub fn not_ilike(self, other: Expr) -> Expr {
1652        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1653    }
1654
1655    /// Return the name to use for the specific Expr
1656    pub fn name_for_alias(&self) -> Result<String> {
1657        Ok(self.schema_name().to_string())
1658    }
1659
1660    /// Ensure `expr` has the name as `original_name` by adding an
1661    /// alias if necessary.
1662    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1663        let new_name = self.name_for_alias()?;
1664        if new_name == original_name {
1665            return Ok(self);
1666        }
1667
1668        Ok(self.alias(original_name))
1669    }
1670
1671    /// Return `self AS name` alias expression
1672    pub fn alias(self, name: impl Into<String>) -> Expr {
1673        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1674    }
1675
1676    /// Return `self AS name` alias expression with metadata
1677    ///
1678    /// The metadata will be attached to the Arrow Schema field when the expression
1679    /// is converted to a field via `Expr.to_field()`.
1680    ///
1681    /// # Example
1682    /// ```
1683    /// # use datafusion_expr::col;
1684    /// # use std::collections::HashMap;
1685    /// # use datafusion_expr::expr::FieldMetadata;
1686    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1687    /// let metadata = FieldMetadata::from(metadata);
1688    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1689    /// ```
1690    ///
1691    pub fn alias_with_metadata(
1692        self,
1693        name: impl Into<String>,
1694        metadata: Option<FieldMetadata>,
1695    ) -> Expr {
1696        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1697    }
1698
1699    /// Return `self AS name` alias expression with a specific qualifier
1700    pub fn alias_qualified(
1701        self,
1702        relation: Option<impl Into<TableReference>>,
1703        name: impl Into<String>,
1704    ) -> Expr {
1705        Expr::Alias(Alias::new(self, relation, name.into()))
1706    }
1707
1708    /// Return `self AS name` alias expression with a specific qualifier and metadata
1709    ///
1710    /// The metadata will be attached to the Arrow Schema field when the expression
1711    /// is converted to a field via `Expr.to_field()`.
1712    ///
1713    /// # Example
1714    /// ```
1715    /// # use datafusion_expr::col;
1716    /// # use std::collections::HashMap;
1717    /// # use datafusion_expr::expr::FieldMetadata;
1718    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1719    /// let metadata = FieldMetadata::from(metadata);
1720    /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1721    /// ```
1722    ///
1723    pub fn alias_qualified_with_metadata(
1724        self,
1725        relation: Option<impl Into<TableReference>>,
1726        name: impl Into<String>,
1727        metadata: Option<FieldMetadata>,
1728    ) -> Expr {
1729        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1730    }
1731
1732    /// Remove an alias from an expression if one exists.
1733    ///
1734    /// If the expression is not an alias, the expression is returned unchanged.
1735    /// This method does not remove aliases from nested expressions.
1736    ///
1737    /// # Example
1738    /// ```
1739    /// # use datafusion_expr::col;
1740    /// // `foo as "bar"` is unaliased to `foo`
1741    /// let expr = col("foo").alias("bar");
1742    /// assert_eq!(expr.unalias(), col("foo"));
1743    ///
1744    /// // `foo as "bar" + baz` is not unaliased
1745    /// let expr = col("foo").alias("bar") + col("baz");
1746    /// assert_eq!(expr.clone().unalias(), expr);
1747    ///
1748    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1749    /// let expr = col("foo").alias("bar").alias("baz");
1750    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1751    /// ```
1752    pub fn unalias(self) -> Expr {
1753        match self {
1754            Expr::Alias(alias) => *alias.expr,
1755            _ => self,
1756        }
1757    }
1758
1759    /// Recursively removed potentially multiple aliases from an expression.
1760    ///
1761    /// This method removes nested aliases and returns [`Transformed`]
1762    /// to signal if the expression was changed.
1763    ///
1764    /// # Example
1765    /// ```
1766    /// # use datafusion_expr::col;
1767    /// // `foo as "bar"` is unaliased to `foo`
1768    /// let expr = col("foo").alias("bar");
1769    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1770    ///
1771    /// // `foo as "bar" + baz` is  unaliased
1772    /// let expr = col("foo").alias("bar") + col("baz");
1773    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1774    ///
1775    /// // `foo as "bar" as "baz" is unalaised to foo
1776    /// let expr = col("foo").alias("bar").alias("baz");
1777    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1778    /// ```
1779    pub fn unalias_nested(self) -> Transformed<Expr> {
1780        self.transform_down_up(
1781            |expr| {
1782                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1783                let recursion = if matches!(
1784                    expr,
1785                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1786                ) {
1787                    // Subqueries could contain aliases so don't recurse into those
1788                    TreeNodeRecursion::Jump
1789                } else {
1790                    TreeNodeRecursion::Continue
1791                };
1792                Ok(Transformed::new(expr, false, recursion))
1793            },
1794            |expr| {
1795                // f_up: unalias on up so we can remove nested aliases like
1796                // `(x as foo) as bar`
1797                if let Expr::Alias(alias) = expr {
1798                    match alias
1799                        .metadata
1800                        .as_ref()
1801                        .map(|h| h.is_empty())
1802                        .unwrap_or(true)
1803                    {
1804                        true => Ok(Transformed::yes(*alias.expr)),
1805                        false => Ok(Transformed::no(Expr::Alias(alias))),
1806                    }
1807                } else {
1808                    Ok(Transformed::no(expr))
1809                }
1810            },
1811        )
1812        // Unreachable code: internal closure doesn't return err
1813        .unwrap()
1814    }
1815
1816    /// Return `self IN <list>` if `negated` is false, otherwise
1817    /// return `self NOT IN <list>`.a
1818    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1819        Expr::InList(InList::new(Box::new(self), list, negated))
1820    }
1821
1822    /// Return `IsNull(Box(self))
1823    pub fn is_null(self) -> Expr {
1824        Expr::IsNull(Box::new(self))
1825    }
1826
1827    /// Return `IsNotNull(Box(self))
1828    pub fn is_not_null(self) -> Expr {
1829        Expr::IsNotNull(Box::new(self))
1830    }
1831
1832    /// Create a sort configuration from an existing expression.
1833    ///
1834    /// ```
1835    /// # use datafusion_expr::col;
1836    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1837    /// ```
1838    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1839        Sort::new(self, asc, nulls_first)
1840    }
1841
1842    /// Return `IsTrue(Box(self))`
1843    pub fn is_true(self) -> Expr {
1844        Expr::IsTrue(Box::new(self))
1845    }
1846
1847    /// Return `IsNotTrue(Box(self))`
1848    pub fn is_not_true(self) -> Expr {
1849        Expr::IsNotTrue(Box::new(self))
1850    }
1851
1852    /// Return `IsFalse(Box(self))`
1853    pub fn is_false(self) -> Expr {
1854        Expr::IsFalse(Box::new(self))
1855    }
1856
1857    /// Return `IsNotFalse(Box(self))`
1858    pub fn is_not_false(self) -> Expr {
1859        Expr::IsNotFalse(Box::new(self))
1860    }
1861
1862    /// Return `IsUnknown(Box(self))`
1863    pub fn is_unknown(self) -> Expr {
1864        Expr::IsUnknown(Box::new(self))
1865    }
1866
1867    /// Return `IsNotUnknown(Box(self))`
1868    pub fn is_not_unknown(self) -> Expr {
1869        Expr::IsNotUnknown(Box::new(self))
1870    }
1871
1872    /// return `self BETWEEN low AND high`
1873    pub fn between(self, low: Expr, high: Expr) -> Expr {
1874        Expr::Between(Between::new(
1875            Box::new(self),
1876            false,
1877            Box::new(low),
1878            Box::new(high),
1879        ))
1880    }
1881
1882    /// Return `self NOT BETWEEN low AND high`
1883    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1884        Expr::Between(Between::new(
1885            Box::new(self),
1886            true,
1887            Box::new(low),
1888            Box::new(high),
1889        ))
1890    }
1891    /// Return a reference to the inner `Column` if any
1892    ///
1893    /// returns `None` if the expression is not a `Column`
1894    ///
1895    /// Note: None may be returned for expressions that are not `Column` but
1896    /// are convertible to `Column` such as `Cast` expressions.
1897    ///
1898    /// Example
1899    /// ```
1900    /// # use datafusion_common::Column;
1901    /// use datafusion_expr::{col, Expr};
1902    /// let expr = col("foo");
1903    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1904    ///
1905    /// let expr = col("foo").alias("bar");
1906    /// assert_eq!(expr.try_as_col(), None);
1907    /// ```
1908    pub fn try_as_col(&self) -> Option<&Column> {
1909        if let Expr::Column(it) = self {
1910            Some(it)
1911        } else {
1912            None
1913        }
1914    }
1915
1916    /// Returns the inner `Column` if any. This is a specialized version of
1917    /// [`Self::try_as_col`] that take Cast expressions into account when the
1918    /// expression is as on condition for joins.
1919    ///
1920    /// Called this method when you are sure that the expression is a `Column`
1921    /// or a `Cast` expression that wraps a `Column`.
1922    pub fn get_as_join_column(&self) -> Option<&Column> {
1923        match self {
1924            Expr::Column(c) => Some(c),
1925            Expr::Cast(Cast { expr, .. }) => match &**expr {
1926                Expr::Column(c) => Some(c),
1927                _ => None,
1928            },
1929            _ => None,
1930        }
1931    }
1932
1933    /// Return all references to columns in this expression.
1934    ///
1935    /// # Example
1936    /// ```
1937    /// # use std::collections::HashSet;
1938    /// # use datafusion_common::Column;
1939    /// # use datafusion_expr::col;
1940    /// // For an expression `a + (b * a)`
1941    /// let expr = col("a") + (col("b") * col("a"));
1942    /// let refs = expr.column_refs();
1943    /// // refs contains "a" and "b"
1944    /// assert_eq!(refs.len(), 2);
1945    /// assert!(refs.contains(&Column::new_unqualified("a")));
1946    /// assert!(refs.contains(&Column::new_unqualified("b")));
1947    /// ```
1948    pub fn column_refs(&self) -> HashSet<&Column> {
1949        let mut using_columns = HashSet::new();
1950        self.add_column_refs(&mut using_columns);
1951        using_columns
1952    }
1953
1954    /// Adds references to all columns in this expression to the set
1955    ///
1956    /// See [`Self::column_refs`] for details
1957    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1958        self.apply(|expr| {
1959            if let Expr::Column(col) = expr {
1960                set.insert(col);
1961            }
1962            Ok(TreeNodeRecursion::Continue)
1963        })
1964        .expect("traversal is infallible");
1965    }
1966
1967    /// Return all references to columns and their occurrence counts in the expression.
1968    ///
1969    /// # Example
1970    /// ```
1971    /// # use std::collections::HashMap;
1972    /// # use datafusion_common::Column;
1973    /// # use datafusion_expr::col;
1974    /// // For an expression `a + (b * a)`
1975    /// let expr = col("a") + (col("b") * col("a"));
1976    /// let mut refs = expr.column_refs_counts();
1977    /// // refs contains "a" and "b"
1978    /// assert_eq!(refs.len(), 2);
1979    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1980    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1981    /// ```
1982    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1983        let mut map = HashMap::new();
1984        self.add_column_ref_counts(&mut map);
1985        map
1986    }
1987
1988    /// Adds references to all columns and their occurrence counts in the expression to
1989    /// the map.
1990    ///
1991    /// See [`Self::column_refs_counts`] for details
1992    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1993        self.apply(|expr| {
1994            if let Expr::Column(col) = expr {
1995                *map.entry(col).or_default() += 1;
1996            }
1997            Ok(TreeNodeRecursion::Continue)
1998        })
1999        .expect("traversal is infallible");
2000    }
2001
2002    /// Returns true if there are any column references in this Expr
2003    pub fn any_column_refs(&self) -> bool {
2004        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
2005            .expect("exists closure is infallible")
2006    }
2007
2008    /// Return true if the expression contains out reference(correlated) expressions.
2009    pub fn contains_outer(&self) -> bool {
2010        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
2011            .expect("exists closure is infallible")
2012    }
2013
2014    /// Returns true if the expression node is volatile, i.e. whether it can return
2015    /// different results when evaluated multiple times with the same input.
2016    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
2017    /// - `rand()` returns `true`,
2018    /// - `a + rand()` returns `false`
2019    pub fn is_volatile_node(&self) -> bool {
2020        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
2021    }
2022
2023    /// Returns true if the expression is volatile, i.e. whether it can return different
2024    /// results when evaluated multiple times with the same input.
2025    ///
2026    /// For example the function call `RANDOM()` is volatile as each call will
2027    /// return a different value.
2028    ///
2029    /// See [`Volatility`] for more information.
2030    pub fn is_volatile(&self) -> bool {
2031        self.exists(|expr| Ok(expr.is_volatile_node()))
2032            .expect("exists closure is infallible")
2033    }
2034
2035    /// Recursively find all [`Expr::Placeholder`] expressions, and
2036    /// to infer their [`DataType`] from the context of their use.
2037    ///
2038    /// For example, given an expression like `<int32> = $0` will infer `$0` to
2039    /// have type `int32`.
2040    ///
2041    /// Returns transformed expression and flag that is true if expression contains
2042    /// at least one placeholder.
2043    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
2044        let mut has_placeholder = false;
2045        self.transform(|mut expr| {
2046            match &mut expr {
2047                // Default to assuming the arguments are the same type
2048                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
2049                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
2050                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
2051                }
2052                Expr::Between(Between {
2053                    expr,
2054                    negated: _,
2055                    low,
2056                    high,
2057                }) => {
2058                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
2059                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
2060                }
2061                Expr::InList(InList {
2062                    expr,
2063                    list,
2064                    negated: _,
2065                }) => {
2066                    for item in list.iter_mut() {
2067                        rewrite_placeholder(item, expr.as_ref(), schema)?;
2068                    }
2069                }
2070                Expr::Like(Like { expr, pattern, .. })
2071                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
2072                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
2073                }
2074                Expr::Placeholder(_) => {
2075                    has_placeholder = true;
2076                }
2077                _ => {}
2078            }
2079            Ok(Transformed::yes(expr))
2080        })
2081        .data()
2082        .map(|data| (data, has_placeholder))
2083    }
2084
2085    /// Returns true if some of this `exprs` subexpressions may not be evaluated
2086    /// and thus any side effects (like divide by zero) may not be encountered
2087    pub fn short_circuits(&self) -> bool {
2088        match self {
2089            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
2090            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
2091                matches!(op, Operator::And | Operator::Or)
2092            }
2093            Expr::Case { .. } => true,
2094            // Use explicit pattern match instead of a default
2095            // implementation, so that in the future if someone adds
2096            // new Expr types, they will check here as well
2097            // TODO: remove the next line after `Expr::Wildcard` is removed
2098            #[expect(deprecated)]
2099            Expr::AggregateFunction(..)
2100            | Expr::Alias(..)
2101            | Expr::Between(..)
2102            | Expr::Cast(..)
2103            | Expr::Column(..)
2104            | Expr::Exists(..)
2105            | Expr::GroupingSet(..)
2106            | Expr::InList(..)
2107            | Expr::InSubquery(..)
2108            | Expr::IsFalse(..)
2109            | Expr::IsNotFalse(..)
2110            | Expr::IsNotNull(..)
2111            | Expr::IsNotTrue(..)
2112            | Expr::IsNotUnknown(..)
2113            | Expr::IsNull(..)
2114            | Expr::IsTrue(..)
2115            | Expr::IsUnknown(..)
2116            | Expr::Like(..)
2117            | Expr::ScalarSubquery(..)
2118            | Expr::ScalarVariable(_, _)
2119            | Expr::SimilarTo(..)
2120            | Expr::Not(..)
2121            | Expr::Negative(..)
2122            | Expr::OuterReferenceColumn(_, _)
2123            | Expr::TryCast(..)
2124            | Expr::Unnest(..)
2125            | Expr::Wildcard { .. }
2126            | Expr::WindowFunction(..)
2127            | Expr::Literal(..)
2128            | Expr::Placeholder(..) => false,
2129        }
2130    }
2131
2132    /// Returns a reference to the set of locations in the SQL query where this
2133    /// expression appears, if known. [`None`] is returned if the expression
2134    /// type doesn't support tracking locations yet.
2135    pub fn spans(&self) -> Option<&Spans> {
2136        match self {
2137            Expr::Column(col) => Some(&col.spans),
2138            _ => None,
2139        }
2140    }
2141
2142    /// Check if the Expr is literal and get the literal value if it is.
2143    pub fn as_literal(&self) -> Option<&ScalarValue> {
2144        if let Expr::Literal(lit, _) = self {
2145            Some(lit)
2146        } else {
2147            None
2148        }
2149    }
2150}
2151
2152impl Normalizeable for Expr {
2153    fn can_normalize(&self) -> bool {
2154        #[allow(clippy::match_like_matches_macro)]
2155        match self {
2156            Expr::BinaryExpr(BinaryExpr {
2157                op:
2158                    _op @ (Operator::Plus
2159                    | Operator::Multiply
2160                    | Operator::BitwiseAnd
2161                    | Operator::BitwiseOr
2162                    | Operator::BitwiseXor
2163                    | Operator::Eq
2164                    | Operator::NotEq),
2165                ..
2166            }) => true,
2167            _ => false,
2168        }
2169    }
2170}
2171
2172impl NormalizeEq for Expr {
2173    fn normalize_eq(&self, other: &Self) -> bool {
2174        match (self, other) {
2175            (
2176                Expr::BinaryExpr(BinaryExpr {
2177                    left: self_left,
2178                    op: self_op,
2179                    right: self_right,
2180                }),
2181                Expr::BinaryExpr(BinaryExpr {
2182                    left: other_left,
2183                    op: other_op,
2184                    right: other_right,
2185                }),
2186            ) => {
2187                if self_op != other_op {
2188                    return false;
2189                }
2190
2191                if matches!(
2192                    self_op,
2193                    Operator::Plus
2194                        | Operator::Multiply
2195                        | Operator::BitwiseAnd
2196                        | Operator::BitwiseOr
2197                        | Operator::BitwiseXor
2198                        | Operator::Eq
2199                        | Operator::NotEq
2200                ) {
2201                    (self_left.normalize_eq(other_left)
2202                        && self_right.normalize_eq(other_right))
2203                        || (self_left.normalize_eq(other_right)
2204                            && self_right.normalize_eq(other_left))
2205                } else {
2206                    self_left.normalize_eq(other_left)
2207                        && self_right.normalize_eq(other_right)
2208                }
2209            }
2210            (
2211                Expr::Alias(Alias {
2212                    expr: self_expr,
2213                    relation: self_relation,
2214                    name: self_name,
2215                    ..
2216                }),
2217                Expr::Alias(Alias {
2218                    expr: other_expr,
2219                    relation: other_relation,
2220                    name: other_name,
2221                    ..
2222                }),
2223            ) => {
2224                self_name == other_name
2225                    && self_relation == other_relation
2226                    && self_expr.normalize_eq(other_expr)
2227            }
2228            (
2229                Expr::Like(Like {
2230                    negated: self_negated,
2231                    expr: self_expr,
2232                    pattern: self_pattern,
2233                    escape_char: self_escape_char,
2234                    case_insensitive: self_case_insensitive,
2235                }),
2236                Expr::Like(Like {
2237                    negated: other_negated,
2238                    expr: other_expr,
2239                    pattern: other_pattern,
2240                    escape_char: other_escape_char,
2241                    case_insensitive: other_case_insensitive,
2242                }),
2243            )
2244            | (
2245                Expr::SimilarTo(Like {
2246                    negated: self_negated,
2247                    expr: self_expr,
2248                    pattern: self_pattern,
2249                    escape_char: self_escape_char,
2250                    case_insensitive: self_case_insensitive,
2251                }),
2252                Expr::SimilarTo(Like {
2253                    negated: other_negated,
2254                    expr: other_expr,
2255                    pattern: other_pattern,
2256                    escape_char: other_escape_char,
2257                    case_insensitive: other_case_insensitive,
2258                }),
2259            ) => {
2260                self_negated == other_negated
2261                    && self_escape_char == other_escape_char
2262                    && self_case_insensitive == other_case_insensitive
2263                    && self_expr.normalize_eq(other_expr)
2264                    && self_pattern.normalize_eq(other_pattern)
2265            }
2266            (Expr::Not(self_expr), Expr::Not(other_expr))
2267            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2268            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2269            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2270            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2271            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2272            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2273            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2274            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2275            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2276            | (
2277                Expr::Unnest(Unnest { expr: self_expr }),
2278                Expr::Unnest(Unnest { expr: other_expr }),
2279            ) => self_expr.normalize_eq(other_expr),
2280            (
2281                Expr::Between(Between {
2282                    expr: self_expr,
2283                    negated: self_negated,
2284                    low: self_low,
2285                    high: self_high,
2286                }),
2287                Expr::Between(Between {
2288                    expr: other_expr,
2289                    negated: other_negated,
2290                    low: other_low,
2291                    high: other_high,
2292                }),
2293            ) => {
2294                self_negated == other_negated
2295                    && self_expr.normalize_eq(other_expr)
2296                    && self_low.normalize_eq(other_low)
2297                    && self_high.normalize_eq(other_high)
2298            }
2299            (
2300                Expr::Cast(Cast {
2301                    expr: self_expr,
2302                    data_type: self_data_type,
2303                }),
2304                Expr::Cast(Cast {
2305                    expr: other_expr,
2306                    data_type: other_data_type,
2307                }),
2308            )
2309            | (
2310                Expr::TryCast(TryCast {
2311                    expr: self_expr,
2312                    data_type: self_data_type,
2313                }),
2314                Expr::TryCast(TryCast {
2315                    expr: other_expr,
2316                    data_type: other_data_type,
2317                }),
2318            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
2319            (
2320                Expr::ScalarFunction(ScalarFunction {
2321                    func: self_func,
2322                    args: self_args,
2323                }),
2324                Expr::ScalarFunction(ScalarFunction {
2325                    func: other_func,
2326                    args: other_args,
2327                }),
2328            ) => {
2329                self_func.name() == other_func.name()
2330                    && self_args.len() == other_args.len()
2331                    && self_args
2332                        .iter()
2333                        .zip(other_args.iter())
2334                        .all(|(a, b)| a.normalize_eq(b))
2335            }
2336            (
2337                Expr::AggregateFunction(AggregateFunction {
2338                    func: self_func,
2339                    params:
2340                        AggregateFunctionParams {
2341                            args: self_args,
2342                            distinct: self_distinct,
2343                            filter: self_filter,
2344                            order_by: self_order_by,
2345                            null_treatment: self_null_treatment,
2346                        },
2347                }),
2348                Expr::AggregateFunction(AggregateFunction {
2349                    func: other_func,
2350                    params:
2351                        AggregateFunctionParams {
2352                            args: other_args,
2353                            distinct: other_distinct,
2354                            filter: other_filter,
2355                            order_by: other_order_by,
2356                            null_treatment: other_null_treatment,
2357                        },
2358                }),
2359            ) => {
2360                self_func.name() == other_func.name()
2361                    && self_distinct == other_distinct
2362                    && self_null_treatment == other_null_treatment
2363                    && self_args.len() == other_args.len()
2364                    && self_args
2365                        .iter()
2366                        .zip(other_args.iter())
2367                        .all(|(a, b)| a.normalize_eq(b))
2368                    && match (self_filter, other_filter) {
2369                        (Some(self_filter), Some(other_filter)) => {
2370                            self_filter.normalize_eq(other_filter)
2371                        }
2372                        (None, None) => true,
2373                        _ => false,
2374                    }
2375                    && self_order_by
2376                        .iter()
2377                        .zip(other_order_by.iter())
2378                        .all(|(a, b)| {
2379                            a.asc == b.asc
2380                                && a.nulls_first == b.nulls_first
2381                                && a.expr.normalize_eq(&b.expr)
2382                        })
2383                    && self_order_by.len() == other_order_by.len()
2384            }
2385            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2386                let WindowFunction {
2387                    fun: self_fun,
2388                    params:
2389                        WindowFunctionParams {
2390                            args: self_args,
2391                            window_frame: self_window_frame,
2392                            partition_by: self_partition_by,
2393                            order_by: self_order_by,
2394                            filter: self_filter,
2395                            null_treatment: self_null_treatment,
2396                            distinct: self_distinct,
2397                        },
2398                } = left.as_ref();
2399                let WindowFunction {
2400                    fun: other_fun,
2401                    params:
2402                        WindowFunctionParams {
2403                            args: other_args,
2404                            window_frame: other_window_frame,
2405                            partition_by: other_partition_by,
2406                            order_by: other_order_by,
2407                            filter: other_filter,
2408                            null_treatment: other_null_treatment,
2409                            distinct: other_distinct,
2410                        },
2411                } = other.as_ref();
2412
2413                self_fun.name() == other_fun.name()
2414                    && self_window_frame == other_window_frame
2415                    && match (self_filter, other_filter) {
2416                        (Some(a), Some(b)) => a.normalize_eq(b),
2417                        (None, None) => true,
2418                        _ => false,
2419                    }
2420                    && self_null_treatment == other_null_treatment
2421                    && self_args.len() == other_args.len()
2422                    && self_args
2423                        .iter()
2424                        .zip(other_args.iter())
2425                        .all(|(a, b)| a.normalize_eq(b))
2426                    && self_partition_by
2427                        .iter()
2428                        .zip(other_partition_by.iter())
2429                        .all(|(a, b)| a.normalize_eq(b))
2430                    && self_order_by
2431                        .iter()
2432                        .zip(other_order_by.iter())
2433                        .all(|(a, b)| {
2434                            a.asc == b.asc
2435                                && a.nulls_first == b.nulls_first
2436                                && a.expr.normalize_eq(&b.expr)
2437                        })
2438                    && self_distinct == other_distinct
2439            }
2440            (
2441                Expr::Exists(Exists {
2442                    subquery: self_subquery,
2443                    negated: self_negated,
2444                }),
2445                Expr::Exists(Exists {
2446                    subquery: other_subquery,
2447                    negated: other_negated,
2448                }),
2449            ) => {
2450                self_negated == other_negated
2451                    && self_subquery.normalize_eq(other_subquery)
2452            }
2453            (
2454                Expr::InSubquery(InSubquery {
2455                    expr: self_expr,
2456                    subquery: self_subquery,
2457                    negated: self_negated,
2458                }),
2459                Expr::InSubquery(InSubquery {
2460                    expr: other_expr,
2461                    subquery: other_subquery,
2462                    negated: other_negated,
2463                }),
2464            ) => {
2465                self_negated == other_negated
2466                    && self_expr.normalize_eq(other_expr)
2467                    && self_subquery.normalize_eq(other_subquery)
2468            }
2469            (
2470                Expr::ScalarSubquery(self_subquery),
2471                Expr::ScalarSubquery(other_subquery),
2472            ) => self_subquery.normalize_eq(other_subquery),
2473            (
2474                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2475                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2476            )
2477            | (
2478                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2479                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2480            ) => {
2481                self_exprs.len() == other_exprs.len()
2482                    && self_exprs
2483                        .iter()
2484                        .zip(other_exprs.iter())
2485                        .all(|(a, b)| a.normalize_eq(b))
2486            }
2487            (
2488                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2489                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2490            ) => {
2491                self_exprs.len() == other_exprs.len()
2492                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2493                        a.len() == b.len()
2494                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2495                    })
2496            }
2497            (
2498                Expr::InList(InList {
2499                    expr: self_expr,
2500                    list: self_list,
2501                    negated: self_negated,
2502                }),
2503                Expr::InList(InList {
2504                    expr: other_expr,
2505                    list: other_list,
2506                    negated: other_negated,
2507                }),
2508            ) => {
2509                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2510                self_negated == other_negated
2511                    && self_expr.normalize_eq(other_expr)
2512                    && self_list.len() == other_list.len()
2513                    && self_list
2514                        .iter()
2515                        .zip(other_list.iter())
2516                        .all(|(a, b)| a.normalize_eq(b))
2517            }
2518            (
2519                Expr::Case(Case {
2520                    expr: self_expr,
2521                    when_then_expr: self_when_then_expr,
2522                    else_expr: self_else_expr,
2523                }),
2524                Expr::Case(Case {
2525                    expr: other_expr,
2526                    when_then_expr: other_when_then_expr,
2527                    else_expr: other_else_expr,
2528                }),
2529            ) => {
2530                // TODO: normalize_eq for when_then_expr
2531                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2532                self_when_then_expr.len() == other_when_then_expr.len()
2533                    && self_when_then_expr
2534                        .iter()
2535                        .zip(other_when_then_expr.iter())
2536                        .all(|((self_when, self_then), (other_when, other_then))| {
2537                            self_when.normalize_eq(other_when)
2538                                && self_then.normalize_eq(other_then)
2539                        })
2540                    && match (self_expr, other_expr) {
2541                        (Some(self_expr), Some(other_expr)) => {
2542                            self_expr.normalize_eq(other_expr)
2543                        }
2544                        (None, None) => true,
2545                        (_, _) => false,
2546                    }
2547                    && match (self_else_expr, other_else_expr) {
2548                        (Some(self_else_expr), Some(other_else_expr)) => {
2549                            self_else_expr.normalize_eq(other_else_expr)
2550                        }
2551                        (None, None) => true,
2552                        (_, _) => false,
2553                    }
2554            }
2555            (_, _) => self == other,
2556        }
2557    }
2558}
2559
2560impl HashNode for Expr {
2561    /// As it is pretty easy to forget changing this method when `Expr` changes the
2562    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2563    /// compile time.
2564    fn hash_node<H: Hasher>(&self, state: &mut H) {
2565        mem::discriminant(self).hash(state);
2566        match self {
2567            Expr::Alias(Alias {
2568                expr: _expr,
2569                relation,
2570                name,
2571                ..
2572            }) => {
2573                relation.hash(state);
2574                name.hash(state);
2575            }
2576            Expr::Column(column) => {
2577                column.hash(state);
2578            }
2579            Expr::ScalarVariable(data_type, name) => {
2580                data_type.hash(state);
2581                name.hash(state);
2582            }
2583            Expr::Literal(scalar_value, _) => {
2584                scalar_value.hash(state);
2585            }
2586            Expr::BinaryExpr(BinaryExpr {
2587                left: _left,
2588                op,
2589                right: _right,
2590            }) => {
2591                op.hash(state);
2592            }
2593            Expr::Like(Like {
2594                negated,
2595                expr: _expr,
2596                pattern: _pattern,
2597                escape_char,
2598                case_insensitive,
2599            })
2600            | Expr::SimilarTo(Like {
2601                negated,
2602                expr: _expr,
2603                pattern: _pattern,
2604                escape_char,
2605                case_insensitive,
2606            }) => {
2607                negated.hash(state);
2608                escape_char.hash(state);
2609                case_insensitive.hash(state);
2610            }
2611            Expr::Not(_expr)
2612            | Expr::IsNotNull(_expr)
2613            | Expr::IsNull(_expr)
2614            | Expr::IsTrue(_expr)
2615            | Expr::IsFalse(_expr)
2616            | Expr::IsUnknown(_expr)
2617            | Expr::IsNotTrue(_expr)
2618            | Expr::IsNotFalse(_expr)
2619            | Expr::IsNotUnknown(_expr)
2620            | Expr::Negative(_expr) => {}
2621            Expr::Between(Between {
2622                expr: _expr,
2623                negated,
2624                low: _low,
2625                high: _high,
2626            }) => {
2627                negated.hash(state);
2628            }
2629            Expr::Case(Case {
2630                expr: _expr,
2631                when_then_expr: _when_then_expr,
2632                else_expr: _else_expr,
2633            }) => {}
2634            Expr::Cast(Cast {
2635                expr: _expr,
2636                data_type,
2637            })
2638            | Expr::TryCast(TryCast {
2639                expr: _expr,
2640                data_type,
2641            }) => {
2642                data_type.hash(state);
2643            }
2644            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2645                func.hash(state);
2646            }
2647            Expr::AggregateFunction(AggregateFunction {
2648                func,
2649                params:
2650                    AggregateFunctionParams {
2651                        args: _args,
2652                        distinct,
2653                        filter: _,
2654                        order_by: _,
2655                        null_treatment,
2656                    },
2657            }) => {
2658                func.hash(state);
2659                distinct.hash(state);
2660                null_treatment.hash(state);
2661            }
2662            Expr::WindowFunction(window_fun) => {
2663                let WindowFunction {
2664                    fun,
2665                    params:
2666                        WindowFunctionParams {
2667                            args: _args,
2668                            partition_by: _,
2669                            order_by: _,
2670                            window_frame,
2671                            filter,
2672                            null_treatment,
2673                            distinct,
2674                        },
2675                } = window_fun.as_ref();
2676                fun.hash(state);
2677                window_frame.hash(state);
2678                filter.hash(state);
2679                null_treatment.hash(state);
2680                distinct.hash(state);
2681            }
2682            Expr::InList(InList {
2683                expr: _expr,
2684                list: _list,
2685                negated,
2686            }) => {
2687                negated.hash(state);
2688            }
2689            Expr::Exists(Exists { subquery, negated }) => {
2690                subquery.hash(state);
2691                negated.hash(state);
2692            }
2693            Expr::InSubquery(InSubquery {
2694                expr: _expr,
2695                subquery,
2696                negated,
2697            }) => {
2698                subquery.hash(state);
2699                negated.hash(state);
2700            }
2701            Expr::ScalarSubquery(subquery) => {
2702                subquery.hash(state);
2703            }
2704            #[expect(deprecated)]
2705            Expr::Wildcard { qualifier, options } => {
2706                qualifier.hash(state);
2707                options.hash(state);
2708            }
2709            Expr::GroupingSet(grouping_set) => {
2710                mem::discriminant(grouping_set).hash(state);
2711                match grouping_set {
2712                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2713                    GroupingSet::GroupingSets(_exprs) => {}
2714                }
2715            }
2716            Expr::Placeholder(place_holder) => {
2717                place_holder.hash(state);
2718            }
2719            Expr::OuterReferenceColumn(data_type, column) => {
2720                data_type.hash(state);
2721                column.hash(state);
2722            }
2723            Expr::Unnest(Unnest { expr: _expr }) => {}
2724        };
2725    }
2726}
2727
2728// Modifies expr if it is a placeholder with datatype of right
2729fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2730    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
2731        if data_type.is_none() {
2732            let other_dt = other.get_type(schema);
2733            match other_dt {
2734                Err(e) => {
2735                    Err(e.context(format!(
2736                        "Can not find type of {other} needed to infer type of {expr}"
2737                    )))?;
2738                }
2739                Ok(dt) => {
2740                    *data_type = Some(dt);
2741                }
2742            }
2743        };
2744    }
2745    Ok(())
2746}
2747
2748#[macro_export]
2749macro_rules! expr_vec_fmt {
2750    ( $ARRAY:expr ) => {{
2751        $ARRAY
2752            .iter()
2753            .map(|e| format!("{e}"))
2754            .collect::<Vec<String>>()
2755            .join(", ")
2756    }};
2757}
2758
2759struct SchemaDisplay<'a>(&'a Expr);
2760impl Display for SchemaDisplay<'_> {
2761    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2762        match self.0 {
2763            // The same as Display
2764            // TODO: remove the next line after `Expr::Wildcard` is removed
2765            #[expect(deprecated)]
2766            Expr::Column(_)
2767            | Expr::Literal(_, _)
2768            | Expr::ScalarVariable(..)
2769            | Expr::OuterReferenceColumn(..)
2770            | Expr::Placeholder(_)
2771            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2772            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2773                match func.schema_name(params) {
2774                    Ok(name) => {
2775                        write!(f, "{name}")
2776                    }
2777                    Err(e) => {
2778                        write!(f, "got error from schema_name {e}")
2779                    }
2780                }
2781            }
2782            // Expr is not shown since it is aliased
2783            Expr::Alias(Alias {
2784                name,
2785                relation: Some(relation),
2786                ..
2787            }) => write!(f, "{relation}.{name}"),
2788            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2789            Expr::Between(Between {
2790                expr,
2791                negated,
2792                low,
2793                high,
2794            }) => {
2795                if *negated {
2796                    write!(
2797                        f,
2798                        "{} NOT BETWEEN {} AND {}",
2799                        SchemaDisplay(expr),
2800                        SchemaDisplay(low),
2801                        SchemaDisplay(high),
2802                    )
2803                } else {
2804                    write!(
2805                        f,
2806                        "{} BETWEEN {} AND {}",
2807                        SchemaDisplay(expr),
2808                        SchemaDisplay(low),
2809                        SchemaDisplay(high),
2810                    )
2811                }
2812            }
2813            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2814                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2815            }
2816            Expr::Case(Case {
2817                expr,
2818                when_then_expr,
2819                else_expr,
2820            }) => {
2821                write!(f, "CASE ")?;
2822
2823                if let Some(e) = expr {
2824                    write!(f, "{} ", SchemaDisplay(e))?;
2825                }
2826
2827                for (when, then) in when_then_expr {
2828                    write!(
2829                        f,
2830                        "WHEN {} THEN {} ",
2831                        SchemaDisplay(when),
2832                        SchemaDisplay(then),
2833                    )?;
2834                }
2835
2836                if let Some(e) = else_expr {
2837                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2838                }
2839
2840                write!(f, "END")
2841            }
2842            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2843            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2844                write!(f, "{}", SchemaDisplay(expr))
2845            }
2846            Expr::InList(InList {
2847                expr,
2848                list,
2849                negated,
2850            }) => {
2851                let inlist_name = schema_name_from_exprs(list)?;
2852
2853                if *negated {
2854                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2855                } else {
2856                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2857                }
2858            }
2859            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2860            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2861            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2862                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2863            }
2864            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2865                write!(f, "GROUPING SETS (")?;
2866                for exprs in lists_of_exprs.iter() {
2867                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2868                }
2869                write!(f, ")")
2870            }
2871            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2872                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2873            }
2874            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2875            Expr::IsNotNull(expr) => {
2876                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2877            }
2878            Expr::IsUnknown(expr) => {
2879                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2880            }
2881            Expr::IsNotUnknown(expr) => {
2882                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2883            }
2884            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2885                write!(f, "NOT IN")
2886            }
2887            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2888            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2889            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2890            Expr::IsNotTrue(expr) => {
2891                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2892            }
2893            Expr::IsNotFalse(expr) => {
2894                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2895            }
2896            Expr::Like(Like {
2897                negated,
2898                expr,
2899                pattern,
2900                escape_char,
2901                case_insensitive,
2902            }) => {
2903                write!(
2904                    f,
2905                    "{} {}{} {}",
2906                    SchemaDisplay(expr),
2907                    if *negated { "NOT " } else { "" },
2908                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2909                    SchemaDisplay(pattern),
2910                )?;
2911
2912                if let Some(char) = escape_char {
2913                    write!(f, " CHAR '{char}'")?;
2914                }
2915
2916                Ok(())
2917            }
2918            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2919            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2920            Expr::Unnest(Unnest { expr }) => {
2921                write!(f, "UNNEST({})", SchemaDisplay(expr))
2922            }
2923            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2924                match func.schema_name(args) {
2925                    Ok(name) => {
2926                        write!(f, "{name}")
2927                    }
2928                    Err(e) => {
2929                        write!(f, "got error from schema_name {e}")
2930                    }
2931                }
2932            }
2933            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2934                write!(f, "{}", subquery.schema().field(0).name())
2935            }
2936            Expr::SimilarTo(Like {
2937                negated,
2938                expr,
2939                pattern,
2940                escape_char,
2941                ..
2942            }) => {
2943                write!(
2944                    f,
2945                    "{} {} {}",
2946                    SchemaDisplay(expr),
2947                    if *negated {
2948                        "NOT SIMILAR TO"
2949                    } else {
2950                        "SIMILAR TO"
2951                    },
2952                    SchemaDisplay(pattern),
2953                )?;
2954                if let Some(char) = escape_char {
2955                    write!(f, " CHAR '{char}'")?;
2956                }
2957
2958                Ok(())
2959            }
2960            Expr::WindowFunction(window_fun) => {
2961                let WindowFunction { fun, params } = window_fun.as_ref();
2962                match fun {
2963                    WindowFunctionDefinition::AggregateUDF(fun) => {
2964                        match fun.window_function_schema_name(params) {
2965                            Ok(name) => {
2966                                write!(f, "{name}")
2967                            }
2968                            Err(e) => {
2969                                write!(
2970                                    f,
2971                                    "got error from window_function_schema_name {e}"
2972                                )
2973                            }
2974                        }
2975                    }
2976                    _ => {
2977                        let WindowFunctionParams {
2978                            args,
2979                            partition_by,
2980                            order_by,
2981                            window_frame,
2982                            filter,
2983                            null_treatment,
2984                            distinct,
2985                        } = params;
2986
2987                        // Write function name and open parenthesis
2988                        write!(f, "{fun}(")?;
2989
2990                        // If DISTINCT, emit the keyword
2991                        if *distinct {
2992                            write!(f, "DISTINCT ")?;
2993                        }
2994
2995                        // Write the comma‑separated argument list
2996                        write!(
2997                            f,
2998                            "{}",
2999                            schema_name_from_exprs_comma_separated_without_space(args)?
3000                        )?;
3001
3002                        // **Close the argument parenthesis**
3003                        write!(f, ")")?;
3004
3005                        if let Some(null_treatment) = null_treatment {
3006                            write!(f, " {null_treatment}")?;
3007                        }
3008
3009                        if let Some(filter) = filter {
3010                            write!(f, " FILTER (WHERE {filter})")?;
3011                        }
3012
3013                        if !partition_by.is_empty() {
3014                            write!(
3015                                f,
3016                                " PARTITION BY [{}]",
3017                                schema_name_from_exprs(partition_by)?
3018                            )?;
3019                        }
3020
3021                        if !order_by.is_empty() {
3022                            write!(
3023                                f,
3024                                " ORDER BY [{}]",
3025                                schema_name_from_sorts(order_by)?
3026                            )?;
3027                        };
3028
3029                        write!(f, " {window_frame}")
3030                    }
3031                }
3032            }
3033        }
3034    }
3035}
3036
3037/// A helper struct for displaying an `Expr` as an SQL-like string.
3038struct SqlDisplay<'a>(&'a Expr);
3039
3040impl Display for SqlDisplay<'_> {
3041    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
3042        match self.0 {
3043            Expr::Literal(scalar, _) => scalar.fmt(f),
3044            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
3045            Expr::Between(Between {
3046                expr,
3047                negated,
3048                low,
3049                high,
3050            }) => {
3051                if *negated {
3052                    write!(
3053                        f,
3054                        "{} NOT BETWEEN {} AND {}",
3055                        SqlDisplay(expr),
3056                        SqlDisplay(low),
3057                        SqlDisplay(high),
3058                    )
3059                } else {
3060                    write!(
3061                        f,
3062                        "{} BETWEEN {} AND {}",
3063                        SqlDisplay(expr),
3064                        SqlDisplay(low),
3065                        SqlDisplay(high),
3066                    )
3067                }
3068            }
3069            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
3070                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
3071            }
3072            Expr::Case(Case {
3073                expr,
3074                when_then_expr,
3075                else_expr,
3076            }) => {
3077                write!(f, "CASE ")?;
3078
3079                if let Some(e) = expr {
3080                    write!(f, "{} ", SqlDisplay(e))?;
3081                }
3082
3083                for (when, then) in when_then_expr {
3084                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
3085                }
3086
3087                if let Some(e) = else_expr {
3088                    write!(f, "ELSE {} ", SqlDisplay(e))?;
3089                }
3090
3091                write!(f, "END")
3092            }
3093            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
3094                write!(f, "{}", SqlDisplay(expr))
3095            }
3096            Expr::InList(InList {
3097                expr,
3098                list,
3099                negated,
3100            }) => {
3101                write!(
3102                    f,
3103                    "{}{} IN {}",
3104                    SqlDisplay(expr),
3105                    if *negated { " NOT" } else { "" },
3106                    ExprListDisplay::comma_separated(list.as_slice())
3107                )
3108            }
3109            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
3110                write!(
3111                    f,
3112                    "ROLLUP ({})",
3113                    ExprListDisplay::comma_separated(exprs.as_slice())
3114                )
3115            }
3116            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
3117                write!(f, "GROUPING SETS (")?;
3118                for exprs in lists_of_exprs.iter() {
3119                    write!(
3120                        f,
3121                        "({})",
3122                        ExprListDisplay::comma_separated(exprs.as_slice())
3123                    )?;
3124                }
3125                write!(f, ")")
3126            }
3127            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
3128                write!(
3129                    f,
3130                    "ROLLUP ({})",
3131                    ExprListDisplay::comma_separated(exprs.as_slice())
3132                )
3133            }
3134            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
3135            Expr::IsNotNull(expr) => {
3136                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
3137            }
3138            Expr::IsUnknown(expr) => {
3139                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
3140            }
3141            Expr::IsNotUnknown(expr) => {
3142                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
3143            }
3144            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
3145            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
3146            Expr::IsNotTrue(expr) => {
3147                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
3148            }
3149            Expr::IsNotFalse(expr) => {
3150                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
3151            }
3152            Expr::Like(Like {
3153                negated,
3154                expr,
3155                pattern,
3156                escape_char,
3157                case_insensitive,
3158            }) => {
3159                write!(
3160                    f,
3161                    "{} {}{} {}",
3162                    SqlDisplay(expr),
3163                    if *negated { "NOT " } else { "" },
3164                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3165                    SqlDisplay(pattern),
3166                )?;
3167
3168                if let Some(char) = escape_char {
3169                    write!(f, " CHAR '{char}'")?;
3170                }
3171
3172                Ok(())
3173            }
3174            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
3175            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
3176            Expr::Unnest(Unnest { expr }) => {
3177                write!(f, "UNNEST({})", SqlDisplay(expr))
3178            }
3179            Expr::SimilarTo(Like {
3180                negated,
3181                expr,
3182                pattern,
3183                escape_char,
3184                ..
3185            }) => {
3186                write!(
3187                    f,
3188                    "{} {} {}",
3189                    SqlDisplay(expr),
3190                    if *negated {
3191                        "NOT SIMILAR TO"
3192                    } else {
3193                        "SIMILAR TO"
3194                    },
3195                    SqlDisplay(pattern),
3196                )?;
3197                if let Some(char) = escape_char {
3198                    write!(f, " CHAR '{char}'")?;
3199                }
3200
3201                Ok(())
3202            }
3203            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3204                match func.human_display(params) {
3205                    Ok(name) => {
3206                        write!(f, "{name}")
3207                    }
3208                    Err(e) => {
3209                        write!(f, "got error from schema_name {e}")
3210                    }
3211                }
3212            }
3213            _ => write!(f, "{}", self.0),
3214        }
3215    }
3216}
3217
3218/// Get schema_name for Vector of expressions
3219///
3220/// Internal usage. Please call `schema_name_from_exprs` instead
3221// TODO: Use ", " to standardize the formatting of Vec<Expr>,
3222// <https://github.com/apache/datafusion/issues/10364>
3223pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
3224    exprs: &[Expr],
3225) -> Result<String, fmt::Error> {
3226    schema_name_from_exprs_inner(exprs, ",")
3227}
3228
3229/// Formats a list of `&Expr` with a custom separator using SQL display format
3230pub struct ExprListDisplay<'a> {
3231    exprs: &'a [Expr],
3232    sep: &'a str,
3233}
3234
3235impl<'a> ExprListDisplay<'a> {
3236    /// Create a new display struct with the given expressions and separator
3237    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
3238        Self { exprs, sep }
3239    }
3240
3241    /// Create a new display struct with comma-space separator
3242    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
3243        Self::new(exprs, ", ")
3244    }
3245}
3246
3247impl Display for ExprListDisplay<'_> {
3248    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3249        let mut first = true;
3250        for expr in self.exprs {
3251            if !first {
3252                write!(f, "{}", self.sep)?;
3253            }
3254            write!(f, "{}", SqlDisplay(expr))?;
3255            first = false;
3256        }
3257        Ok(())
3258    }
3259}
3260
3261/// Get schema_name for Vector of expressions
3262pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
3263    schema_name_from_exprs_inner(exprs, ", ")
3264}
3265
3266fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
3267    let mut s = String::new();
3268    for (i, e) in exprs.iter().enumerate() {
3269        if i > 0 {
3270            write!(&mut s, "{sep}")?;
3271        }
3272        write!(&mut s, "{}", SchemaDisplay(e))?;
3273    }
3274
3275    Ok(s)
3276}
3277
3278pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
3279    let mut s = String::new();
3280    for (i, e) in sorts.iter().enumerate() {
3281        if i > 0 {
3282            write!(&mut s, ", ")?;
3283        }
3284        let ordering = if e.asc { "ASC" } else { "DESC" };
3285        let nulls_ordering = if e.nulls_first {
3286            "NULLS FIRST"
3287        } else {
3288            "NULLS LAST"
3289        };
3290        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
3291    }
3292
3293    Ok(s)
3294}
3295
3296pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3297pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3298
3299/// Format expressions for display as part of a logical plan. In many cases, this will produce
3300/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3301impl Display for Expr {
3302    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3303        match self {
3304            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3305            Expr::Column(c) => write!(f, "{c}"),
3306            Expr::OuterReferenceColumn(_, c) => {
3307                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3308            }
3309            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3310            Expr::Literal(v, metadata) => {
3311                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3312                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3313                    true => write!(f, "{v:?}"),
3314                }
3315            }
3316            Expr::Case(case) => {
3317                write!(f, "CASE ")?;
3318                if let Some(e) = &case.expr {
3319                    write!(f, "{e} ")?;
3320                }
3321                for (w, t) in &case.when_then_expr {
3322                    write!(f, "WHEN {w} THEN {t} ")?;
3323                }
3324                if let Some(e) = &case.else_expr {
3325                    write!(f, "ELSE {e} ")?;
3326                }
3327                write!(f, "END")
3328            }
3329            Expr::Cast(Cast { expr, data_type }) => {
3330                write!(f, "CAST({expr} AS {data_type:?})")
3331            }
3332            Expr::TryCast(TryCast { expr, data_type }) => {
3333                write!(f, "TRY_CAST({expr} AS {data_type:?})")
3334            }
3335            Expr::Not(expr) => write!(f, "NOT {expr}"),
3336            Expr::Negative(expr) => write!(f, "(- {expr})"),
3337            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3338            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3339            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3340            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3341            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3342            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3343            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3344            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3345            Expr::Exists(Exists {
3346                subquery,
3347                negated: true,
3348            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3349            Expr::Exists(Exists {
3350                subquery,
3351                negated: false,
3352            }) => write!(f, "EXISTS ({subquery:?})"),
3353            Expr::InSubquery(InSubquery {
3354                expr,
3355                subquery,
3356                negated: true,
3357            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3358            Expr::InSubquery(InSubquery {
3359                expr,
3360                subquery,
3361                negated: false,
3362            }) => write!(f, "{expr} IN ({subquery:?})"),
3363            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3364            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3365            Expr::ScalarFunction(fun) => {
3366                fmt_function(f, fun.name(), false, &fun.args, true)
3367            }
3368            Expr::WindowFunction(window_fun) => {
3369                let WindowFunction { fun, params } = window_fun.as_ref();
3370                match fun {
3371                    WindowFunctionDefinition::AggregateUDF(fun) => {
3372                        match fun.window_function_display_name(params) {
3373                            Ok(name) => {
3374                                write!(f, "{name}")
3375                            }
3376                            Err(e) => {
3377                                write!(
3378                                    f,
3379                                    "got error from window_function_display_name {e}"
3380                                )
3381                            }
3382                        }
3383                    }
3384                    WindowFunctionDefinition::WindowUDF(fun) => {
3385                        let WindowFunctionParams {
3386                            args,
3387                            partition_by,
3388                            order_by,
3389                            window_frame,
3390                            filter,
3391                            null_treatment,
3392                            distinct,
3393                        } = params;
3394
3395                        fmt_function(f, &fun.to_string(), *distinct, args, true)?;
3396
3397                        if let Some(nt) = null_treatment {
3398                            write!(f, "{nt}")?;
3399                        }
3400
3401                        if let Some(fe) = filter {
3402                            write!(f, " FILTER (WHERE {fe})")?;
3403                        }
3404
3405                        if !partition_by.is_empty() {
3406                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3407                        }
3408                        if !order_by.is_empty() {
3409                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3410                        }
3411                        write!(
3412                            f,
3413                            " {} BETWEEN {} AND {}",
3414                            window_frame.units,
3415                            window_frame.start_bound,
3416                            window_frame.end_bound
3417                        )
3418                    }
3419                }
3420            }
3421            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3422                match func.display_name(params) {
3423                    Ok(name) => {
3424                        write!(f, "{name}")
3425                    }
3426                    Err(e) => {
3427                        write!(f, "got error from display_name {e}")
3428                    }
3429                }
3430            }
3431            Expr::Between(Between {
3432                expr,
3433                negated,
3434                low,
3435                high,
3436            }) => {
3437                if *negated {
3438                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3439                } else {
3440                    write!(f, "{expr} BETWEEN {low} AND {high}")
3441                }
3442            }
3443            Expr::Like(Like {
3444                negated,
3445                expr,
3446                pattern,
3447                escape_char,
3448                case_insensitive,
3449            }) => {
3450                write!(f, "{expr}")?;
3451                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3452                if *negated {
3453                    write!(f, " NOT")?;
3454                }
3455                if let Some(char) = escape_char {
3456                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3457                } else {
3458                    write!(f, " {op_name} {pattern}")
3459                }
3460            }
3461            Expr::SimilarTo(Like {
3462                negated,
3463                expr,
3464                pattern,
3465                escape_char,
3466                case_insensitive: _,
3467            }) => {
3468                write!(f, "{expr}")?;
3469                if *negated {
3470                    write!(f, " NOT")?;
3471                }
3472                if let Some(char) = escape_char {
3473                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3474                } else {
3475                    write!(f, " SIMILAR TO {pattern}")
3476                }
3477            }
3478            Expr::InList(InList {
3479                expr,
3480                list,
3481                negated,
3482            }) => {
3483                if *negated {
3484                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3485                } else {
3486                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3487                }
3488            }
3489            #[expect(deprecated)]
3490            Expr::Wildcard { qualifier, options } => match qualifier {
3491                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3492                None => write!(f, "*{options}"),
3493            },
3494            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3495                GroupingSet::Rollup(exprs) => {
3496                    // ROLLUP (c0, c1, c2)
3497                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3498                }
3499                GroupingSet::Cube(exprs) => {
3500                    // CUBE (c0, c1, c2)
3501                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3502                }
3503                GroupingSet::GroupingSets(lists_of_exprs) => {
3504                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3505                    write!(
3506                        f,
3507                        "GROUPING SETS ({})",
3508                        lists_of_exprs
3509                            .iter()
3510                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3511                            .collect::<Vec<String>>()
3512                            .join(", ")
3513                    )
3514                }
3515            },
3516            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3517            Expr::Unnest(Unnest { expr }) => {
3518                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3519            }
3520        }
3521    }
3522}
3523
3524fn fmt_function(
3525    f: &mut Formatter,
3526    fun: &str,
3527    distinct: bool,
3528    args: &[Expr],
3529    display: bool,
3530) -> fmt::Result {
3531    let args: Vec<String> = match display {
3532        true => args.iter().map(|arg| format!("{arg}")).collect(),
3533        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3534    };
3535
3536    let distinct_str = match distinct {
3537        true => "DISTINCT ",
3538        false => "",
3539    };
3540    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3541}
3542
3543/// The name of the column (field) that this `Expr` will produce in the physical plan.
3544/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3545pub fn physical_name(expr: &Expr) -> Result<String> {
3546    match expr {
3547        Expr::Column(col) => Ok(col.name.clone()),
3548        Expr::Alias(alias) => Ok(alias.name.clone()),
3549        _ => Ok(expr.schema_name().to_string()),
3550    }
3551}
3552
3553#[cfg(test)]
3554mod test {
3555    use crate::expr_fn::col;
3556    use crate::{
3557        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
3558        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
3559    };
3560    use arrow::datatypes::{Field, Schema};
3561    use sqlparser::ast;
3562    use sqlparser::ast::{Ident, IdentWithAlias};
3563    use std::any::Any;
3564
3565    #[test]
3566    fn infer_placeholder_in_clause() {
3567        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3568        let column = col("department_id");
3569        let param_placeholders = vec![
3570            Expr::Placeholder(Placeholder {
3571                id: "$1".to_string(),
3572                data_type: None,
3573            }),
3574            Expr::Placeholder(Placeholder {
3575                id: "$2".to_string(),
3576                data_type: None,
3577            }),
3578            Expr::Placeholder(Placeholder {
3579                id: "$3".to_string(),
3580                data_type: None,
3581            }),
3582        ];
3583        let in_list = Expr::InList(InList {
3584            expr: Box::new(column),
3585            list: param_placeholders,
3586            negated: false,
3587        });
3588
3589        let schema = Arc::new(Schema::new(vec![
3590            Field::new("name", DataType::Utf8, true),
3591            Field::new("department_id", DataType::Int32, true),
3592        ]));
3593        let df_schema = DFSchema::try_from(schema).unwrap();
3594
3595        let (inferred_expr, contains_placeholder) =
3596            in_list.infer_placeholder_types(&df_schema).unwrap();
3597
3598        assert!(contains_placeholder);
3599
3600        match inferred_expr {
3601            Expr::InList(in_list) => {
3602                for expr in in_list.list {
3603                    match expr {
3604                        Expr::Placeholder(placeholder) => {
3605                            assert_eq!(
3606                                placeholder.data_type,
3607                                Some(DataType::Int32),
3608                                "Placeholder {} should infer Int32",
3609                                placeholder.id
3610                            );
3611                        }
3612                        _ => panic!("Expected Placeholder expression"),
3613                    }
3614                }
3615            }
3616            _ => panic!("Expected InList expression"),
3617        }
3618    }
3619
3620    #[test]
3621    fn infer_placeholder_like_and_similar_to() {
3622        // name LIKE $1
3623        let schema =
3624            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3625        let df_schema = DFSchema::try_from(schema).unwrap();
3626
3627        let like = Like {
3628            expr: Box::new(col("name")),
3629            pattern: Box::new(Expr::Placeholder(Placeholder {
3630                id: "$1".to_string(),
3631                data_type: None,
3632            })),
3633            negated: false,
3634            case_insensitive: false,
3635            escape_char: None,
3636        };
3637
3638        let expr = Expr::Like(like.clone());
3639
3640        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3641        match inferred_expr {
3642            Expr::Like(like) => match *like.pattern {
3643                Expr::Placeholder(placeholder) => {
3644                    assert_eq!(placeholder.data_type, Some(DataType::Utf8));
3645                }
3646                _ => panic!("Expected Placeholder"),
3647            },
3648            _ => panic!("Expected Like"),
3649        }
3650
3651        // name SIMILAR TO $1
3652        let expr = Expr::SimilarTo(like);
3653
3654        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3655        match inferred_expr {
3656            Expr::SimilarTo(like) => match *like.pattern {
3657                Expr::Placeholder(placeholder) => {
3658                    assert_eq!(
3659                        placeholder.data_type,
3660                        Some(DataType::Utf8),
3661                        "Placeholder {} should infer Utf8",
3662                        placeholder.id
3663                    );
3664                }
3665                _ => panic!("Expected Placeholder expression"),
3666            },
3667            _ => panic!("Expected SimilarTo expression"),
3668        }
3669    }
3670
3671    #[test]
3672    fn format_case_when() -> Result<()> {
3673        let expr = case(col("a"))
3674            .when(lit(1), lit(true))
3675            .when(lit(0), lit(false))
3676            .otherwise(lit(ScalarValue::Null))?;
3677        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3678        assert_eq!(expected, format!("{expr}"));
3679        Ok(())
3680    }
3681
3682    #[test]
3683    fn format_cast() -> Result<()> {
3684        let expr = Expr::Cast(Cast {
3685            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3686            data_type: DataType::Utf8,
3687        });
3688        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3689        assert_eq!(expected_canonical, format!("{expr}"));
3690        // Note that CAST intentionally has a name that is different from its `Display`
3691        // representation. CAST does not change the name of expressions.
3692        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3693        Ok(())
3694    }
3695
3696    #[test]
3697    fn test_partial_ord() {
3698        // Test validates that partial ord is defined for Expr, not
3699        // intended to exhaustively test all possibilities
3700        let exp1 = col("a") + lit(1);
3701        let exp2 = col("a") + lit(2);
3702        let exp3 = !(col("a") + lit(2));
3703
3704        assert!(exp1 < exp2);
3705        assert!(exp3 > exp2);
3706        assert!(exp1 < exp3)
3707    }
3708
3709    #[test]
3710    fn test_collect_expr() -> Result<()> {
3711        // single column
3712        {
3713            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3714            let columns = expr.column_refs();
3715            assert_eq!(1, columns.len());
3716            assert!(columns.contains(&Column::from_name("a")));
3717        }
3718
3719        // multiple columns
3720        {
3721            let expr = col("a") + col("b") + lit(1);
3722            let columns = expr.column_refs();
3723            assert_eq!(2, columns.len());
3724            assert!(columns.contains(&Column::from_name("a")));
3725            assert!(columns.contains(&Column::from_name("b")));
3726        }
3727
3728        Ok(())
3729    }
3730
3731    #[test]
3732    fn test_logical_ops() {
3733        assert_eq!(
3734            format!("{}", lit(1u32).eq(lit(2u32))),
3735            "UInt32(1) = UInt32(2)"
3736        );
3737        assert_eq!(
3738            format!("{}", lit(1u32).not_eq(lit(2u32))),
3739            "UInt32(1) != UInt32(2)"
3740        );
3741        assert_eq!(
3742            format!("{}", lit(1u32).gt(lit(2u32))),
3743            "UInt32(1) > UInt32(2)"
3744        );
3745        assert_eq!(
3746            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3747            "UInt32(1) >= UInt32(2)"
3748        );
3749        assert_eq!(
3750            format!("{}", lit(1u32).lt(lit(2u32))),
3751            "UInt32(1) < UInt32(2)"
3752        );
3753        assert_eq!(
3754            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3755            "UInt32(1) <= UInt32(2)"
3756        );
3757        assert_eq!(
3758            format!("{}", lit(1u32).and(lit(2u32))),
3759            "UInt32(1) AND UInt32(2)"
3760        );
3761        assert_eq!(
3762            format!("{}", lit(1u32).or(lit(2u32))),
3763            "UInt32(1) OR UInt32(2)"
3764        );
3765    }
3766
3767    #[test]
3768    fn test_is_volatile_scalar_func() {
3769        // UDF
3770        #[derive(Debug, PartialEq, Eq, Hash)]
3771        struct TestScalarUDF {
3772            signature: Signature,
3773        }
3774        impl ScalarUDFImpl for TestScalarUDF {
3775            fn as_any(&self) -> &dyn Any {
3776                self
3777            }
3778            fn name(&self) -> &str {
3779                "TestScalarUDF"
3780            }
3781
3782            fn signature(&self) -> &Signature {
3783                &self.signature
3784            }
3785
3786            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3787                Ok(DataType::Utf8)
3788            }
3789
3790            fn invoke_with_args(
3791                &self,
3792                _args: ScalarFunctionArgs,
3793            ) -> Result<ColumnarValue> {
3794                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
3795            }
3796        }
3797        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3798            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
3799        }));
3800        assert_ne!(udf.signature().volatility, Volatility::Volatile);
3801
3802        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3803            signature: Signature::uniform(
3804                1,
3805                vec![DataType::Float32],
3806                Volatility::Volatile,
3807            ),
3808        }));
3809        assert_eq!(udf.signature().volatility, Volatility::Volatile);
3810    }
3811
3812    use super::*;
3813
3814    #[test]
3815    fn test_display_wildcard() {
3816        assert_eq!(format!("{}", wildcard()), "*");
3817        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
3818        assert_eq!(
3819            format!(
3820                "{}",
3821                wildcard_with_options(wildcard_options(
3822                    Some(IlikeSelectItem {
3823                        pattern: "c1".to_string()
3824                    }),
3825                    None,
3826                    None,
3827                    None,
3828                    None
3829                ))
3830            ),
3831            "* ILIKE 'c1'"
3832        );
3833        assert_eq!(
3834            format!(
3835                "{}",
3836                wildcard_with_options(wildcard_options(
3837                    None,
3838                    Some(ExcludeSelectItem::Multiple(vec![
3839                        Ident::from("c1"),
3840                        Ident::from("c2")
3841                    ])),
3842                    None,
3843                    None,
3844                    None
3845                ))
3846            ),
3847            "* EXCLUDE (c1, c2)"
3848        );
3849        assert_eq!(
3850            format!(
3851                "{}",
3852                wildcard_with_options(wildcard_options(
3853                    None,
3854                    None,
3855                    Some(ExceptSelectItem {
3856                        first_element: Ident::from("c1"),
3857                        additional_elements: vec![Ident::from("c2")]
3858                    }),
3859                    None,
3860                    None
3861                ))
3862            ),
3863            "* EXCEPT (c1, c2)"
3864        );
3865        assert_eq!(
3866            format!(
3867                "{}",
3868                wildcard_with_options(wildcard_options(
3869                    None,
3870                    None,
3871                    None,
3872                    Some(PlannedReplaceSelectItem {
3873                        items: vec![ReplaceSelectElement {
3874                            expr: ast::Expr::Identifier(Ident::from("c1")),
3875                            column_name: Ident::from("a1"),
3876                            as_keyword: false
3877                        }],
3878                        planned_expressions: vec![]
3879                    }),
3880                    None
3881                ))
3882            ),
3883            "* REPLACE (c1 a1)"
3884        );
3885        assert_eq!(
3886            format!(
3887                "{}",
3888                wildcard_with_options(wildcard_options(
3889                    None,
3890                    None,
3891                    None,
3892                    None,
3893                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3894                        ident: Ident::from("c1"),
3895                        alias: Ident::from("a1")
3896                    }]))
3897                ))
3898            ),
3899            "* RENAME (c1 AS a1)"
3900        )
3901    }
3902
3903    #[test]
3904    fn test_schema_display_alias_with_relation() {
3905        assert_eq!(
3906            format!(
3907                "{}",
3908                SchemaDisplay(
3909                    &lit(1).alias_qualified("table_name".into(), "column_name")
3910                )
3911            ),
3912            "table_name.column_name"
3913        );
3914    }
3915
3916    #[test]
3917    fn test_schema_display_alias_without_relation() {
3918        assert_eq!(
3919            format!(
3920                "{}",
3921                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3922            ),
3923            "column_name"
3924        );
3925    }
3926
3927    fn wildcard_options(
3928        opt_ilike: Option<IlikeSelectItem>,
3929        opt_exclude: Option<ExcludeSelectItem>,
3930        opt_except: Option<ExceptSelectItem>,
3931        opt_replace: Option<PlannedReplaceSelectItem>,
3932        opt_rename: Option<RenameSelectItem>,
3933    ) -> WildcardOptions {
3934        WildcardOptions {
3935            ilike: opt_ilike,
3936            exclude: opt_exclude,
3937            except: opt_except,
3938            replace: opt_replace,
3939            rename: opt_rename,
3940        }
3941    }
3942
3943    #[test]
3944    fn test_size_of_expr() {
3945        // because Expr is such a widely used struct in DataFusion
3946        // it is important to keep its size as small as possible
3947        //
3948        // If this test fails when you change `Expr`, please try
3949        // `Box`ing the fields to make `Expr` smaller
3950        // See https://github.com/apache/datafusion/issues/16199 for details
3951        assert_eq!(size_of::<Expr>(), 128);
3952        assert_eq!(size_of::<ScalarValue>(), 64);
3953        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3954        assert_eq!(size_of::<Vec<Expr>>(), 24);
3955        assert_eq!(size_of::<Arc<Expr>>(), 8);
3956    }
3957}