datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::{BTreeMap, HashSet};
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::logical_plan::Subquery;
30use crate::Volatility;
31use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
32
33use arrow::datatypes::{DataType, FieldRef};
34use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
35use datafusion_common::tree_node::{
36    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
37};
38use datafusion_common::{
39    Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
40};
41use datafusion_functions_window_common::field::WindowUDFFieldArgs;
42use sqlparser::ast::{
43    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
44    NullTreatment, RenameSelectItem, ReplaceSelectElement,
45};
46
47/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
48///
49/// For example the expression `A + 1` will be represented as
50///
51///```text
52///  BinaryExpr {
53///    left: Expr::Column("A"),
54///    op: Operator::Plus,
55///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
56/// }
57/// ```
58///
59/// # Creating Expressions
60///
61/// `Expr`s can be created directly, but it is often easier and less verbose to
62/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
63/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
64///
65/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
66///
67/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
68///
69/// # Printing Expressions
70///
71/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
72/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
73///
74/// If you need  SQL to pass to other systems, consider using [`Unparser`].
75///
76/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
77///
78/// # Schema Access
79///
80/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
81/// of an `Expr`.
82///
83/// # Visiting and Rewriting `Expr`s
84///
85/// The `Expr` struct implements the [`TreeNode`] trait for walking and
86/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
87/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
88/// the examples below and [`TreeNode`] for more information.
89///
90/// # Examples: Creating and Using `Expr`s
91///
92/// ## Column References and Literals
93///
94/// [`Expr::Column`] refer to the values of columns and are often created with
95/// the [`col`] function. For example to create an expression `c1` referring to
96/// column named "c1":
97///
98/// [`col`]: crate::expr_fn::col
99///
100/// ```
101/// # use datafusion_common::Column;
102/// # use datafusion_expr::{lit, col, Expr};
103/// let expr = col("c1");
104/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
105/// ```
106///
107/// [`Expr::Literal`] refer to literal, or constant, values. These are created
108/// with the [`lit`] function. For example to create an expression `42`:
109///
110/// [`lit`]: crate::lit
111///
112/// ```
113/// # use datafusion_common::{Column, ScalarValue};
114/// # use datafusion_expr::{lit, col, Expr};
115/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
116/// let expr = lit(42i64);
117/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
118/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
119/// // To make a (typed) NULL:
120/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
121/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
122/// let expr = lit(ScalarValue::Null);
123/// ```
124///
125/// ## Binary Expressions
126///
127/// Exprs implement traits that allow easy to understand construction of more
128/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
129/// "c2" together
130///
131/// ```
132/// # use datafusion_expr::{lit, col, Operator, Expr};
133/// // Use the `+` operator to add two columns together
134/// let expr = col("c1") + col("c2");
135/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
136/// if let Expr::BinaryExpr(binary_expr) = expr {
137///   assert_eq!(*binary_expr.left, col("c1"));
138///   assert_eq!(*binary_expr.right, col("c2"));
139///   assert_eq!(binary_expr.op, Operator::Plus);
140/// }
141/// ```
142///
143/// The expression `c1 = 42` to compares the value in column "c1" to the
144/// literal value `42`:
145///
146/// ```
147/// # use datafusion_common::ScalarValue;
148/// # use datafusion_expr::{lit, col, Operator, Expr};
149/// let expr = col("c1").eq(lit(42_i32));
150/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
151/// if let Expr::BinaryExpr(binary_expr) = expr {
152///   assert_eq!(*binary_expr.left, col("c1"));
153///   let scalar = ScalarValue::Int32(Some(42));
154///   assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
155///   assert_eq!(binary_expr.op, Operator::Eq);
156/// }
157/// ```
158///
159/// Here is how to implement the equivalent of `SELECT *` to select all
160/// [`Expr::Column`] from a [`DFSchema`]'s columns:
161///
162/// ```
163/// # use arrow::datatypes::{DataType, Field, Schema};
164/// # use datafusion_common::{DFSchema, Column};
165/// # use datafusion_expr::Expr;
166/// // Create a schema c1(int, c2 float)
167/// let arrow_schema = Schema::new(vec![
168///    Field::new("c1", DataType::Int32, false),
169///    Field::new("c2", DataType::Float64, false),
170/// ]);
171/// // DFSchema is a an Arrow schema with optional relation name
172/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
173///   .unwrap();
174///
175/// // Form Vec<Expr> with an expression for each column in the schema
176/// let exprs: Vec<_> = df_schema.iter()
177///   .map(Expr::from)
178///   .collect();
179///
180/// assert_eq!(exprs, vec![
181///   Expr::from(Column::from_qualified_name("t1.c1")),
182///   Expr::from(Column::from_qualified_name("t1.c2")),
183/// ]);
184/// ```
185///
186/// # Examples: Displaying `Exprs`
187///
188/// There are three ways to print an `Expr` depending on the usecase.
189///
190/// ## Use `Debug` trait
191///
192/// Following Rust conventions, the `Debug` implementation prints out the
193/// internal structure of the expression, which is useful for debugging.
194///
195/// ```
196/// # use datafusion_expr::{lit, col};
197/// let expr = col("c1") + lit(42);
198/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
199/// ```
200///
201/// ## Use the `Display` trait  (detailed expression)
202///
203/// The `Display` implementation prints out the expression in a SQL-like form,
204/// but has additional details such as the data type of literals. This is useful
205/// for understanding the expression in more detail and is used for the low level
206/// [`ExplainFormat::Indent`] explain plan format.
207///
208/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
209///
210/// ```
211/// # use datafusion_expr::{lit, col};
212/// let expr = col("c1") + lit(42);
213/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
214/// ```
215///
216/// ## Use [`Self::human_display`] (human readable)
217///
218/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
219/// for human consumption by end users. It is used for the
220/// [`ExplainFormat::Tree`] explain plan format.
221///
222/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
223///
224///```
225/// # use datafusion_expr::{lit, col};
226/// let expr = col("c1") + lit(42);
227/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
228/// ```
229///
230/// # Examples: Visiting and Rewriting `Expr`s
231///
232/// Here is an example that finds all literals in an `Expr` tree:
233/// ```
234/// # use std::collections::{HashSet};
235/// use datafusion_common::ScalarValue;
236/// # use datafusion_expr::{col, Expr, lit};
237/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
238/// // Expression a = 5 AND b = 6
239/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
240/// // find all literals in a HashMap
241/// let mut scalars = HashSet::new();
242/// // apply recursively visits all nodes in the expression tree
243/// expr.apply(|e| {
244///    if let Expr::Literal(scalar, _) = e {
245///       scalars.insert(scalar);
246///    }
247///    // The return value controls whether to continue visiting the tree
248///    Ok(TreeNodeRecursion::Continue)
249/// }).unwrap();
250/// // All subtrees have been visited and literals found
251/// assert_eq!(scalars.len(), 2);
252/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
253/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
254/// ```
255///
256/// Rewrite an expression, replacing references to column "a" in an
257/// to the literal `42`:
258///
259///  ```
260/// # use datafusion_common::tree_node::{Transformed, TreeNode};
261/// # use datafusion_expr::{col, Expr, lit};
262/// // expression a = 5 AND b = 6
263/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
264/// // rewrite all references to column "a" to the literal 42
265/// let rewritten = expr.transform(|e| {
266///   if let Expr::Column(c) = &e {
267///     if &c.name == "a" {
268///       // return Transformed::yes to indicate the node was changed
269///       return Ok(Transformed::yes(lit(42)))
270///     }
271///   }
272///   // return Transformed::no to indicate the node was not changed
273///   Ok(Transformed::no(e))
274/// }).unwrap();
275/// // The expression has been rewritten
276/// assert!(rewritten.transformed);
277/// // to 42 = 5 AND b = 6
278/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
279#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
280pub enum Expr {
281    /// An expression with a specific name.
282    Alias(Alias),
283    /// A named reference to a qualified field in a schema.
284    Column(Column),
285    /// A named reference to a variable in a registry.
286    ScalarVariable(DataType, Vec<String>),
287    /// A constant value along with associated metadata
288    Literal(ScalarValue, Option<BTreeMap<String, String>>),
289    /// A binary expression such as "age > 21"
290    BinaryExpr(BinaryExpr),
291    /// LIKE expression
292    Like(Like),
293    /// LIKE expression that uses regular expressions
294    SimilarTo(Like),
295    /// Negation of an expression. The expression's type must be a boolean to make sense.
296    Not(Box<Expr>),
297    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
298    IsNotNull(Box<Expr>),
299    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
300    IsNull(Box<Expr>),
301    /// True if argument is true, false otherwise. This expression itself is never NULL.
302    IsTrue(Box<Expr>),
303    /// True if argument is  false, false otherwise. This expression itself is never NULL.
304    IsFalse(Box<Expr>),
305    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
306    IsUnknown(Box<Expr>),
307    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
308    IsNotTrue(Box<Expr>),
309    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
310    IsNotFalse(Box<Expr>),
311    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
312    IsNotUnknown(Box<Expr>),
313    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
314    Negative(Box<Expr>),
315    /// Whether an expression is between a given range.
316    Between(Between),
317    /// A CASE expression (see docs on [`Case`])
318    Case(Case),
319    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
320    /// This expression is guaranteed to have a fixed type.
321    Cast(Cast),
322    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
323    /// This expression is guaranteed to have a fixed type.
324    TryCast(TryCast),
325    /// Call a scalar function with a set of arguments.
326    ScalarFunction(ScalarFunction),
327    /// Calls an aggregate function with arguments, and optional
328    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
329    ///
330    /// See also [`ExprFunctionExt`] to set these fields.
331    ///
332    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
333    AggregateFunction(AggregateFunction),
334    /// Call a window function with a set of arguments.
335    WindowFunction(Box<WindowFunction>),
336    /// Returns whether the list contains the expr value.
337    InList(InList),
338    /// EXISTS subquery
339    Exists(Exists),
340    /// IN subquery
341    InSubquery(InSubquery),
342    /// Scalar subquery
343    ScalarSubquery(Subquery),
344    /// Represents a reference to all available fields in a specific schema,
345    /// with an optional (schema) qualifier.
346    ///
347    /// This expr has to be resolved to a list of columns before translating logical
348    /// plan into physical plan.
349    #[deprecated(
350        since = "46.0.0",
351        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
352    )]
353    Wildcard {
354        qualifier: Option<TableReference>,
355        options: Box<WildcardOptions>,
356    },
357    /// List of grouping set expressions. Only valid in the context of an aggregate
358    /// GROUP BY expression list
359    GroupingSet(GroupingSet),
360    /// A place holder for parameters in a prepared statement
361    /// (e.g. `$foo` or `$1`)
362    Placeholder(Placeholder),
363    /// A placeholder which holds a reference to a qualified field
364    /// in the outer query, used for correlated sub queries.
365    OuterReferenceColumn(DataType, Column),
366    /// Unnest expression
367    Unnest(Unnest),
368}
369
370impl Default for Expr {
371    fn default() -> Self {
372        Expr::Literal(ScalarValue::Null, None)
373    }
374}
375
376/// Create an [`Expr`] from a [`Column`]
377impl From<Column> for Expr {
378    fn from(value: Column) -> Self {
379        Expr::Column(value)
380    }
381}
382
383/// Create an [`Expr`] from a [`WindowFunction`]
384impl From<WindowFunction> for Expr {
385    fn from(value: WindowFunction) -> Self {
386        Expr::WindowFunction(Box::new(value))
387    }
388}
389
390/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
391/// useful for creating [`Expr`] from a [`DFSchema`].
392///
393/// See example on [`Expr`]
394impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
395    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
396        Expr::from(Column::from(value))
397    }
398}
399
400impl<'a> TreeNodeContainer<'a, Self> for Expr {
401    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
402        &'a self,
403        mut f: F,
404    ) -> Result<TreeNodeRecursion> {
405        f(self)
406    }
407
408    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
409        self,
410        mut f: F,
411    ) -> Result<Transformed<Self>> {
412        f(self)
413    }
414}
415
416/// UNNEST expression.
417#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
418pub struct Unnest {
419    pub expr: Box<Expr>,
420}
421
422impl Unnest {
423    /// Create a new Unnest expression.
424    pub fn new(expr: Expr) -> Self {
425        Self {
426            expr: Box::new(expr),
427        }
428    }
429
430    /// Create a new Unnest expression.
431    pub fn new_boxed(boxed: Box<Expr>) -> Self {
432        Self { expr: boxed }
433    }
434}
435
436/// Alias expression
437#[derive(Clone, PartialEq, Eq, Debug)]
438pub struct Alias {
439    pub expr: Box<Expr>,
440    pub relation: Option<TableReference>,
441    pub name: String,
442    pub metadata: Option<std::collections::HashMap<String, String>>,
443}
444
445impl Hash for Alias {
446    fn hash<H: Hasher>(&self, state: &mut H) {
447        self.expr.hash(state);
448        self.relation.hash(state);
449        self.name.hash(state);
450    }
451}
452
453impl PartialOrd for Alias {
454    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
455        let cmp = self.expr.partial_cmp(&other.expr);
456        let Some(Ordering::Equal) = cmp else {
457            return cmp;
458        };
459        let cmp = self.relation.partial_cmp(&other.relation);
460        let Some(Ordering::Equal) = cmp else {
461            return cmp;
462        };
463        self.name.partial_cmp(&other.name)
464    }
465}
466
467impl Alias {
468    /// Create an alias with an optional schema/field qualifier.
469    pub fn new(
470        expr: Expr,
471        relation: Option<impl Into<TableReference>>,
472        name: impl Into<String>,
473    ) -> Self {
474        Self {
475            expr: Box::new(expr),
476            relation: relation.map(|r| r.into()),
477            name: name.into(),
478            metadata: None,
479        }
480    }
481
482    pub fn with_metadata(
483        mut self,
484        metadata: Option<std::collections::HashMap<String, String>>,
485    ) -> Self {
486        self.metadata = metadata;
487        self
488    }
489}
490
491/// Binary expression
492#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
493pub struct BinaryExpr {
494    /// Left-hand side of the expression
495    pub left: Box<Expr>,
496    /// The comparison operator
497    pub op: Operator,
498    /// Right-hand side of the expression
499    pub right: Box<Expr>,
500}
501
502impl BinaryExpr {
503    /// Create a new binary expression
504    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
505        Self { left, op, right }
506    }
507}
508
509impl Display for BinaryExpr {
510    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
511        // Put parentheses around child binary expressions so that we can see the difference
512        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
513        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
514        // equivalent and the parentheses are not necessary.
515
516        fn write_child(
517            f: &mut Formatter<'_>,
518            expr: &Expr,
519            precedence: u8,
520        ) -> fmt::Result {
521            match expr {
522                Expr::BinaryExpr(child) => {
523                    let p = child.op.precedence();
524                    if p == 0 || p < precedence {
525                        write!(f, "({child})")?;
526                    } else {
527                        write!(f, "{child}")?;
528                    }
529                }
530                _ => write!(f, "{expr}")?,
531            }
532            Ok(())
533        }
534
535        let precedence = self.op.precedence();
536        write_child(f, self.left.as_ref(), precedence)?;
537        write!(f, " {} ", self.op)?;
538        write_child(f, self.right.as_ref(), precedence)
539    }
540}
541
542/// CASE expression
543///
544/// The CASE expression is similar to a series of nested if/else and there are two forms that
545/// can be used. The first form consists of a series of boolean "when" expressions with
546/// corresponding "then" expressions, and an optional "else" expression.
547///
548/// ```text
549/// CASE WHEN condition THEN result
550///      [WHEN ...]
551///      [ELSE result]
552/// END
553/// ```
554///
555/// The second form uses a base expression and then a series of "when" clauses that match on a
556/// literal value.
557///
558/// ```text
559/// CASE expression
560///     WHEN value THEN result
561///     [WHEN ...]
562///     [ELSE result]
563/// END
564/// ```
565#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
566pub struct Case {
567    /// Optional base expression that can be compared to literal values in the "when" expressions
568    pub expr: Option<Box<Expr>>,
569    /// One or more when/then expressions
570    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
571    /// Optional "else" expression
572    pub else_expr: Option<Box<Expr>>,
573}
574
575impl Case {
576    /// Create a new Case expression
577    pub fn new(
578        expr: Option<Box<Expr>>,
579        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
580        else_expr: Option<Box<Expr>>,
581    ) -> Self {
582        Self {
583            expr,
584            when_then_expr,
585            else_expr,
586        }
587    }
588}
589
590/// LIKE expression
591#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
592pub struct Like {
593    pub negated: bool,
594    pub expr: Box<Expr>,
595    pub pattern: Box<Expr>,
596    pub escape_char: Option<char>,
597    /// Whether to ignore case on comparing
598    pub case_insensitive: bool,
599}
600
601impl Like {
602    /// Create a new Like expression
603    pub fn new(
604        negated: bool,
605        expr: Box<Expr>,
606        pattern: Box<Expr>,
607        escape_char: Option<char>,
608        case_insensitive: bool,
609    ) -> Self {
610        Self {
611            negated,
612            expr,
613            pattern,
614            escape_char,
615            case_insensitive,
616        }
617    }
618}
619
620/// BETWEEN expression
621#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
622pub struct Between {
623    /// The value to compare
624    pub expr: Box<Expr>,
625    /// Whether the expression is negated
626    pub negated: bool,
627    /// The low end of the range
628    pub low: Box<Expr>,
629    /// The high end of the range
630    pub high: Box<Expr>,
631}
632
633impl Between {
634    /// Create a new Between expression
635    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
636        Self {
637            expr,
638            negated,
639            low,
640            high,
641        }
642    }
643}
644
645/// Invoke a [`ScalarUDF`] with a set of arguments
646///
647/// [`ScalarUDF`]: crate::ScalarUDF
648#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
649pub struct ScalarFunction {
650    /// The function
651    pub func: Arc<crate::ScalarUDF>,
652    /// List of expressions to feed to the functions as arguments
653    pub args: Vec<Expr>,
654}
655
656impl ScalarFunction {
657    // return the Function's name
658    pub fn name(&self) -> &str {
659        self.func.name()
660    }
661}
662
663impl ScalarFunction {
664    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
665    ///
666    /// [`ScalarUDF`]: crate::ScalarUDF
667    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
668        Self { func: udf, args }
669    }
670}
671
672/// Access a sub field of a nested type, such as `Field` or `List`
673#[derive(Clone, PartialEq, Eq, Hash, Debug)]
674pub enum GetFieldAccess {
675    /// Named field, for example `struct["name"]`
676    NamedStructField { name: ScalarValue },
677    /// Single list index, for example: `list[i]`
678    ListIndex { key: Box<Expr> },
679    /// List stride, for example `list[i:j:k]`
680    ListRange {
681        start: Box<Expr>,
682        stop: Box<Expr>,
683        stride: Box<Expr>,
684    },
685}
686
687/// Cast expression
688#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
689pub struct Cast {
690    /// The expression being cast
691    pub expr: Box<Expr>,
692    /// The `DataType` the expression will yield
693    pub data_type: DataType,
694}
695
696impl Cast {
697    /// Create a new Cast expression
698    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
699        Self { expr, data_type }
700    }
701}
702
703/// TryCast Expression
704#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
705pub struct TryCast {
706    /// The expression being cast
707    pub expr: Box<Expr>,
708    /// The `DataType` the expression will yield
709    pub data_type: DataType,
710}
711
712impl TryCast {
713    /// Create a new TryCast expression
714    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
715        Self { expr, data_type }
716    }
717}
718
719/// SORT expression
720#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
721pub struct Sort {
722    /// The expression to sort on
723    pub expr: Expr,
724    /// The direction of the sort
725    pub asc: bool,
726    /// Whether to put Nulls before all other data values
727    pub nulls_first: bool,
728}
729
730impl Sort {
731    /// Create a new Sort expression
732    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
733        Self {
734            expr,
735            asc,
736            nulls_first,
737        }
738    }
739
740    /// Create a new Sort expression with the opposite sort direction
741    pub fn reverse(&self) -> Self {
742        Self {
743            expr: self.expr.clone(),
744            asc: !self.asc,
745            nulls_first: !self.nulls_first,
746        }
747    }
748
749    /// Replaces the Sort expressions with `expr`
750    pub fn with_expr(&self, expr: Expr) -> Self {
751        Self {
752            expr,
753            asc: self.asc,
754            nulls_first: self.nulls_first,
755        }
756    }
757}
758
759impl Display for Sort {
760    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
761        write!(f, "{}", self.expr)?;
762        if self.asc {
763            write!(f, " ASC")?;
764        } else {
765            write!(f, " DESC")?;
766        }
767        if self.nulls_first {
768            write!(f, " NULLS FIRST")?;
769        } else {
770            write!(f, " NULLS LAST")?;
771        }
772        Ok(())
773    }
774}
775
776impl<'a> TreeNodeContainer<'a, Expr> for Sort {
777    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
778        &'a self,
779        f: F,
780    ) -> Result<TreeNodeRecursion> {
781        self.expr.apply_elements(f)
782    }
783
784    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
785        self,
786        f: F,
787    ) -> Result<Transformed<Self>> {
788        self.expr
789            .map_elements(f)?
790            .map_data(|expr| Ok(Self { expr, ..self }))
791    }
792}
793
794/// Aggregate function
795///
796/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
797///
798/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
799#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
800pub struct AggregateFunction {
801    /// Name of the function
802    pub func: Arc<crate::AggregateUDF>,
803    pub params: AggregateFunctionParams,
804}
805
806#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
807pub struct AggregateFunctionParams {
808    pub args: Vec<Expr>,
809    /// Whether this is a DISTINCT aggregation or not
810    pub distinct: bool,
811    /// Optional filter
812    pub filter: Option<Box<Expr>>,
813    /// Optional ordering
814    pub order_by: Option<Vec<Sort>>,
815    pub null_treatment: Option<NullTreatment>,
816}
817
818impl AggregateFunction {
819    /// Create a new AggregateFunction expression with a user-defined function (UDF)
820    pub fn new_udf(
821        func: Arc<crate::AggregateUDF>,
822        args: Vec<Expr>,
823        distinct: bool,
824        filter: Option<Box<Expr>>,
825        order_by: Option<Vec<Sort>>,
826        null_treatment: Option<NullTreatment>,
827    ) -> Self {
828        Self {
829            func,
830            params: AggregateFunctionParams {
831                args,
832                distinct,
833                filter,
834                order_by,
835                null_treatment,
836            },
837        }
838    }
839}
840
841/// A function used as a SQL window function
842///
843/// In SQL, you can use:
844/// - Actual window functions ([`WindowUDF`])
845/// - Normal aggregate functions ([`AggregateUDF`])
846#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
847pub enum WindowFunctionDefinition {
848    /// A user defined aggregate function
849    AggregateUDF(Arc<crate::AggregateUDF>),
850    /// A user defined aggregate function
851    WindowUDF(Arc<WindowUDF>),
852}
853
854impl WindowFunctionDefinition {
855    /// Returns the datatype of the window function
856    pub fn return_field(
857        &self,
858        input_expr_fields: &[FieldRef],
859        _input_expr_nullable: &[bool],
860        display_name: &str,
861    ) -> Result<FieldRef> {
862        match self {
863            WindowFunctionDefinition::AggregateUDF(fun) => {
864                fun.return_field(input_expr_fields)
865            }
866            WindowFunctionDefinition::WindowUDF(fun) => {
867                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
868            }
869        }
870    }
871
872    /// The signatures supported by the function `fun`.
873    pub fn signature(&self) -> Signature {
874        match self {
875            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
876            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
877        }
878    }
879
880    /// Function's name for display
881    pub fn name(&self) -> &str {
882        match self {
883            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
884            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
885        }
886    }
887
888    /// Return the the inner window simplification function, if any
889    ///
890    /// See [`WindowFunctionSimplification`] for more information
891    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
892        match self {
893            WindowFunctionDefinition::AggregateUDF(_) => None,
894            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
895        }
896    }
897}
898
899impl Display for WindowFunctionDefinition {
900    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
901        match self {
902            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
903            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
904        }
905    }
906}
907
908impl From<Arc<crate::AggregateUDF>> for WindowFunctionDefinition {
909    fn from(value: Arc<crate::AggregateUDF>) -> Self {
910        Self::AggregateUDF(value)
911    }
912}
913
914impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
915    fn from(value: Arc<WindowUDF>) -> Self {
916        Self::WindowUDF(value)
917    }
918}
919
920/// Window function
921///
922/// Holds the actual function to call [`WindowFunction`] as well as its
923/// arguments (`args`) and the contents of the `OVER` clause:
924///
925/// 1. `PARTITION BY`
926/// 2. `ORDER BY`
927/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
928///
929/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
930///
931/// [`ExprFunctionExt`]: crate::ExprFunctionExt
932#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
933pub struct WindowFunction {
934    /// Name of the function
935    pub fun: WindowFunctionDefinition,
936    pub params: WindowFunctionParams,
937}
938
939#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
940pub struct WindowFunctionParams {
941    /// List of expressions to feed to the functions as arguments
942    pub args: Vec<Expr>,
943    /// List of partition by expressions
944    pub partition_by: Vec<Expr>,
945    /// List of order by expressions
946    pub order_by: Vec<Sort>,
947    /// Window frame
948    pub window_frame: WindowFrame,
949    /// Specifies how NULL value is treated: ignore or respect
950    pub null_treatment: Option<NullTreatment>,
951}
952
953impl WindowFunction {
954    /// Create a new Window expression with the specified argument an
955    /// empty `OVER` clause
956    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
957        Self {
958            fun: fun.into(),
959            params: WindowFunctionParams {
960                args,
961                partition_by: Vec::default(),
962                order_by: Vec::default(),
963                window_frame: WindowFrame::new(None),
964                null_treatment: None,
965            },
966        }
967    }
968
969    /// Return the the inner window simplification function, if any
970    ///
971    /// See [`WindowFunctionSimplification`] for more information
972    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
973        self.fun.simplify()
974    }
975}
976
977/// EXISTS expression
978#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
979pub struct Exists {
980    /// Subquery that will produce a single column of data
981    pub subquery: Subquery,
982    /// Whether the expression is negated
983    pub negated: bool,
984}
985
986impl Exists {
987    // Create a new Exists expression.
988    pub fn new(subquery: Subquery, negated: bool) -> Self {
989        Self { subquery, negated }
990    }
991}
992
993/// User Defined Aggregate Function
994///
995/// See [`udaf::AggregateUDF`] for more information.
996#[derive(Clone, PartialEq, Eq, Hash, Debug)]
997pub struct AggregateUDF {
998    /// The function
999    pub fun: Arc<udaf::AggregateUDF>,
1000    /// List of expressions to feed to the functions as arguments
1001    pub args: Vec<Expr>,
1002    /// Optional filter
1003    pub filter: Option<Box<Expr>>,
1004    /// Optional ORDER BY applied prior to aggregating
1005    pub order_by: Option<Vec<Expr>>,
1006}
1007
1008impl AggregateUDF {
1009    /// Create a new AggregateUDF expression
1010    pub fn new(
1011        fun: Arc<udaf::AggregateUDF>,
1012        args: Vec<Expr>,
1013        filter: Option<Box<Expr>>,
1014        order_by: Option<Vec<Expr>>,
1015    ) -> Self {
1016        Self {
1017            fun,
1018            args,
1019            filter,
1020            order_by,
1021        }
1022    }
1023}
1024
1025/// InList expression
1026#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1027pub struct InList {
1028    /// The expression to compare
1029    pub expr: Box<Expr>,
1030    /// The list of values to compare against
1031    pub list: Vec<Expr>,
1032    /// Whether the expression is negated
1033    pub negated: bool,
1034}
1035
1036impl InList {
1037    /// Create a new InList expression
1038    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1039        Self {
1040            expr,
1041            list,
1042            negated,
1043        }
1044    }
1045}
1046
1047/// IN subquery
1048#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1049pub struct InSubquery {
1050    /// The expression to compare
1051    pub expr: Box<Expr>,
1052    /// Subquery that will produce a single column of data to compare against
1053    pub subquery: Subquery,
1054    /// Whether the expression is negated
1055    pub negated: bool,
1056}
1057
1058impl InSubquery {
1059    /// Create a new InSubquery expression
1060    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1061        Self {
1062            expr,
1063            subquery,
1064            negated,
1065        }
1066    }
1067}
1068
1069/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1070///
1071/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1072/// or can be specified directly using `PREPARE` statements.
1073#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1074pub struct Placeholder {
1075    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1076    pub id: String,
1077    /// The type the parameter will be filled in with
1078    pub data_type: Option<DataType>,
1079}
1080
1081impl Placeholder {
1082    /// Create a new Placeholder expression
1083    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1084        Self { id, data_type }
1085    }
1086}
1087
1088/// Grouping sets
1089///
1090/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1091/// for Postgres definition.
1092/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1093/// for Apache Spark definition.
1094#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1095pub enum GroupingSet {
1096    /// Rollup grouping sets
1097    Rollup(Vec<Expr>),
1098    /// Cube grouping sets
1099    Cube(Vec<Expr>),
1100    /// User-defined grouping sets
1101    GroupingSets(Vec<Vec<Expr>>),
1102}
1103
1104impl GroupingSet {
1105    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1106    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1107    /// the exprs in the underlying sets.
1108    pub fn distinct_expr(&self) -> Vec<&Expr> {
1109        match self {
1110            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1111                exprs.iter().collect()
1112            }
1113            GroupingSet::GroupingSets(groups) => {
1114                let mut exprs: Vec<&Expr> = vec![];
1115                for exp in groups.iter().flatten() {
1116                    if !exprs.contains(&exp) {
1117                        exprs.push(exp);
1118                    }
1119                }
1120                exprs
1121            }
1122        }
1123    }
1124}
1125
1126/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1127#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1128pub struct WildcardOptions {
1129    /// `[ILIKE...]`.
1130    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1131    pub ilike: Option<IlikeSelectItem>,
1132    /// `[EXCLUDE...]`.
1133    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1134    pub exclude: Option<ExcludeSelectItem>,
1135    /// `[EXCEPT...]`.
1136    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1137    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1138    pub except: Option<ExceptSelectItem>,
1139    /// `[REPLACE]`
1140    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1141    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1142    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1143    pub replace: Option<PlannedReplaceSelectItem>,
1144    /// `[RENAME ...]`.
1145    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1146    pub rename: Option<RenameSelectItem>,
1147}
1148
1149impl WildcardOptions {
1150    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1151        WildcardOptions {
1152            ilike: self.ilike,
1153            exclude: self.exclude,
1154            except: self.except,
1155            replace: Some(replace),
1156            rename: self.rename,
1157        }
1158    }
1159}
1160
1161impl Display for WildcardOptions {
1162    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1163        if let Some(ilike) = &self.ilike {
1164            write!(f, " {ilike}")?;
1165        }
1166        if let Some(exclude) = &self.exclude {
1167            write!(f, " {exclude}")?;
1168        }
1169        if let Some(except) = &self.except {
1170            write!(f, " {except}")?;
1171        }
1172        if let Some(replace) = &self.replace {
1173            write!(f, " {replace}")?;
1174        }
1175        if let Some(rename) = &self.rename {
1176            write!(f, " {rename}")?;
1177        }
1178        Ok(())
1179    }
1180}
1181
1182/// The planned expressions for `REPLACE`
1183#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1184pub struct PlannedReplaceSelectItem {
1185    /// The original ast nodes
1186    pub items: Vec<ReplaceSelectElement>,
1187    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1188    pub planned_expressions: Vec<Expr>,
1189}
1190
1191impl Display for PlannedReplaceSelectItem {
1192    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1193        write!(f, "REPLACE")?;
1194        write!(f, " ({})", display_comma_separated(&self.items))?;
1195        Ok(())
1196    }
1197}
1198
1199impl PlannedReplaceSelectItem {
1200    pub fn items(&self) -> &[ReplaceSelectElement] {
1201        &self.items
1202    }
1203
1204    pub fn expressions(&self) -> &[Expr] {
1205        &self.planned_expressions
1206    }
1207}
1208
1209impl Expr {
1210    /// The name of the column (field) that this `Expr` will produce.
1211    ///
1212    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1213    /// [`Schema`] will have a field with this name.
1214    ///
1215    /// Note that the resulting string is subtlety different from the `Display`
1216    /// representation for certain `Expr`. Some differences:
1217    ///
1218    /// 1. [`Expr::Alias`], which shows only the alias itself
1219    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1220    ///
1221    /// # Example
1222    /// ```
1223    /// # use datafusion_expr::{col, lit};
1224    /// let expr = col("foo").eq(lit(42));
1225    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1226    ///
1227    /// let expr = col("foo").alias("bar").eq(lit(11));
1228    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1229    /// ```
1230    ///
1231    /// [`Schema`]: arrow::datatypes::Schema
1232    pub fn schema_name(&self) -> impl Display + '_ {
1233        SchemaDisplay(self)
1234    }
1235
1236    /// Human readable display formatting for this expression.
1237    ///
1238    /// This function is primarily used in printing the explain tree output,
1239    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1240    /// show how expressions are used in physical and logical plans. See the
1241    /// [`Expr`] for other ways to format expressions
1242    ///
1243    /// Note this format is intended for human consumption rather than SQL for
1244    /// other systems. If you need  SQL to pass to other systems, consider using
1245    /// [`Unparser`].
1246    ///
1247    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1248    ///
1249    /// # Example
1250    /// ```
1251    /// # use datafusion_expr::{col, lit};
1252    /// let expr = col("foo") + lit(42);
1253    /// // For EXPLAIN output:
1254    /// // "foo + 42"
1255    /// println!("{}", expr.human_display());
1256    /// ```
1257    pub fn human_display(&self) -> impl Display + '_ {
1258        SqlDisplay(self)
1259    }
1260
1261    /// Returns the qualifier and the schema name of this expression.
1262    ///
1263    /// Used when the expression forms the output field of a certain plan.
1264    /// The result is the field's qualifier and field name in the plan's
1265    /// output schema. We can use this qualified name to reference the field.
1266    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1267        match self {
1268            Expr::Column(Column {
1269                relation,
1270                name,
1271                spans: _,
1272            }) => (relation.clone(), name.clone()),
1273            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1274            _ => (None, self.schema_name().to_string()),
1275        }
1276    }
1277
1278    /// Returns a full and complete string representation of this expression.
1279    #[deprecated(since = "42.0.0", note = "use format! instead")]
1280    pub fn canonical_name(&self) -> String {
1281        format!("{self}")
1282    }
1283
1284    /// Return String representation of the variant represented by `self`
1285    /// Useful for non-rust based bindings
1286    pub fn variant_name(&self) -> &str {
1287        match self {
1288            Expr::AggregateFunction { .. } => "AggregateFunction",
1289            Expr::Alias(..) => "Alias",
1290            Expr::Between { .. } => "Between",
1291            Expr::BinaryExpr { .. } => "BinaryExpr",
1292            Expr::Case { .. } => "Case",
1293            Expr::Cast { .. } => "Cast",
1294            Expr::Column(..) => "Column",
1295            Expr::OuterReferenceColumn(_, _) => "Outer",
1296            Expr::Exists { .. } => "Exists",
1297            Expr::GroupingSet(..) => "GroupingSet",
1298            Expr::InList { .. } => "InList",
1299            Expr::InSubquery(..) => "InSubquery",
1300            Expr::IsNotNull(..) => "IsNotNull",
1301            Expr::IsNull(..) => "IsNull",
1302            Expr::Like { .. } => "Like",
1303            Expr::SimilarTo { .. } => "RLike",
1304            Expr::IsTrue(..) => "IsTrue",
1305            Expr::IsFalse(..) => "IsFalse",
1306            Expr::IsUnknown(..) => "IsUnknown",
1307            Expr::IsNotTrue(..) => "IsNotTrue",
1308            Expr::IsNotFalse(..) => "IsNotFalse",
1309            Expr::IsNotUnknown(..) => "IsNotUnknown",
1310            Expr::Literal(..) => "Literal",
1311            Expr::Negative(..) => "Negative",
1312            Expr::Not(..) => "Not",
1313            Expr::Placeholder(_) => "Placeholder",
1314            Expr::ScalarFunction(..) => "ScalarFunction",
1315            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1316            Expr::ScalarVariable(..) => "ScalarVariable",
1317            Expr::TryCast { .. } => "TryCast",
1318            Expr::WindowFunction { .. } => "WindowFunction",
1319            #[expect(deprecated)]
1320            Expr::Wildcard { .. } => "Wildcard",
1321            Expr::Unnest { .. } => "Unnest",
1322        }
1323    }
1324
1325    /// Return `self == other`
1326    pub fn eq(self, other: Expr) -> Expr {
1327        binary_expr(self, Operator::Eq, other)
1328    }
1329
1330    /// Return `self != other`
1331    pub fn not_eq(self, other: Expr) -> Expr {
1332        binary_expr(self, Operator::NotEq, other)
1333    }
1334
1335    /// Return `self > other`
1336    pub fn gt(self, other: Expr) -> Expr {
1337        binary_expr(self, Operator::Gt, other)
1338    }
1339
1340    /// Return `self >= other`
1341    pub fn gt_eq(self, other: Expr) -> Expr {
1342        binary_expr(self, Operator::GtEq, other)
1343    }
1344
1345    /// Return `self < other`
1346    pub fn lt(self, other: Expr) -> Expr {
1347        binary_expr(self, Operator::Lt, other)
1348    }
1349
1350    /// Return `self <= other`
1351    pub fn lt_eq(self, other: Expr) -> Expr {
1352        binary_expr(self, Operator::LtEq, other)
1353    }
1354
1355    /// Return `self && other`
1356    pub fn and(self, other: Expr) -> Expr {
1357        binary_expr(self, Operator::And, other)
1358    }
1359
1360    /// Return `self || other`
1361    pub fn or(self, other: Expr) -> Expr {
1362        binary_expr(self, Operator::Or, other)
1363    }
1364
1365    /// Return `self LIKE other`
1366    pub fn like(self, other: Expr) -> Expr {
1367        Expr::Like(Like::new(
1368            false,
1369            Box::new(self),
1370            Box::new(other),
1371            None,
1372            false,
1373        ))
1374    }
1375
1376    /// Return `self NOT LIKE other`
1377    pub fn not_like(self, other: Expr) -> Expr {
1378        Expr::Like(Like::new(
1379            true,
1380            Box::new(self),
1381            Box::new(other),
1382            None,
1383            false,
1384        ))
1385    }
1386
1387    /// Return `self ILIKE other`
1388    pub fn ilike(self, other: Expr) -> Expr {
1389        Expr::Like(Like::new(
1390            false,
1391            Box::new(self),
1392            Box::new(other),
1393            None,
1394            true,
1395        ))
1396    }
1397
1398    /// Return `self NOT ILIKE other`
1399    pub fn not_ilike(self, other: Expr) -> Expr {
1400        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1401    }
1402
1403    /// Return the name to use for the specific Expr
1404    pub fn name_for_alias(&self) -> Result<String> {
1405        Ok(self.schema_name().to_string())
1406    }
1407
1408    /// Ensure `expr` has the name as `original_name` by adding an
1409    /// alias if necessary.
1410    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1411        let new_name = self.name_for_alias()?;
1412        if new_name == original_name {
1413            return Ok(self);
1414        }
1415
1416        Ok(self.alias(original_name))
1417    }
1418
1419    /// Return `self AS name` alias expression
1420    pub fn alias(self, name: impl Into<String>) -> Expr {
1421        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1422    }
1423
1424    /// Return `self AS name` alias expression with metadata
1425    ///
1426    /// The metadata will be attached to the Arrow Schema field when the expression
1427    /// is converted to a field via `Expr.to_field()`.
1428    ///
1429    /// # Example
1430    /// ```
1431    /// # use datafusion_expr::col;
1432    /// use std::collections::HashMap;
1433    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1434    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1435    /// ```
1436    ///
1437    pub fn alias_with_metadata(
1438        self,
1439        name: impl Into<String>,
1440        metadata: Option<std::collections::HashMap<String, String>>,
1441    ) -> Expr {
1442        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1443    }
1444
1445    /// Return `self AS name` alias expression with a specific qualifier
1446    pub fn alias_qualified(
1447        self,
1448        relation: Option<impl Into<TableReference>>,
1449        name: impl Into<String>,
1450    ) -> Expr {
1451        Expr::Alias(Alias::new(self, relation, name.into()))
1452    }
1453
1454    /// Return `self AS name` alias expression with a specific qualifier and metadata
1455    ///
1456    /// The metadata will be attached to the Arrow Schema field when the expression
1457    /// is converted to a field via `Expr.to_field()`.
1458    ///
1459    /// # Example
1460    /// ```
1461    /// # use datafusion_expr::col;
1462    /// use std::collections::HashMap;
1463    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1464    /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1465    /// ```
1466    ///
1467    pub fn alias_qualified_with_metadata(
1468        self,
1469        relation: Option<impl Into<TableReference>>,
1470        name: impl Into<String>,
1471        metadata: Option<std::collections::HashMap<String, String>>,
1472    ) -> Expr {
1473        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1474    }
1475
1476    /// Remove an alias from an expression if one exists.
1477    ///
1478    /// If the expression is not an alias, the expression is returned unchanged.
1479    /// This method does not remove aliases from nested expressions.
1480    ///
1481    /// # Example
1482    /// ```
1483    /// # use datafusion_expr::col;
1484    /// // `foo as "bar"` is unaliased to `foo`
1485    /// let expr = col("foo").alias("bar");
1486    /// assert_eq!(expr.unalias(), col("foo"));
1487    ///
1488    /// // `foo as "bar" + baz` is not unaliased
1489    /// let expr = col("foo").alias("bar") + col("baz");
1490    /// assert_eq!(expr.clone().unalias(), expr);
1491    ///
1492    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1493    /// let expr = col("foo").alias("bar").alias("baz");
1494    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1495    /// ```
1496    pub fn unalias(self) -> Expr {
1497        match self {
1498            Expr::Alias(alias) => *alias.expr,
1499            _ => self,
1500        }
1501    }
1502
1503    /// Recursively removed potentially multiple aliases from an expression.
1504    ///
1505    /// This method removes nested aliases and returns [`Transformed`]
1506    /// to signal if the expression was changed.
1507    ///
1508    /// # Example
1509    /// ```
1510    /// # use datafusion_expr::col;
1511    /// // `foo as "bar"` is unaliased to `foo`
1512    /// let expr = col("foo").alias("bar");
1513    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1514    ///
1515    /// // `foo as "bar" + baz` is  unaliased
1516    /// let expr = col("foo").alias("bar") + col("baz");
1517    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1518    ///
1519    /// // `foo as "bar" as "baz" is unalaised to foo
1520    /// let expr = col("foo").alias("bar").alias("baz");
1521    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1522    /// ```
1523    pub fn unalias_nested(self) -> Transformed<Expr> {
1524        self.transform_down_up(
1525            |expr| {
1526                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1527                let recursion = if matches!(
1528                    expr,
1529                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1530                ) {
1531                    // Subqueries could contain aliases so don't recurse into those
1532                    TreeNodeRecursion::Jump
1533                } else {
1534                    TreeNodeRecursion::Continue
1535                };
1536                Ok(Transformed::new(expr, false, recursion))
1537            },
1538            |expr| {
1539                // f_up: unalias on up so we can remove nested aliases like
1540                // `(x as foo) as bar`
1541                if let Expr::Alias(alias) = expr {
1542                    match alias
1543                        .metadata
1544                        .as_ref()
1545                        .map(|h| h.is_empty())
1546                        .unwrap_or(true)
1547                    {
1548                        true => Ok(Transformed::yes(*alias.expr)),
1549                        false => Ok(Transformed::no(Expr::Alias(alias))),
1550                    }
1551                } else {
1552                    Ok(Transformed::no(expr))
1553                }
1554            },
1555        )
1556        // Unreachable code: internal closure doesn't return err
1557        .unwrap()
1558    }
1559
1560    /// Return `self IN <list>` if `negated` is false, otherwise
1561    /// return `self NOT IN <list>`.a
1562    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1563        Expr::InList(InList::new(Box::new(self), list, negated))
1564    }
1565
1566    /// Return `IsNull(Box(self))
1567    pub fn is_null(self) -> Expr {
1568        Expr::IsNull(Box::new(self))
1569    }
1570
1571    /// Return `IsNotNull(Box(self))
1572    pub fn is_not_null(self) -> Expr {
1573        Expr::IsNotNull(Box::new(self))
1574    }
1575
1576    /// Create a sort configuration from an existing expression.
1577    ///
1578    /// ```
1579    /// # use datafusion_expr::col;
1580    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1581    /// ```
1582    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1583        Sort::new(self, asc, nulls_first)
1584    }
1585
1586    /// Return `IsTrue(Box(self))`
1587    pub fn is_true(self) -> Expr {
1588        Expr::IsTrue(Box::new(self))
1589    }
1590
1591    /// Return `IsNotTrue(Box(self))`
1592    pub fn is_not_true(self) -> Expr {
1593        Expr::IsNotTrue(Box::new(self))
1594    }
1595
1596    /// Return `IsFalse(Box(self))`
1597    pub fn is_false(self) -> Expr {
1598        Expr::IsFalse(Box::new(self))
1599    }
1600
1601    /// Return `IsNotFalse(Box(self))`
1602    pub fn is_not_false(self) -> Expr {
1603        Expr::IsNotFalse(Box::new(self))
1604    }
1605
1606    /// Return `IsUnknown(Box(self))`
1607    pub fn is_unknown(self) -> Expr {
1608        Expr::IsUnknown(Box::new(self))
1609    }
1610
1611    /// Return `IsNotUnknown(Box(self))`
1612    pub fn is_not_unknown(self) -> Expr {
1613        Expr::IsNotUnknown(Box::new(self))
1614    }
1615
1616    /// return `self BETWEEN low AND high`
1617    pub fn between(self, low: Expr, high: Expr) -> Expr {
1618        Expr::Between(Between::new(
1619            Box::new(self),
1620            false,
1621            Box::new(low),
1622            Box::new(high),
1623        ))
1624    }
1625
1626    /// Return `self NOT BETWEEN low AND high`
1627    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1628        Expr::Between(Between::new(
1629            Box::new(self),
1630            true,
1631            Box::new(low),
1632            Box::new(high),
1633        ))
1634    }
1635    /// Return a reference to the inner `Column` if any
1636    ///
1637    /// returns `None` if the expression is not a `Column`
1638    ///
1639    /// Note: None may be returned for expressions that are not `Column` but
1640    /// are convertible to `Column` such as `Cast` expressions.
1641    ///
1642    /// Example
1643    /// ```
1644    /// # use datafusion_common::Column;
1645    /// use datafusion_expr::{col, Expr};
1646    /// let expr = col("foo");
1647    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1648    ///
1649    /// let expr = col("foo").alias("bar");
1650    /// assert_eq!(expr.try_as_col(), None);
1651    /// ```
1652    pub fn try_as_col(&self) -> Option<&Column> {
1653        if let Expr::Column(it) = self {
1654            Some(it)
1655        } else {
1656            None
1657        }
1658    }
1659
1660    /// Returns the inner `Column` if any. This is a specialized version of
1661    /// [`Self::try_as_col`] that take Cast expressions into account when the
1662    /// expression is as on condition for joins.
1663    ///
1664    /// Called this method when you are sure that the expression is a `Column`
1665    /// or a `Cast` expression that wraps a `Column`.
1666    pub fn get_as_join_column(&self) -> Option<&Column> {
1667        match self {
1668            Expr::Column(c) => Some(c),
1669            Expr::Cast(Cast { expr, .. }) => match &**expr {
1670                Expr::Column(c) => Some(c),
1671                _ => None,
1672            },
1673            _ => None,
1674        }
1675    }
1676
1677    /// Return all references to columns in this expression.
1678    ///
1679    /// # Example
1680    /// ```
1681    /// # use std::collections::HashSet;
1682    /// # use datafusion_common::Column;
1683    /// # use datafusion_expr::col;
1684    /// // For an expression `a + (b * a)`
1685    /// let expr = col("a") + (col("b") * col("a"));
1686    /// let refs = expr.column_refs();
1687    /// // refs contains "a" and "b"
1688    /// assert_eq!(refs.len(), 2);
1689    /// assert!(refs.contains(&Column::new_unqualified("a")));
1690    /// assert!(refs.contains(&Column::new_unqualified("b")));
1691    /// ```
1692    pub fn column_refs(&self) -> HashSet<&Column> {
1693        let mut using_columns = HashSet::new();
1694        self.add_column_refs(&mut using_columns);
1695        using_columns
1696    }
1697
1698    /// Adds references to all columns in this expression to the set
1699    ///
1700    /// See [`Self::column_refs`] for details
1701    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1702        self.apply(|expr| {
1703            if let Expr::Column(col) = expr {
1704                set.insert(col);
1705            }
1706            Ok(TreeNodeRecursion::Continue)
1707        })
1708        .expect("traversal is infallible");
1709    }
1710
1711    /// Return all references to columns and their occurrence counts in the expression.
1712    ///
1713    /// # Example
1714    /// ```
1715    /// # use std::collections::HashMap;
1716    /// # use datafusion_common::Column;
1717    /// # use datafusion_expr::col;
1718    /// // For an expression `a + (b * a)`
1719    /// let expr = col("a") + (col("b") * col("a"));
1720    /// let mut refs = expr.column_refs_counts();
1721    /// // refs contains "a" and "b"
1722    /// assert_eq!(refs.len(), 2);
1723    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1724    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1725    /// ```
1726    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1727        let mut map = HashMap::new();
1728        self.add_column_ref_counts(&mut map);
1729        map
1730    }
1731
1732    /// Adds references to all columns and their occurrence counts in the expression to
1733    /// the map.
1734    ///
1735    /// See [`Self::column_refs_counts`] for details
1736    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1737        self.apply(|expr| {
1738            if let Expr::Column(col) = expr {
1739                *map.entry(col).or_default() += 1;
1740            }
1741            Ok(TreeNodeRecursion::Continue)
1742        })
1743        .expect("traversal is infallible");
1744    }
1745
1746    /// Returns true if there are any column references in this Expr
1747    pub fn any_column_refs(&self) -> bool {
1748        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
1749            .expect("exists closure is infallible")
1750    }
1751
1752    /// Return true if the expression contains out reference(correlated) expressions.
1753    pub fn contains_outer(&self) -> bool {
1754        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1755            .expect("exists closure is infallible")
1756    }
1757
1758    /// Returns true if the expression node is volatile, i.e. whether it can return
1759    /// different results when evaluated multiple times with the same input.
1760    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1761    /// - `rand()` returns `true`,
1762    /// - `a + rand()` returns `false`
1763    pub fn is_volatile_node(&self) -> bool {
1764        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
1765    }
1766
1767    /// Returns true if the expression is volatile, i.e. whether it can return different
1768    /// results when evaluated multiple times with the same input.
1769    ///
1770    /// For example the function call `RANDOM()` is volatile as each call will
1771    /// return a different value.
1772    ///
1773    /// See [`Volatility`] for more information.
1774    pub fn is_volatile(&self) -> bool {
1775        self.exists(|expr| Ok(expr.is_volatile_node()))
1776            .expect("exists closure is infallible")
1777    }
1778
1779    /// Recursively find all [`Expr::Placeholder`] expressions, and
1780    /// to infer their [`DataType`] from the context of their use.
1781    ///
1782    /// For example, given an expression like `<int32> = $0` will infer `$0` to
1783    /// have type `int32`.
1784    ///
1785    /// Returns transformed expression and flag that is true if expression contains
1786    /// at least one placeholder.
1787    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
1788        let mut has_placeholder = false;
1789        self.transform(|mut expr| {
1790            match &mut expr {
1791                // Default to assuming the arguments are the same type
1792                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
1793                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
1794                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
1795                }
1796                Expr::Between(Between {
1797                    expr,
1798                    negated: _,
1799                    low,
1800                    high,
1801                }) => {
1802                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
1803                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
1804                }
1805                Expr::InList(InList {
1806                    expr,
1807                    list,
1808                    negated: _,
1809                }) => {
1810                    for item in list.iter_mut() {
1811                        rewrite_placeholder(item, expr.as_ref(), schema)?;
1812                    }
1813                }
1814                Expr::Like(Like { expr, pattern, .. })
1815                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
1816                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
1817                }
1818                Expr::Placeholder(_) => {
1819                    has_placeholder = true;
1820                }
1821                _ => {}
1822            }
1823            Ok(Transformed::yes(expr))
1824        })
1825        .data()
1826        .map(|data| (data, has_placeholder))
1827    }
1828
1829    /// Returns true if some of this `exprs` subexpressions may not be evaluated
1830    /// and thus any side effects (like divide by zero) may not be encountered
1831    pub fn short_circuits(&self) -> bool {
1832        match self {
1833            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
1834            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
1835                matches!(op, Operator::And | Operator::Or)
1836            }
1837            Expr::Case { .. } => true,
1838            // Use explicit pattern match instead of a default
1839            // implementation, so that in the future if someone adds
1840            // new Expr types, they will check here as well
1841            // TODO: remove the next line after `Expr::Wildcard` is removed
1842            #[expect(deprecated)]
1843            Expr::AggregateFunction(..)
1844            | Expr::Alias(..)
1845            | Expr::Between(..)
1846            | Expr::Cast(..)
1847            | Expr::Column(..)
1848            | Expr::Exists(..)
1849            | Expr::GroupingSet(..)
1850            | Expr::InList(..)
1851            | Expr::InSubquery(..)
1852            | Expr::IsFalse(..)
1853            | Expr::IsNotFalse(..)
1854            | Expr::IsNotNull(..)
1855            | Expr::IsNotTrue(..)
1856            | Expr::IsNotUnknown(..)
1857            | Expr::IsNull(..)
1858            | Expr::IsTrue(..)
1859            | Expr::IsUnknown(..)
1860            | Expr::Like(..)
1861            | Expr::ScalarSubquery(..)
1862            | Expr::ScalarVariable(_, _)
1863            | Expr::SimilarTo(..)
1864            | Expr::Not(..)
1865            | Expr::Negative(..)
1866            | Expr::OuterReferenceColumn(_, _)
1867            | Expr::TryCast(..)
1868            | Expr::Unnest(..)
1869            | Expr::Wildcard { .. }
1870            | Expr::WindowFunction(..)
1871            | Expr::Literal(..)
1872            | Expr::Placeholder(..) => false,
1873        }
1874    }
1875
1876    /// Returns a reference to the set of locations in the SQL query where this
1877    /// expression appears, if known. [`None`] is returned if the expression
1878    /// type doesn't support tracking locations yet.
1879    pub fn spans(&self) -> Option<&Spans> {
1880        match self {
1881            Expr::Column(col) => Some(&col.spans),
1882            _ => None,
1883        }
1884    }
1885}
1886
1887impl Normalizeable for Expr {
1888    fn can_normalize(&self) -> bool {
1889        #[allow(clippy::match_like_matches_macro)]
1890        match self {
1891            Expr::BinaryExpr(BinaryExpr {
1892                op:
1893                    _op @ (Operator::Plus
1894                    | Operator::Multiply
1895                    | Operator::BitwiseAnd
1896                    | Operator::BitwiseOr
1897                    | Operator::BitwiseXor
1898                    | Operator::Eq
1899                    | Operator::NotEq),
1900                ..
1901            }) => true,
1902            _ => false,
1903        }
1904    }
1905}
1906
1907impl NormalizeEq for Expr {
1908    fn normalize_eq(&self, other: &Self) -> bool {
1909        match (self, other) {
1910            (
1911                Expr::BinaryExpr(BinaryExpr {
1912                    left: self_left,
1913                    op: self_op,
1914                    right: self_right,
1915                }),
1916                Expr::BinaryExpr(BinaryExpr {
1917                    left: other_left,
1918                    op: other_op,
1919                    right: other_right,
1920                }),
1921            ) => {
1922                if self_op != other_op {
1923                    return false;
1924                }
1925
1926                if matches!(
1927                    self_op,
1928                    Operator::Plus
1929                        | Operator::Multiply
1930                        | Operator::BitwiseAnd
1931                        | Operator::BitwiseOr
1932                        | Operator::BitwiseXor
1933                        | Operator::Eq
1934                        | Operator::NotEq
1935                ) {
1936                    (self_left.normalize_eq(other_left)
1937                        && self_right.normalize_eq(other_right))
1938                        || (self_left.normalize_eq(other_right)
1939                            && self_right.normalize_eq(other_left))
1940                } else {
1941                    self_left.normalize_eq(other_left)
1942                        && self_right.normalize_eq(other_right)
1943                }
1944            }
1945            (
1946                Expr::Alias(Alias {
1947                    expr: self_expr,
1948                    relation: self_relation,
1949                    name: self_name,
1950                    ..
1951                }),
1952                Expr::Alias(Alias {
1953                    expr: other_expr,
1954                    relation: other_relation,
1955                    name: other_name,
1956                    ..
1957                }),
1958            ) => {
1959                self_name == other_name
1960                    && self_relation == other_relation
1961                    && self_expr.normalize_eq(other_expr)
1962            }
1963            (
1964                Expr::Like(Like {
1965                    negated: self_negated,
1966                    expr: self_expr,
1967                    pattern: self_pattern,
1968                    escape_char: self_escape_char,
1969                    case_insensitive: self_case_insensitive,
1970                }),
1971                Expr::Like(Like {
1972                    negated: other_negated,
1973                    expr: other_expr,
1974                    pattern: other_pattern,
1975                    escape_char: other_escape_char,
1976                    case_insensitive: other_case_insensitive,
1977                }),
1978            )
1979            | (
1980                Expr::SimilarTo(Like {
1981                    negated: self_negated,
1982                    expr: self_expr,
1983                    pattern: self_pattern,
1984                    escape_char: self_escape_char,
1985                    case_insensitive: self_case_insensitive,
1986                }),
1987                Expr::SimilarTo(Like {
1988                    negated: other_negated,
1989                    expr: other_expr,
1990                    pattern: other_pattern,
1991                    escape_char: other_escape_char,
1992                    case_insensitive: other_case_insensitive,
1993                }),
1994            ) => {
1995                self_negated == other_negated
1996                    && self_escape_char == other_escape_char
1997                    && self_case_insensitive == other_case_insensitive
1998                    && self_expr.normalize_eq(other_expr)
1999                    && self_pattern.normalize_eq(other_pattern)
2000            }
2001            (Expr::Not(self_expr), Expr::Not(other_expr))
2002            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2003            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2004            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2005            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2006            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2007            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2008            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2009            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2010            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2011            | (
2012                Expr::Unnest(Unnest { expr: self_expr }),
2013                Expr::Unnest(Unnest { expr: other_expr }),
2014            ) => self_expr.normalize_eq(other_expr),
2015            (
2016                Expr::Between(Between {
2017                    expr: self_expr,
2018                    negated: self_negated,
2019                    low: self_low,
2020                    high: self_high,
2021                }),
2022                Expr::Between(Between {
2023                    expr: other_expr,
2024                    negated: other_negated,
2025                    low: other_low,
2026                    high: other_high,
2027                }),
2028            ) => {
2029                self_negated == other_negated
2030                    && self_expr.normalize_eq(other_expr)
2031                    && self_low.normalize_eq(other_low)
2032                    && self_high.normalize_eq(other_high)
2033            }
2034            (
2035                Expr::Cast(Cast {
2036                    expr: self_expr,
2037                    data_type: self_data_type,
2038                }),
2039                Expr::Cast(Cast {
2040                    expr: other_expr,
2041                    data_type: other_data_type,
2042                }),
2043            )
2044            | (
2045                Expr::TryCast(TryCast {
2046                    expr: self_expr,
2047                    data_type: self_data_type,
2048                }),
2049                Expr::TryCast(TryCast {
2050                    expr: other_expr,
2051                    data_type: other_data_type,
2052                }),
2053            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
2054            (
2055                Expr::ScalarFunction(ScalarFunction {
2056                    func: self_func,
2057                    args: self_args,
2058                }),
2059                Expr::ScalarFunction(ScalarFunction {
2060                    func: other_func,
2061                    args: other_args,
2062                }),
2063            ) => {
2064                self_func.name() == other_func.name()
2065                    && self_args.len() == other_args.len()
2066                    && self_args
2067                        .iter()
2068                        .zip(other_args.iter())
2069                        .all(|(a, b)| a.normalize_eq(b))
2070            }
2071            (
2072                Expr::AggregateFunction(AggregateFunction {
2073                    func: self_func,
2074                    params:
2075                        AggregateFunctionParams {
2076                            args: self_args,
2077                            distinct: self_distinct,
2078                            filter: self_filter,
2079                            order_by: self_order_by,
2080                            null_treatment: self_null_treatment,
2081                        },
2082                }),
2083                Expr::AggregateFunction(AggregateFunction {
2084                    func: other_func,
2085                    params:
2086                        AggregateFunctionParams {
2087                            args: other_args,
2088                            distinct: other_distinct,
2089                            filter: other_filter,
2090                            order_by: other_order_by,
2091                            null_treatment: other_null_treatment,
2092                        },
2093                }),
2094            ) => {
2095                self_func.name() == other_func.name()
2096                    && self_distinct == other_distinct
2097                    && self_null_treatment == other_null_treatment
2098                    && self_args.len() == other_args.len()
2099                    && self_args
2100                        .iter()
2101                        .zip(other_args.iter())
2102                        .all(|(a, b)| a.normalize_eq(b))
2103                    && match (self_filter, other_filter) {
2104                        (Some(self_filter), Some(other_filter)) => {
2105                            self_filter.normalize_eq(other_filter)
2106                        }
2107                        (None, None) => true,
2108                        _ => false,
2109                    }
2110                    && match (self_order_by, other_order_by) {
2111                        (Some(self_order_by), Some(other_order_by)) => self_order_by
2112                            .iter()
2113                            .zip(other_order_by.iter())
2114                            .all(|(a, b)| {
2115                                a.asc == b.asc
2116                                    && a.nulls_first == b.nulls_first
2117                                    && a.expr.normalize_eq(&b.expr)
2118                            }),
2119                        (None, None) => true,
2120                        _ => false,
2121                    }
2122            }
2123            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2124                let WindowFunction {
2125                    fun: self_fun,
2126                    params:
2127                        WindowFunctionParams {
2128                            args: self_args,
2129                            window_frame: self_window_frame,
2130                            partition_by: self_partition_by,
2131                            order_by: self_order_by,
2132                            null_treatment: self_null_treatment,
2133                        },
2134                } = left.as_ref();
2135                let WindowFunction {
2136                    fun: other_fun,
2137                    params:
2138                        WindowFunctionParams {
2139                            args: other_args,
2140                            window_frame: other_window_frame,
2141                            partition_by: other_partition_by,
2142                            order_by: other_order_by,
2143                            null_treatment: other_null_treatment,
2144                        },
2145                } = other.as_ref();
2146
2147                self_fun.name() == other_fun.name()
2148                    && self_window_frame == other_window_frame
2149                    && self_null_treatment == other_null_treatment
2150                    && self_args.len() == other_args.len()
2151                    && self_args
2152                        .iter()
2153                        .zip(other_args.iter())
2154                        .all(|(a, b)| a.normalize_eq(b))
2155                    && self_partition_by
2156                        .iter()
2157                        .zip(other_partition_by.iter())
2158                        .all(|(a, b)| a.normalize_eq(b))
2159                    && self_order_by
2160                        .iter()
2161                        .zip(other_order_by.iter())
2162                        .all(|(a, b)| {
2163                            a.asc == b.asc
2164                                && a.nulls_first == b.nulls_first
2165                                && a.expr.normalize_eq(&b.expr)
2166                        })
2167            }
2168            (
2169                Expr::Exists(Exists {
2170                    subquery: self_subquery,
2171                    negated: self_negated,
2172                }),
2173                Expr::Exists(Exists {
2174                    subquery: other_subquery,
2175                    negated: other_negated,
2176                }),
2177            ) => {
2178                self_negated == other_negated
2179                    && self_subquery.normalize_eq(other_subquery)
2180            }
2181            (
2182                Expr::InSubquery(InSubquery {
2183                    expr: self_expr,
2184                    subquery: self_subquery,
2185                    negated: self_negated,
2186                }),
2187                Expr::InSubquery(InSubquery {
2188                    expr: other_expr,
2189                    subquery: other_subquery,
2190                    negated: other_negated,
2191                }),
2192            ) => {
2193                self_negated == other_negated
2194                    && self_expr.normalize_eq(other_expr)
2195                    && self_subquery.normalize_eq(other_subquery)
2196            }
2197            (
2198                Expr::ScalarSubquery(self_subquery),
2199                Expr::ScalarSubquery(other_subquery),
2200            ) => self_subquery.normalize_eq(other_subquery),
2201            (
2202                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2203                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2204            )
2205            | (
2206                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2207                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2208            ) => {
2209                self_exprs.len() == other_exprs.len()
2210                    && self_exprs
2211                        .iter()
2212                        .zip(other_exprs.iter())
2213                        .all(|(a, b)| a.normalize_eq(b))
2214            }
2215            (
2216                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2217                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2218            ) => {
2219                self_exprs.len() == other_exprs.len()
2220                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2221                        a.len() == b.len()
2222                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2223                    })
2224            }
2225            (
2226                Expr::InList(InList {
2227                    expr: self_expr,
2228                    list: self_list,
2229                    negated: self_negated,
2230                }),
2231                Expr::InList(InList {
2232                    expr: other_expr,
2233                    list: other_list,
2234                    negated: other_negated,
2235                }),
2236            ) => {
2237                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2238                self_negated == other_negated
2239                    && self_expr.normalize_eq(other_expr)
2240                    && self_list.len() == other_list.len()
2241                    && self_list
2242                        .iter()
2243                        .zip(other_list.iter())
2244                        .all(|(a, b)| a.normalize_eq(b))
2245            }
2246            (
2247                Expr::Case(Case {
2248                    expr: self_expr,
2249                    when_then_expr: self_when_then_expr,
2250                    else_expr: self_else_expr,
2251                }),
2252                Expr::Case(Case {
2253                    expr: other_expr,
2254                    when_then_expr: other_when_then_expr,
2255                    else_expr: other_else_expr,
2256                }),
2257            ) => {
2258                // TODO: normalize_eq for when_then_expr
2259                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2260                self_when_then_expr.len() == other_when_then_expr.len()
2261                    && self_when_then_expr
2262                        .iter()
2263                        .zip(other_when_then_expr.iter())
2264                        .all(|((self_when, self_then), (other_when, other_then))| {
2265                            self_when.normalize_eq(other_when)
2266                                && self_then.normalize_eq(other_then)
2267                        })
2268                    && match (self_expr, other_expr) {
2269                        (Some(self_expr), Some(other_expr)) => {
2270                            self_expr.normalize_eq(other_expr)
2271                        }
2272                        (None, None) => true,
2273                        (_, _) => false,
2274                    }
2275                    && match (self_else_expr, other_else_expr) {
2276                        (Some(self_else_expr), Some(other_else_expr)) => {
2277                            self_else_expr.normalize_eq(other_else_expr)
2278                        }
2279                        (None, None) => true,
2280                        (_, _) => false,
2281                    }
2282            }
2283            (_, _) => self == other,
2284        }
2285    }
2286}
2287
2288impl HashNode for Expr {
2289    /// As it is pretty easy to forget changing this method when `Expr` changes the
2290    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2291    /// compile time.
2292    fn hash_node<H: Hasher>(&self, state: &mut H) {
2293        mem::discriminant(self).hash(state);
2294        match self {
2295            Expr::Alias(Alias {
2296                expr: _expr,
2297                relation,
2298                name,
2299                ..
2300            }) => {
2301                relation.hash(state);
2302                name.hash(state);
2303            }
2304            Expr::Column(column) => {
2305                column.hash(state);
2306            }
2307            Expr::ScalarVariable(data_type, name) => {
2308                data_type.hash(state);
2309                name.hash(state);
2310            }
2311            Expr::Literal(scalar_value, _) => {
2312                scalar_value.hash(state);
2313            }
2314            Expr::BinaryExpr(BinaryExpr {
2315                left: _left,
2316                op,
2317                right: _right,
2318            }) => {
2319                op.hash(state);
2320            }
2321            Expr::Like(Like {
2322                negated,
2323                expr: _expr,
2324                pattern: _pattern,
2325                escape_char,
2326                case_insensitive,
2327            })
2328            | Expr::SimilarTo(Like {
2329                negated,
2330                expr: _expr,
2331                pattern: _pattern,
2332                escape_char,
2333                case_insensitive,
2334            }) => {
2335                negated.hash(state);
2336                escape_char.hash(state);
2337                case_insensitive.hash(state);
2338            }
2339            Expr::Not(_expr)
2340            | Expr::IsNotNull(_expr)
2341            | Expr::IsNull(_expr)
2342            | Expr::IsTrue(_expr)
2343            | Expr::IsFalse(_expr)
2344            | Expr::IsUnknown(_expr)
2345            | Expr::IsNotTrue(_expr)
2346            | Expr::IsNotFalse(_expr)
2347            | Expr::IsNotUnknown(_expr)
2348            | Expr::Negative(_expr) => {}
2349            Expr::Between(Between {
2350                expr: _expr,
2351                negated,
2352                low: _low,
2353                high: _high,
2354            }) => {
2355                negated.hash(state);
2356            }
2357            Expr::Case(Case {
2358                expr: _expr,
2359                when_then_expr: _when_then_expr,
2360                else_expr: _else_expr,
2361            }) => {}
2362            Expr::Cast(Cast {
2363                expr: _expr,
2364                data_type,
2365            })
2366            | Expr::TryCast(TryCast {
2367                expr: _expr,
2368                data_type,
2369            }) => {
2370                data_type.hash(state);
2371            }
2372            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2373                func.hash(state);
2374            }
2375            Expr::AggregateFunction(AggregateFunction {
2376                func,
2377                params:
2378                    AggregateFunctionParams {
2379                        args: _args,
2380                        distinct,
2381                        filter: _,
2382                        order_by: _,
2383                        null_treatment,
2384                    },
2385            }) => {
2386                func.hash(state);
2387                distinct.hash(state);
2388                null_treatment.hash(state);
2389            }
2390            Expr::WindowFunction(window_fun) => {
2391                let WindowFunction {
2392                    fun,
2393                    params:
2394                        WindowFunctionParams {
2395                            args: _args,
2396                            partition_by: _,
2397                            order_by: _,
2398                            window_frame,
2399                            null_treatment,
2400                        },
2401                } = window_fun.as_ref();
2402                fun.hash(state);
2403                window_frame.hash(state);
2404                null_treatment.hash(state);
2405            }
2406            Expr::InList(InList {
2407                expr: _expr,
2408                list: _list,
2409                negated,
2410            }) => {
2411                negated.hash(state);
2412            }
2413            Expr::Exists(Exists { subquery, negated }) => {
2414                subquery.hash(state);
2415                negated.hash(state);
2416            }
2417            Expr::InSubquery(InSubquery {
2418                expr: _expr,
2419                subquery,
2420                negated,
2421            }) => {
2422                subquery.hash(state);
2423                negated.hash(state);
2424            }
2425            Expr::ScalarSubquery(subquery) => {
2426                subquery.hash(state);
2427            }
2428            #[expect(deprecated)]
2429            Expr::Wildcard { qualifier, options } => {
2430                qualifier.hash(state);
2431                options.hash(state);
2432            }
2433            Expr::GroupingSet(grouping_set) => {
2434                mem::discriminant(grouping_set).hash(state);
2435                match grouping_set {
2436                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2437                    GroupingSet::GroupingSets(_exprs) => {}
2438                }
2439            }
2440            Expr::Placeholder(place_holder) => {
2441                place_holder.hash(state);
2442            }
2443            Expr::OuterReferenceColumn(data_type, column) => {
2444                data_type.hash(state);
2445                column.hash(state);
2446            }
2447            Expr::Unnest(Unnest { expr: _expr }) => {}
2448        };
2449    }
2450}
2451
2452// Modifies expr if it is a placeholder with datatype of right
2453fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2454    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
2455        if data_type.is_none() {
2456            let other_dt = other.get_type(schema);
2457            match other_dt {
2458                Err(e) => {
2459                    Err(e.context(format!(
2460                        "Can not find type of {other} needed to infer type of {expr}"
2461                    )))?;
2462                }
2463                Ok(dt) => {
2464                    *data_type = Some(dt);
2465                }
2466            }
2467        };
2468    }
2469    Ok(())
2470}
2471
2472#[macro_export]
2473macro_rules! expr_vec_fmt {
2474    ( $ARRAY:expr ) => {{
2475        $ARRAY
2476            .iter()
2477            .map(|e| format!("{e}"))
2478            .collect::<Vec<String>>()
2479            .join(", ")
2480    }};
2481}
2482
2483struct SchemaDisplay<'a>(&'a Expr);
2484impl Display for SchemaDisplay<'_> {
2485    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2486        match self.0 {
2487            // The same as Display
2488            // TODO: remove the next line after `Expr::Wildcard` is removed
2489            #[expect(deprecated)]
2490            Expr::Column(_)
2491            | Expr::Literal(_, _)
2492            | Expr::ScalarVariable(..)
2493            | Expr::OuterReferenceColumn(..)
2494            | Expr::Placeholder(_)
2495            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2496            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2497                match func.schema_name(params) {
2498                    Ok(name) => {
2499                        write!(f, "{name}")
2500                    }
2501                    Err(e) => {
2502                        write!(f, "got error from schema_name {e}")
2503                    }
2504                }
2505            }
2506            // Expr is not shown since it is aliased
2507            Expr::Alias(Alias {
2508                name,
2509                relation: Some(relation),
2510                ..
2511            }) => write!(f, "{relation}.{name}"),
2512            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2513            Expr::Between(Between {
2514                expr,
2515                negated,
2516                low,
2517                high,
2518            }) => {
2519                if *negated {
2520                    write!(
2521                        f,
2522                        "{} NOT BETWEEN {} AND {}",
2523                        SchemaDisplay(expr),
2524                        SchemaDisplay(low),
2525                        SchemaDisplay(high),
2526                    )
2527                } else {
2528                    write!(
2529                        f,
2530                        "{} BETWEEN {} AND {}",
2531                        SchemaDisplay(expr),
2532                        SchemaDisplay(low),
2533                        SchemaDisplay(high),
2534                    )
2535                }
2536            }
2537            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2538                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2539            }
2540            Expr::Case(Case {
2541                expr,
2542                when_then_expr,
2543                else_expr,
2544            }) => {
2545                write!(f, "CASE ")?;
2546
2547                if let Some(e) = expr {
2548                    write!(f, "{} ", SchemaDisplay(e))?;
2549                }
2550
2551                for (when, then) in when_then_expr {
2552                    write!(
2553                        f,
2554                        "WHEN {} THEN {} ",
2555                        SchemaDisplay(when),
2556                        SchemaDisplay(then),
2557                    )?;
2558                }
2559
2560                if let Some(e) = else_expr {
2561                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2562                }
2563
2564                write!(f, "END")
2565            }
2566            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2567            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2568                write!(f, "{}", SchemaDisplay(expr))
2569            }
2570            Expr::InList(InList {
2571                expr,
2572                list,
2573                negated,
2574            }) => {
2575                let inlist_name = schema_name_from_exprs(list)?;
2576
2577                if *negated {
2578                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2579                } else {
2580                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2581                }
2582            }
2583            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2584            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2585            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2586                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2587            }
2588            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2589                write!(f, "GROUPING SETS (")?;
2590                for exprs in lists_of_exprs.iter() {
2591                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2592                }
2593                write!(f, ")")
2594            }
2595            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2596                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2597            }
2598            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2599            Expr::IsNotNull(expr) => {
2600                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2601            }
2602            Expr::IsUnknown(expr) => {
2603                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2604            }
2605            Expr::IsNotUnknown(expr) => {
2606                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2607            }
2608            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2609                write!(f, "NOT IN")
2610            }
2611            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2612            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2613            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2614            Expr::IsNotTrue(expr) => {
2615                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2616            }
2617            Expr::IsNotFalse(expr) => {
2618                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2619            }
2620            Expr::Like(Like {
2621                negated,
2622                expr,
2623                pattern,
2624                escape_char,
2625                case_insensitive,
2626            }) => {
2627                write!(
2628                    f,
2629                    "{} {}{} {}",
2630                    SchemaDisplay(expr),
2631                    if *negated { "NOT " } else { "" },
2632                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2633                    SchemaDisplay(pattern),
2634                )?;
2635
2636                if let Some(char) = escape_char {
2637                    write!(f, " CHAR '{char}'")?;
2638                }
2639
2640                Ok(())
2641            }
2642            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2643            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2644            Expr::Unnest(Unnest { expr }) => {
2645                write!(f, "UNNEST({})", SchemaDisplay(expr))
2646            }
2647            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2648                match func.schema_name(args) {
2649                    Ok(name) => {
2650                        write!(f, "{name}")
2651                    }
2652                    Err(e) => {
2653                        write!(f, "got error from schema_name {e}")
2654                    }
2655                }
2656            }
2657            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2658                write!(f, "{}", subquery.schema().field(0).name())
2659            }
2660            Expr::SimilarTo(Like {
2661                negated,
2662                expr,
2663                pattern,
2664                escape_char,
2665                ..
2666            }) => {
2667                write!(
2668                    f,
2669                    "{} {} {}",
2670                    SchemaDisplay(expr),
2671                    if *negated {
2672                        "NOT SIMILAR TO"
2673                    } else {
2674                        "SIMILAR TO"
2675                    },
2676                    SchemaDisplay(pattern),
2677                )?;
2678                if let Some(char) = escape_char {
2679                    write!(f, " CHAR '{char}'")?;
2680                }
2681
2682                Ok(())
2683            }
2684            Expr::WindowFunction(window_fun) => {
2685                let WindowFunction { fun, params } = window_fun.as_ref();
2686                match fun {
2687                    WindowFunctionDefinition::AggregateUDF(fun) => {
2688                        match fun.window_function_schema_name(params) {
2689                            Ok(name) => {
2690                                write!(f, "{name}")
2691                            }
2692                            Err(e) => {
2693                                write!(
2694                                    f,
2695                                    "got error from window_function_schema_name {e}"
2696                                )
2697                            }
2698                        }
2699                    }
2700                    _ => {
2701                        let WindowFunctionParams {
2702                            args,
2703                            partition_by,
2704                            order_by,
2705                            window_frame,
2706                            null_treatment,
2707                        } = params;
2708
2709                        write!(
2710                            f,
2711                            "{}({})",
2712                            fun,
2713                            schema_name_from_exprs_comma_separated_without_space(args)?
2714                        )?;
2715
2716                        if let Some(null_treatment) = null_treatment {
2717                            write!(f, " {null_treatment}")?;
2718                        }
2719
2720                        if !partition_by.is_empty() {
2721                            write!(
2722                                f,
2723                                " PARTITION BY [{}]",
2724                                schema_name_from_exprs(partition_by)?
2725                            )?;
2726                        }
2727
2728                        if !order_by.is_empty() {
2729                            write!(
2730                                f,
2731                                " ORDER BY [{}]",
2732                                schema_name_from_sorts(order_by)?
2733                            )?;
2734                        };
2735
2736                        write!(f, " {window_frame}")
2737                    }
2738                }
2739            }
2740        }
2741    }
2742}
2743
2744/// A helper struct for displaying an `Expr` as an SQL-like string.
2745struct SqlDisplay<'a>(&'a Expr);
2746
2747impl Display for SqlDisplay<'_> {
2748    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2749        match self.0 {
2750            Expr::Literal(scalar, _) => scalar.fmt(f),
2751            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2752            Expr::Between(Between {
2753                expr,
2754                negated,
2755                low,
2756                high,
2757            }) => {
2758                if *negated {
2759                    write!(
2760                        f,
2761                        "{} NOT BETWEEN {} AND {}",
2762                        SqlDisplay(expr),
2763                        SqlDisplay(low),
2764                        SqlDisplay(high),
2765                    )
2766                } else {
2767                    write!(
2768                        f,
2769                        "{} BETWEEN {} AND {}",
2770                        SqlDisplay(expr),
2771                        SqlDisplay(low),
2772                        SqlDisplay(high),
2773                    )
2774                }
2775            }
2776            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2777                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
2778            }
2779            Expr::Case(Case {
2780                expr,
2781                when_then_expr,
2782                else_expr,
2783            }) => {
2784                write!(f, "CASE ")?;
2785
2786                if let Some(e) = expr {
2787                    write!(f, "{} ", SqlDisplay(e))?;
2788                }
2789
2790                for (when, then) in when_then_expr {
2791                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
2792                }
2793
2794                if let Some(e) = else_expr {
2795                    write!(f, "ELSE {} ", SqlDisplay(e))?;
2796                }
2797
2798                write!(f, "END")
2799            }
2800            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2801                write!(f, "{}", SqlDisplay(expr))
2802            }
2803            Expr::InList(InList {
2804                expr,
2805                list,
2806                negated,
2807            }) => {
2808                write!(
2809                    f,
2810                    "{}{} IN {}",
2811                    SqlDisplay(expr),
2812                    if *negated { " NOT" } else { "" },
2813                    ExprListDisplay::comma_separated(list.as_slice())
2814                )
2815            }
2816            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2817                write!(
2818                    f,
2819                    "ROLLUP ({})",
2820                    ExprListDisplay::comma_separated(exprs.as_slice())
2821                )
2822            }
2823            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2824                write!(f, "GROUPING SETS (")?;
2825                for exprs in lists_of_exprs.iter() {
2826                    write!(
2827                        f,
2828                        "({})",
2829                        ExprListDisplay::comma_separated(exprs.as_slice())
2830                    )?;
2831                }
2832                write!(f, ")")
2833            }
2834            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2835                write!(
2836                    f,
2837                    "ROLLUP ({})",
2838                    ExprListDisplay::comma_separated(exprs.as_slice())
2839                )
2840            }
2841            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
2842            Expr::IsNotNull(expr) => {
2843                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
2844            }
2845            Expr::IsUnknown(expr) => {
2846                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
2847            }
2848            Expr::IsNotUnknown(expr) => {
2849                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
2850            }
2851            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
2852            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
2853            Expr::IsNotTrue(expr) => {
2854                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
2855            }
2856            Expr::IsNotFalse(expr) => {
2857                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
2858            }
2859            Expr::Like(Like {
2860                negated,
2861                expr,
2862                pattern,
2863                escape_char,
2864                case_insensitive,
2865            }) => {
2866                write!(
2867                    f,
2868                    "{} {}{} {}",
2869                    SqlDisplay(expr),
2870                    if *negated { "NOT " } else { "" },
2871                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2872                    SqlDisplay(pattern),
2873                )?;
2874
2875                if let Some(char) = escape_char {
2876                    write!(f, " CHAR '{char}'")?;
2877                }
2878
2879                Ok(())
2880            }
2881            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
2882            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
2883            Expr::Unnest(Unnest { expr }) => {
2884                write!(f, "UNNEST({})", SqlDisplay(expr))
2885            }
2886            Expr::SimilarTo(Like {
2887                negated,
2888                expr,
2889                pattern,
2890                escape_char,
2891                ..
2892            }) => {
2893                write!(
2894                    f,
2895                    "{} {} {}",
2896                    SqlDisplay(expr),
2897                    if *negated {
2898                        "NOT SIMILAR TO"
2899                    } else {
2900                        "SIMILAR TO"
2901                    },
2902                    SqlDisplay(pattern),
2903                )?;
2904                if let Some(char) = escape_char {
2905                    write!(f, " CHAR '{char}'")?;
2906                }
2907
2908                Ok(())
2909            }
2910            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2911                match func.human_display(params) {
2912                    Ok(name) => {
2913                        write!(f, "{name}")
2914                    }
2915                    Err(e) => {
2916                        write!(f, "got error from schema_name {e}")
2917                    }
2918                }
2919            }
2920            _ => write!(f, "{}", self.0),
2921        }
2922    }
2923}
2924
2925/// Get schema_name for Vector of expressions
2926///
2927/// Internal usage. Please call `schema_name_from_exprs` instead
2928// TODO: Use ", " to standardize the formatting of Vec<Expr>,
2929// <https://github.com/apache/datafusion/issues/10364>
2930pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
2931    exprs: &[Expr],
2932) -> Result<String, fmt::Error> {
2933    schema_name_from_exprs_inner(exprs, ",")
2934}
2935
2936/// Formats a list of `&Expr` with a custom separator using SQL display format
2937pub struct ExprListDisplay<'a> {
2938    exprs: &'a [Expr],
2939    sep: &'a str,
2940}
2941
2942impl<'a> ExprListDisplay<'a> {
2943    /// Create a new display struct with the given expressions and separator
2944    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
2945        Self { exprs, sep }
2946    }
2947
2948    /// Create a new display struct with comma-space separator
2949    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
2950        Self::new(exprs, ", ")
2951    }
2952}
2953
2954impl Display for ExprListDisplay<'_> {
2955    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
2956        let mut first = true;
2957        for expr in self.exprs {
2958            if !first {
2959                write!(f, "{}", self.sep)?;
2960            }
2961            write!(f, "{}", SqlDisplay(expr))?;
2962            first = false;
2963        }
2964        Ok(())
2965    }
2966}
2967
2968/// Get schema_name for Vector of expressions
2969pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
2970    schema_name_from_exprs_inner(exprs, ", ")
2971}
2972
2973fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
2974    let mut s = String::new();
2975    for (i, e) in exprs.iter().enumerate() {
2976        if i > 0 {
2977            write!(&mut s, "{sep}")?;
2978        }
2979        write!(&mut s, "{}", SchemaDisplay(e))?;
2980    }
2981
2982    Ok(s)
2983}
2984
2985pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
2986    let mut s = String::new();
2987    for (i, e) in sorts.iter().enumerate() {
2988        if i > 0 {
2989            write!(&mut s, ", ")?;
2990        }
2991        let ordering = if e.asc { "ASC" } else { "DESC" };
2992        let nulls_ordering = if e.nulls_first {
2993            "NULLS FIRST"
2994        } else {
2995            "NULLS LAST"
2996        };
2997        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
2998    }
2999
3000    Ok(s)
3001}
3002
3003pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3004pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3005
3006/// Format expressions for display as part of a logical plan. In many cases, this will produce
3007/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3008impl Display for Expr {
3009    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3010        match self {
3011            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3012            Expr::Column(c) => write!(f, "{c}"),
3013            Expr::OuterReferenceColumn(_, c) => {
3014                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3015            }
3016            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3017            Expr::Literal(v, metadata) => {
3018                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3019                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3020                    true => write!(f, "{v:?}"),
3021                }
3022            }
3023            Expr::Case(case) => {
3024                write!(f, "CASE ")?;
3025                if let Some(e) = &case.expr {
3026                    write!(f, "{e} ")?;
3027                }
3028                for (w, t) in &case.when_then_expr {
3029                    write!(f, "WHEN {w} THEN {t} ")?;
3030                }
3031                if let Some(e) = &case.else_expr {
3032                    write!(f, "ELSE {e} ")?;
3033                }
3034                write!(f, "END")
3035            }
3036            Expr::Cast(Cast { expr, data_type }) => {
3037                write!(f, "CAST({expr} AS {data_type:?})")
3038            }
3039            Expr::TryCast(TryCast { expr, data_type }) => {
3040                write!(f, "TRY_CAST({expr} AS {data_type:?})")
3041            }
3042            Expr::Not(expr) => write!(f, "NOT {expr}"),
3043            Expr::Negative(expr) => write!(f, "(- {expr})"),
3044            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3045            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3046            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3047            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3048            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3049            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3050            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3051            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3052            Expr::Exists(Exists {
3053                subquery,
3054                negated: true,
3055            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3056            Expr::Exists(Exists {
3057                subquery,
3058                negated: false,
3059            }) => write!(f, "EXISTS ({subquery:?})"),
3060            Expr::InSubquery(InSubquery {
3061                expr,
3062                subquery,
3063                negated: true,
3064            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3065            Expr::InSubquery(InSubquery {
3066                expr,
3067                subquery,
3068                negated: false,
3069            }) => write!(f, "{expr} IN ({subquery:?})"),
3070            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3071            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3072            Expr::ScalarFunction(fun) => {
3073                fmt_function(f, fun.name(), false, &fun.args, true)
3074            }
3075            // TODO: use udf's display_name, need to fix the separator issue, <https://github.com/apache/datafusion/issues/10364>
3076            // Expr::ScalarFunction(ScalarFunction { func, args }) => {
3077            //     write!(f, "{}", func.display_name(args).unwrap())
3078            // }
3079            Expr::WindowFunction(window_fun) => {
3080                let WindowFunction { fun, params } = window_fun.as_ref();
3081                match fun {
3082                    WindowFunctionDefinition::AggregateUDF(fun) => {
3083                        match fun.window_function_display_name(params) {
3084                            Ok(name) => {
3085                                write!(f, "{name}")
3086                            }
3087                            Err(e) => {
3088                                write!(
3089                                    f,
3090                                    "got error from window_function_display_name {e}"
3091                                )
3092                            }
3093                        }
3094                    }
3095                    WindowFunctionDefinition::WindowUDF(fun) => {
3096                        let WindowFunctionParams {
3097                            args,
3098                            partition_by,
3099                            order_by,
3100                            window_frame,
3101                            null_treatment,
3102                        } = params;
3103
3104                        fmt_function(f, &fun.to_string(), false, args, true)?;
3105
3106                        if let Some(nt) = null_treatment {
3107                            write!(f, "{nt}")?;
3108                        }
3109
3110                        if !partition_by.is_empty() {
3111                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3112                        }
3113                        if !order_by.is_empty() {
3114                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3115                        }
3116                        write!(
3117                            f,
3118                            " {} BETWEEN {} AND {}",
3119                            window_frame.units,
3120                            window_frame.start_bound,
3121                            window_frame.end_bound
3122                        )
3123                    }
3124                }
3125            }
3126            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3127                match func.display_name(params) {
3128                    Ok(name) => {
3129                        write!(f, "{name}")
3130                    }
3131                    Err(e) => {
3132                        write!(f, "got error from display_name {e}")
3133                    }
3134                }
3135            }
3136            Expr::Between(Between {
3137                expr,
3138                negated,
3139                low,
3140                high,
3141            }) => {
3142                if *negated {
3143                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3144                } else {
3145                    write!(f, "{expr} BETWEEN {low} AND {high}")
3146                }
3147            }
3148            Expr::Like(Like {
3149                negated,
3150                expr,
3151                pattern,
3152                escape_char,
3153                case_insensitive,
3154            }) => {
3155                write!(f, "{expr}")?;
3156                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3157                if *negated {
3158                    write!(f, " NOT")?;
3159                }
3160                if let Some(char) = escape_char {
3161                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3162                } else {
3163                    write!(f, " {op_name} {pattern}")
3164                }
3165            }
3166            Expr::SimilarTo(Like {
3167                negated,
3168                expr,
3169                pattern,
3170                escape_char,
3171                case_insensitive: _,
3172            }) => {
3173                write!(f, "{expr}")?;
3174                if *negated {
3175                    write!(f, " NOT")?;
3176                }
3177                if let Some(char) = escape_char {
3178                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3179                } else {
3180                    write!(f, " SIMILAR TO {pattern}")
3181                }
3182            }
3183            Expr::InList(InList {
3184                expr,
3185                list,
3186                negated,
3187            }) => {
3188                if *negated {
3189                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3190                } else {
3191                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3192                }
3193            }
3194            #[expect(deprecated)]
3195            Expr::Wildcard { qualifier, options } => match qualifier {
3196                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3197                None => write!(f, "*{options}"),
3198            },
3199            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3200                GroupingSet::Rollup(exprs) => {
3201                    // ROLLUP (c0, c1, c2)
3202                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3203                }
3204                GroupingSet::Cube(exprs) => {
3205                    // CUBE (c0, c1, c2)
3206                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3207                }
3208                GroupingSet::GroupingSets(lists_of_exprs) => {
3209                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3210                    write!(
3211                        f,
3212                        "GROUPING SETS ({})",
3213                        lists_of_exprs
3214                            .iter()
3215                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3216                            .collect::<Vec<String>>()
3217                            .join(", ")
3218                    )
3219                }
3220            },
3221            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3222            Expr::Unnest(Unnest { expr }) => {
3223                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3224            }
3225        }
3226    }
3227}
3228
3229fn fmt_function(
3230    f: &mut Formatter,
3231    fun: &str,
3232    distinct: bool,
3233    args: &[Expr],
3234    display: bool,
3235) -> fmt::Result {
3236    let args: Vec<String> = match display {
3237        true => args.iter().map(|arg| format!("{arg}")).collect(),
3238        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3239    };
3240
3241    let distinct_str = match distinct {
3242        true => "DISTINCT ",
3243        false => "",
3244    };
3245    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3246}
3247
3248/// The name of the column (field) that this `Expr` will produce in the physical plan.
3249/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3250pub fn physical_name(expr: &Expr) -> Result<String> {
3251    match expr {
3252        Expr::Column(col) => Ok(col.name.clone()),
3253        Expr::Alias(alias) => Ok(alias.name.clone()),
3254        _ => Ok(expr.schema_name().to_string()),
3255    }
3256}
3257
3258#[cfg(test)]
3259mod test {
3260    use crate::expr_fn::col;
3261    use crate::{
3262        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
3263        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
3264    };
3265    use arrow::datatypes::{Field, Schema};
3266    use sqlparser::ast;
3267    use sqlparser::ast::{Ident, IdentWithAlias};
3268    use std::any::Any;
3269
3270    #[test]
3271    fn infer_placeholder_in_clause() {
3272        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3273        let column = col("department_id");
3274        let param_placeholders = vec![
3275            Expr::Placeholder(Placeholder {
3276                id: "$1".to_string(),
3277                data_type: None,
3278            }),
3279            Expr::Placeholder(Placeholder {
3280                id: "$2".to_string(),
3281                data_type: None,
3282            }),
3283            Expr::Placeholder(Placeholder {
3284                id: "$3".to_string(),
3285                data_type: None,
3286            }),
3287        ];
3288        let in_list = Expr::InList(InList {
3289            expr: Box::new(column),
3290            list: param_placeholders,
3291            negated: false,
3292        });
3293
3294        let schema = Arc::new(Schema::new(vec![
3295            Field::new("name", DataType::Utf8, true),
3296            Field::new("department_id", DataType::Int32, true),
3297        ]));
3298        let df_schema = DFSchema::try_from(schema).unwrap();
3299
3300        let (inferred_expr, contains_placeholder) =
3301            in_list.infer_placeholder_types(&df_schema).unwrap();
3302
3303        assert!(contains_placeholder);
3304
3305        match inferred_expr {
3306            Expr::InList(in_list) => {
3307                for expr in in_list.list {
3308                    match expr {
3309                        Expr::Placeholder(placeholder) => {
3310                            assert_eq!(
3311                                placeholder.data_type,
3312                                Some(DataType::Int32),
3313                                "Placeholder {} should infer Int32",
3314                                placeholder.id
3315                            );
3316                        }
3317                        _ => panic!("Expected Placeholder expression"),
3318                    }
3319                }
3320            }
3321            _ => panic!("Expected InList expression"),
3322        }
3323    }
3324
3325    #[test]
3326    fn infer_placeholder_like_and_similar_to() {
3327        // name LIKE $1
3328        let schema =
3329            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3330        let df_schema = DFSchema::try_from(schema).unwrap();
3331
3332        let like = Like {
3333            expr: Box::new(col("name")),
3334            pattern: Box::new(Expr::Placeholder(Placeholder {
3335                id: "$1".to_string(),
3336                data_type: None,
3337            })),
3338            negated: false,
3339            case_insensitive: false,
3340            escape_char: None,
3341        };
3342
3343        let expr = Expr::Like(like.clone());
3344
3345        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3346        match inferred_expr {
3347            Expr::Like(like) => match *like.pattern {
3348                Expr::Placeholder(placeholder) => {
3349                    assert_eq!(placeholder.data_type, Some(DataType::Utf8));
3350                }
3351                _ => panic!("Expected Placeholder"),
3352            },
3353            _ => panic!("Expected Like"),
3354        }
3355
3356        // name SIMILAR TO $1
3357        let expr = Expr::SimilarTo(like);
3358
3359        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3360        match inferred_expr {
3361            Expr::SimilarTo(like) => match *like.pattern {
3362                Expr::Placeholder(placeholder) => {
3363                    assert_eq!(
3364                        placeholder.data_type,
3365                        Some(DataType::Utf8),
3366                        "Placeholder {} should infer Utf8",
3367                        placeholder.id
3368                    );
3369                }
3370                _ => panic!("Expected Placeholder expression"),
3371            },
3372            _ => panic!("Expected SimilarTo expression"),
3373        }
3374    }
3375
3376    #[test]
3377    #[allow(deprecated)]
3378    fn format_case_when() -> Result<()> {
3379        let expr = case(col("a"))
3380            .when(lit(1), lit(true))
3381            .when(lit(0), lit(false))
3382            .otherwise(lit(ScalarValue::Null))?;
3383        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3384        assert_eq!(expected, expr.canonical_name());
3385        assert_eq!(expected, format!("{expr}"));
3386        Ok(())
3387    }
3388
3389    #[test]
3390    #[allow(deprecated)]
3391    fn format_cast() -> Result<()> {
3392        let expr = Expr::Cast(Cast {
3393            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3394            data_type: DataType::Utf8,
3395        });
3396        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3397        assert_eq!(expected_canonical, expr.canonical_name());
3398        assert_eq!(expected_canonical, format!("{expr}"));
3399        // Note that CAST intentionally has a name that is different from its `Display`
3400        // representation. CAST does not change the name of expressions.
3401        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3402        Ok(())
3403    }
3404
3405    #[test]
3406    fn test_partial_ord() {
3407        // Test validates that partial ord is defined for Expr, not
3408        // intended to exhaustively test all possibilities
3409        let exp1 = col("a") + lit(1);
3410        let exp2 = col("a") + lit(2);
3411        let exp3 = !(col("a") + lit(2));
3412
3413        assert!(exp1 < exp2);
3414        assert!(exp3 > exp2);
3415        assert!(exp1 < exp3)
3416    }
3417
3418    #[test]
3419    fn test_collect_expr() -> Result<()> {
3420        // single column
3421        {
3422            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3423            let columns = expr.column_refs();
3424            assert_eq!(1, columns.len());
3425            assert!(columns.contains(&Column::from_name("a")));
3426        }
3427
3428        // multiple columns
3429        {
3430            let expr = col("a") + col("b") + lit(1);
3431            let columns = expr.column_refs();
3432            assert_eq!(2, columns.len());
3433            assert!(columns.contains(&Column::from_name("a")));
3434            assert!(columns.contains(&Column::from_name("b")));
3435        }
3436
3437        Ok(())
3438    }
3439
3440    #[test]
3441    fn test_logical_ops() {
3442        assert_eq!(
3443            format!("{}", lit(1u32).eq(lit(2u32))),
3444            "UInt32(1) = UInt32(2)"
3445        );
3446        assert_eq!(
3447            format!("{}", lit(1u32).not_eq(lit(2u32))),
3448            "UInt32(1) != UInt32(2)"
3449        );
3450        assert_eq!(
3451            format!("{}", lit(1u32).gt(lit(2u32))),
3452            "UInt32(1) > UInt32(2)"
3453        );
3454        assert_eq!(
3455            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3456            "UInt32(1) >= UInt32(2)"
3457        );
3458        assert_eq!(
3459            format!("{}", lit(1u32).lt(lit(2u32))),
3460            "UInt32(1) < UInt32(2)"
3461        );
3462        assert_eq!(
3463            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3464            "UInt32(1) <= UInt32(2)"
3465        );
3466        assert_eq!(
3467            format!("{}", lit(1u32).and(lit(2u32))),
3468            "UInt32(1) AND UInt32(2)"
3469        );
3470        assert_eq!(
3471            format!("{}", lit(1u32).or(lit(2u32))),
3472            "UInt32(1) OR UInt32(2)"
3473        );
3474    }
3475
3476    #[test]
3477    fn test_is_volatile_scalar_func() {
3478        // UDF
3479        #[derive(Debug)]
3480        struct TestScalarUDF {
3481            signature: Signature,
3482        }
3483        impl ScalarUDFImpl for TestScalarUDF {
3484            fn as_any(&self) -> &dyn Any {
3485                self
3486            }
3487            fn name(&self) -> &str {
3488                "TestScalarUDF"
3489            }
3490
3491            fn signature(&self) -> &Signature {
3492                &self.signature
3493            }
3494
3495            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3496                Ok(DataType::Utf8)
3497            }
3498
3499            fn invoke_with_args(
3500                &self,
3501                _args: ScalarFunctionArgs,
3502            ) -> Result<ColumnarValue> {
3503                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
3504            }
3505        }
3506        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3507            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
3508        }));
3509        assert_ne!(udf.signature().volatility, Volatility::Volatile);
3510
3511        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3512            signature: Signature::uniform(
3513                1,
3514                vec![DataType::Float32],
3515                Volatility::Volatile,
3516            ),
3517        }));
3518        assert_eq!(udf.signature().volatility, Volatility::Volatile);
3519    }
3520
3521    use super::*;
3522
3523    #[test]
3524    fn test_display_wildcard() {
3525        assert_eq!(format!("{}", wildcard()), "*");
3526        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
3527        assert_eq!(
3528            format!(
3529                "{}",
3530                wildcard_with_options(wildcard_options(
3531                    Some(IlikeSelectItem {
3532                        pattern: "c1".to_string()
3533                    }),
3534                    None,
3535                    None,
3536                    None,
3537                    None
3538                ))
3539            ),
3540            "* ILIKE 'c1'"
3541        );
3542        assert_eq!(
3543            format!(
3544                "{}",
3545                wildcard_with_options(wildcard_options(
3546                    None,
3547                    Some(ExcludeSelectItem::Multiple(vec![
3548                        Ident::from("c1"),
3549                        Ident::from("c2")
3550                    ])),
3551                    None,
3552                    None,
3553                    None
3554                ))
3555            ),
3556            "* EXCLUDE (c1, c2)"
3557        );
3558        assert_eq!(
3559            format!(
3560                "{}",
3561                wildcard_with_options(wildcard_options(
3562                    None,
3563                    None,
3564                    Some(ExceptSelectItem {
3565                        first_element: Ident::from("c1"),
3566                        additional_elements: vec![Ident::from("c2")]
3567                    }),
3568                    None,
3569                    None
3570                ))
3571            ),
3572            "* EXCEPT (c1, c2)"
3573        );
3574        assert_eq!(
3575            format!(
3576                "{}",
3577                wildcard_with_options(wildcard_options(
3578                    None,
3579                    None,
3580                    None,
3581                    Some(PlannedReplaceSelectItem {
3582                        items: vec![ReplaceSelectElement {
3583                            expr: ast::Expr::Identifier(Ident::from("c1")),
3584                            column_name: Ident::from("a1"),
3585                            as_keyword: false
3586                        }],
3587                        planned_expressions: vec![]
3588                    }),
3589                    None
3590                ))
3591            ),
3592            "* REPLACE (c1 a1)"
3593        );
3594        assert_eq!(
3595            format!(
3596                "{}",
3597                wildcard_with_options(wildcard_options(
3598                    None,
3599                    None,
3600                    None,
3601                    None,
3602                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3603                        ident: Ident::from("c1"),
3604                        alias: Ident::from("a1")
3605                    }]))
3606                ))
3607            ),
3608            "* RENAME (c1 AS a1)"
3609        )
3610    }
3611
3612    #[test]
3613    fn test_schema_display_alias_with_relation() {
3614        assert_eq!(
3615            format!(
3616                "{}",
3617                SchemaDisplay(
3618                    &lit(1).alias_qualified("table_name".into(), "column_name")
3619                )
3620            ),
3621            "table_name.column_name"
3622        );
3623    }
3624
3625    #[test]
3626    fn test_schema_display_alias_without_relation() {
3627        assert_eq!(
3628            format!(
3629                "{}",
3630                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3631            ),
3632            "column_name"
3633        );
3634    }
3635
3636    fn wildcard_options(
3637        opt_ilike: Option<IlikeSelectItem>,
3638        opt_exclude: Option<ExcludeSelectItem>,
3639        opt_except: Option<ExceptSelectItem>,
3640        opt_replace: Option<PlannedReplaceSelectItem>,
3641        opt_rename: Option<RenameSelectItem>,
3642    ) -> WildcardOptions {
3643        WildcardOptions {
3644            ilike: opt_ilike,
3645            exclude: opt_exclude,
3646            except: opt_except,
3647            replace: opt_replace,
3648            rename: opt_rename,
3649        }
3650    }
3651
3652    #[test]
3653    fn test_size_of_expr() {
3654        // because Expr is such a widely used struct in DataFusion
3655        // it is important to keep its size as small as possible
3656        //
3657        // If this test fails when you change `Expr`, please try
3658        // `Box`ing the fields to make `Expr` smaller
3659        // See https://github.com/apache/datafusion/issues/16199 for details
3660        assert_eq!(size_of::<Expr>(), 144);
3661        assert_eq!(size_of::<ScalarValue>(), 64);
3662        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3663        assert_eq!(size_of::<Vec<Expr>>(), 24);
3664        assert_eq!(size_of::<Arc<Expr>>(), 8);
3665    }
3666}