datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::{BTreeMap, HashSet};
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::logical_plan::Subquery;
30use crate::{AggregateUDF, Volatility};
31use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
32
33use arrow::datatypes::{DataType, Field, FieldRef};
34use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
35use datafusion_common::tree_node::{
36    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
37};
38use datafusion_common::{
39    Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
40};
41use datafusion_functions_window_common::field::WindowUDFFieldArgs;
42use sqlparser::ast::{
43    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
44    NullTreatment, RenameSelectItem, ReplaceSelectElement,
45};
46
47/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
48///
49/// For example the expression `A + 1` will be represented as
50///
51///```text
52///  BinaryExpr {
53///    left: Expr::Column("A"),
54///    op: Operator::Plus,
55///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
56/// }
57/// ```
58///
59/// # Creating Expressions
60///
61/// `Expr`s can be created directly, but it is often easier and less verbose to
62/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
63/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
64///
65/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
66///
67/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
68///
69/// # Printing Expressions
70///
71/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
72/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
73///
74/// If you need  SQL to pass to other systems, consider using [`Unparser`].
75///
76/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
77///
78/// # Schema Access
79///
80/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
81/// of an `Expr`.
82///
83/// # Visiting and Rewriting `Expr`s
84///
85/// The `Expr` struct implements the [`TreeNode`] trait for walking and
86/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
87/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
88/// the examples below and [`TreeNode`] for more information.
89///
90/// # Examples: Creating and Using `Expr`s
91///
92/// ## Column References and Literals
93///
94/// [`Expr::Column`] refer to the values of columns and are often created with
95/// the [`col`] function. For example to create an expression `c1` referring to
96/// column named "c1":
97///
98/// [`col`]: crate::expr_fn::col
99///
100/// ```
101/// # use datafusion_common::Column;
102/// # use datafusion_expr::{lit, col, Expr};
103/// let expr = col("c1");
104/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
105/// ```
106///
107/// [`Expr::Literal`] refer to literal, or constant, values. These are created
108/// with the [`lit`] function. For example to create an expression `42`:
109///
110/// [`lit`]: crate::lit
111///
112/// ```
113/// # use datafusion_common::{Column, ScalarValue};
114/// # use datafusion_expr::{lit, col, Expr};
115/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
116/// let expr = lit(42i64);
117/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
118/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
119/// // To make a (typed) NULL:
120/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
121/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
122/// let expr = lit(ScalarValue::Null);
123/// ```
124///
125/// ## Binary Expressions
126///
127/// Exprs implement traits that allow easy to understand construction of more
128/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
129/// "c2" together
130///
131/// ```
132/// # use datafusion_expr::{lit, col, Operator, Expr};
133/// // Use the `+` operator to add two columns together
134/// let expr = col("c1") + col("c2");
135/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
136/// if let Expr::BinaryExpr(binary_expr) = expr {
137///   assert_eq!(*binary_expr.left, col("c1"));
138///   assert_eq!(*binary_expr.right, col("c2"));
139///   assert_eq!(binary_expr.op, Operator::Plus);
140/// }
141/// ```
142///
143/// The expression `c1 = 42` to compares the value in column "c1" to the
144/// literal value `42`:
145///
146/// ```
147/// # use datafusion_common::ScalarValue;
148/// # use datafusion_expr::{lit, col, Operator, Expr};
149/// let expr = col("c1").eq(lit(42_i32));
150/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
151/// if let Expr::BinaryExpr(binary_expr) = expr {
152///   assert_eq!(*binary_expr.left, col("c1"));
153///   let scalar = ScalarValue::Int32(Some(42));
154///   assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
155///   assert_eq!(binary_expr.op, Operator::Eq);
156/// }
157/// ```
158///
159/// Here is how to implement the equivalent of `SELECT *` to select all
160/// [`Expr::Column`] from a [`DFSchema`]'s columns:
161///
162/// ```
163/// # use arrow::datatypes::{DataType, Field, Schema};
164/// # use datafusion_common::{DFSchema, Column};
165/// # use datafusion_expr::Expr;
166/// // Create a schema c1(int, c2 float)
167/// let arrow_schema = Schema::new(vec![
168///    Field::new("c1", DataType::Int32, false),
169///    Field::new("c2", DataType::Float64, false),
170/// ]);
171/// // DFSchema is a an Arrow schema with optional relation name
172/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
173///   .unwrap();
174///
175/// // Form Vec<Expr> with an expression for each column in the schema
176/// let exprs: Vec<_> = df_schema.iter()
177///   .map(Expr::from)
178///   .collect();
179///
180/// assert_eq!(exprs, vec![
181///   Expr::from(Column::from_qualified_name("t1.c1")),
182///   Expr::from(Column::from_qualified_name("t1.c2")),
183/// ]);
184/// ```
185///
186/// # Examples: Displaying `Exprs`
187///
188/// There are three ways to print an `Expr` depending on the usecase.
189///
190/// ## Use `Debug` trait
191///
192/// Following Rust conventions, the `Debug` implementation prints out the
193/// internal structure of the expression, which is useful for debugging.
194///
195/// ```
196/// # use datafusion_expr::{lit, col};
197/// let expr = col("c1") + lit(42);
198/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
199/// ```
200///
201/// ## Use the `Display` trait  (detailed expression)
202///
203/// The `Display` implementation prints out the expression in a SQL-like form,
204/// but has additional details such as the data type of literals. This is useful
205/// for understanding the expression in more detail and is used for the low level
206/// [`ExplainFormat::Indent`] explain plan format.
207///
208/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
209///
210/// ```
211/// # use datafusion_expr::{lit, col};
212/// let expr = col("c1") + lit(42);
213/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
214/// ```
215///
216/// ## Use [`Self::human_display`] (human readable)
217///
218/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
219/// for human consumption by end users. It is used for the
220/// [`ExplainFormat::Tree`] explain plan format.
221///
222/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
223///
224///```
225/// # use datafusion_expr::{lit, col};
226/// let expr = col("c1") + lit(42);
227/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
228/// ```
229///
230/// # Examples: Visiting and Rewriting `Expr`s
231///
232/// Here is an example that finds all literals in an `Expr` tree:
233/// ```
234/// # use std::collections::{HashSet};
235/// use datafusion_common::ScalarValue;
236/// # use datafusion_expr::{col, Expr, lit};
237/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
238/// // Expression a = 5 AND b = 6
239/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
240/// // find all literals in a HashMap
241/// let mut scalars = HashSet::new();
242/// // apply recursively visits all nodes in the expression tree
243/// expr.apply(|e| {
244///    if let Expr::Literal(scalar, _) = e {
245///       scalars.insert(scalar);
246///    }
247///    // The return value controls whether to continue visiting the tree
248///    Ok(TreeNodeRecursion::Continue)
249/// }).unwrap();
250/// // All subtrees have been visited and literals found
251/// assert_eq!(scalars.len(), 2);
252/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
253/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
254/// ```
255///
256/// Rewrite an expression, replacing references to column "a" in an
257/// to the literal `42`:
258///
259///  ```
260/// # use datafusion_common::tree_node::{Transformed, TreeNode};
261/// # use datafusion_expr::{col, Expr, lit};
262/// // expression a = 5 AND b = 6
263/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
264/// // rewrite all references to column "a" to the literal 42
265/// let rewritten = expr.transform(|e| {
266///   if let Expr::Column(c) = &e {
267///     if &c.name == "a" {
268///       // return Transformed::yes to indicate the node was changed
269///       return Ok(Transformed::yes(lit(42)))
270///     }
271///   }
272///   // return Transformed::no to indicate the node was not changed
273///   Ok(Transformed::no(e))
274/// }).unwrap();
275/// // The expression has been rewritten
276/// assert!(rewritten.transformed);
277/// // to 42 = 5 AND b = 6
278/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
279#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
280pub enum Expr {
281    /// An expression with a specific name.
282    Alias(Alias),
283    /// A named reference to a qualified field in a schema.
284    Column(Column),
285    /// A named reference to a variable in a registry.
286    ScalarVariable(DataType, Vec<String>),
287    /// A constant value along with associated [`FieldMetadata`].
288    Literal(ScalarValue, Option<FieldMetadata>),
289    /// A binary expression such as "age > 21"
290    BinaryExpr(BinaryExpr),
291    /// LIKE expression
292    Like(Like),
293    /// LIKE expression that uses regular expressions
294    SimilarTo(Like),
295    /// Negation of an expression. The expression's type must be a boolean to make sense.
296    Not(Box<Expr>),
297    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
298    IsNotNull(Box<Expr>),
299    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
300    IsNull(Box<Expr>),
301    /// True if argument is true, false otherwise. This expression itself is never NULL.
302    IsTrue(Box<Expr>),
303    /// True if argument is  false, false otherwise. This expression itself is never NULL.
304    IsFalse(Box<Expr>),
305    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
306    IsUnknown(Box<Expr>),
307    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
308    IsNotTrue(Box<Expr>),
309    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
310    IsNotFalse(Box<Expr>),
311    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
312    IsNotUnknown(Box<Expr>),
313    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
314    Negative(Box<Expr>),
315    /// Whether an expression is between a given range.
316    Between(Between),
317    /// A CASE expression (see docs on [`Case`])
318    Case(Case),
319    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
320    /// This expression is guaranteed to have a fixed type.
321    Cast(Cast),
322    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
323    /// This expression is guaranteed to have a fixed type.
324    TryCast(TryCast),
325    /// Call a scalar function with a set of arguments.
326    ScalarFunction(ScalarFunction),
327    /// Calls an aggregate function with arguments, and optional
328    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
329    ///
330    /// See also [`ExprFunctionExt`] to set these fields.
331    ///
332    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
333    AggregateFunction(AggregateFunction),
334    /// Call a window function with a set of arguments.
335    WindowFunction(Box<WindowFunction>),
336    /// Returns whether the list contains the expr value.
337    InList(InList),
338    /// EXISTS subquery
339    Exists(Exists),
340    /// IN subquery
341    InSubquery(InSubquery),
342    /// Scalar subquery
343    ScalarSubquery(Subquery),
344    /// Represents a reference to all available fields in a specific schema,
345    /// with an optional (schema) qualifier.
346    ///
347    /// This expr has to be resolved to a list of columns before translating logical
348    /// plan into physical plan.
349    #[deprecated(
350        since = "46.0.0",
351        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
352    )]
353    Wildcard {
354        qualifier: Option<TableReference>,
355        options: Box<WildcardOptions>,
356    },
357    /// List of grouping set expressions. Only valid in the context of an aggregate
358    /// GROUP BY expression list
359    GroupingSet(GroupingSet),
360    /// A place holder for parameters in a prepared statement
361    /// (e.g. `$foo` or `$1`)
362    Placeholder(Placeholder),
363    /// A placeholder which holds a reference to a qualified field
364    /// in the outer query, used for correlated sub queries.
365    OuterReferenceColumn(DataType, Column),
366    /// Unnest expression
367    Unnest(Unnest),
368}
369
370impl Default for Expr {
371    fn default() -> Self {
372        Expr::Literal(ScalarValue::Null, None)
373    }
374}
375
376/// Create an [`Expr`] from a [`Column`]
377impl From<Column> for Expr {
378    fn from(value: Column) -> Self {
379        Expr::Column(value)
380    }
381}
382
383/// Create an [`Expr`] from a [`WindowFunction`]
384impl From<WindowFunction> for Expr {
385    fn from(value: WindowFunction) -> Self {
386        Expr::WindowFunction(Box::new(value))
387    }
388}
389
390/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
391/// useful for creating [`Expr`] from a [`DFSchema`].
392///
393/// See example on [`Expr`]
394impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
395    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
396        Expr::from(Column::from(value))
397    }
398}
399
400impl<'a> TreeNodeContainer<'a, Self> for Expr {
401    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
402        &'a self,
403        mut f: F,
404    ) -> Result<TreeNodeRecursion> {
405        f(self)
406    }
407
408    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
409        self,
410        mut f: F,
411    ) -> Result<Transformed<Self>> {
412        f(self)
413    }
414}
415
416/// Literal metadata
417///
418/// Stores metadata associated with a literal expressions
419/// and is designed to be fast to `clone`.
420///
421/// This structure is used to store metadata associated with a literal expression, and it
422/// corresponds to the `metadata` field on [`Field`].
423///
424/// # Example: Create [`FieldMetadata`] from a [`Field`]
425/// ```
426/// # use std::collections::HashMap;
427/// # use datafusion_expr::expr::FieldMetadata;
428/// # use arrow::datatypes::{Field, DataType};
429/// # let field = Field::new("c1", DataType::Int32, true)
430/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
431/// // Create a new `FieldMetadata` instance from a `Field`
432/// let metadata = FieldMetadata::new_from_field(&field);
433/// // There is also a `From` impl:
434/// let metadata = FieldMetadata::from(&field);
435/// ```
436///
437/// # Example: Update a [`Field`] with [`FieldMetadata`]
438/// ```
439/// # use datafusion_expr::expr::FieldMetadata;
440/// # use arrow::datatypes::{Field, DataType};
441/// # let field = Field::new("c1", DataType::Int32, true);
442/// # let metadata = FieldMetadata::new_from_field(&field);
443/// // Add any metadata from `FieldMetadata` to `Field`
444/// let updated_field = metadata.add_to_field(field);
445/// ```
446///
447#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
448pub struct FieldMetadata {
449    /// The inner metadata of a literal expression, which is a map of string
450    /// keys to string values.
451    ///
452    /// Note this is not a `HashMap because `HashMap` does not provide
453    /// implementations for traits like `Debug` and `Hash`.
454    inner: Arc<BTreeMap<String, String>>,
455}
456
457impl Default for FieldMetadata {
458    fn default() -> Self {
459        Self::new_empty()
460    }
461}
462
463impl FieldMetadata {
464    /// Create a new empty metadata instance.
465    pub fn new_empty() -> Self {
466        Self {
467            inner: Arc::new(BTreeMap::new()),
468        }
469    }
470
471    /// Merges two optional `FieldMetadata` instances, overwriting any existing
472    /// keys in `m` with keys from `n` if present
473    pub fn merge_options(
474        m: Option<&FieldMetadata>,
475        n: Option<&FieldMetadata>,
476    ) -> Option<FieldMetadata> {
477        match (m, n) {
478            (Some(m), Some(n)) => {
479                let mut merged = m.clone();
480                merged.extend(n.clone());
481                Some(merged)
482            }
483            (Some(m), None) => Some(m.clone()),
484            (None, Some(n)) => Some(n.clone()),
485            (None, None) => None,
486        }
487    }
488
489    /// Create a new metadata instance from a `Field`'s metadata.
490    pub fn new_from_field(field: &Field) -> Self {
491        let inner = field
492            .metadata()
493            .iter()
494            .map(|(k, v)| (k.to_string(), v.to_string()))
495            .collect();
496        Self {
497            inner: Arc::new(inner),
498        }
499    }
500
501    /// Create a new metadata instance from a map of string keys to string values.
502    pub fn new(inner: BTreeMap<String, String>) -> Self {
503        Self {
504            inner: Arc::new(inner),
505        }
506    }
507
508    /// Get the inner metadata as a reference to a `BTreeMap`.
509    pub fn inner(&self) -> &BTreeMap<String, String> {
510        &self.inner
511    }
512
513    /// Return the inner metadata
514    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
515        self.inner
516    }
517
518    /// Adds metadata from `other` into `self`, overwriting any existing keys.
519    pub fn extend(&mut self, other: Self) {
520        if other.is_empty() {
521            return;
522        }
523        let other = Arc::unwrap_or_clone(other.into_inner());
524        Arc::make_mut(&mut self.inner).extend(other);
525    }
526
527    /// Returns true if the metadata is empty.
528    pub fn is_empty(&self) -> bool {
529        self.inner.is_empty()
530    }
531
532    /// Returns the number of key-value pairs in the metadata.
533    pub fn len(&self) -> usize {
534        self.inner.len()
535    }
536
537    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
538    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
539        self.inner
540            .iter()
541            .map(|(k, v)| (k.to_string(), v.to_string()))
542            .collect()
543    }
544
545    /// Updates the metadata on the Field with this metadata, if it is not empty.
546    pub fn add_to_field(&self, field: Field) -> Field {
547        if self.inner.is_empty() {
548            return field;
549        }
550
551        field.with_metadata(self.to_hashmap())
552    }
553}
554
555impl From<&Field> for FieldMetadata {
556    fn from(field: &Field) -> Self {
557        Self::new_from_field(field)
558    }
559}
560
561impl From<BTreeMap<String, String>> for FieldMetadata {
562    fn from(inner: BTreeMap<String, String>) -> Self {
563        Self::new(inner)
564    }
565}
566
567impl From<std::collections::HashMap<String, String>> for FieldMetadata {
568    fn from(map: std::collections::HashMap<String, String>) -> Self {
569        Self::new(map.into_iter().collect())
570    }
571}
572
573/// From reference
574impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
575    fn from(map: &std::collections::HashMap<String, String>) -> Self {
576        let inner = map
577            .iter()
578            .map(|(k, v)| (k.to_string(), v.to_string()))
579            .collect();
580        Self::new(inner)
581    }
582}
583
584/// From hashbrown map
585impl From<HashMap<String, String>> for FieldMetadata {
586    fn from(map: HashMap<String, String>) -> Self {
587        let inner = map.into_iter().collect();
588        Self::new(inner)
589    }
590}
591
592impl From<&HashMap<String, String>> for FieldMetadata {
593    fn from(map: &HashMap<String, String>) -> Self {
594        let inner = map
595            .into_iter()
596            .map(|(k, v)| (k.to_string(), v.to_string()))
597            .collect();
598        Self::new(inner)
599    }
600}
601
602/// UNNEST expression.
603#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
604pub struct Unnest {
605    pub expr: Box<Expr>,
606}
607
608impl Unnest {
609    /// Create a new Unnest expression.
610    pub fn new(expr: Expr) -> Self {
611        Self {
612            expr: Box::new(expr),
613        }
614    }
615
616    /// Create a new Unnest expression.
617    pub fn new_boxed(boxed: Box<Expr>) -> Self {
618        Self { expr: boxed }
619    }
620}
621
622/// Alias expression
623#[derive(Clone, PartialEq, Eq, Debug)]
624pub struct Alias {
625    pub expr: Box<Expr>,
626    pub relation: Option<TableReference>,
627    pub name: String,
628    pub metadata: Option<FieldMetadata>,
629}
630
631impl Hash for Alias {
632    fn hash<H: Hasher>(&self, state: &mut H) {
633        self.expr.hash(state);
634        self.relation.hash(state);
635        self.name.hash(state);
636    }
637}
638
639impl PartialOrd for Alias {
640    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
641        let cmp = self.expr.partial_cmp(&other.expr);
642        let Some(Ordering::Equal) = cmp else {
643            return cmp;
644        };
645        let cmp = self.relation.partial_cmp(&other.relation);
646        let Some(Ordering::Equal) = cmp else {
647            return cmp;
648        };
649        self.name.partial_cmp(&other.name)
650    }
651}
652
653impl Alias {
654    /// Create an alias with an optional schema/field qualifier.
655    pub fn new(
656        expr: Expr,
657        relation: Option<impl Into<TableReference>>,
658        name: impl Into<String>,
659    ) -> Self {
660        Self {
661            expr: Box::new(expr),
662            relation: relation.map(|r| r.into()),
663            name: name.into(),
664            metadata: None,
665        }
666    }
667
668    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
669        self.metadata = metadata;
670        self
671    }
672}
673
674/// Binary expression
675#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
676pub struct BinaryExpr {
677    /// Left-hand side of the expression
678    pub left: Box<Expr>,
679    /// The comparison operator
680    pub op: Operator,
681    /// Right-hand side of the expression
682    pub right: Box<Expr>,
683}
684
685impl BinaryExpr {
686    /// Create a new binary expression
687    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
688        Self { left, op, right }
689    }
690}
691
692impl Display for BinaryExpr {
693    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
694        // Put parentheses around child binary expressions so that we can see the difference
695        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
696        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
697        // equivalent and the parentheses are not necessary.
698
699        fn write_child(
700            f: &mut Formatter<'_>,
701            expr: &Expr,
702            precedence: u8,
703        ) -> fmt::Result {
704            match expr {
705                Expr::BinaryExpr(child) => {
706                    let p = child.op.precedence();
707                    if p == 0 || p < precedence {
708                        write!(f, "({child})")?;
709                    } else {
710                        write!(f, "{child}")?;
711                    }
712                }
713                _ => write!(f, "{expr}")?,
714            }
715            Ok(())
716        }
717
718        let precedence = self.op.precedence();
719        write_child(f, self.left.as_ref(), precedence)?;
720        write!(f, " {} ", self.op)?;
721        write_child(f, self.right.as_ref(), precedence)
722    }
723}
724
725/// CASE expression
726///
727/// The CASE expression is similar to a series of nested if/else and there are two forms that
728/// can be used. The first form consists of a series of boolean "when" expressions with
729/// corresponding "then" expressions, and an optional "else" expression.
730///
731/// ```text
732/// CASE WHEN condition THEN result
733///      [WHEN ...]
734///      [ELSE result]
735/// END
736/// ```
737///
738/// The second form uses a base expression and then a series of "when" clauses that match on a
739/// literal value.
740///
741/// ```text
742/// CASE expression
743///     WHEN value THEN result
744///     [WHEN ...]
745///     [ELSE result]
746/// END
747/// ```
748#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
749pub struct Case {
750    /// Optional base expression that can be compared to literal values in the "when" expressions
751    pub expr: Option<Box<Expr>>,
752    /// One or more when/then expressions
753    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
754    /// Optional "else" expression
755    pub else_expr: Option<Box<Expr>>,
756}
757
758impl Case {
759    /// Create a new Case expression
760    pub fn new(
761        expr: Option<Box<Expr>>,
762        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
763        else_expr: Option<Box<Expr>>,
764    ) -> Self {
765        Self {
766            expr,
767            when_then_expr,
768            else_expr,
769        }
770    }
771}
772
773/// LIKE expression
774#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
775pub struct Like {
776    pub negated: bool,
777    pub expr: Box<Expr>,
778    pub pattern: Box<Expr>,
779    pub escape_char: Option<char>,
780    /// Whether to ignore case on comparing
781    pub case_insensitive: bool,
782}
783
784impl Like {
785    /// Create a new Like expression
786    pub fn new(
787        negated: bool,
788        expr: Box<Expr>,
789        pattern: Box<Expr>,
790        escape_char: Option<char>,
791        case_insensitive: bool,
792    ) -> Self {
793        Self {
794            negated,
795            expr,
796            pattern,
797            escape_char,
798            case_insensitive,
799        }
800    }
801}
802
803/// BETWEEN expression
804#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
805pub struct Between {
806    /// The value to compare
807    pub expr: Box<Expr>,
808    /// Whether the expression is negated
809    pub negated: bool,
810    /// The low end of the range
811    pub low: Box<Expr>,
812    /// The high end of the range
813    pub high: Box<Expr>,
814}
815
816impl Between {
817    /// Create a new Between expression
818    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
819        Self {
820            expr,
821            negated,
822            low,
823            high,
824        }
825    }
826}
827
828/// Invoke a [`ScalarUDF`] with a set of arguments
829///
830/// [`ScalarUDF`]: crate::ScalarUDF
831#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
832pub struct ScalarFunction {
833    /// The function
834    pub func: Arc<crate::ScalarUDF>,
835    /// List of expressions to feed to the functions as arguments
836    pub args: Vec<Expr>,
837}
838
839impl ScalarFunction {
840    // return the Function's name
841    pub fn name(&self) -> &str {
842        self.func.name()
843    }
844}
845
846impl ScalarFunction {
847    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
848    ///
849    /// [`ScalarUDF`]: crate::ScalarUDF
850    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
851        Self { func: udf, args }
852    }
853}
854
855/// Access a sub field of a nested type, such as `Field` or `List`
856#[derive(Clone, PartialEq, Eq, Hash, Debug)]
857pub enum GetFieldAccess {
858    /// Named field, for example `struct["name"]`
859    NamedStructField { name: ScalarValue },
860    /// Single list index, for example: `list[i]`
861    ListIndex { key: Box<Expr> },
862    /// List stride, for example `list[i:j:k]`
863    ListRange {
864        start: Box<Expr>,
865        stop: Box<Expr>,
866        stride: Box<Expr>,
867    },
868}
869
870/// Cast expression
871#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
872pub struct Cast {
873    /// The expression being cast
874    pub expr: Box<Expr>,
875    /// The `DataType` the expression will yield
876    pub data_type: DataType,
877}
878
879impl Cast {
880    /// Create a new Cast expression
881    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
882        Self { expr, data_type }
883    }
884}
885
886/// TryCast Expression
887#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
888pub struct TryCast {
889    /// The expression being cast
890    pub expr: Box<Expr>,
891    /// The `DataType` the expression will yield
892    pub data_type: DataType,
893}
894
895impl TryCast {
896    /// Create a new TryCast expression
897    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
898        Self { expr, data_type }
899    }
900}
901
902/// SORT expression
903#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
904pub struct Sort {
905    /// The expression to sort on
906    pub expr: Expr,
907    /// The direction of the sort
908    pub asc: bool,
909    /// Whether to put Nulls before all other data values
910    pub nulls_first: bool,
911}
912
913impl Sort {
914    /// Create a new Sort expression
915    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
916        Self {
917            expr,
918            asc,
919            nulls_first,
920        }
921    }
922
923    /// Create a new Sort expression with the opposite sort direction
924    pub fn reverse(&self) -> Self {
925        Self {
926            expr: self.expr.clone(),
927            asc: !self.asc,
928            nulls_first: !self.nulls_first,
929        }
930    }
931
932    /// Replaces the Sort expressions with `expr`
933    pub fn with_expr(&self, expr: Expr) -> Self {
934        Self {
935            expr,
936            asc: self.asc,
937            nulls_first: self.nulls_first,
938        }
939    }
940}
941
942impl Display for Sort {
943    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
944        write!(f, "{}", self.expr)?;
945        if self.asc {
946            write!(f, " ASC")?;
947        } else {
948            write!(f, " DESC")?;
949        }
950        if self.nulls_first {
951            write!(f, " NULLS FIRST")?;
952        } else {
953            write!(f, " NULLS LAST")?;
954        }
955        Ok(())
956    }
957}
958
959impl<'a> TreeNodeContainer<'a, Expr> for Sort {
960    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
961        &'a self,
962        f: F,
963    ) -> Result<TreeNodeRecursion> {
964        self.expr.apply_elements(f)
965    }
966
967    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
968        self,
969        f: F,
970    ) -> Result<Transformed<Self>> {
971        self.expr
972            .map_elements(f)?
973            .map_data(|expr| Ok(Self { expr, ..self }))
974    }
975}
976
977/// Aggregate function
978///
979/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
980///
981/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
982#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
983pub struct AggregateFunction {
984    /// Name of the function
985    pub func: Arc<AggregateUDF>,
986    pub params: AggregateFunctionParams,
987}
988
989#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
990pub struct AggregateFunctionParams {
991    pub args: Vec<Expr>,
992    /// Whether this is a DISTINCT aggregation or not
993    pub distinct: bool,
994    /// Optional filter
995    pub filter: Option<Box<Expr>>,
996    /// Optional ordering
997    pub order_by: Vec<Sort>,
998    pub null_treatment: Option<NullTreatment>,
999}
1000
1001impl AggregateFunction {
1002    /// Create a new AggregateFunction expression with a user-defined function (UDF)
1003    pub fn new_udf(
1004        func: Arc<AggregateUDF>,
1005        args: Vec<Expr>,
1006        distinct: bool,
1007        filter: Option<Box<Expr>>,
1008        order_by: Vec<Sort>,
1009        null_treatment: Option<NullTreatment>,
1010    ) -> Self {
1011        Self {
1012            func,
1013            params: AggregateFunctionParams {
1014                args,
1015                distinct,
1016                filter,
1017                order_by,
1018                null_treatment,
1019            },
1020        }
1021    }
1022}
1023
1024/// A function used as a SQL window function
1025///
1026/// In SQL, you can use:
1027/// - Actual window functions ([`WindowUDF`])
1028/// - Normal aggregate functions ([`AggregateUDF`])
1029#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
1030pub enum WindowFunctionDefinition {
1031    /// A user defined aggregate function
1032    AggregateUDF(Arc<AggregateUDF>),
1033    /// A user defined aggregate function
1034    WindowUDF(Arc<WindowUDF>),
1035}
1036
1037impl WindowFunctionDefinition {
1038    /// Returns the datatype of the window function
1039    pub fn return_field(
1040        &self,
1041        input_expr_fields: &[FieldRef],
1042        _input_expr_nullable: &[bool],
1043        display_name: &str,
1044    ) -> Result<FieldRef> {
1045        match self {
1046            WindowFunctionDefinition::AggregateUDF(fun) => {
1047                fun.return_field(input_expr_fields)
1048            }
1049            WindowFunctionDefinition::WindowUDF(fun) => {
1050                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
1051            }
1052        }
1053    }
1054
1055    /// The signatures supported by the function `fun`.
1056    pub fn signature(&self) -> Signature {
1057        match self {
1058            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
1059            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
1060        }
1061    }
1062
1063    /// Function's name for display
1064    pub fn name(&self) -> &str {
1065        match self {
1066            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
1067            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
1068        }
1069    }
1070
1071    /// Return the the inner window simplification function, if any
1072    ///
1073    /// See [`WindowFunctionSimplification`] for more information
1074    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1075        match self {
1076            WindowFunctionDefinition::AggregateUDF(_) => None,
1077            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
1078        }
1079    }
1080}
1081
1082impl Display for WindowFunctionDefinition {
1083    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1084        match self {
1085            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
1086            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
1087        }
1088    }
1089}
1090
1091impl From<Arc<AggregateUDF>> for WindowFunctionDefinition {
1092    fn from(value: Arc<AggregateUDF>) -> Self {
1093        Self::AggregateUDF(value)
1094    }
1095}
1096
1097impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
1098    fn from(value: Arc<WindowUDF>) -> Self {
1099        Self::WindowUDF(value)
1100    }
1101}
1102
1103/// Window function
1104///
1105/// Holds the actual function to call [`WindowFunction`] as well as its
1106/// arguments (`args`) and the contents of the `OVER` clause:
1107///
1108/// 1. `PARTITION BY`
1109/// 2. `ORDER BY`
1110/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
1111///
1112/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
1113///
1114/// [`ExprFunctionExt`]: crate::ExprFunctionExt
1115#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1116pub struct WindowFunction {
1117    /// Name of the function
1118    pub fun: WindowFunctionDefinition,
1119    pub params: WindowFunctionParams,
1120}
1121
1122#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1123pub struct WindowFunctionParams {
1124    /// List of expressions to feed to the functions as arguments
1125    pub args: Vec<Expr>,
1126    /// List of partition by expressions
1127    pub partition_by: Vec<Expr>,
1128    /// List of order by expressions
1129    pub order_by: Vec<Sort>,
1130    /// Window frame
1131    pub window_frame: WindowFrame,
1132    /// Specifies how NULL value is treated: ignore or respect
1133    pub null_treatment: Option<NullTreatment>,
1134}
1135
1136impl WindowFunction {
1137    /// Create a new Window expression with the specified argument an
1138    /// empty `OVER` clause
1139    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
1140        Self {
1141            fun: fun.into(),
1142            params: WindowFunctionParams {
1143                args,
1144                partition_by: Vec::default(),
1145                order_by: Vec::default(),
1146                window_frame: WindowFrame::new(None),
1147                null_treatment: None,
1148            },
1149        }
1150    }
1151
1152    /// Return the the inner window simplification function, if any
1153    ///
1154    /// See [`WindowFunctionSimplification`] for more information
1155    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1156        self.fun.simplify()
1157    }
1158}
1159
1160/// EXISTS expression
1161#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1162pub struct Exists {
1163    /// Subquery that will produce a single column of data
1164    pub subquery: Subquery,
1165    /// Whether the expression is negated
1166    pub negated: bool,
1167}
1168
1169impl Exists {
1170    // Create a new Exists expression.
1171    pub fn new(subquery: Subquery, negated: bool) -> Self {
1172        Self { subquery, negated }
1173    }
1174}
1175
1176/// InList expression
1177#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1178pub struct InList {
1179    /// The expression to compare
1180    pub expr: Box<Expr>,
1181    /// The list of values to compare against
1182    pub list: Vec<Expr>,
1183    /// Whether the expression is negated
1184    pub negated: bool,
1185}
1186
1187impl InList {
1188    /// Create a new InList expression
1189    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1190        Self {
1191            expr,
1192            list,
1193            negated,
1194        }
1195    }
1196}
1197
1198/// IN subquery
1199#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1200pub struct InSubquery {
1201    /// The expression to compare
1202    pub expr: Box<Expr>,
1203    /// Subquery that will produce a single column of data to compare against
1204    pub subquery: Subquery,
1205    /// Whether the expression is negated
1206    pub negated: bool,
1207}
1208
1209impl InSubquery {
1210    /// Create a new InSubquery expression
1211    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1212        Self {
1213            expr,
1214            subquery,
1215            negated,
1216        }
1217    }
1218}
1219
1220/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1221///
1222/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1223/// or can be specified directly using `PREPARE` statements.
1224#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1225pub struct Placeholder {
1226    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1227    pub id: String,
1228    /// The type the parameter will be filled in with
1229    pub data_type: Option<DataType>,
1230}
1231
1232impl Placeholder {
1233    /// Create a new Placeholder expression
1234    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1235        Self { id, data_type }
1236    }
1237}
1238
1239/// Grouping sets
1240///
1241/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1242/// for Postgres definition.
1243/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1244/// for Apache Spark definition.
1245#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1246pub enum GroupingSet {
1247    /// Rollup grouping sets
1248    Rollup(Vec<Expr>),
1249    /// Cube grouping sets
1250    Cube(Vec<Expr>),
1251    /// User-defined grouping sets
1252    GroupingSets(Vec<Vec<Expr>>),
1253}
1254
1255impl GroupingSet {
1256    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1257    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1258    /// the exprs in the underlying sets.
1259    pub fn distinct_expr(&self) -> Vec<&Expr> {
1260        match self {
1261            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1262                exprs.iter().collect()
1263            }
1264            GroupingSet::GroupingSets(groups) => {
1265                let mut exprs: Vec<&Expr> = vec![];
1266                for exp in groups.iter().flatten() {
1267                    if !exprs.contains(&exp) {
1268                        exprs.push(exp);
1269                    }
1270                }
1271                exprs
1272            }
1273        }
1274    }
1275}
1276
1277/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1278#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1279pub struct WildcardOptions {
1280    /// `[ILIKE...]`.
1281    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1282    pub ilike: Option<IlikeSelectItem>,
1283    /// `[EXCLUDE...]`.
1284    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1285    pub exclude: Option<ExcludeSelectItem>,
1286    /// `[EXCEPT...]`.
1287    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1288    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1289    pub except: Option<ExceptSelectItem>,
1290    /// `[REPLACE]`
1291    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1292    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1293    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1294    pub replace: Option<PlannedReplaceSelectItem>,
1295    /// `[RENAME ...]`.
1296    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1297    pub rename: Option<RenameSelectItem>,
1298}
1299
1300impl WildcardOptions {
1301    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1302        WildcardOptions {
1303            ilike: self.ilike,
1304            exclude: self.exclude,
1305            except: self.except,
1306            replace: Some(replace),
1307            rename: self.rename,
1308        }
1309    }
1310}
1311
1312impl Display for WildcardOptions {
1313    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1314        if let Some(ilike) = &self.ilike {
1315            write!(f, " {ilike}")?;
1316        }
1317        if let Some(exclude) = &self.exclude {
1318            write!(f, " {exclude}")?;
1319        }
1320        if let Some(except) = &self.except {
1321            write!(f, " {except}")?;
1322        }
1323        if let Some(replace) = &self.replace {
1324            write!(f, " {replace}")?;
1325        }
1326        if let Some(rename) = &self.rename {
1327            write!(f, " {rename}")?;
1328        }
1329        Ok(())
1330    }
1331}
1332
1333/// The planned expressions for `REPLACE`
1334#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1335pub struct PlannedReplaceSelectItem {
1336    /// The original ast nodes
1337    pub items: Vec<ReplaceSelectElement>,
1338    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1339    pub planned_expressions: Vec<Expr>,
1340}
1341
1342impl Display for PlannedReplaceSelectItem {
1343    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1344        write!(f, "REPLACE")?;
1345        write!(f, " ({})", display_comma_separated(&self.items))?;
1346        Ok(())
1347    }
1348}
1349
1350impl PlannedReplaceSelectItem {
1351    pub fn items(&self) -> &[ReplaceSelectElement] {
1352        &self.items
1353    }
1354
1355    pub fn expressions(&self) -> &[Expr] {
1356        &self.planned_expressions
1357    }
1358}
1359
1360impl Expr {
1361    /// The name of the column (field) that this `Expr` will produce.
1362    ///
1363    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1364    /// [`Schema`] will have a field with this name.
1365    ///
1366    /// Note that the resulting string is subtlety different from the `Display`
1367    /// representation for certain `Expr`. Some differences:
1368    ///
1369    /// 1. [`Expr::Alias`], which shows only the alias itself
1370    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1371    ///
1372    /// # Example
1373    /// ```
1374    /// # use datafusion_expr::{col, lit};
1375    /// let expr = col("foo").eq(lit(42));
1376    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1377    ///
1378    /// let expr = col("foo").alias("bar").eq(lit(11));
1379    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1380    /// ```
1381    ///
1382    /// [`Schema`]: arrow::datatypes::Schema
1383    pub fn schema_name(&self) -> impl Display + '_ {
1384        SchemaDisplay(self)
1385    }
1386
1387    /// Human readable display formatting for this expression.
1388    ///
1389    /// This function is primarily used in printing the explain tree output,
1390    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1391    /// show how expressions are used in physical and logical plans. See the
1392    /// [`Expr`] for other ways to format expressions
1393    ///
1394    /// Note this format is intended for human consumption rather than SQL for
1395    /// other systems. If you need  SQL to pass to other systems, consider using
1396    /// [`Unparser`].
1397    ///
1398    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1399    ///
1400    /// # Example
1401    /// ```
1402    /// # use datafusion_expr::{col, lit};
1403    /// let expr = col("foo") + lit(42);
1404    /// // For EXPLAIN output:
1405    /// // "foo + 42"
1406    /// println!("{}", expr.human_display());
1407    /// ```
1408    pub fn human_display(&self) -> impl Display + '_ {
1409        SqlDisplay(self)
1410    }
1411
1412    /// Returns the qualifier and the schema name of this expression.
1413    ///
1414    /// Used when the expression forms the output field of a certain plan.
1415    /// The result is the field's qualifier and field name in the plan's
1416    /// output schema. We can use this qualified name to reference the field.
1417    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1418        match self {
1419            Expr::Column(Column {
1420                relation,
1421                name,
1422                spans: _,
1423            }) => (relation.clone(), name.clone()),
1424            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1425            _ => (None, self.schema_name().to_string()),
1426        }
1427    }
1428
1429    /// Returns a full and complete string representation of this expression.
1430    #[deprecated(since = "42.0.0", note = "use format! instead")]
1431    pub fn canonical_name(&self) -> String {
1432        format!("{self}")
1433    }
1434
1435    /// Return String representation of the variant represented by `self`
1436    /// Useful for non-rust based bindings
1437    pub fn variant_name(&self) -> &str {
1438        match self {
1439            Expr::AggregateFunction { .. } => "AggregateFunction",
1440            Expr::Alias(..) => "Alias",
1441            Expr::Between { .. } => "Between",
1442            Expr::BinaryExpr { .. } => "BinaryExpr",
1443            Expr::Case { .. } => "Case",
1444            Expr::Cast { .. } => "Cast",
1445            Expr::Column(..) => "Column",
1446            Expr::OuterReferenceColumn(_, _) => "Outer",
1447            Expr::Exists { .. } => "Exists",
1448            Expr::GroupingSet(..) => "GroupingSet",
1449            Expr::InList { .. } => "InList",
1450            Expr::InSubquery(..) => "InSubquery",
1451            Expr::IsNotNull(..) => "IsNotNull",
1452            Expr::IsNull(..) => "IsNull",
1453            Expr::Like { .. } => "Like",
1454            Expr::SimilarTo { .. } => "RLike",
1455            Expr::IsTrue(..) => "IsTrue",
1456            Expr::IsFalse(..) => "IsFalse",
1457            Expr::IsUnknown(..) => "IsUnknown",
1458            Expr::IsNotTrue(..) => "IsNotTrue",
1459            Expr::IsNotFalse(..) => "IsNotFalse",
1460            Expr::IsNotUnknown(..) => "IsNotUnknown",
1461            Expr::Literal(..) => "Literal",
1462            Expr::Negative(..) => "Negative",
1463            Expr::Not(..) => "Not",
1464            Expr::Placeholder(_) => "Placeholder",
1465            Expr::ScalarFunction(..) => "ScalarFunction",
1466            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1467            Expr::ScalarVariable(..) => "ScalarVariable",
1468            Expr::TryCast { .. } => "TryCast",
1469            Expr::WindowFunction { .. } => "WindowFunction",
1470            #[expect(deprecated)]
1471            Expr::Wildcard { .. } => "Wildcard",
1472            Expr::Unnest { .. } => "Unnest",
1473        }
1474    }
1475
1476    /// Return `self == other`
1477    pub fn eq(self, other: Expr) -> Expr {
1478        binary_expr(self, Operator::Eq, other)
1479    }
1480
1481    /// Return `self != other`
1482    pub fn not_eq(self, other: Expr) -> Expr {
1483        binary_expr(self, Operator::NotEq, other)
1484    }
1485
1486    /// Return `self > other`
1487    pub fn gt(self, other: Expr) -> Expr {
1488        binary_expr(self, Operator::Gt, other)
1489    }
1490
1491    /// Return `self >= other`
1492    pub fn gt_eq(self, other: Expr) -> Expr {
1493        binary_expr(self, Operator::GtEq, other)
1494    }
1495
1496    /// Return `self < other`
1497    pub fn lt(self, other: Expr) -> Expr {
1498        binary_expr(self, Operator::Lt, other)
1499    }
1500
1501    /// Return `self <= other`
1502    pub fn lt_eq(self, other: Expr) -> Expr {
1503        binary_expr(self, Operator::LtEq, other)
1504    }
1505
1506    /// Return `self && other`
1507    pub fn and(self, other: Expr) -> Expr {
1508        binary_expr(self, Operator::And, other)
1509    }
1510
1511    /// Return `self || other`
1512    pub fn or(self, other: Expr) -> Expr {
1513        binary_expr(self, Operator::Or, other)
1514    }
1515
1516    /// Return `self LIKE other`
1517    pub fn like(self, other: Expr) -> Expr {
1518        Expr::Like(Like::new(
1519            false,
1520            Box::new(self),
1521            Box::new(other),
1522            None,
1523            false,
1524        ))
1525    }
1526
1527    /// Return `self NOT LIKE other`
1528    pub fn not_like(self, other: Expr) -> Expr {
1529        Expr::Like(Like::new(
1530            true,
1531            Box::new(self),
1532            Box::new(other),
1533            None,
1534            false,
1535        ))
1536    }
1537
1538    /// Return `self ILIKE other`
1539    pub fn ilike(self, other: Expr) -> Expr {
1540        Expr::Like(Like::new(
1541            false,
1542            Box::new(self),
1543            Box::new(other),
1544            None,
1545            true,
1546        ))
1547    }
1548
1549    /// Return `self NOT ILIKE other`
1550    pub fn not_ilike(self, other: Expr) -> Expr {
1551        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1552    }
1553
1554    /// Return the name to use for the specific Expr
1555    pub fn name_for_alias(&self) -> Result<String> {
1556        Ok(self.schema_name().to_string())
1557    }
1558
1559    /// Ensure `expr` has the name as `original_name` by adding an
1560    /// alias if necessary.
1561    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1562        let new_name = self.name_for_alias()?;
1563        if new_name == original_name {
1564            return Ok(self);
1565        }
1566
1567        Ok(self.alias(original_name))
1568    }
1569
1570    /// Return `self AS name` alias expression
1571    pub fn alias(self, name: impl Into<String>) -> Expr {
1572        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1573    }
1574
1575    /// Return `self AS name` alias expression with metadata
1576    ///
1577    /// The metadata will be attached to the Arrow Schema field when the expression
1578    /// is converted to a field via `Expr.to_field()`.
1579    ///
1580    /// # Example
1581    /// ```
1582    /// # use datafusion_expr::col;
1583    /// # use std::collections::HashMap;
1584    /// # use datafusion_expr::expr::FieldMetadata;
1585    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1586    /// let metadata = FieldMetadata::from(metadata);
1587    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1588    /// ```
1589    ///
1590    pub fn alias_with_metadata(
1591        self,
1592        name: impl Into<String>,
1593        metadata: Option<FieldMetadata>,
1594    ) -> Expr {
1595        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1596    }
1597
1598    /// Return `self AS name` alias expression with a specific qualifier
1599    pub fn alias_qualified(
1600        self,
1601        relation: Option<impl Into<TableReference>>,
1602        name: impl Into<String>,
1603    ) -> Expr {
1604        Expr::Alias(Alias::new(self, relation, name.into()))
1605    }
1606
1607    /// Return `self AS name` alias expression with a specific qualifier and metadata
1608    ///
1609    /// The metadata will be attached to the Arrow Schema field when the expression
1610    /// is converted to a field via `Expr.to_field()`.
1611    ///
1612    /// # Example
1613    /// ```
1614    /// # use datafusion_expr::col;
1615    /// # use std::collections::HashMap;
1616    /// # use datafusion_expr::expr::FieldMetadata;
1617    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1618    /// let metadata = FieldMetadata::from(metadata);
1619    /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1620    /// ```
1621    ///
1622    pub fn alias_qualified_with_metadata(
1623        self,
1624        relation: Option<impl Into<TableReference>>,
1625        name: impl Into<String>,
1626        metadata: Option<FieldMetadata>,
1627    ) -> Expr {
1628        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1629    }
1630
1631    /// Remove an alias from an expression if one exists.
1632    ///
1633    /// If the expression is not an alias, the expression is returned unchanged.
1634    /// This method does not remove aliases from nested expressions.
1635    ///
1636    /// # Example
1637    /// ```
1638    /// # use datafusion_expr::col;
1639    /// // `foo as "bar"` is unaliased to `foo`
1640    /// let expr = col("foo").alias("bar");
1641    /// assert_eq!(expr.unalias(), col("foo"));
1642    ///
1643    /// // `foo as "bar" + baz` is not unaliased
1644    /// let expr = col("foo").alias("bar") + col("baz");
1645    /// assert_eq!(expr.clone().unalias(), expr);
1646    ///
1647    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1648    /// let expr = col("foo").alias("bar").alias("baz");
1649    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1650    /// ```
1651    pub fn unalias(self) -> Expr {
1652        match self {
1653            Expr::Alias(alias) => *alias.expr,
1654            _ => self,
1655        }
1656    }
1657
1658    /// Recursively removed potentially multiple aliases from an expression.
1659    ///
1660    /// This method removes nested aliases and returns [`Transformed`]
1661    /// to signal if the expression was changed.
1662    ///
1663    /// # Example
1664    /// ```
1665    /// # use datafusion_expr::col;
1666    /// // `foo as "bar"` is unaliased to `foo`
1667    /// let expr = col("foo").alias("bar");
1668    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1669    ///
1670    /// // `foo as "bar" + baz` is  unaliased
1671    /// let expr = col("foo").alias("bar") + col("baz");
1672    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1673    ///
1674    /// // `foo as "bar" as "baz" is unalaised to foo
1675    /// let expr = col("foo").alias("bar").alias("baz");
1676    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1677    /// ```
1678    pub fn unalias_nested(self) -> Transformed<Expr> {
1679        self.transform_down_up(
1680            |expr| {
1681                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1682                let recursion = if matches!(
1683                    expr,
1684                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1685                ) {
1686                    // Subqueries could contain aliases so don't recurse into those
1687                    TreeNodeRecursion::Jump
1688                } else {
1689                    TreeNodeRecursion::Continue
1690                };
1691                Ok(Transformed::new(expr, false, recursion))
1692            },
1693            |expr| {
1694                // f_up: unalias on up so we can remove nested aliases like
1695                // `(x as foo) as bar`
1696                if let Expr::Alias(alias) = expr {
1697                    match alias
1698                        .metadata
1699                        .as_ref()
1700                        .map(|h| h.is_empty())
1701                        .unwrap_or(true)
1702                    {
1703                        true => Ok(Transformed::yes(*alias.expr)),
1704                        false => Ok(Transformed::no(Expr::Alias(alias))),
1705                    }
1706                } else {
1707                    Ok(Transformed::no(expr))
1708                }
1709            },
1710        )
1711        // Unreachable code: internal closure doesn't return err
1712        .unwrap()
1713    }
1714
1715    /// Return `self IN <list>` if `negated` is false, otherwise
1716    /// return `self NOT IN <list>`.a
1717    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1718        Expr::InList(InList::new(Box::new(self), list, negated))
1719    }
1720
1721    /// Return `IsNull(Box(self))
1722    pub fn is_null(self) -> Expr {
1723        Expr::IsNull(Box::new(self))
1724    }
1725
1726    /// Return `IsNotNull(Box(self))
1727    pub fn is_not_null(self) -> Expr {
1728        Expr::IsNotNull(Box::new(self))
1729    }
1730
1731    /// Create a sort configuration from an existing expression.
1732    ///
1733    /// ```
1734    /// # use datafusion_expr::col;
1735    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1736    /// ```
1737    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1738        Sort::new(self, asc, nulls_first)
1739    }
1740
1741    /// Return `IsTrue(Box(self))`
1742    pub fn is_true(self) -> Expr {
1743        Expr::IsTrue(Box::new(self))
1744    }
1745
1746    /// Return `IsNotTrue(Box(self))`
1747    pub fn is_not_true(self) -> Expr {
1748        Expr::IsNotTrue(Box::new(self))
1749    }
1750
1751    /// Return `IsFalse(Box(self))`
1752    pub fn is_false(self) -> Expr {
1753        Expr::IsFalse(Box::new(self))
1754    }
1755
1756    /// Return `IsNotFalse(Box(self))`
1757    pub fn is_not_false(self) -> Expr {
1758        Expr::IsNotFalse(Box::new(self))
1759    }
1760
1761    /// Return `IsUnknown(Box(self))`
1762    pub fn is_unknown(self) -> Expr {
1763        Expr::IsUnknown(Box::new(self))
1764    }
1765
1766    /// Return `IsNotUnknown(Box(self))`
1767    pub fn is_not_unknown(self) -> Expr {
1768        Expr::IsNotUnknown(Box::new(self))
1769    }
1770
1771    /// return `self BETWEEN low AND high`
1772    pub fn between(self, low: Expr, high: Expr) -> Expr {
1773        Expr::Between(Between::new(
1774            Box::new(self),
1775            false,
1776            Box::new(low),
1777            Box::new(high),
1778        ))
1779    }
1780
1781    /// Return `self NOT BETWEEN low AND high`
1782    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1783        Expr::Between(Between::new(
1784            Box::new(self),
1785            true,
1786            Box::new(low),
1787            Box::new(high),
1788        ))
1789    }
1790    /// Return a reference to the inner `Column` if any
1791    ///
1792    /// returns `None` if the expression is not a `Column`
1793    ///
1794    /// Note: None may be returned for expressions that are not `Column` but
1795    /// are convertible to `Column` such as `Cast` expressions.
1796    ///
1797    /// Example
1798    /// ```
1799    /// # use datafusion_common::Column;
1800    /// use datafusion_expr::{col, Expr};
1801    /// let expr = col("foo");
1802    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1803    ///
1804    /// let expr = col("foo").alias("bar");
1805    /// assert_eq!(expr.try_as_col(), None);
1806    /// ```
1807    pub fn try_as_col(&self) -> Option<&Column> {
1808        if let Expr::Column(it) = self {
1809            Some(it)
1810        } else {
1811            None
1812        }
1813    }
1814
1815    /// Returns the inner `Column` if any. This is a specialized version of
1816    /// [`Self::try_as_col`] that take Cast expressions into account when the
1817    /// expression is as on condition for joins.
1818    ///
1819    /// Called this method when you are sure that the expression is a `Column`
1820    /// or a `Cast` expression that wraps a `Column`.
1821    pub fn get_as_join_column(&self) -> Option<&Column> {
1822        match self {
1823            Expr::Column(c) => Some(c),
1824            Expr::Cast(Cast { expr, .. }) => match &**expr {
1825                Expr::Column(c) => Some(c),
1826                _ => None,
1827            },
1828            _ => None,
1829        }
1830    }
1831
1832    /// Return all references to columns in this expression.
1833    ///
1834    /// # Example
1835    /// ```
1836    /// # use std::collections::HashSet;
1837    /// # use datafusion_common::Column;
1838    /// # use datafusion_expr::col;
1839    /// // For an expression `a + (b * a)`
1840    /// let expr = col("a") + (col("b") * col("a"));
1841    /// let refs = expr.column_refs();
1842    /// // refs contains "a" and "b"
1843    /// assert_eq!(refs.len(), 2);
1844    /// assert!(refs.contains(&Column::new_unqualified("a")));
1845    /// assert!(refs.contains(&Column::new_unqualified("b")));
1846    /// ```
1847    pub fn column_refs(&self) -> HashSet<&Column> {
1848        let mut using_columns = HashSet::new();
1849        self.add_column_refs(&mut using_columns);
1850        using_columns
1851    }
1852
1853    /// Adds references to all columns in this expression to the set
1854    ///
1855    /// See [`Self::column_refs`] for details
1856    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1857        self.apply(|expr| {
1858            if let Expr::Column(col) = expr {
1859                set.insert(col);
1860            }
1861            Ok(TreeNodeRecursion::Continue)
1862        })
1863        .expect("traversal is infallible");
1864    }
1865
1866    /// Return all references to columns and their occurrence counts in the expression.
1867    ///
1868    /// # Example
1869    /// ```
1870    /// # use std::collections::HashMap;
1871    /// # use datafusion_common::Column;
1872    /// # use datafusion_expr::col;
1873    /// // For an expression `a + (b * a)`
1874    /// let expr = col("a") + (col("b") * col("a"));
1875    /// let mut refs = expr.column_refs_counts();
1876    /// // refs contains "a" and "b"
1877    /// assert_eq!(refs.len(), 2);
1878    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1879    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1880    /// ```
1881    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1882        let mut map = HashMap::new();
1883        self.add_column_ref_counts(&mut map);
1884        map
1885    }
1886
1887    /// Adds references to all columns and their occurrence counts in the expression to
1888    /// the map.
1889    ///
1890    /// See [`Self::column_refs_counts`] for details
1891    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1892        self.apply(|expr| {
1893            if let Expr::Column(col) = expr {
1894                *map.entry(col).or_default() += 1;
1895            }
1896            Ok(TreeNodeRecursion::Continue)
1897        })
1898        .expect("traversal is infallible");
1899    }
1900
1901    /// Returns true if there are any column references in this Expr
1902    pub fn any_column_refs(&self) -> bool {
1903        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
1904            .expect("exists closure is infallible")
1905    }
1906
1907    /// Return true if the expression contains out reference(correlated) expressions.
1908    pub fn contains_outer(&self) -> bool {
1909        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1910            .expect("exists closure is infallible")
1911    }
1912
1913    /// Returns true if the expression node is volatile, i.e. whether it can return
1914    /// different results when evaluated multiple times with the same input.
1915    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1916    /// - `rand()` returns `true`,
1917    /// - `a + rand()` returns `false`
1918    pub fn is_volatile_node(&self) -> bool {
1919        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
1920    }
1921
1922    /// Returns true if the expression is volatile, i.e. whether it can return different
1923    /// results when evaluated multiple times with the same input.
1924    ///
1925    /// For example the function call `RANDOM()` is volatile as each call will
1926    /// return a different value.
1927    ///
1928    /// See [`Volatility`] for more information.
1929    pub fn is_volatile(&self) -> bool {
1930        self.exists(|expr| Ok(expr.is_volatile_node()))
1931            .expect("exists closure is infallible")
1932    }
1933
1934    /// Recursively find all [`Expr::Placeholder`] expressions, and
1935    /// to infer their [`DataType`] from the context of their use.
1936    ///
1937    /// For example, given an expression like `<int32> = $0` will infer `$0` to
1938    /// have type `int32`.
1939    ///
1940    /// Returns transformed expression and flag that is true if expression contains
1941    /// at least one placeholder.
1942    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
1943        let mut has_placeholder = false;
1944        self.transform(|mut expr| {
1945            match &mut expr {
1946                // Default to assuming the arguments are the same type
1947                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
1948                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
1949                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
1950                }
1951                Expr::Between(Between {
1952                    expr,
1953                    negated: _,
1954                    low,
1955                    high,
1956                }) => {
1957                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
1958                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
1959                }
1960                Expr::InList(InList {
1961                    expr,
1962                    list,
1963                    negated: _,
1964                }) => {
1965                    for item in list.iter_mut() {
1966                        rewrite_placeholder(item, expr.as_ref(), schema)?;
1967                    }
1968                }
1969                Expr::Like(Like { expr, pattern, .. })
1970                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
1971                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
1972                }
1973                Expr::Placeholder(_) => {
1974                    has_placeholder = true;
1975                }
1976                _ => {}
1977            }
1978            Ok(Transformed::yes(expr))
1979        })
1980        .data()
1981        .map(|data| (data, has_placeholder))
1982    }
1983
1984    /// Returns true if some of this `exprs` subexpressions may not be evaluated
1985    /// and thus any side effects (like divide by zero) may not be encountered
1986    pub fn short_circuits(&self) -> bool {
1987        match self {
1988            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
1989            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
1990                matches!(op, Operator::And | Operator::Or)
1991            }
1992            Expr::Case { .. } => true,
1993            // Use explicit pattern match instead of a default
1994            // implementation, so that in the future if someone adds
1995            // new Expr types, they will check here as well
1996            // TODO: remove the next line after `Expr::Wildcard` is removed
1997            #[expect(deprecated)]
1998            Expr::AggregateFunction(..)
1999            | Expr::Alias(..)
2000            | Expr::Between(..)
2001            | Expr::Cast(..)
2002            | Expr::Column(..)
2003            | Expr::Exists(..)
2004            | Expr::GroupingSet(..)
2005            | Expr::InList(..)
2006            | Expr::InSubquery(..)
2007            | Expr::IsFalse(..)
2008            | Expr::IsNotFalse(..)
2009            | Expr::IsNotNull(..)
2010            | Expr::IsNotTrue(..)
2011            | Expr::IsNotUnknown(..)
2012            | Expr::IsNull(..)
2013            | Expr::IsTrue(..)
2014            | Expr::IsUnknown(..)
2015            | Expr::Like(..)
2016            | Expr::ScalarSubquery(..)
2017            | Expr::ScalarVariable(_, _)
2018            | Expr::SimilarTo(..)
2019            | Expr::Not(..)
2020            | Expr::Negative(..)
2021            | Expr::OuterReferenceColumn(_, _)
2022            | Expr::TryCast(..)
2023            | Expr::Unnest(..)
2024            | Expr::Wildcard { .. }
2025            | Expr::WindowFunction(..)
2026            | Expr::Literal(..)
2027            | Expr::Placeholder(..) => false,
2028        }
2029    }
2030
2031    /// Returns a reference to the set of locations in the SQL query where this
2032    /// expression appears, if known. [`None`] is returned if the expression
2033    /// type doesn't support tracking locations yet.
2034    pub fn spans(&self) -> Option<&Spans> {
2035        match self {
2036            Expr::Column(col) => Some(&col.spans),
2037            _ => None,
2038        }
2039    }
2040
2041    /// Check if the Expr is literal and get the literal value if it is.
2042    pub fn as_literal(&self) -> Option<&ScalarValue> {
2043        if let Expr::Literal(lit, _) = self {
2044            Some(lit)
2045        } else {
2046            None
2047        }
2048    }
2049}
2050
2051impl Normalizeable for Expr {
2052    fn can_normalize(&self) -> bool {
2053        #[allow(clippy::match_like_matches_macro)]
2054        match self {
2055            Expr::BinaryExpr(BinaryExpr {
2056                op:
2057                    _op @ (Operator::Plus
2058                    | Operator::Multiply
2059                    | Operator::BitwiseAnd
2060                    | Operator::BitwiseOr
2061                    | Operator::BitwiseXor
2062                    | Operator::Eq
2063                    | Operator::NotEq),
2064                ..
2065            }) => true,
2066            _ => false,
2067        }
2068    }
2069}
2070
2071impl NormalizeEq for Expr {
2072    fn normalize_eq(&self, other: &Self) -> bool {
2073        match (self, other) {
2074            (
2075                Expr::BinaryExpr(BinaryExpr {
2076                    left: self_left,
2077                    op: self_op,
2078                    right: self_right,
2079                }),
2080                Expr::BinaryExpr(BinaryExpr {
2081                    left: other_left,
2082                    op: other_op,
2083                    right: other_right,
2084                }),
2085            ) => {
2086                if self_op != other_op {
2087                    return false;
2088                }
2089
2090                if matches!(
2091                    self_op,
2092                    Operator::Plus
2093                        | Operator::Multiply
2094                        | Operator::BitwiseAnd
2095                        | Operator::BitwiseOr
2096                        | Operator::BitwiseXor
2097                        | Operator::Eq
2098                        | Operator::NotEq
2099                ) {
2100                    (self_left.normalize_eq(other_left)
2101                        && self_right.normalize_eq(other_right))
2102                        || (self_left.normalize_eq(other_right)
2103                            && self_right.normalize_eq(other_left))
2104                } else {
2105                    self_left.normalize_eq(other_left)
2106                        && self_right.normalize_eq(other_right)
2107                }
2108            }
2109            (
2110                Expr::Alias(Alias {
2111                    expr: self_expr,
2112                    relation: self_relation,
2113                    name: self_name,
2114                    ..
2115                }),
2116                Expr::Alias(Alias {
2117                    expr: other_expr,
2118                    relation: other_relation,
2119                    name: other_name,
2120                    ..
2121                }),
2122            ) => {
2123                self_name == other_name
2124                    && self_relation == other_relation
2125                    && self_expr.normalize_eq(other_expr)
2126            }
2127            (
2128                Expr::Like(Like {
2129                    negated: self_negated,
2130                    expr: self_expr,
2131                    pattern: self_pattern,
2132                    escape_char: self_escape_char,
2133                    case_insensitive: self_case_insensitive,
2134                }),
2135                Expr::Like(Like {
2136                    negated: other_negated,
2137                    expr: other_expr,
2138                    pattern: other_pattern,
2139                    escape_char: other_escape_char,
2140                    case_insensitive: other_case_insensitive,
2141                }),
2142            )
2143            | (
2144                Expr::SimilarTo(Like {
2145                    negated: self_negated,
2146                    expr: self_expr,
2147                    pattern: self_pattern,
2148                    escape_char: self_escape_char,
2149                    case_insensitive: self_case_insensitive,
2150                }),
2151                Expr::SimilarTo(Like {
2152                    negated: other_negated,
2153                    expr: other_expr,
2154                    pattern: other_pattern,
2155                    escape_char: other_escape_char,
2156                    case_insensitive: other_case_insensitive,
2157                }),
2158            ) => {
2159                self_negated == other_negated
2160                    && self_escape_char == other_escape_char
2161                    && self_case_insensitive == other_case_insensitive
2162                    && self_expr.normalize_eq(other_expr)
2163                    && self_pattern.normalize_eq(other_pattern)
2164            }
2165            (Expr::Not(self_expr), Expr::Not(other_expr))
2166            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2167            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2168            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2169            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2170            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2171            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2172            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2173            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2174            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2175            | (
2176                Expr::Unnest(Unnest { expr: self_expr }),
2177                Expr::Unnest(Unnest { expr: other_expr }),
2178            ) => self_expr.normalize_eq(other_expr),
2179            (
2180                Expr::Between(Between {
2181                    expr: self_expr,
2182                    negated: self_negated,
2183                    low: self_low,
2184                    high: self_high,
2185                }),
2186                Expr::Between(Between {
2187                    expr: other_expr,
2188                    negated: other_negated,
2189                    low: other_low,
2190                    high: other_high,
2191                }),
2192            ) => {
2193                self_negated == other_negated
2194                    && self_expr.normalize_eq(other_expr)
2195                    && self_low.normalize_eq(other_low)
2196                    && self_high.normalize_eq(other_high)
2197            }
2198            (
2199                Expr::Cast(Cast {
2200                    expr: self_expr,
2201                    data_type: self_data_type,
2202                }),
2203                Expr::Cast(Cast {
2204                    expr: other_expr,
2205                    data_type: other_data_type,
2206                }),
2207            )
2208            | (
2209                Expr::TryCast(TryCast {
2210                    expr: self_expr,
2211                    data_type: self_data_type,
2212                }),
2213                Expr::TryCast(TryCast {
2214                    expr: other_expr,
2215                    data_type: other_data_type,
2216                }),
2217            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
2218            (
2219                Expr::ScalarFunction(ScalarFunction {
2220                    func: self_func,
2221                    args: self_args,
2222                }),
2223                Expr::ScalarFunction(ScalarFunction {
2224                    func: other_func,
2225                    args: other_args,
2226                }),
2227            ) => {
2228                self_func.name() == other_func.name()
2229                    && self_args.len() == other_args.len()
2230                    && self_args
2231                        .iter()
2232                        .zip(other_args.iter())
2233                        .all(|(a, b)| a.normalize_eq(b))
2234            }
2235            (
2236                Expr::AggregateFunction(AggregateFunction {
2237                    func: self_func,
2238                    params:
2239                        AggregateFunctionParams {
2240                            args: self_args,
2241                            distinct: self_distinct,
2242                            filter: self_filter,
2243                            order_by: self_order_by,
2244                            null_treatment: self_null_treatment,
2245                        },
2246                }),
2247                Expr::AggregateFunction(AggregateFunction {
2248                    func: other_func,
2249                    params:
2250                        AggregateFunctionParams {
2251                            args: other_args,
2252                            distinct: other_distinct,
2253                            filter: other_filter,
2254                            order_by: other_order_by,
2255                            null_treatment: other_null_treatment,
2256                        },
2257                }),
2258            ) => {
2259                self_func.name() == other_func.name()
2260                    && self_distinct == other_distinct
2261                    && self_null_treatment == other_null_treatment
2262                    && self_args.len() == other_args.len()
2263                    && self_args
2264                        .iter()
2265                        .zip(other_args.iter())
2266                        .all(|(a, b)| a.normalize_eq(b))
2267                    && match (self_filter, other_filter) {
2268                        (Some(self_filter), Some(other_filter)) => {
2269                            self_filter.normalize_eq(other_filter)
2270                        }
2271                        (None, None) => true,
2272                        _ => false,
2273                    }
2274                    && self_order_by
2275                        .iter()
2276                        .zip(other_order_by.iter())
2277                        .all(|(a, b)| {
2278                            a.asc == b.asc
2279                                && a.nulls_first == b.nulls_first
2280                                && a.expr.normalize_eq(&b.expr)
2281                        })
2282                    && self_order_by.len() == other_order_by.len()
2283            }
2284            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2285                let WindowFunction {
2286                    fun: self_fun,
2287                    params:
2288                        WindowFunctionParams {
2289                            args: self_args,
2290                            window_frame: self_window_frame,
2291                            partition_by: self_partition_by,
2292                            order_by: self_order_by,
2293                            null_treatment: self_null_treatment,
2294                        },
2295                } = left.as_ref();
2296                let WindowFunction {
2297                    fun: other_fun,
2298                    params:
2299                        WindowFunctionParams {
2300                            args: other_args,
2301                            window_frame: other_window_frame,
2302                            partition_by: other_partition_by,
2303                            order_by: other_order_by,
2304                            null_treatment: other_null_treatment,
2305                        },
2306                } = other.as_ref();
2307
2308                self_fun.name() == other_fun.name()
2309                    && self_window_frame == other_window_frame
2310                    && self_null_treatment == other_null_treatment
2311                    && self_args.len() == other_args.len()
2312                    && self_args
2313                        .iter()
2314                        .zip(other_args.iter())
2315                        .all(|(a, b)| a.normalize_eq(b))
2316                    && self_partition_by
2317                        .iter()
2318                        .zip(other_partition_by.iter())
2319                        .all(|(a, b)| a.normalize_eq(b))
2320                    && self_order_by
2321                        .iter()
2322                        .zip(other_order_by.iter())
2323                        .all(|(a, b)| {
2324                            a.asc == b.asc
2325                                && a.nulls_first == b.nulls_first
2326                                && a.expr.normalize_eq(&b.expr)
2327                        })
2328            }
2329            (
2330                Expr::Exists(Exists {
2331                    subquery: self_subquery,
2332                    negated: self_negated,
2333                }),
2334                Expr::Exists(Exists {
2335                    subquery: other_subquery,
2336                    negated: other_negated,
2337                }),
2338            ) => {
2339                self_negated == other_negated
2340                    && self_subquery.normalize_eq(other_subquery)
2341            }
2342            (
2343                Expr::InSubquery(InSubquery {
2344                    expr: self_expr,
2345                    subquery: self_subquery,
2346                    negated: self_negated,
2347                }),
2348                Expr::InSubquery(InSubquery {
2349                    expr: other_expr,
2350                    subquery: other_subquery,
2351                    negated: other_negated,
2352                }),
2353            ) => {
2354                self_negated == other_negated
2355                    && self_expr.normalize_eq(other_expr)
2356                    && self_subquery.normalize_eq(other_subquery)
2357            }
2358            (
2359                Expr::ScalarSubquery(self_subquery),
2360                Expr::ScalarSubquery(other_subquery),
2361            ) => self_subquery.normalize_eq(other_subquery),
2362            (
2363                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2364                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2365            )
2366            | (
2367                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2368                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2369            ) => {
2370                self_exprs.len() == other_exprs.len()
2371                    && self_exprs
2372                        .iter()
2373                        .zip(other_exprs.iter())
2374                        .all(|(a, b)| a.normalize_eq(b))
2375            }
2376            (
2377                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2378                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2379            ) => {
2380                self_exprs.len() == other_exprs.len()
2381                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2382                        a.len() == b.len()
2383                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2384                    })
2385            }
2386            (
2387                Expr::InList(InList {
2388                    expr: self_expr,
2389                    list: self_list,
2390                    negated: self_negated,
2391                }),
2392                Expr::InList(InList {
2393                    expr: other_expr,
2394                    list: other_list,
2395                    negated: other_negated,
2396                }),
2397            ) => {
2398                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2399                self_negated == other_negated
2400                    && self_expr.normalize_eq(other_expr)
2401                    && self_list.len() == other_list.len()
2402                    && self_list
2403                        .iter()
2404                        .zip(other_list.iter())
2405                        .all(|(a, b)| a.normalize_eq(b))
2406            }
2407            (
2408                Expr::Case(Case {
2409                    expr: self_expr,
2410                    when_then_expr: self_when_then_expr,
2411                    else_expr: self_else_expr,
2412                }),
2413                Expr::Case(Case {
2414                    expr: other_expr,
2415                    when_then_expr: other_when_then_expr,
2416                    else_expr: other_else_expr,
2417                }),
2418            ) => {
2419                // TODO: normalize_eq for when_then_expr
2420                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2421                self_when_then_expr.len() == other_when_then_expr.len()
2422                    && self_when_then_expr
2423                        .iter()
2424                        .zip(other_when_then_expr.iter())
2425                        .all(|((self_when, self_then), (other_when, other_then))| {
2426                            self_when.normalize_eq(other_when)
2427                                && self_then.normalize_eq(other_then)
2428                        })
2429                    && match (self_expr, other_expr) {
2430                        (Some(self_expr), Some(other_expr)) => {
2431                            self_expr.normalize_eq(other_expr)
2432                        }
2433                        (None, None) => true,
2434                        (_, _) => false,
2435                    }
2436                    && match (self_else_expr, other_else_expr) {
2437                        (Some(self_else_expr), Some(other_else_expr)) => {
2438                            self_else_expr.normalize_eq(other_else_expr)
2439                        }
2440                        (None, None) => true,
2441                        (_, _) => false,
2442                    }
2443            }
2444            (_, _) => self == other,
2445        }
2446    }
2447}
2448
2449impl HashNode for Expr {
2450    /// As it is pretty easy to forget changing this method when `Expr` changes the
2451    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2452    /// compile time.
2453    fn hash_node<H: Hasher>(&self, state: &mut H) {
2454        mem::discriminant(self).hash(state);
2455        match self {
2456            Expr::Alias(Alias {
2457                expr: _expr,
2458                relation,
2459                name,
2460                ..
2461            }) => {
2462                relation.hash(state);
2463                name.hash(state);
2464            }
2465            Expr::Column(column) => {
2466                column.hash(state);
2467            }
2468            Expr::ScalarVariable(data_type, name) => {
2469                data_type.hash(state);
2470                name.hash(state);
2471            }
2472            Expr::Literal(scalar_value, _) => {
2473                scalar_value.hash(state);
2474            }
2475            Expr::BinaryExpr(BinaryExpr {
2476                left: _left,
2477                op,
2478                right: _right,
2479            }) => {
2480                op.hash(state);
2481            }
2482            Expr::Like(Like {
2483                negated,
2484                expr: _expr,
2485                pattern: _pattern,
2486                escape_char,
2487                case_insensitive,
2488            })
2489            | Expr::SimilarTo(Like {
2490                negated,
2491                expr: _expr,
2492                pattern: _pattern,
2493                escape_char,
2494                case_insensitive,
2495            }) => {
2496                negated.hash(state);
2497                escape_char.hash(state);
2498                case_insensitive.hash(state);
2499            }
2500            Expr::Not(_expr)
2501            | Expr::IsNotNull(_expr)
2502            | Expr::IsNull(_expr)
2503            | Expr::IsTrue(_expr)
2504            | Expr::IsFalse(_expr)
2505            | Expr::IsUnknown(_expr)
2506            | Expr::IsNotTrue(_expr)
2507            | Expr::IsNotFalse(_expr)
2508            | Expr::IsNotUnknown(_expr)
2509            | Expr::Negative(_expr) => {}
2510            Expr::Between(Between {
2511                expr: _expr,
2512                negated,
2513                low: _low,
2514                high: _high,
2515            }) => {
2516                negated.hash(state);
2517            }
2518            Expr::Case(Case {
2519                expr: _expr,
2520                when_then_expr: _when_then_expr,
2521                else_expr: _else_expr,
2522            }) => {}
2523            Expr::Cast(Cast {
2524                expr: _expr,
2525                data_type,
2526            })
2527            | Expr::TryCast(TryCast {
2528                expr: _expr,
2529                data_type,
2530            }) => {
2531                data_type.hash(state);
2532            }
2533            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2534                func.hash(state);
2535            }
2536            Expr::AggregateFunction(AggregateFunction {
2537                func,
2538                params:
2539                    AggregateFunctionParams {
2540                        args: _args,
2541                        distinct,
2542                        filter: _,
2543                        order_by: _,
2544                        null_treatment,
2545                    },
2546            }) => {
2547                func.hash(state);
2548                distinct.hash(state);
2549                null_treatment.hash(state);
2550            }
2551            Expr::WindowFunction(window_fun) => {
2552                let WindowFunction {
2553                    fun,
2554                    params:
2555                        WindowFunctionParams {
2556                            args: _args,
2557                            partition_by: _,
2558                            order_by: _,
2559                            window_frame,
2560                            null_treatment,
2561                        },
2562                } = window_fun.as_ref();
2563                fun.hash(state);
2564                window_frame.hash(state);
2565                null_treatment.hash(state);
2566            }
2567            Expr::InList(InList {
2568                expr: _expr,
2569                list: _list,
2570                negated,
2571            }) => {
2572                negated.hash(state);
2573            }
2574            Expr::Exists(Exists { subquery, negated }) => {
2575                subquery.hash(state);
2576                negated.hash(state);
2577            }
2578            Expr::InSubquery(InSubquery {
2579                expr: _expr,
2580                subquery,
2581                negated,
2582            }) => {
2583                subquery.hash(state);
2584                negated.hash(state);
2585            }
2586            Expr::ScalarSubquery(subquery) => {
2587                subquery.hash(state);
2588            }
2589            #[expect(deprecated)]
2590            Expr::Wildcard { qualifier, options } => {
2591                qualifier.hash(state);
2592                options.hash(state);
2593            }
2594            Expr::GroupingSet(grouping_set) => {
2595                mem::discriminant(grouping_set).hash(state);
2596                match grouping_set {
2597                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2598                    GroupingSet::GroupingSets(_exprs) => {}
2599                }
2600            }
2601            Expr::Placeholder(place_holder) => {
2602                place_holder.hash(state);
2603            }
2604            Expr::OuterReferenceColumn(data_type, column) => {
2605                data_type.hash(state);
2606                column.hash(state);
2607            }
2608            Expr::Unnest(Unnest { expr: _expr }) => {}
2609        };
2610    }
2611}
2612
2613// Modifies expr if it is a placeholder with datatype of right
2614fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2615    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
2616        if data_type.is_none() {
2617            let other_dt = other.get_type(schema);
2618            match other_dt {
2619                Err(e) => {
2620                    Err(e.context(format!(
2621                        "Can not find type of {other} needed to infer type of {expr}"
2622                    )))?;
2623                }
2624                Ok(dt) => {
2625                    *data_type = Some(dt);
2626                }
2627            }
2628        };
2629    }
2630    Ok(())
2631}
2632
2633#[macro_export]
2634macro_rules! expr_vec_fmt {
2635    ( $ARRAY:expr ) => {{
2636        $ARRAY
2637            .iter()
2638            .map(|e| format!("{e}"))
2639            .collect::<Vec<String>>()
2640            .join(", ")
2641    }};
2642}
2643
2644struct SchemaDisplay<'a>(&'a Expr);
2645impl Display for SchemaDisplay<'_> {
2646    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2647        match self.0 {
2648            // The same as Display
2649            // TODO: remove the next line after `Expr::Wildcard` is removed
2650            #[expect(deprecated)]
2651            Expr::Column(_)
2652            | Expr::Literal(_, _)
2653            | Expr::ScalarVariable(..)
2654            | Expr::OuterReferenceColumn(..)
2655            | Expr::Placeholder(_)
2656            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2657            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2658                match func.schema_name(params) {
2659                    Ok(name) => {
2660                        write!(f, "{name}")
2661                    }
2662                    Err(e) => {
2663                        write!(f, "got error from schema_name {e}")
2664                    }
2665                }
2666            }
2667            // Expr is not shown since it is aliased
2668            Expr::Alias(Alias {
2669                name,
2670                relation: Some(relation),
2671                ..
2672            }) => write!(f, "{relation}.{name}"),
2673            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2674            Expr::Between(Between {
2675                expr,
2676                negated,
2677                low,
2678                high,
2679            }) => {
2680                if *negated {
2681                    write!(
2682                        f,
2683                        "{} NOT BETWEEN {} AND {}",
2684                        SchemaDisplay(expr),
2685                        SchemaDisplay(low),
2686                        SchemaDisplay(high),
2687                    )
2688                } else {
2689                    write!(
2690                        f,
2691                        "{} BETWEEN {} AND {}",
2692                        SchemaDisplay(expr),
2693                        SchemaDisplay(low),
2694                        SchemaDisplay(high),
2695                    )
2696                }
2697            }
2698            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2699                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2700            }
2701            Expr::Case(Case {
2702                expr,
2703                when_then_expr,
2704                else_expr,
2705            }) => {
2706                write!(f, "CASE ")?;
2707
2708                if let Some(e) = expr {
2709                    write!(f, "{} ", SchemaDisplay(e))?;
2710                }
2711
2712                for (when, then) in when_then_expr {
2713                    write!(
2714                        f,
2715                        "WHEN {} THEN {} ",
2716                        SchemaDisplay(when),
2717                        SchemaDisplay(then),
2718                    )?;
2719                }
2720
2721                if let Some(e) = else_expr {
2722                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2723                }
2724
2725                write!(f, "END")
2726            }
2727            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2728            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2729                write!(f, "{}", SchemaDisplay(expr))
2730            }
2731            Expr::InList(InList {
2732                expr,
2733                list,
2734                negated,
2735            }) => {
2736                let inlist_name = schema_name_from_exprs(list)?;
2737
2738                if *negated {
2739                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2740                } else {
2741                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2742                }
2743            }
2744            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2745            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2746            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2747                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2748            }
2749            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2750                write!(f, "GROUPING SETS (")?;
2751                for exprs in lists_of_exprs.iter() {
2752                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2753                }
2754                write!(f, ")")
2755            }
2756            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2757                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2758            }
2759            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2760            Expr::IsNotNull(expr) => {
2761                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2762            }
2763            Expr::IsUnknown(expr) => {
2764                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2765            }
2766            Expr::IsNotUnknown(expr) => {
2767                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2768            }
2769            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2770                write!(f, "NOT IN")
2771            }
2772            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2773            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2774            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2775            Expr::IsNotTrue(expr) => {
2776                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2777            }
2778            Expr::IsNotFalse(expr) => {
2779                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2780            }
2781            Expr::Like(Like {
2782                negated,
2783                expr,
2784                pattern,
2785                escape_char,
2786                case_insensitive,
2787            }) => {
2788                write!(
2789                    f,
2790                    "{} {}{} {}",
2791                    SchemaDisplay(expr),
2792                    if *negated { "NOT " } else { "" },
2793                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2794                    SchemaDisplay(pattern),
2795                )?;
2796
2797                if let Some(char) = escape_char {
2798                    write!(f, " CHAR '{char}'")?;
2799                }
2800
2801                Ok(())
2802            }
2803            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2804            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2805            Expr::Unnest(Unnest { expr }) => {
2806                write!(f, "UNNEST({})", SchemaDisplay(expr))
2807            }
2808            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2809                match func.schema_name(args) {
2810                    Ok(name) => {
2811                        write!(f, "{name}")
2812                    }
2813                    Err(e) => {
2814                        write!(f, "got error from schema_name {e}")
2815                    }
2816                }
2817            }
2818            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2819                write!(f, "{}", subquery.schema().field(0).name())
2820            }
2821            Expr::SimilarTo(Like {
2822                negated,
2823                expr,
2824                pattern,
2825                escape_char,
2826                ..
2827            }) => {
2828                write!(
2829                    f,
2830                    "{} {} {}",
2831                    SchemaDisplay(expr),
2832                    if *negated {
2833                        "NOT SIMILAR TO"
2834                    } else {
2835                        "SIMILAR TO"
2836                    },
2837                    SchemaDisplay(pattern),
2838                )?;
2839                if let Some(char) = escape_char {
2840                    write!(f, " CHAR '{char}'")?;
2841                }
2842
2843                Ok(())
2844            }
2845            Expr::WindowFunction(window_fun) => {
2846                let WindowFunction { fun, params } = window_fun.as_ref();
2847                match fun {
2848                    WindowFunctionDefinition::AggregateUDF(fun) => {
2849                        match fun.window_function_schema_name(params) {
2850                            Ok(name) => {
2851                                write!(f, "{name}")
2852                            }
2853                            Err(e) => {
2854                                write!(
2855                                    f,
2856                                    "got error from window_function_schema_name {e}"
2857                                )
2858                            }
2859                        }
2860                    }
2861                    _ => {
2862                        let WindowFunctionParams {
2863                            args,
2864                            partition_by,
2865                            order_by,
2866                            window_frame,
2867                            null_treatment,
2868                        } = params;
2869
2870                        write!(
2871                            f,
2872                            "{}({})",
2873                            fun,
2874                            schema_name_from_exprs_comma_separated_without_space(args)?
2875                        )?;
2876
2877                        if let Some(null_treatment) = null_treatment {
2878                            write!(f, " {null_treatment}")?;
2879                        }
2880
2881                        if !partition_by.is_empty() {
2882                            write!(
2883                                f,
2884                                " PARTITION BY [{}]",
2885                                schema_name_from_exprs(partition_by)?
2886                            )?;
2887                        }
2888
2889                        if !order_by.is_empty() {
2890                            write!(
2891                                f,
2892                                " ORDER BY [{}]",
2893                                schema_name_from_sorts(order_by)?
2894                            )?;
2895                        };
2896
2897                        write!(f, " {window_frame}")
2898                    }
2899                }
2900            }
2901        }
2902    }
2903}
2904
2905/// A helper struct for displaying an `Expr` as an SQL-like string.
2906struct SqlDisplay<'a>(&'a Expr);
2907
2908impl Display for SqlDisplay<'_> {
2909    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2910        match self.0 {
2911            Expr::Literal(scalar, _) => scalar.fmt(f),
2912            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2913            Expr::Between(Between {
2914                expr,
2915                negated,
2916                low,
2917                high,
2918            }) => {
2919                if *negated {
2920                    write!(
2921                        f,
2922                        "{} NOT BETWEEN {} AND {}",
2923                        SqlDisplay(expr),
2924                        SqlDisplay(low),
2925                        SqlDisplay(high),
2926                    )
2927                } else {
2928                    write!(
2929                        f,
2930                        "{} BETWEEN {} AND {}",
2931                        SqlDisplay(expr),
2932                        SqlDisplay(low),
2933                        SqlDisplay(high),
2934                    )
2935                }
2936            }
2937            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2938                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
2939            }
2940            Expr::Case(Case {
2941                expr,
2942                when_then_expr,
2943                else_expr,
2944            }) => {
2945                write!(f, "CASE ")?;
2946
2947                if let Some(e) = expr {
2948                    write!(f, "{} ", SqlDisplay(e))?;
2949                }
2950
2951                for (when, then) in when_then_expr {
2952                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
2953                }
2954
2955                if let Some(e) = else_expr {
2956                    write!(f, "ELSE {} ", SqlDisplay(e))?;
2957                }
2958
2959                write!(f, "END")
2960            }
2961            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2962                write!(f, "{}", SqlDisplay(expr))
2963            }
2964            Expr::InList(InList {
2965                expr,
2966                list,
2967                negated,
2968            }) => {
2969                write!(
2970                    f,
2971                    "{}{} IN {}",
2972                    SqlDisplay(expr),
2973                    if *negated { " NOT" } else { "" },
2974                    ExprListDisplay::comma_separated(list.as_slice())
2975                )
2976            }
2977            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2978                write!(
2979                    f,
2980                    "ROLLUP ({})",
2981                    ExprListDisplay::comma_separated(exprs.as_slice())
2982                )
2983            }
2984            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2985                write!(f, "GROUPING SETS (")?;
2986                for exprs in lists_of_exprs.iter() {
2987                    write!(
2988                        f,
2989                        "({})",
2990                        ExprListDisplay::comma_separated(exprs.as_slice())
2991                    )?;
2992                }
2993                write!(f, ")")
2994            }
2995            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2996                write!(
2997                    f,
2998                    "ROLLUP ({})",
2999                    ExprListDisplay::comma_separated(exprs.as_slice())
3000                )
3001            }
3002            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
3003            Expr::IsNotNull(expr) => {
3004                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
3005            }
3006            Expr::IsUnknown(expr) => {
3007                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
3008            }
3009            Expr::IsNotUnknown(expr) => {
3010                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
3011            }
3012            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
3013            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
3014            Expr::IsNotTrue(expr) => {
3015                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
3016            }
3017            Expr::IsNotFalse(expr) => {
3018                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
3019            }
3020            Expr::Like(Like {
3021                negated,
3022                expr,
3023                pattern,
3024                escape_char,
3025                case_insensitive,
3026            }) => {
3027                write!(
3028                    f,
3029                    "{} {}{} {}",
3030                    SqlDisplay(expr),
3031                    if *negated { "NOT " } else { "" },
3032                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3033                    SqlDisplay(pattern),
3034                )?;
3035
3036                if let Some(char) = escape_char {
3037                    write!(f, " CHAR '{char}'")?;
3038                }
3039
3040                Ok(())
3041            }
3042            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
3043            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
3044            Expr::Unnest(Unnest { expr }) => {
3045                write!(f, "UNNEST({})", SqlDisplay(expr))
3046            }
3047            Expr::SimilarTo(Like {
3048                negated,
3049                expr,
3050                pattern,
3051                escape_char,
3052                ..
3053            }) => {
3054                write!(
3055                    f,
3056                    "{} {} {}",
3057                    SqlDisplay(expr),
3058                    if *negated {
3059                        "NOT SIMILAR TO"
3060                    } else {
3061                        "SIMILAR TO"
3062                    },
3063                    SqlDisplay(pattern),
3064                )?;
3065                if let Some(char) = escape_char {
3066                    write!(f, " CHAR '{char}'")?;
3067                }
3068
3069                Ok(())
3070            }
3071            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3072                match func.human_display(params) {
3073                    Ok(name) => {
3074                        write!(f, "{name}")
3075                    }
3076                    Err(e) => {
3077                        write!(f, "got error from schema_name {e}")
3078                    }
3079                }
3080            }
3081            _ => write!(f, "{}", self.0),
3082        }
3083    }
3084}
3085
3086/// Get schema_name for Vector of expressions
3087///
3088/// Internal usage. Please call `schema_name_from_exprs` instead
3089// TODO: Use ", " to standardize the formatting of Vec<Expr>,
3090// <https://github.com/apache/datafusion/issues/10364>
3091pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
3092    exprs: &[Expr],
3093) -> Result<String, fmt::Error> {
3094    schema_name_from_exprs_inner(exprs, ",")
3095}
3096
3097/// Formats a list of `&Expr` with a custom separator using SQL display format
3098pub struct ExprListDisplay<'a> {
3099    exprs: &'a [Expr],
3100    sep: &'a str,
3101}
3102
3103impl<'a> ExprListDisplay<'a> {
3104    /// Create a new display struct with the given expressions and separator
3105    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
3106        Self { exprs, sep }
3107    }
3108
3109    /// Create a new display struct with comma-space separator
3110    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
3111        Self::new(exprs, ", ")
3112    }
3113}
3114
3115impl Display for ExprListDisplay<'_> {
3116    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3117        let mut first = true;
3118        for expr in self.exprs {
3119            if !first {
3120                write!(f, "{}", self.sep)?;
3121            }
3122            write!(f, "{}", SqlDisplay(expr))?;
3123            first = false;
3124        }
3125        Ok(())
3126    }
3127}
3128
3129/// Get schema_name for Vector of expressions
3130pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
3131    schema_name_from_exprs_inner(exprs, ", ")
3132}
3133
3134fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
3135    let mut s = String::new();
3136    for (i, e) in exprs.iter().enumerate() {
3137        if i > 0 {
3138            write!(&mut s, "{sep}")?;
3139        }
3140        write!(&mut s, "{}", SchemaDisplay(e))?;
3141    }
3142
3143    Ok(s)
3144}
3145
3146pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
3147    let mut s = String::new();
3148    for (i, e) in sorts.iter().enumerate() {
3149        if i > 0 {
3150            write!(&mut s, ", ")?;
3151        }
3152        let ordering = if e.asc { "ASC" } else { "DESC" };
3153        let nulls_ordering = if e.nulls_first {
3154            "NULLS FIRST"
3155        } else {
3156            "NULLS LAST"
3157        };
3158        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
3159    }
3160
3161    Ok(s)
3162}
3163
3164pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3165pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3166
3167/// Format expressions for display as part of a logical plan. In many cases, this will produce
3168/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3169impl Display for Expr {
3170    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3171        match self {
3172            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3173            Expr::Column(c) => write!(f, "{c}"),
3174            Expr::OuterReferenceColumn(_, c) => {
3175                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3176            }
3177            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3178            Expr::Literal(v, metadata) => {
3179                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3180                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3181                    true => write!(f, "{v:?}"),
3182                }
3183            }
3184            Expr::Case(case) => {
3185                write!(f, "CASE ")?;
3186                if let Some(e) = &case.expr {
3187                    write!(f, "{e} ")?;
3188                }
3189                for (w, t) in &case.when_then_expr {
3190                    write!(f, "WHEN {w} THEN {t} ")?;
3191                }
3192                if let Some(e) = &case.else_expr {
3193                    write!(f, "ELSE {e} ")?;
3194                }
3195                write!(f, "END")
3196            }
3197            Expr::Cast(Cast { expr, data_type }) => {
3198                write!(f, "CAST({expr} AS {data_type:?})")
3199            }
3200            Expr::TryCast(TryCast { expr, data_type }) => {
3201                write!(f, "TRY_CAST({expr} AS {data_type:?})")
3202            }
3203            Expr::Not(expr) => write!(f, "NOT {expr}"),
3204            Expr::Negative(expr) => write!(f, "(- {expr})"),
3205            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3206            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3207            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3208            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3209            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3210            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3211            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3212            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3213            Expr::Exists(Exists {
3214                subquery,
3215                negated: true,
3216            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3217            Expr::Exists(Exists {
3218                subquery,
3219                negated: false,
3220            }) => write!(f, "EXISTS ({subquery:?})"),
3221            Expr::InSubquery(InSubquery {
3222                expr,
3223                subquery,
3224                negated: true,
3225            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3226            Expr::InSubquery(InSubquery {
3227                expr,
3228                subquery,
3229                negated: false,
3230            }) => write!(f, "{expr} IN ({subquery:?})"),
3231            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3232            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3233            Expr::ScalarFunction(fun) => {
3234                fmt_function(f, fun.name(), false, &fun.args, true)
3235            }
3236            // TODO: use udf's display_name, need to fix the separator issue, <https://github.com/apache/datafusion/issues/10364>
3237            // Expr::ScalarFunction(ScalarFunction { func, args }) => {
3238            //     write!(f, "{}", func.display_name(args).unwrap())
3239            // }
3240            Expr::WindowFunction(window_fun) => {
3241                let WindowFunction { fun, params } = window_fun.as_ref();
3242                match fun {
3243                    WindowFunctionDefinition::AggregateUDF(fun) => {
3244                        match fun.window_function_display_name(params) {
3245                            Ok(name) => {
3246                                write!(f, "{name}")
3247                            }
3248                            Err(e) => {
3249                                write!(
3250                                    f,
3251                                    "got error from window_function_display_name {e}"
3252                                )
3253                            }
3254                        }
3255                    }
3256                    WindowFunctionDefinition::WindowUDF(fun) => {
3257                        let WindowFunctionParams {
3258                            args,
3259                            partition_by,
3260                            order_by,
3261                            window_frame,
3262                            null_treatment,
3263                        } = params;
3264
3265                        fmt_function(f, &fun.to_string(), false, args, true)?;
3266
3267                        if let Some(nt) = null_treatment {
3268                            write!(f, "{nt}")?;
3269                        }
3270
3271                        if !partition_by.is_empty() {
3272                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3273                        }
3274                        if !order_by.is_empty() {
3275                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3276                        }
3277                        write!(
3278                            f,
3279                            " {} BETWEEN {} AND {}",
3280                            window_frame.units,
3281                            window_frame.start_bound,
3282                            window_frame.end_bound
3283                        )
3284                    }
3285                }
3286            }
3287            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3288                match func.display_name(params) {
3289                    Ok(name) => {
3290                        write!(f, "{name}")
3291                    }
3292                    Err(e) => {
3293                        write!(f, "got error from display_name {e}")
3294                    }
3295                }
3296            }
3297            Expr::Between(Between {
3298                expr,
3299                negated,
3300                low,
3301                high,
3302            }) => {
3303                if *negated {
3304                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3305                } else {
3306                    write!(f, "{expr} BETWEEN {low} AND {high}")
3307                }
3308            }
3309            Expr::Like(Like {
3310                negated,
3311                expr,
3312                pattern,
3313                escape_char,
3314                case_insensitive,
3315            }) => {
3316                write!(f, "{expr}")?;
3317                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3318                if *negated {
3319                    write!(f, " NOT")?;
3320                }
3321                if let Some(char) = escape_char {
3322                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3323                } else {
3324                    write!(f, " {op_name} {pattern}")
3325                }
3326            }
3327            Expr::SimilarTo(Like {
3328                negated,
3329                expr,
3330                pattern,
3331                escape_char,
3332                case_insensitive: _,
3333            }) => {
3334                write!(f, "{expr}")?;
3335                if *negated {
3336                    write!(f, " NOT")?;
3337                }
3338                if let Some(char) = escape_char {
3339                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3340                } else {
3341                    write!(f, " SIMILAR TO {pattern}")
3342                }
3343            }
3344            Expr::InList(InList {
3345                expr,
3346                list,
3347                negated,
3348            }) => {
3349                if *negated {
3350                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3351                } else {
3352                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3353                }
3354            }
3355            #[expect(deprecated)]
3356            Expr::Wildcard { qualifier, options } => match qualifier {
3357                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3358                None => write!(f, "*{options}"),
3359            },
3360            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3361                GroupingSet::Rollup(exprs) => {
3362                    // ROLLUP (c0, c1, c2)
3363                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3364                }
3365                GroupingSet::Cube(exprs) => {
3366                    // CUBE (c0, c1, c2)
3367                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3368                }
3369                GroupingSet::GroupingSets(lists_of_exprs) => {
3370                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3371                    write!(
3372                        f,
3373                        "GROUPING SETS ({})",
3374                        lists_of_exprs
3375                            .iter()
3376                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3377                            .collect::<Vec<String>>()
3378                            .join(", ")
3379                    )
3380                }
3381            },
3382            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3383            Expr::Unnest(Unnest { expr }) => {
3384                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3385            }
3386        }
3387    }
3388}
3389
3390fn fmt_function(
3391    f: &mut Formatter,
3392    fun: &str,
3393    distinct: bool,
3394    args: &[Expr],
3395    display: bool,
3396) -> fmt::Result {
3397    let args: Vec<String> = match display {
3398        true => args.iter().map(|arg| format!("{arg}")).collect(),
3399        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3400    };
3401
3402    let distinct_str = match distinct {
3403        true => "DISTINCT ",
3404        false => "",
3405    };
3406    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3407}
3408
3409/// The name of the column (field) that this `Expr` will produce in the physical plan.
3410/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3411pub fn physical_name(expr: &Expr) -> Result<String> {
3412    match expr {
3413        Expr::Column(col) => Ok(col.name.clone()),
3414        Expr::Alias(alias) => Ok(alias.name.clone()),
3415        _ => Ok(expr.schema_name().to_string()),
3416    }
3417}
3418
3419#[cfg(test)]
3420mod test {
3421    use crate::expr_fn::col;
3422    use crate::{
3423        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
3424        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
3425    };
3426    use arrow::datatypes::{Field, Schema};
3427    use sqlparser::ast;
3428    use sqlparser::ast::{Ident, IdentWithAlias};
3429    use std::any::Any;
3430
3431    #[test]
3432    fn infer_placeholder_in_clause() {
3433        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3434        let column = col("department_id");
3435        let param_placeholders = vec![
3436            Expr::Placeholder(Placeholder {
3437                id: "$1".to_string(),
3438                data_type: None,
3439            }),
3440            Expr::Placeholder(Placeholder {
3441                id: "$2".to_string(),
3442                data_type: None,
3443            }),
3444            Expr::Placeholder(Placeholder {
3445                id: "$3".to_string(),
3446                data_type: None,
3447            }),
3448        ];
3449        let in_list = Expr::InList(InList {
3450            expr: Box::new(column),
3451            list: param_placeholders,
3452            negated: false,
3453        });
3454
3455        let schema = Arc::new(Schema::new(vec![
3456            Field::new("name", DataType::Utf8, true),
3457            Field::new("department_id", DataType::Int32, true),
3458        ]));
3459        let df_schema = DFSchema::try_from(schema).unwrap();
3460
3461        let (inferred_expr, contains_placeholder) =
3462            in_list.infer_placeholder_types(&df_schema).unwrap();
3463
3464        assert!(contains_placeholder);
3465
3466        match inferred_expr {
3467            Expr::InList(in_list) => {
3468                for expr in in_list.list {
3469                    match expr {
3470                        Expr::Placeholder(placeholder) => {
3471                            assert_eq!(
3472                                placeholder.data_type,
3473                                Some(DataType::Int32),
3474                                "Placeholder {} should infer Int32",
3475                                placeholder.id
3476                            );
3477                        }
3478                        _ => panic!("Expected Placeholder expression"),
3479                    }
3480                }
3481            }
3482            _ => panic!("Expected InList expression"),
3483        }
3484    }
3485
3486    #[test]
3487    fn infer_placeholder_like_and_similar_to() {
3488        // name LIKE $1
3489        let schema =
3490            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3491        let df_schema = DFSchema::try_from(schema).unwrap();
3492
3493        let like = Like {
3494            expr: Box::new(col("name")),
3495            pattern: Box::new(Expr::Placeholder(Placeholder {
3496                id: "$1".to_string(),
3497                data_type: None,
3498            })),
3499            negated: false,
3500            case_insensitive: false,
3501            escape_char: None,
3502        };
3503
3504        let expr = Expr::Like(like.clone());
3505
3506        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3507        match inferred_expr {
3508            Expr::Like(like) => match *like.pattern {
3509                Expr::Placeholder(placeholder) => {
3510                    assert_eq!(placeholder.data_type, Some(DataType::Utf8));
3511                }
3512                _ => panic!("Expected Placeholder"),
3513            },
3514            _ => panic!("Expected Like"),
3515        }
3516
3517        // name SIMILAR TO $1
3518        let expr = Expr::SimilarTo(like);
3519
3520        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3521        match inferred_expr {
3522            Expr::SimilarTo(like) => match *like.pattern {
3523                Expr::Placeholder(placeholder) => {
3524                    assert_eq!(
3525                        placeholder.data_type,
3526                        Some(DataType::Utf8),
3527                        "Placeholder {} should infer Utf8",
3528                        placeholder.id
3529                    );
3530                }
3531                _ => panic!("Expected Placeholder expression"),
3532            },
3533            _ => panic!("Expected SimilarTo expression"),
3534        }
3535    }
3536
3537    #[test]
3538    #[allow(deprecated)]
3539    fn format_case_when() -> Result<()> {
3540        let expr = case(col("a"))
3541            .when(lit(1), lit(true))
3542            .when(lit(0), lit(false))
3543            .otherwise(lit(ScalarValue::Null))?;
3544        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3545        assert_eq!(expected, expr.canonical_name());
3546        assert_eq!(expected, format!("{expr}"));
3547        Ok(())
3548    }
3549
3550    #[test]
3551    #[allow(deprecated)]
3552    fn format_cast() -> Result<()> {
3553        let expr = Expr::Cast(Cast {
3554            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3555            data_type: DataType::Utf8,
3556        });
3557        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3558        assert_eq!(expected_canonical, expr.canonical_name());
3559        assert_eq!(expected_canonical, format!("{expr}"));
3560        // Note that CAST intentionally has a name that is different from its `Display`
3561        // representation. CAST does not change the name of expressions.
3562        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3563        Ok(())
3564    }
3565
3566    #[test]
3567    fn test_partial_ord() {
3568        // Test validates that partial ord is defined for Expr, not
3569        // intended to exhaustively test all possibilities
3570        let exp1 = col("a") + lit(1);
3571        let exp2 = col("a") + lit(2);
3572        let exp3 = !(col("a") + lit(2));
3573
3574        assert!(exp1 < exp2);
3575        assert!(exp3 > exp2);
3576        assert!(exp1 < exp3)
3577    }
3578
3579    #[test]
3580    fn test_collect_expr() -> Result<()> {
3581        // single column
3582        {
3583            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3584            let columns = expr.column_refs();
3585            assert_eq!(1, columns.len());
3586            assert!(columns.contains(&Column::from_name("a")));
3587        }
3588
3589        // multiple columns
3590        {
3591            let expr = col("a") + col("b") + lit(1);
3592            let columns = expr.column_refs();
3593            assert_eq!(2, columns.len());
3594            assert!(columns.contains(&Column::from_name("a")));
3595            assert!(columns.contains(&Column::from_name("b")));
3596        }
3597
3598        Ok(())
3599    }
3600
3601    #[test]
3602    fn test_logical_ops() {
3603        assert_eq!(
3604            format!("{}", lit(1u32).eq(lit(2u32))),
3605            "UInt32(1) = UInt32(2)"
3606        );
3607        assert_eq!(
3608            format!("{}", lit(1u32).not_eq(lit(2u32))),
3609            "UInt32(1) != UInt32(2)"
3610        );
3611        assert_eq!(
3612            format!("{}", lit(1u32).gt(lit(2u32))),
3613            "UInt32(1) > UInt32(2)"
3614        );
3615        assert_eq!(
3616            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3617            "UInt32(1) >= UInt32(2)"
3618        );
3619        assert_eq!(
3620            format!("{}", lit(1u32).lt(lit(2u32))),
3621            "UInt32(1) < UInt32(2)"
3622        );
3623        assert_eq!(
3624            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3625            "UInt32(1) <= UInt32(2)"
3626        );
3627        assert_eq!(
3628            format!("{}", lit(1u32).and(lit(2u32))),
3629            "UInt32(1) AND UInt32(2)"
3630        );
3631        assert_eq!(
3632            format!("{}", lit(1u32).or(lit(2u32))),
3633            "UInt32(1) OR UInt32(2)"
3634        );
3635    }
3636
3637    #[test]
3638    fn test_is_volatile_scalar_func() {
3639        // UDF
3640        #[derive(Debug)]
3641        struct TestScalarUDF {
3642            signature: Signature,
3643        }
3644        impl ScalarUDFImpl for TestScalarUDF {
3645            fn as_any(&self) -> &dyn Any {
3646                self
3647            }
3648            fn name(&self) -> &str {
3649                "TestScalarUDF"
3650            }
3651
3652            fn signature(&self) -> &Signature {
3653                &self.signature
3654            }
3655
3656            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3657                Ok(DataType::Utf8)
3658            }
3659
3660            fn invoke_with_args(
3661                &self,
3662                _args: ScalarFunctionArgs,
3663            ) -> Result<ColumnarValue> {
3664                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
3665            }
3666        }
3667        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3668            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
3669        }));
3670        assert_ne!(udf.signature().volatility, Volatility::Volatile);
3671
3672        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3673            signature: Signature::uniform(
3674                1,
3675                vec![DataType::Float32],
3676                Volatility::Volatile,
3677            ),
3678        }));
3679        assert_eq!(udf.signature().volatility, Volatility::Volatile);
3680    }
3681
3682    use super::*;
3683
3684    #[test]
3685    fn test_display_wildcard() {
3686        assert_eq!(format!("{}", wildcard()), "*");
3687        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
3688        assert_eq!(
3689            format!(
3690                "{}",
3691                wildcard_with_options(wildcard_options(
3692                    Some(IlikeSelectItem {
3693                        pattern: "c1".to_string()
3694                    }),
3695                    None,
3696                    None,
3697                    None,
3698                    None
3699                ))
3700            ),
3701            "* ILIKE 'c1'"
3702        );
3703        assert_eq!(
3704            format!(
3705                "{}",
3706                wildcard_with_options(wildcard_options(
3707                    None,
3708                    Some(ExcludeSelectItem::Multiple(vec![
3709                        Ident::from("c1"),
3710                        Ident::from("c2")
3711                    ])),
3712                    None,
3713                    None,
3714                    None
3715                ))
3716            ),
3717            "* EXCLUDE (c1, c2)"
3718        );
3719        assert_eq!(
3720            format!(
3721                "{}",
3722                wildcard_with_options(wildcard_options(
3723                    None,
3724                    None,
3725                    Some(ExceptSelectItem {
3726                        first_element: Ident::from("c1"),
3727                        additional_elements: vec![Ident::from("c2")]
3728                    }),
3729                    None,
3730                    None
3731                ))
3732            ),
3733            "* EXCEPT (c1, c2)"
3734        );
3735        assert_eq!(
3736            format!(
3737                "{}",
3738                wildcard_with_options(wildcard_options(
3739                    None,
3740                    None,
3741                    None,
3742                    Some(PlannedReplaceSelectItem {
3743                        items: vec![ReplaceSelectElement {
3744                            expr: ast::Expr::Identifier(Ident::from("c1")),
3745                            column_name: Ident::from("a1"),
3746                            as_keyword: false
3747                        }],
3748                        planned_expressions: vec![]
3749                    }),
3750                    None
3751                ))
3752            ),
3753            "* REPLACE (c1 a1)"
3754        );
3755        assert_eq!(
3756            format!(
3757                "{}",
3758                wildcard_with_options(wildcard_options(
3759                    None,
3760                    None,
3761                    None,
3762                    None,
3763                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3764                        ident: Ident::from("c1"),
3765                        alias: Ident::from("a1")
3766                    }]))
3767                ))
3768            ),
3769            "* RENAME (c1 AS a1)"
3770        )
3771    }
3772
3773    #[test]
3774    fn test_schema_display_alias_with_relation() {
3775        assert_eq!(
3776            format!(
3777                "{}",
3778                SchemaDisplay(
3779                    &lit(1).alias_qualified("table_name".into(), "column_name")
3780                )
3781            ),
3782            "table_name.column_name"
3783        );
3784    }
3785
3786    #[test]
3787    fn test_schema_display_alias_without_relation() {
3788        assert_eq!(
3789            format!(
3790                "{}",
3791                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3792            ),
3793            "column_name"
3794        );
3795    }
3796
3797    fn wildcard_options(
3798        opt_ilike: Option<IlikeSelectItem>,
3799        opt_exclude: Option<ExcludeSelectItem>,
3800        opt_except: Option<ExceptSelectItem>,
3801        opt_replace: Option<PlannedReplaceSelectItem>,
3802        opt_rename: Option<RenameSelectItem>,
3803    ) -> WildcardOptions {
3804        WildcardOptions {
3805            ilike: opt_ilike,
3806            exclude: opt_exclude,
3807            except: opt_except,
3808            replace: opt_replace,
3809            rename: opt_rename,
3810        }
3811    }
3812
3813    #[test]
3814    fn test_size_of_expr() {
3815        // because Expr is such a widely used struct in DataFusion
3816        // it is important to keep its size as small as possible
3817        //
3818        // If this test fails when you change `Expr`, please try
3819        // `Box`ing the fields to make `Expr` smaller
3820        // See https://github.com/apache/datafusion/issues/16199 for details
3821        assert_eq!(size_of::<Expr>(), 128);
3822        assert_eq!(size_of::<ScalarValue>(), 64);
3823        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3824        assert_eq!(size_of::<Vec<Expr>>(), 24);
3825        assert_eq!(size_of::<Arc<Expr>>(), 8);
3826    }
3827}