Skip to main content

datafusion_physical_expr/simplifier/
const_evaluator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Constant expression evaluation for the physical expression simplifier
19
20use std::sync::Arc;
21
22use arrow::array::new_null_array;
23use arrow::datatypes::{DataType, Field, Schema};
24use arrow::record_batch::RecordBatch;
25use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
26use datafusion_common::{Result, ScalarValue};
27use datafusion_expr_common::columnar_value::ColumnarValue;
28
29use crate::PhysicalExpr;
30use crate::expressions::{Column, Literal};
31
32/// Simplify expressions that consist only of literals by evaluating them.
33///
34/// This function checks if all children of the given expression are literals.
35/// If so, it evaluates the expression against a dummy RecordBatch and returns
36/// the result as a new Literal.
37///
38/// # Example transformations
39/// - `1 + 2` -> `3`
40/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
41/// - `'hello' || ' world'` -> `'hello world'`
42#[deprecated(
43    since = "53.0.0",
44    note = "This function will be removed in a future release in favor of a private implementation that depends on other implementation details. Please open an issue if you have a use case for keeping it."
45)]
46pub fn simplify_const_expr(
47    expr: Arc<dyn PhysicalExpr>,
48) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
49    let batch = create_dummy_batch()?;
50    // If expr is already a const literal or can't be evaluated into one.
51    if expr.as_any().is::<Literal>() || (!can_evaluate_as_constant(&expr)) {
52        return Ok(Transformed::no(expr));
53    }
54
55    // Evaluate the expression
56    match expr.evaluate(&batch) {
57        Ok(ColumnarValue::Scalar(scalar)) => {
58            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
59        }
60        Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
61            // Some operations return an array even for scalar inputs
62            let scalar = ScalarValue::try_from_array(&arr, 0)?;
63            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
64        }
65        Ok(_) => {
66            // Unexpected result - keep original expression
67            Ok(Transformed::no(expr))
68        }
69        Err(_) => {
70            // On error, keep original expression
71            // The expression might succeed at runtime due to short-circuit evaluation
72            // or other runtime conditions
73            Ok(Transformed::no(expr))
74        }
75    }
76}
77
78/// Simplify expressions whose immediate children are all literals.
79///
80/// This function only checks the direct children of the expression,
81/// not the entire subtree. It is designed to be used with bottom-up tree
82/// traversal, where children are simplified before parents.
83///
84/// # Example transformations
85/// - `1 + 2` -> `3`
86/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first)
87/// - `'hello' || ' world'` -> `'hello world'`
88pub(crate) fn simplify_const_expr_immediate(
89    expr: Arc<dyn PhysicalExpr>,
90    batch: &RecordBatch,
91) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
92    // Already a literal - nothing to do
93    if expr.as_any().is::<Literal>() {
94        return Ok(Transformed::no(expr));
95    }
96
97    // Column references cannot be evaluated at plan time
98    if expr.as_any().is::<Column>() {
99        return Ok(Transformed::no(expr));
100    }
101
102    // Volatile nodes cannot be evaluated at plan time
103    if expr.is_volatile_node() {
104        return Ok(Transformed::no(expr));
105    }
106
107    // Since transform visits bottom-up, children have already been simplified.
108    // If all children are now Literals, this node can be const-evaluated.
109    // This is O(k) where k = number of children, instead of O(subtree).
110    let all_children_literal = expr
111        .children()
112        .iter()
113        .all(|child| child.as_any().is::<Literal>());
114
115    if !all_children_literal {
116        return Ok(Transformed::no(expr));
117    }
118
119    // Evaluate the expression
120    match expr.evaluate(batch) {
121        Ok(ColumnarValue::Scalar(scalar)) => {
122            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
123        }
124        Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
125            // Some operations return an array even for scalar inputs
126            let scalar = ScalarValue::try_from_array(&arr, 0)?;
127            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
128        }
129        Ok(_) => {
130            // Unexpected result - keep original expression
131            Ok(Transformed::no(expr))
132        }
133        Err(_) => {
134            // On error, keep original expression
135            // The expression might succeed at runtime due to short-circuit evaluation
136            // or other runtime conditions
137            Ok(Transformed::no(expr))
138        }
139    }
140}
141
142/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
143///
144/// The batch is never actually accessed for data - it's just needed because
145/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
146/// that only contain literals, the batch content is irrelevant.
147///
148/// This is the same approach used in the logical expression `ConstEvaluator`.
149pub(crate) fn create_dummy_batch() -> Result<RecordBatch> {
150    // RecordBatch requires at least one column
151    let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
152    let col = new_null_array(&DataType::Null, 1);
153    Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
154}
155
156fn can_evaluate_as_constant(expr: &Arc<dyn PhysicalExpr>) -> bool {
157    let mut can_evaluate = true;
158
159    expr.apply(|e| {
160        if e.as_any().is::<Column>() || e.is_volatile_node() {
161            can_evaluate = false;
162            Ok(TreeNodeRecursion::Stop)
163        } else {
164            Ok(TreeNodeRecursion::Continue)
165        }
166    })
167    .expect("apply should not fail");
168
169    can_evaluate
170}
171
172/// Check if this expression has any column references.
173#[deprecated(
174    since = "53.0.0",
175    note = "This function isn't used internally and is trivial to implement, therefore it will be removed in a future release."
176)]
177pub fn has_column_references(expr: &Arc<dyn PhysicalExpr>) -> bool {
178    let mut has_columns = false;
179    expr.apply(|expr| {
180        if expr.as_any().downcast_ref::<Column>().is_some() {
181            has_columns = true;
182            Ok(TreeNodeRecursion::Stop)
183        } else {
184            Ok(TreeNodeRecursion::Continue)
185        }
186    })
187    .expect("apply should not fail");
188    has_columns
189}