datafusion_physical_expr/simplifier/const_evaluator.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Constant expression evaluation for the physical expression simplifier
19
20use std::sync::Arc;
21
22use arrow::array::new_null_array;
23use arrow::datatypes::{DataType, Field, Schema};
24use arrow::record_batch::RecordBatch;
25use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
26use datafusion_common::{Result, ScalarValue, internal_datafusion_err};
27use datafusion_expr_common::columnar_value::ColumnarValue;
28
29use crate::PhysicalExpr;
30use crate::expressions::{Column, Literal};
31
32/// Simplify expressions that consist only of literals by evaluating them.
33///
34/// This function checks if all children of the given expression are literals.
35/// If so, it evaluates the expression against a dummy RecordBatch and returns
36/// the result as a new Literal.
37///
38/// # Example transformations
39/// - `1 + 2` -> `3`
40/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
41/// - `'hello' || ' world'` -> `'hello world'`
42#[deprecated(
43 since = "53.0.0",
44 note = "This function will be removed in a future release in favor of a private implementation that depends on other implementation details. Please open an issue if you have a use case for keeping it."
45)]
46pub fn simplify_const_expr(
47 expr: Arc<dyn PhysicalExpr>,
48) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
49 let batch = create_dummy_batch()?;
50 // If expr is already a const literal or can't be evaluated into one.
51 if expr.is::<Literal>() || (!can_evaluate_as_constant(&expr)) {
52 return Ok(Transformed::no(expr));
53 }
54
55 // Evaluate the expression
56 match expr.evaluate(batch) {
57 Ok(ColumnarValue::Scalar(scalar)) => {
58 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
59 }
60 Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
61 // Some operations return an array even for scalar inputs
62 let scalar = ScalarValue::try_from_array(&arr, 0)?;
63 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
64 }
65 Ok(_) => {
66 // Unexpected result - keep original expression
67 Ok(Transformed::no(expr))
68 }
69 Err(_) => {
70 // On error, keep original expression
71 // The expression might succeed at runtime due to short-circuit evaluation
72 // or other runtime conditions
73 Ok(Transformed::no(expr))
74 }
75 }
76}
77
78/// Simplify expressions whose immediate children are all literals.
79///
80/// This function only checks the direct children of the expression,
81/// not the entire subtree. It is designed to be used with bottom-up tree
82/// traversal, where children are simplified before parents.
83///
84/// # Example transformations
85/// - `1 + 2` -> `3`
86/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first)
87/// - `'hello' || ' world'` -> `'hello world'`
88pub(crate) fn simplify_const_expr_immediate(
89 expr: Arc<dyn PhysicalExpr>,
90 batch: &RecordBatch,
91) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
92 // Already a literal - nothing to do
93 if expr.is::<Literal>() {
94 return Ok(Transformed::no(expr));
95 }
96
97 // Column references cannot be evaluated at plan time
98 if expr.is::<Column>() {
99 return Ok(Transformed::no(expr));
100 }
101
102 // Volatile nodes cannot be evaluated at plan time
103 if expr.is_volatile_node() {
104 return Ok(Transformed::no(expr));
105 }
106
107 // Since transform visits bottom-up, children have already been simplified.
108 // If all children are now Literals, this node can be const-evaluated.
109 // This is O(k) where k = number of children, instead of O(subtree).
110 //
111 // Leaf nodes (zero children) are rejected here. Const-folding is only
112 // sound for a node whose value is fully determined by its child literals;
113 // a leaf has no children, so there is nothing to derive constness from.
114 // The known leaves that are constant (`Literal`) or known-non-constant
115 // (`Column`, volatile) are handled by the dedicated checks above. Any
116 // other leaf is opaque to the simplifier and must be preserved as-is,
117 // otherwise `all` over an empty child list would vacuously hold and the
118 // node would be evaluated against the dummy batch, producing a value
119 // unrelated to its real runtime semantics.
120 let children = expr.children();
121 if children.is_empty() || !children.iter().all(|c| c.is::<Literal>()) {
122 return Ok(Transformed::no(expr));
123 }
124
125 // Evaluate the expression
126 match expr.evaluate(batch) {
127 Ok(ColumnarValue::Scalar(scalar)) => {
128 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
129 }
130 Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
131 // Some operations return an array even for scalar inputs
132 let scalar = ScalarValue::try_from_array(&arr, 0)?;
133 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
134 }
135 Ok(_) => {
136 // Unexpected result - keep original expression
137 Ok(Transformed::no(expr))
138 }
139 Err(_) => {
140 // On error, keep original expression
141 // The expression might succeed at runtime due to short-circuit evaluation
142 // or other runtime conditions
143 Ok(Transformed::no(expr))
144 }
145 }
146}
147
148/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
149///
150/// The batch is never actually accessed for data - it's just needed because
151/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
152/// that only contain literals, the batch content is irrelevant.
153///
154/// This is the same approach used in the logical expression `ConstEvaluator`.
155pub(crate) fn create_dummy_batch() -> Result<&'static RecordBatch> {
156 static DUMMY_BATCH: std::sync::OnceLock<Result<RecordBatch>> =
157 std::sync::OnceLock::new();
158 DUMMY_BATCH
159 .get_or_init(|| {
160 // RecordBatch requires at least one column
161 let dummy_schema =
162 Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
163 let col = new_null_array(&DataType::Null, 1);
164 Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
165 })
166 .as_ref()
167 .map_err(|e| {
168 internal_datafusion_err!(
169 "Failed to create dummy batch for constant expression evaluation: {e}"
170 )
171 })
172}
173
174fn can_evaluate_as_constant(expr: &Arc<dyn PhysicalExpr>) -> bool {
175 let mut can_evaluate = true;
176
177 expr.apply(|e| {
178 if e.is::<Column>() || e.is_volatile_node() {
179 can_evaluate = false;
180 Ok(TreeNodeRecursion::Stop)
181 } else {
182 Ok(TreeNodeRecursion::Continue)
183 }
184 })
185 .expect("apply should not fail");
186
187 can_evaluate
188}
189
190/// Check if this expression has any column references.
191#[deprecated(
192 since = "53.0.0",
193 note = "This function isn't used internally and is trivial to implement, therefore it will be removed in a future release."
194)]
195pub fn has_column_references(expr: &Arc<dyn PhysicalExpr>) -> bool {
196 let mut has_columns = false;
197 expr.apply(|expr| {
198 if expr.downcast_ref::<Column>().is_some() {
199 has_columns = true;
200 Ok(TreeNodeRecursion::Stop)
201 } else {
202 Ok(TreeNodeRecursion::Continue)
203 }
204 })
205 .expect("apply should not fail");
206 has_columns
207}