datafusion_physical_expr/simplifier/const_evaluator.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Constant expression evaluation for the physical expression simplifier
19
20use std::sync::Arc;
21
22use arrow::array::new_null_array;
23use arrow::datatypes::{DataType, Field, Schema};
24use arrow::record_batch::RecordBatch;
25use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
26use datafusion_common::{Result, ScalarValue};
27use datafusion_expr_common::columnar_value::ColumnarValue;
28use datafusion_physical_expr_common::physical_expr::is_volatile;
29
30use crate::PhysicalExpr;
31use crate::expressions::{Column, Literal};
32
33/// Simplify expressions that consist only of literals by evaluating them.
34///
35/// This function checks if all children of the given expression are literals.
36/// If so, it evaluates the expression against a dummy RecordBatch and returns
37/// the result as a new Literal.
38///
39/// # Example transformations
40/// - `1 + 2` -> `3`
41/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
42/// - `'hello' || ' world'` -> `'hello world'`
43pub fn simplify_const_expr(
44 expr: &Arc<dyn PhysicalExpr>,
45) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
46 if is_volatile(expr) || has_column_references(expr) {
47 return Ok(Transformed::no(Arc::clone(expr)));
48 }
49
50 // Create a 1-row dummy batch for evaluation
51 let batch = create_dummy_batch()?;
52
53 // Evaluate the expression
54 match expr.evaluate(&batch) {
55 Ok(ColumnarValue::Scalar(scalar)) => {
56 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
57 }
58 Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
59 // Some operations return an array even for scalar inputs
60 let scalar = ScalarValue::try_from_array(&arr, 0)?;
61 Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
62 }
63 Ok(_) => {
64 // Unexpected result - keep original expression
65 Ok(Transformed::no(Arc::clone(expr)))
66 }
67 Err(_) => {
68 // On error, keep original expression
69 // The expression might succeed at runtime due to short-circuit evaluation
70 // or other runtime conditions
71 Ok(Transformed::no(Arc::clone(expr)))
72 }
73 }
74}
75
76/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
77///
78/// The batch is never actually accessed for data - it's just needed because
79/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
80/// that only contain literals, the batch content is irrelevant.
81///
82/// This is the same approach used in the logical expression `ConstEvaluator`.
83fn create_dummy_batch() -> Result<RecordBatch> {
84 // RecordBatch requires at least one column
85 let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
86 let col = new_null_array(&DataType::Null, 1);
87 Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
88}
89
90/// Check if this expression has any column references.
91pub fn has_column_references(expr: &Arc<dyn PhysicalExpr>) -> bool {
92 let mut has_columns = false;
93 expr.apply(|expr| {
94 if expr.as_any().downcast_ref::<Column>().is_some() {
95 has_columns = true;
96 Ok(TreeNodeRecursion::Stop)
97 } else {
98 Ok(TreeNodeRecursion::Continue)
99 }
100 })
101 .expect("apply should not fail");
102 has_columns
103}