Skip to main content

datafusion_expr/
simplify.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Structs to provide the information needed for expression simplification.
19
20use std::sync::Arc;
21
22use arrow::datatypes::DataType;
23use chrono::{DateTime, Utc};
24use datafusion_common::config::ConfigOptions;
25use datafusion_common::{DFSchema, DFSchemaRef, Result};
26
27use crate::{Expr, ExprSchemable};
28
29/// Provides simplification information based on schema, query execution time,
30/// and configuration options.
31///
32/// # Example
33/// See the `simplify_demo` in the [`expr_api` example]
34///
35/// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
36#[derive(Debug, Clone)]
37pub struct SimplifyContext {
38    schema: DFSchemaRef,
39    query_execution_start_time: Option<DateTime<Utc>>,
40    config_options: Arc<ConfigOptions>,
41}
42
43/// Builder for [`SimplifyContext`].
44#[derive(Debug, Default)]
45pub struct SimplifyContextBuilder {
46    schema: Option<DFSchemaRef>,
47    query_execution_start_time: Option<DateTime<Utc>>,
48    config_options: Option<Arc<ConfigOptions>>,
49}
50
51impl Default for SimplifyContext {
52    fn default() -> Self {
53        Self {
54            schema: Arc::new(DFSchema::empty()),
55            query_execution_start_time: None,
56            config_options: Arc::new(ConfigOptions::default()),
57        }
58    }
59}
60
61impl SimplifyContext {
62    /// Returns a builder for [`SimplifyContext`].
63    pub fn builder() -> SimplifyContextBuilder {
64        SimplifyContextBuilder::default()
65    }
66
67    #[deprecated(
68        since = "54.0.0",
69        note = "Use SimplifyContextBuilder if you intend to use non-default values."
70    )]
71    /// Set the [`ConfigOptions`] for this context
72    pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
73        self.config_options = config_options;
74        self
75    }
76
77    #[deprecated(
78        since = "54.0.0",
79        note = "Use SimplifyContextBuilder if you intend to use non-default values."
80    )]
81    /// Set the schema for this context
82    pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
83        self.schema = schema;
84        self
85    }
86
87    #[deprecated(
88        since = "54.0.0",
89        note = "Use SimplifyContextBuilder if you intend to use non-default values."
90    )]
91    /// Set the query execution start time
92    pub fn with_query_execution_start_time(
93        mut self,
94        query_execution_start_time: Option<DateTime<Utc>>,
95    ) -> Self {
96        self.query_execution_start_time = query_execution_start_time;
97        self
98    }
99
100    #[deprecated(
101        since = "54.0.0",
102        note = "Use SimplifyContextBuilder if you intend to use non-default values."
103    )]
104    /// Set the query execution start to the current time
105    pub fn with_current_time(mut self) -> Self {
106        self.query_execution_start_time = Some(Utc::now());
107        self
108    }
109
110    /// Returns the schema
111    pub fn schema(&self) -> &DFSchemaRef {
112        &self.schema
113    }
114
115    /// Returns true if this Expr has boolean type
116    pub fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
117        Ok(expr.get_type(&self.schema)? == DataType::Boolean)
118    }
119
120    /// Returns true if expr is nullable
121    pub fn nullable(&self, expr: &Expr) -> Result<bool> {
122        expr.nullable(self.schema.as_ref())
123    }
124
125    /// Returns data type of this expr needed for determining optimized int type of a value
126    pub fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
127        expr.get_type(&self.schema)
128    }
129
130    /// Returns the time at which the query execution started.
131    /// If `None`, time-dependent functions like `now()` will not be simplified.
132    pub fn query_execution_start_time(&self) -> Option<DateTime<Utc>> {
133        self.query_execution_start_time
134    }
135
136    /// Returns the configuration options for the session.
137    pub fn config_options(&self) -> &Arc<ConfigOptions> {
138        &self.config_options
139    }
140}
141
142impl SimplifyContextBuilder {
143    /// Set the [`ConfigOptions`] for this context.
144    pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
145        self.config_options = Some(config_options);
146        self
147    }
148
149    /// Set the schema for this context.
150    pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
151        self.schema = Some(schema);
152        self
153    }
154
155    /// Set the query execution start time.
156    pub fn with_query_execution_start_time(
157        mut self,
158        query_execution_start_time: Option<DateTime<Utc>>,
159    ) -> Self {
160        self.query_execution_start_time = query_execution_start_time;
161        self
162    }
163
164    /// Set the query execution start to the current time.
165    pub fn with_current_time(mut self) -> Self {
166        self.query_execution_start_time = Some(Utc::now());
167        self
168    }
169
170    /// Build a [`SimplifyContext`], filling in any unspecified fields with defaults.
171    pub fn build(self) -> SimplifyContext {
172        SimplifyContext {
173            schema: self.schema.unwrap_or_else(|| Arc::new(DFSchema::empty())),
174            query_execution_start_time: self.query_execution_start_time,
175            config_options: self
176                .config_options
177                .unwrap_or_else(|| Arc::new(ConfigOptions::default())),
178        }
179    }
180}
181
182/// Was the expression simplified?
183#[derive(Debug)]
184pub enum ExprSimplifyResult {
185    /// The function call was simplified to an entirely new Expr
186    Simplified(Expr),
187    /// The function call could not be simplified, and the arguments
188    /// are return unmodified.
189    Original(Vec<Expr>),
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn simplify_context_builder_builds_default_context() {
198        let context = SimplifyContext::builder().build();
199        let default_options = ConfigOptions::default();
200
201        assert_eq!(context.schema().as_ref(), &DFSchema::empty());
202        assert_eq!(context.query_execution_start_time(), None);
203        assert_eq!(
204            context.config_options().optimizer.max_passes,
205            default_options.optimizer.max_passes
206        );
207    }
208
209    #[test]
210    fn simplify_context_builder_uses_overrides() {
211        let schema = Arc::new(DFSchema::empty());
212        let config_options = Arc::new(ConfigOptions::default());
213        let current_time = Utc::now();
214
215        let context = SimplifyContext::builder()
216            .with_schema(Arc::clone(&schema))
217            .with_config_options(Arc::clone(&config_options))
218            .with_query_execution_start_time(Some(current_time))
219            .build();
220
221        assert_eq!(context.schema().as_ref(), schema.as_ref());
222        assert_eq!(context.query_execution_start_time(), Some(current_time));
223        assert!(Arc::ptr_eq(context.config_options(), &config_options));
224    }
225}