Skip to main content

datafusion_expr_common/
placement.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Expression placement information for optimization decisions.
19
20/// Describes where an expression should be placed in the query plan for
21/// optimal execution. This is used by optimizers to make decisions about
22/// expression placement, such as whether to push expressions down through
23/// projections.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum ExpressionPlacement {
26    /// A constant literal value.
27    Literal,
28    /// A simple column reference.
29    Column,
30    /// A cheap expression that can be pushed to leaf nodes in the plan.
31    /// Examples include `get_field` for struct field access.
32    /// Pushing these expressions down in the plan can reduce data early
33    /// at low compute cost.
34    /// See [`ExpressionPlacement::should_push_to_leaves`] for details.
35    MoveTowardsLeafNodes,
36    /// An expensive expression that should stay where it is in the plan.
37    /// Examples include complex scalar functions or UDFs.
38    KeepInPlace,
39}
40
41impl ExpressionPlacement {
42    /// Returns true if the expression can be pushed down to leaf nodes
43    /// in the query plan.
44    ///
45    /// This returns true for:
46    /// - [`ExpressionPlacement::Column`]: Simple column references can be pushed down. They do no compute and do not increase or
47    ///   decrease the amount of data being processed.
48    ///   A projection that reduces the number of columns can eliminate unnecessary data early,
49    ///   but this method only considers one expression at a time, not a projection as a whole.
50    /// - [`ExpressionPlacement::MoveTowardsLeafNodes`]: Cheap expressions can be pushed down to leaves to take advantage of
51    ///   early computation and potential optimizations at the data source level.
52    ///   For example `struct_col['field']` is cheap to compute (just an Arc clone of the nested array for `'field'`)
53    ///   and thus can reduce data early in the plan at very low compute cost.
54    ///   It may even be possible to eliminate the expression entirely if the data source can project only the needed field
55    ///   (as e.g. Parquet can).
56    pub fn should_push_to_leaves(&self) -> bool {
57        matches!(
58            self,
59            ExpressionPlacement::Column | ExpressionPlacement::MoveTowardsLeafNodes
60        )
61    }
62}