datafusion_expr_common/placement.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Expression placement information for optimization decisions.
19
20/// Describes where an expression should be placed in the query plan for
21/// optimal execution. This is used by optimizers to make decisions about
22/// expression placement, such as whether to push expressions down through
23/// projections.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum ExpressionPlacement {
26 /// A constant literal value.
27 Literal,
28 /// A simple column reference.
29 Column,
30 /// A cheap expression that can be pushed to leaf nodes in the plan.
31 /// Examples include `get_field` for struct field access.
32 /// Pushing these expressions down in the plan can reduce data early
33 /// at low compute cost.
34 /// See [`ExpressionPlacement::should_push_to_leaves`] for details.
35 MoveTowardsLeafNodes,
36 /// An expensive expression that should stay where it is in the plan.
37 /// Examples include complex scalar functions or UDFs.
38 KeepInPlace,
39}
40
41impl ExpressionPlacement {
42 /// Returns true if the expression can be pushed down to leaf nodes
43 /// in the query plan.
44 ///
45 /// This returns true for:
46 /// - [`ExpressionPlacement::Column`]: Simple column references can be pushed down. They do no compute and do not increase or
47 /// decrease the amount of data being processed.
48 /// A projection that reduces the number of columns can eliminate unnecessary data early,
49 /// but this method only considers one expression at a time, not a projection as a whole.
50 /// - [`ExpressionPlacement::MoveTowardsLeafNodes`]: Cheap expressions can be pushed down to leaves to take advantage of
51 /// early computation and potential optimizations at the data source level.
52 /// For example `struct_col['field']` is cheap to compute (just an Arc clone of the nested array for `'field'`)
53 /// and thus can reduce data early in the plan at very low compute cost.
54 /// It may even be possible to eliminate the expression entirely if the data source can project only the needed field
55 /// (as e.g. Parquet can).
56 pub fn should_push_to_leaves(&self) -> bool {
57 matches!(
58 self,
59 ExpressionPlacement::Column | ExpressionPlacement::MoveTowardsLeafNodes
60 )
61 }
62}