pub struct ProjectionExprs { /* private fields */ }Expand description
A collection of ProjectionExpr instances, representing a complete
projection operation.
Projection operations are used in query plans to select specific columns or compute new columns based on existing ones.
See ProjectionExprs::from_indices to select a subset of columns by
indices.
Implementations§
Source§impl ProjectionExprs
impl ProjectionExprs
pub fn new<I>(exprs: I) -> Selfwhere
I: IntoIterator<Item = ProjectionExpr>,
Sourcepub fn from_indices(indices: &[usize], schema: &Schema) -> Self
pub fn from_indices(indices: &[usize], schema: &Schema) -> Self
Creates a ProjectionExpr from a list of column indices.
This is a convenience method for creating simple column-only projections, where each projection expression is a reference to a column in the input schema.
§Behavior
- Ordering: the output projection preserves the exact order of indices provided in the input slice
For example,
[2, 0, 1]will produce projections for columns 2, 0, then 1 in that order - Duplicates: Duplicate indices are allowed and will create multiple projection expressions referencing the same source column
For example,
[0, 0]creates 2 separate projections both referencing column 0
§Panics
Panics if any index in indices is out of bounds for the provided schema.
§Example
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_physical_expr::projection::ProjectionExprs;
use std::sync::Arc;
// Create a schema with three columns
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Utf8, false),
Field::new("c", DataType::Float64, false),
]));
// Project columns at indices 2 and 0 (c and a) - ordering is preserved
let projection = ProjectionExprs::from_indices(&[2, 0], &schema);
// This creates: SELECT c@2 AS c, a@0 AS a
assert_eq!(projection.as_ref().len(), 2);
assert_eq!(projection.as_ref()[0].alias, "c");
assert_eq!(projection.as_ref()[1].alias, "a");
// Duplicate indices are allowed
let projection_with_dups = ProjectionExprs::from_indices(&[0, 0, 1], &schema);
assert_eq!(projection_with_dups.as_ref().len(), 3);
assert_eq!(projection_with_dups.as_ref()[0].alias, "a");
assert_eq!(projection_with_dups.as_ref()[1].alias, "a"); // duplicate
assert_eq!(projection_with_dups.as_ref()[2].alias, "b");Sourcepub fn iter(&self) -> impl Iterator<Item = &ProjectionExpr>
pub fn iter(&self) -> impl Iterator<Item = &ProjectionExpr>
Returns an iterator over the projection expressions
Sourcepub fn projection_mapping(
&self,
input_schema: &SchemaRef,
) -> Result<ProjectionMapping>
pub fn projection_mapping( &self, input_schema: &SchemaRef, ) -> Result<ProjectionMapping>
Creates a ProjectionMapping from this projection
Sourcepub fn expr_iter(&self) -> impl Iterator<Item = Arc<dyn PhysicalExpr>> + '_
pub fn expr_iter(&self) -> impl Iterator<Item = Arc<dyn PhysicalExpr>> + '_
Iterate over a clone of the projection expressions.
Sourcepub fn try_map_exprs<F>(self, f: F) -> Result<Self>
pub fn try_map_exprs<F>(self, f: F) -> Result<Self>
Apply a fallible transformation to the PhysicalExpr of each projection.
This method transforms the expression in each ProjectionExpr while preserving
the alias. This is useful for rewriting expressions, such as when adapting
expressions to a different schema.
§Example
use std::sync::Arc;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_common::Result;
use datafusion_physical_expr::expressions::Column;
use datafusion_physical_expr::projection::ProjectionExprs;
use datafusion_physical_expr::PhysicalExpr;
// Create a schema and projection
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
]));
let projection = ProjectionExprs::from_indices(&[0, 1], &schema);
// Transform each expression (this example just clones them)
let transformed = projection.try_map_exprs(|expr| Ok(expr))?;
assert_eq!(transformed.as_ref().len(), 2);Sourcepub fn try_merge(&self, other: &ProjectionExprs) -> Result<ProjectionExprs>
pub fn try_merge(&self, other: &ProjectionExprs) -> Result<ProjectionExprs>
Apply another projection on top of this projection, returning the combined projection.
For example, if this projection is SELECT c@2 AS x, b@1 AS y, a@0 as z and the other projection is SELECT x@0 + 1 AS c1, y@1 + z@2 as c2,
we return a projection equivalent to SELECT c@2 + 1 AS c1, b@1 + a@0 as c2.
§Example
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::Operator;
use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
use datafusion_physical_expr::projection::{ProjectionExpr, ProjectionExprs};
use std::sync::Arc;
fn main() -> Result<()> {
// Example from the docstring:
// Base projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
let base = ProjectionExprs::new(vec![
ProjectionExpr {
expr: Arc::new(Column::new("c", 2)),
alias: "x".to_string(),
},
ProjectionExpr {
expr: Arc::new(Column::new("b", 1)),
alias: "y".to_string(),
},
ProjectionExpr {
expr: Arc::new(Column::new("a", 0)),
alias: "z".to_string(),
},
]);
// Top projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
let top = ProjectionExprs::new(vec![
ProjectionExpr {
expr: Arc::new(BinaryExpr::new(
Arc::new(Column::new("x", 0)),
Operator::Plus,
Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
)),
alias: "c1".to_string(),
},
ProjectionExpr {
expr: Arc::new(BinaryExpr::new(
Arc::new(Column::new("y", 1)),
Operator::Plus,
Arc::new(Column::new("z", 2)),
)),
alias: "c2".to_string(),
},
]);
// Expected result: SELECT c@2 + 1 AS c1, b@1 + a@0 AS c2
let result = base.try_merge(&top)?;
assert_eq!(result.as_ref().len(), 2);
assert_eq!(result.as_ref()[0].alias, "c1");
assert_eq!(result.as_ref()[1].alias, "c2");
Ok(())
}§Errors
This function returns an error if any expression in the other projection cannot be
applied on top of this projection.
Sourcepub fn column_indices(&self) -> Vec<usize>
pub fn column_indices(&self) -> Vec<usize>
Extract the column indices used in this projection.
For example, for a projection SELECT a AS x, b + 1 AS y, where a is at index 0 and b is at index 1,
this function would return [0, 1].
Repeated indices are returned only once, and the order is ascending.
Sourcepub fn ordered_column_indices(&self) -> Vec<usize>
👎Deprecated since 52.0.0: Use column_indices() instead. This method will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first.
pub fn ordered_column_indices(&self) -> Vec<usize>
Extract the ordered column indices for a column-only projection.
This function assumes that all expressions in the projection are simple column references. It returns the column indices in the order they appear in the projection.
§Panics
Panics if any expression in the projection is not a simple column reference. This includes:
- Computed expressions (e.g.,
a + 1,CAST(a AS INT)) - Function calls (e.g.,
UPPER(name),SUM(amount)) - Literals (e.g.,
42,'hello') - Complex nested expressions (e.g.,
CASE WHEN ... THEN ... END)
§Returns
A vector of column indices in projection order. Unlike column_indices(),
this function:
- Preserves the projection order (does not sort)
- Preserves duplicates (does not deduplicate)
§Example
For a projection SELECT c, a, c where a is at index 0 and c is at index 2,
this function would return [2, 0, 2].
Use column_indices() instead if the projection may contain
non-column expressions or if you need a deduplicated sorted list.
§Panics
Panics if any expression in the projection is not a simple column reference.
Sourcepub fn project_schema(&self, input_schema: &Schema) -> Result<Schema>
pub fn project_schema(&self, input_schema: &Schema) -> Result<Schema>
Project a schema according to this projection.
For example, for a projection SELECT a AS x, b + 1 AS y, where a is at index 0 and b is at index 1,
if the input schema is [a: Int32, b: Int32, c: Int32], the output schema would be [x: Int32, y: Int32].
Fields’ metadata are preserved from the input schema.
Sourcepub fn make_projector(&self, input_schema: &Schema) -> Result<Projector>
pub fn make_projector(&self, input_schema: &Schema) -> Result<Projector>
Create a new Projector from this projection and an input schema.
A Projector can be used to apply this projection to record batches.
§Errors
This function returns an error if the output schema cannot be constructed from the input schema with the given projection expressions. For example, if an expression only works with integer columns but the input schema has a string column at that index.
pub fn create_expression_metrics( &self, metrics: &ExecutionPlanMetricsSet, partition: usize, ) -> ExpressionEvaluatorMetrics
Sourcepub fn project_statistics(
&self,
stats: Statistics,
output_schema: &Schema,
) -> Result<Statistics>
pub fn project_statistics( &self, stats: Statistics, output_schema: &Schema, ) -> Result<Statistics>
Project statistics according to this projection.
For example, for a projection SELECT a AS x, b + 1 AS y, where a is at index 0 and b is at index 1,
if the input statistics has column statistics for columns a, b, and c, the output statistics would have column statistics for columns x and y.
§Example
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_common::stats::{ColumnStatistics, Precision, Statistics};
use datafusion_physical_expr::projection::ProjectionExprs;
use datafusion_common::Result;
use datafusion_common::ScalarValue;
use std::sync::Arc;
fn main() -> Result<()> {
// Input schema: a: Int32, b: Int32, c: Int32
let input_schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
Field::new("c", DataType::Int32, false),
]));
// Input statistics with column stats for a, b, c
let input_stats = Statistics {
num_rows: Precision::Exact(100),
total_byte_size: Precision::Exact(1200),
column_statistics: vec![
// Column a stats
ColumnStatistics::new_unknown()
.with_null_count(Precision::Exact(0))
.with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
.with_max_value(Precision::Exact(ScalarValue::Int32(Some(100))))
.with_distinct_count(Precision::Exact(100)),
// Column b stats
ColumnStatistics::new_unknown()
.with_null_count(Precision::Exact(0))
.with_min_value(Precision::Exact(ScalarValue::Int32(Some(10))))
.with_max_value(Precision::Exact(ScalarValue::Int32(Some(60))))
.with_distinct_count(Precision::Exact(50)),
// Column c stats
ColumnStatistics::new_unknown()
.with_null_count(Precision::Exact(5))
.with_min_value(Precision::Exact(ScalarValue::Int32(Some(-10))))
.with_max_value(Precision::Exact(ScalarValue::Int32(Some(200))))
.with_distinct_count(Precision::Exact(25)),
],
};
// Create a projection that selects columns c and a (indices 2 and 0)
let projection = ProjectionExprs::from_indices(&[2, 0], &input_schema);
// Compute output schema
let output_schema = projection.project_schema(&input_schema)?;
// Project the statistics
let output_stats = projection.project_statistics(input_stats, &output_schema)?;
// The output should have 2 column statistics (for c and a, in that order)
assert_eq!(output_stats.column_statistics.len(), 2);
// First column in output is c (was at index 2)
assert_eq!(
output_stats.column_statistics[0].min_value,
Precision::Exact(ScalarValue::Int32(Some(-10)))
);
assert_eq!(
output_stats.column_statistics[0].null_count,
Precision::Exact(5)
);
// Second column in output is a (was at index 0)
assert_eq!(
output_stats.column_statistics[1].min_value,
Precision::Exact(ScalarValue::Int32(Some(0)))
);
assert_eq!(
output_stats.column_statistics[1].distinct_count,
Precision::Exact(100)
);
// Total byte size is recalculated based on projected columns
assert_eq!(
output_stats.total_byte_size,
Precision::Exact(800), // each Int32 column is 4 bytes * 100 rows * 2 columns
);
// Number of rows remains the same
assert_eq!(output_stats.num_rows, Precision::Exact(100));
Ok(())
}Trait Implementations§
Source§impl AsRef<[ProjectionExpr]> for ProjectionExprs
impl AsRef<[ProjectionExpr]> for ProjectionExprs
Source§fn as_ref(&self) -> &[ProjectionExpr]
fn as_ref(&self) -> &[ProjectionExpr]
Source§impl Clone for ProjectionExprs
impl Clone for ProjectionExprs
Source§fn clone(&self) -> ProjectionExprs
fn clone(&self) -> ProjectionExprs
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for ProjectionExprs
impl Debug for ProjectionExprs
Source§impl Display for ProjectionExprs
impl Display for ProjectionExprs
Source§impl From<&[ProjectionExpr]> for ProjectionExprs
impl From<&[ProjectionExpr]> for ProjectionExprs
Source§fn from(value: &[ProjectionExpr]) -> Self
fn from(value: &[ProjectionExpr]) -> Self
Source§impl From<Vec<ProjectionExpr>> for ProjectionExprs
impl From<Vec<ProjectionExpr>> for ProjectionExprs
Source§fn from(value: Vec<ProjectionExpr>) -> Self
fn from(value: Vec<ProjectionExpr>) -> Self
Source§impl FromIterator<ProjectionExpr> for ProjectionExprs
impl FromIterator<ProjectionExpr> for ProjectionExprs
Source§fn from_iter<T: IntoIterator<Item = ProjectionExpr>>(exprs: T) -> Self
fn from_iter<T: IntoIterator<Item = ProjectionExpr>>(exprs: T) -> Self
Source§impl<'a> IntoIterator for &'a ProjectionExprs
impl<'a> IntoIterator for &'a ProjectionExprs
Source§impl IntoIterator for ProjectionExprs
impl IntoIterator for ProjectionExprs
Source§impl PartialEq for ProjectionExprs
impl PartialEq for ProjectionExprs
impl Eq for ProjectionExprs
impl StructuralPartialEq for ProjectionExprs
Auto Trait Implementations§
impl Freeze for ProjectionExprs
impl !RefUnwindSafe for ProjectionExprs
impl Send for ProjectionExprs
impl Sync for ProjectionExprs
impl Unpin for ProjectionExprs
impl !UnwindSafe for ProjectionExprs
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key and return true if they are equal.Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more