pub struct ExprSimplifier<S> { /* private fields */ }
Expand description

This structure handles API for expression simplification

Provides simplification information based on DFSchema and ExecutionProps. This is the default implementation used by DataFusion

For example:

use arrow::datatypes::{Schema, Field, DataType};
use datafusion_expr::{col, lit};
use datafusion_common::{DataFusionError, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;

// Create the schema
let schema = Schema::new(vec![
    Field::new("i", DataType::Int64, false),
  ])
  .to_dfschema_ref().unwrap();

// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
   .with_schema(schema);
let simplifier = ExprSimplifier::new(context);

// Use the simplifier

// b < 2 or (1 > 3)
let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));

// b < 2
let simplified = simplifier.simplify(expr).unwrap();
assert_eq!(simplified, col("b").lt(lit(2)));

Implementations§

source§

impl<S> ExprSimplifier<S>
where S: SimplifyInfo,

source

pub fn new(info: S) -> ExprSimplifier<S>

Create a new ExprSimplifier with the given info such as an instance of SimplifyContext. See simplify for an example.

source

pub fn simplify(&self, expr: Expr) -> Result<Expr, DataFusionError>

Simplifies this Expr`s as much as possible, evaluating constants and applying algebraic simplifications.

The types of the expression must match what operators expect, or else an error may occur trying to evaluate. See coerce for a function to help.

§Example:

b > 2 AND b > 2

can be written to

b > 2

use arrow::datatypes::DataType;
use datafusion_expr::{col, lit, Expr};
use datafusion_common::Result;
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::simplify::SimplifyInfo;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_common::DFSchema;
use std::sync::Arc;

/// Simple implementation that provides `Simplifier` the information it needs
/// See SimplifyContext for a structure that does this.
#[derive(Default)]
struct Info {
  execution_props: ExecutionProps,
};

impl SimplifyInfo for Info {
  fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
    Ok(false)
  }
  fn nullable(&self, expr: &Expr) -> Result<bool> {
    Ok(true)
  }
  fn execution_props(&self) -> &ExecutionProps {
    &self.execution_props
  }
  fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
    Ok(DataType::Int32)
  }
}

// Create the simplifier
let simplifier = ExprSimplifier::new(Info::default());

// b < 2
let b_lt_2 = col("b").gt(lit(2));

// (b < 2) OR (b < 2)
let expr = b_lt_2.clone().or(b_lt_2.clone());

// (b < 2) OR (b < 2) --> (b < 2)
let expr = simplifier.simplify(expr).unwrap();
assert_eq!(expr, b_lt_2);
source

pub fn coerce( &self, expr: Expr, schema: Arc<DFSchema> ) -> Result<Expr, DataFusionError>

Apply type coercion to an Expr so that it can be evaluated as a PhysicalExpr.

See the type coercion module documentation for more details on type coercion

source

pub fn with_guarantees( self, guarantees: Vec<(Expr, NullableInterval)> ) -> ExprSimplifier<S>

Input guarantees about the values of columns.

The guarantees can simplify expressions. For example, if a column x is guaranteed to be 3, then the expression x > 1 can be replaced by the literal true.

The guarantees are provided as a Vec<(Expr, NullableInterval)>, where the Expr is a column reference and the NullableInterval is an interval representing the known possible values of that column.

use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{col, lit, Expr};
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
use datafusion_common::{Result, ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;

let schema = Schema::new(vec![
  Field::new("x", DataType::Int64, false),
  Field::new("y", DataType::UInt32, false),
  Field::new("z", DataType::Int64, false),
  ])
  .to_dfschema_ref().unwrap();

// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
   .with_schema(schema);

// Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
let expr_x = col("x").gt_eq(lit(3_i64));
let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
let expr_z = col("z").gt(lit(5_i64));
let expr = expr_x.and(expr_y).and(expr_z.clone());

let guarantees = vec![
   // x ∈ [3, 5]
   (
       col("x"),
       NullableInterval::NotNull {
           values: Interval::make(Some(3_i64), Some(5_i64)).unwrap()
       }
   ),
   // y = 3
   (col("y"), NullableInterval::from(ScalarValue::UInt32(Some(3)))),
];
let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
let output = simplifier.simplify(expr).unwrap();
// Expression becomes: true AND true AND (z > 5), which simplifies to
// z > 5.
assert_eq!(output, expr_z);
source

pub fn with_canonicalize(self, canonicalize: bool) -> ExprSimplifier<S>

Should [Canonicalizer] be applied before simplification?

If true (the default), the expression will be rewritten to canonical form before simplification. This is useful to ensure that the simplifier can apply all possible simplifications.

Some expressions, such as those in some Joins, can not be canonicalized without changing their meaning. In these cases, canonicalization should be disabled.

use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{col, lit, Expr};
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
use datafusion_common::{Result, ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;

let schema = Schema::new(vec![
  Field::new("a", DataType::Int64, false),
  Field::new("b", DataType::Int64, false),
  Field::new("c", DataType::Int64, false),
  ])
  .to_dfschema_ref().unwrap();

// Create the simplifier
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props)
   .with_schema(schema);
let simplifier = ExprSimplifier::new(context);

// Expression: a = c AND 1 = b
let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));

// With canonicalization, the expression is rewritten to canonical form
// (though it is no simpler in this case):
let canonical = simplifier.simplify(expr.clone()).unwrap();
// Expression has been rewritten to: (c = a AND b = 1)
assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));

// If canonicalization is disabled, the expression is not changed
let non_canonicalized = simplifier
  .with_canonicalize(false)
  .simplify(expr.clone())
  .unwrap();

assert_eq!(non_canonicalized, expr);

Auto Trait Implementations§

§

impl<S> Freeze for ExprSimplifier<S>
where S: Freeze,

§

impl<S> !RefUnwindSafe for ExprSimplifier<S>

§

impl<S> Send for ExprSimplifier<S>
where S: Send,

§

impl<S> Sync for ExprSimplifier<S>
where S: Sync,

§

impl<S> Unpin for ExprSimplifier<S>
where S: Unpin,

§

impl<S> !UnwindSafe for ExprSimplifier<S>

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

impl<T> Same for T

§

type Output = T

Should always be Self
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V