Enum polars::prelude::Expr

[−]

pub enum Expr {
Show 24 variants
    Alias(Box<Expr, Global>, Arc<str>),
    Column(Arc<str>),
    Columns(Vec<String, Global>),
    DtypeColumn(Vec<DataType, Global>),
    Literal(LiteralValue),
    BinaryExpr {
        left: Box<Expr, Global>,
        op: Operator,
        right: Box<Expr, Global>,
    },
    Cast {
        expr: Box<Expr, Global>,
        data_type: DataType,
        strict: bool,
    },
    Sort {
        expr: Box<Expr, Global>,
        options: SortOptions,
    },
    Take {
        expr: Box<Expr, Global>,
        idx: Box<Expr, Global>,
    },
    SortBy {
        expr: Box<Expr, Global>,
        by: Vec<Expr, Global>,
        reverse: Vec<bool, Global>,
    },
    Agg(AggExpr),
    Ternary {
        predicate: Box<Expr, Global>,
        truthy: Box<Expr, Global>,
        falsy: Box<Expr, Global>,
    },
    Function {
        input: Vec<Expr, Global>,
        function: FunctionExpr,
        options: FunctionOptions,
    },
    Explode(Box<Expr, Global>),
    Filter {
        input: Box<Expr, Global>,
        by: Box<Expr, Global>,
    },
    Window {
        function: Box<Expr, Global>,
        partition_by: Vec<Expr, Global>,
        order_by: Option<Box<Expr, Global>>,
        options: WindowOptions,
    },
    Wildcard,
    Slice {
        input: Box<Expr, Global>,
        offset: Box<Expr, Global>,
        length: Box<Expr, Global>,
    },
    Exclude(Box<Expr, Global>, Vec<Excluded, Global>),
    KeepName(Box<Expr, Global>),
    Count,
    Nth(i64),
    RenameAlias {
        function: SpecialEq<Arc<dyn RenameAliasFn + 'static>>,
        expr: Box<Expr, Global>,
    },
    AnonymousFunction {
        input: Vec<Expr, Global>,
        function: SpecialEq<Arc<dyn SeriesUdf + 'static>>,
        output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
        options: FunctionOptions,
    },
}

Expand description

Queries consists of multiple expressions.

Variants

`Alias(Box<Expr, Global>, Arc<str>)`

`Column(Arc<str>)`

`Columns(Vec<String, Global>)`

`DtypeColumn(Vec<DataType, Global>)`

`BinaryExpr`

Fields

left: Box<Expr, Global>

op: Operator

right: Box<Expr, Global>

`Cast`

Fields

expr: Box<Expr, Global>

data_type: DataType

strict: bool

`Sort`

Fields

expr: Box<Expr, Global>

options: SortOptions

`Take`

Fields

expr: Box<Expr, Global>

idx: Box<Expr, Global>

`SortBy`

Fields

expr: Box<Expr, Global>

by: Vec<Expr, Global>

reverse: Vec<bool, Global>

`Ternary`

Fields

predicate: Box<Expr, Global>

truthy: Box<Expr, Global>

falsy: Box<Expr, Global>

A ternary operation if true then “foo” else “bar”

`Function`

Fields

input: Vec<Expr, Global>

function arguments

function: FunctionExpr

function to apply

options: FunctionOptions

`Explode(Box<Expr, Global>)`

`Filter`

Fields

input: Box<Expr, Global>

by: Box<Expr, Global>

`Window`

Fields

function: Box<Expr, Global>

Also has the input. i.e. avg(“foo”)

partition_by: Vec<Expr, Global>

order_by: Option<Box<Expr, Global>>

options: WindowOptions

See postgres window functions

`Wildcard`

`Slice`

Fields

input: Box<Expr, Global>

offset: Box<Expr, Global>

length is not yet known so we accept negative offsets

length: Box<Expr, Global>

`Exclude(Box<Expr, Global>, Vec<Excluded, Global>)`

Can be used in a select statement to exclude a column from selection

`KeepName(Box<Expr, Global>)`

Set root name as Alias

`Count`

Special case that does not need columns

`Nth(i64)`

Take the nth column in the DataFrame

`RenameAlias`

Fields

function: SpecialEq<Arc<dyn RenameAliasFn + 'static>>

expr: Box<Expr, Global>

`AnonymousFunction`

Fields

input: Vec<Expr, Global>

function arguments

function: SpecialEq<Arc<dyn SeriesUdf + 'static>>

function to apply

output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>

output dtype of the function

options: FunctionOptions

Implementations

impl Expr

pub fn to_dot(&self) -> Result<String, PolarsError>

Get a dot language representation of the Expression.

impl Expr

pub fn to_field(
 &self,
 schema: &Schema,
 ctxt: Context
) -> Result<Field, PolarsError>

Get Field result of the expression. The schema is the input data.

impl Expr

pub fn eq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Compare Expr with other Expr on equality

pub fn neq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Compare Expr with other Expr on non-equality

pub fn lt<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Check if Expr < Expr

pub fn gt<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Check if Expr > Expr

pub fn gt_eq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Check if Expr >= Expr

pub fn lt_eq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Check if Expr <= Expr

pub fn not(self) -> Expr

Negate Expr

pub fn alias(self, name: &str) -> Expr

Rename Column.

pub fn is_null(self) -> Expr

Run is_null operation on Expr.

pub fn is_not_null(self) -> Expr

Run is_not_null operation on Expr.

pub fn drop_nulls(self) -> Expr

Drop null values

pub fn drop_nans(self) -> Expr

Drop NaN values

pub fn min(self) -> Expr

Reduce groups to minimal value.

pub fn max(self) -> Expr

Reduce groups to maximum value.

pub fn nan_min(self) -> Expr

Reduce groups to minimal value.

pub fn nan_max(self) -> Expr

Reduce groups to maximum value.

pub fn mean(self) -> Expr

Reduce groups to the mean value.

pub fn median(self) -> Expr

Reduce groups to the median value.

pub fn sum(self) -> Expr

Reduce groups to the sum of all the values.

pub fn n_unique(self) -> Expr

Get the number of unique values in the groups.

pub fn first(self) -> Expr

Get the first value in the group.

pub fn last(self) -> Expr

Get the last value in the group.

pub fn list(self) -> Expr

Aggregate the group to a Series

pub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Expr

Compute the quantile per group.

pub fn agg_groups(self) -> Expr

Get the group indexes of the group by operation.

pub fn flatten(self) -> Expr

Alias for explode

pub fn explode(self) -> Expr

Explode the utf8/ list column

pub fn slice<E, F>(self, offset: E, length: F) -> Exprwhere
E: Into<Expr>,
F: Into<Expr>,

Slice the Series. offset may be negative.

pub fn append<E>(self, other: E, upcast: bool) -> Exprwhere
E: Into<Expr>,

Append expressions. This is done by adding the chunks of other to this Series.

pub fn head(self, length: Option<usize>) -> Expr

Get the first n elements of the Expr result

pub fn tail(self, length: Option<usize>) -> Expr

Get the last n elements of the Expr result

pub fn unique(self) -> Expr

Get unique values of this expression.

pub fn unique_stable(self) -> Expr

Get unique values of this expression, while maintaining order. This requires more work than Expr::unique.

pub fn arg_unique(self) -> Expr

Get the first index of unique values of this expression.

pub fn arg_min(self) -> Expr

Get the index value that has the minimum value

pub fn arg_max(self) -> Expr

Get the index value that has the maximum value

pub fn arg_sort(self, sort_options: SortOptions) -> Expr

Get the index values that would sort this expression.

pub fn strict_cast(self, data_type: DataType) -> Expr

Cast expression to another data type. Throws an error if conversion had overflows

pub fn cast(self, data_type: DataType) -> Expr

Cast expression to another data type.

pub fn take<E>(self, idx: E) -> Exprwhere
E: Into<Expr>,

Take the values by idx.

pub fn sort(self, reverse: bool) -> Expr

Sort in increasing order. See the eager implementation.

pub fn sort_with(self, options: SortOptions) -> Expr

Sort with given options.

pub fn reverse(self) -> Expr

Reverse column

pub fn map<F>(
 self,
 function: F,
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed.

This function is very similar to Expr::apply, but differs in how it handles aggregations.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.

It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.

pub fn map_many<F>(
 self,
 function: F,
 arguments: &[Expr],
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
 F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed with many arguments

See the Expr::map function for the differences between map and apply.

pub fn map_list<F>(
 self,
 function: F,
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed.

This function is very similar to apply, but differs in how it handles aggregations.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.
map_list should be used when the function expects a list aggregated series.

pub fn function_with_options<F>(
 self,
 function: F,
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
 options: FunctionOptions
) -> Exprwhere
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

A function that cannot be expressed with map or apply and requires extra settings.

pub fn apply<F>(
 self,
 function: F,
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure over the groups. This should only be used in a groupby aggregation.

It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.

This difference with map is that apply will create a separate Series per group.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.

pub fn apply_many<F>(
 self,
 function: F,
 arguments: &[Expr],
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
 F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure over the groups with many arguments. This should only be used in a groupby aggregation.

See the Expr::apply function for the differences between map and apply.

pub fn apply_many_private(
    self,
    function_expr: FunctionExpr,
    arguments: &[Expr],
    auto_explode: bool,
    cast_to_supertypes: bool
) -> Expr

pub fn map_many_private(
    self,
    function_expr: FunctionExpr,
    arguments: &[Expr],
    cast_to_supertypes: bool
) -> Expr

pub fn is_finite(self) -> Expr

Get mask of finite values if dtype is Float

pub fn is_infinite(self) -> Expr

Get mask of infinite values if dtype is Float

pub fn is_nan(self) -> Expr

Get mask of NaN values if dtype is Float

pub fn is_not_nan(self) -> Expr

Get inverse mask of NaN values if dtype is Float

pub fn shift(self, periods: i64) -> Expr

Shift the values in the array by some period. See the eager implementation.

pub fn shift_and_fill<E>(self, periods: i64, fill_value: E) -> Exprwhere
E: Into<Expr>,

Shift the values in the array by some period and fill the resulting empty values.

pub fn cumsum(self, reverse: bool) -> Expr

Get an array with the cumulative sum computed at every element

pub fn cumprod(self, reverse: bool) -> Expr

Get an array with the cumulative product computed at every element

pub fn cummin(self, reverse: bool) -> Expr

Get an array with the cumulative min computed at every element

pub fn cummax(self, reverse: bool) -> Expr

Get an array with the cumulative max computed at every element

pub fn product(self) -> Expr

Get the product aggregation of an expression

pub fn backward_fill(self, limit: Option<u32>) -> Expr

Fill missing value with next non-null.

pub fn forward_fill(self, limit: Option<u32>) -> Expr

Fill missing value with previous non-null.

pub fn round(self, decimals: u32) -> Expr

Round underlying floating point array to given decimal numbers.

pub fn floor(self) -> Expr

Floor underlying floating point array to the lowest integers smaller or equal to the float value.

pub fn ceil(self) -> Expr

Ceil underlying floating point array to the highest integers smaller or equal to the float value.

pub fn clip(self, min: AnyValue<'_>, max: AnyValue<'_>) -> Expr

Clip underlying values to a set boundary.

pub fn clip_max(self, max: AnyValue<'_>) -> Expr

Clip underlying values to a set boundary.

pub fn clip_min(self, min: AnyValue<'_>) -> Expr

Clip underlying values to a set boundary.

pub fn abs(self) -> Expr

Convert all values to their absolute/positive value.

pub fn over<E, IE>(self, partition_by: E) -> Exprwhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,

Apply window function over a subgroup. This is similar to a groupby + aggregation + self join. Or similar to window functions in Postgres.

Example

#[macro_use] extern crate polars_core;
use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example() -> PolarsResult<()> {
    let df = df! {
            "groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
            "values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
        }?;

    let out = df
     .lazy()
     .select(&[
         col("groups"),
         sum("values").over([col("groups")]),
     ])
     .collect()?;
    dbg!(&out);
    Ok(())
}

Outputs:

╭────────┬────────╮
│ groups ┆ values │
│ ---    ┆ ---    │
│ i32    ┆ i32    │
╞════════╪════════╡
│ 1      ┆ 16     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1      ┆ 16     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2      ┆ 13     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2      ┆ 13     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ ...    ┆ ...    │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1      ┆ 16     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2      ┆ 13     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3      ┆ 15     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3      ┆ 15     │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1      ┆ 16     │
╰────────┴────────╯

pub fn fill_null<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,

Replace the null values by a value.

pub fn fill_nan<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,

Replace the floating point NaN values by a value.

pub fn count(self) -> Expr

Count the values of the Series or Get counts of the group by operation.

pub fn std(self, ddof: u8) -> Expr

Standard deviation of the values of the Series

pub fn var(self, ddof: u8) -> Expr

Variance of the values of the Series

pub fn is_duplicated(self) -> Expr

Get a mask of duplicated values

pub fn is_unique(self) -> Expr

Get a mask of unique values

pub fn and<E>(self, expr: E) -> Exprwhere
E: Into<Expr>,

and operation

pub fn xor<E>(self, expr: E) -> Exprwhere
E: Into<Expr>,

pub fn or<E>(self, expr: E) -> Exprwhere
E: Into<Expr>,

or operation

pub fn pow<E>(self, exponent: E) -> Exprwhere
E: Into<Expr>,

Raise expression to the power exponent

pub fn filter<E>(self, predicate: E) -> Exprwhere
E: Into<Expr>,

Filter a single column Should be used in aggregation context. If you want to filter on a DataFrame level, use LazyFrame::filter

pub fn is_in<E>(self, other: E) -> Exprwhere
E: Into<Expr>,

Check if the values of the left expression are in the lists of the right expr.

pub fn sort_by<E, IE, R>(self, by: E, reverse: R) -> Exprwhere
 E: AsRef<[IE]>,
 IE: Into<Expr> + Clone,
 R: AsRef<[bool]>,

Sort this column by the ordering of another column. Can also be used in a groupby context to sort the groups.

pub fn repeat_by<E>(self, by: E) -> Exprwhere
E: Into<Expr>,

Repeat the column n times, where n is determined by the values in by. This yields an Expr of dtype List

pub fn is_first(self) -> Expr

Get a mask of the first unique value.

pub fn mode(self) -> Expr

Compute the mode(s) of this column. This is the most occurring value.

pub fn keep_name(self) -> Expr

Keep the original root name

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: LazyFrame) -> LazyFrame {
    df.select([
// even thought the alias yields a different column name,
// `keep_name` will make sure that the original column name is used
        col("*").alias("foo").keep_name()
])
}

pub fn map_alias<F>(self, function: F) -> Exprwhere
F: 'static + Fn(&str) -> String + Send + Sync,

Define an alias by mapping a function over the original root column name.

pub fn suffix(self, suffix: &str) -> Expr

Add a suffix to the root column name.

pub fn prefix(self, prefix: &str) -> Expr

Add a prefix to the root column name.

pub fn exclude(self, columns: impl IntoVec<String>) -> Expr

Exclude a column from a wildcard/regex selection.

You may also use regexes in the exclude as long as they start with ^ and end with $/

Example

use polars_core::prelude::*;
use polars_lazy::prelude::*;

// Select all columns except foo.
fn example(df: DataFrame) -> LazyFrame {
      df.lazy()
        .select(&[
                col("*").exclude(&["foo"])
                ])
}

pub fn exclude_dtype<D>(self, dtypes: D) -> Exprwhere
D: AsRef<[DataType]>,

pub fn interpolate(self) -> Expr

pub fn rolling_min(self, options: RollingOptions) -> Expr

Apply a rolling min See: [ChunkedArray::rolling_min]

pub fn rolling_max(self, options: RollingOptions) -> Expr

Apply a rolling max See: [ChunkedArray::rolling_max]

pub fn rolling_mean(self, options: RollingOptions) -> Expr

Apply a rolling mean See: [ChunkedArray::rolling_mean]

pub fn rolling_sum(self, options: RollingOptions) -> Expr

Apply a rolling sum See: [ChunkedArray::rolling_sum]

pub fn rolling_median(self, options: RollingOptions) -> Expr

Apply a rolling median See: [ChunkedArray::rolling_median]

pub fn rolling_quantile(
    self,
    quantile: f64,
    interpolation: QuantileInterpolOptions,
    options: RollingOptions
) -> Expr

Apply a rolling quantile See: [ChunkedArray::rolling_quantile]

pub fn rolling_var(self, options: RollingOptions) -> Expr

Apply a rolling variance

pub fn rolling_std(self, options: RollingOptions) -> Expr

Apply a rolling std-dev

pub fn rolling_apply(
 self,
 f: Arc<dyn Fn(&Series) + Send + Sync + 'static>,
 output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
 options: RollingOptionsFixedWindow
) -> Expr

Apply a custom function over a rolling/ moving window of the array. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.

pub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Exprwhere
F: 'static + FnMut(&mut ChunkedArray<Float64Type>) -> Option<f64> + Send + Sync + Copy,

Apply a custom function over a rolling/ moving window of the array. Prefer this over rolling_apply in case of floating point numbers as this is faster. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.

pub fn rank(self, options: RankOptions) -> Expr

pub fn diff(self, n: usize, null_behavior: NullBehavior) -> Expr

pub fn upper_bound(self) -> Expr

Get maximal value that could be hold by this dtype.

pub fn lower_bound(self) -> Expr

Get minimal value that could be hold by this dtype.

pub fn reshape(self, dims: &[i64]) -> Expr

pub fn cumcount(self, reverse: bool) -> Expr

Cumulatively count values from 0 to len.

pub fn shuffle(self, seed: Option<u64>) -> Expr

pub fn sample_n(
 self,
 n: usize,
 with_replacement: bool,
 shuffle: bool,
 seed: Option<u64>
) -> Expr

pub fn sample_frac(
 self,
 frac: f64,
 with_replacement: bool,
 shuffle: bool,
 seed: Option<u64>
) -> Expr

pub fn any(self) -> Expr

Check if any boolean value is true

pub fn shrink_dtype(self) -> Expr

Shrink numeric columns to the minimal required datatype needed to fit the extrema of this Series. This can be used to reduce memory pressure.

pub fn all(self) -> Expr

Check if all boolean values are true

pub fn value_counts(self, multithreaded: bool, sorted: bool) -> Expr

Count all unique values and create a struct mapping value to count Note that it is better to turn multithreaded off in the aggregation context

pub fn null_count(self) -> Expr

Get the null count of the column/group

pub fn set_sorted(self, sorted: IsSorted) -> Expr

Set this Series as sorted so that downstream code can use fast paths for sorted arrays.

Warning

This can lead to incorrect results if this Series is not sorted!! Use with care!

pub fn str(self) -> StringNameSpace

pub fn dt(self) -> DateLikeNameSpace

pub fn arr(self) -> ListNameSpace

pub fn cat(self) -> CategoricalNameSpace

pub fn struct_(self) -> StructNameSpace

impl Expr

pub fn mutate(&mut self) -> ExprMut<'_>

Expr::mutate().apply(fn())

Trait Implementations

impl Add<Expr> for Expr

type Output = Expr

The resulting type after applying the + operator.

fn add(self, rhs: Expr) -> <Expr as Add<Expr>>::Output

Performs the + operation. Read more

impl AsRef<Expr> for AggExpr

fn as_ref(&self) -> &Expr

Converts this type into a shared reference of the (usually inferred) input type.

impl Clone for Expr

fn clone(&self) -> Expr

Returns a copy of the value. Read more

1.0.0 · source

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

impl Debug for Expr

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

impl Default for Expr

fn default() -> Expr

Returns the “default value” for a type. Read more

impl Display for Expr

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

impl Div<Expr> for Expr

type Output = Expr

The resulting type after applying the / operator.

fn div(self, rhs: Expr) -> <Expr as Div<Expr>>::Output

Performs the / operation. Read more

source

impl ExprEvalExtension for Expr

source