pub enum Expr {
Show 24 variants
Alias(Box<Expr, Global>, Arc<str>),
Column(Arc<str>),
Columns(Vec<String, Global>),
DtypeColumn(Vec<DataType, Global>),
Literal(LiteralValue),
BinaryExpr {
left: Box<Expr, Global>,
op: Operator,
right: Box<Expr, Global>,
},
Cast {
expr: Box<Expr, Global>,
data_type: DataType,
strict: bool,
},
Sort {
expr: Box<Expr, Global>,
options: SortOptions,
},
Take {
expr: Box<Expr, Global>,
idx: Box<Expr, Global>,
},
SortBy {
expr: Box<Expr, Global>,
by: Vec<Expr, Global>,
reverse: Vec<bool, Global>,
},
Agg(AggExpr),
Ternary {
predicate: Box<Expr, Global>,
truthy: Box<Expr, Global>,
falsy: Box<Expr, Global>,
},
Function {
input: Vec<Expr, Global>,
function: FunctionExpr,
options: FunctionOptions,
},
Explode(Box<Expr, Global>),
Filter {
input: Box<Expr, Global>,
by: Box<Expr, Global>,
},
Window {
function: Box<Expr, Global>,
partition_by: Vec<Expr, Global>,
order_by: Option<Box<Expr, Global>>,
options: WindowOptions,
},
Wildcard,
Slice {
input: Box<Expr, Global>,
offset: Box<Expr, Global>,
length: Box<Expr, Global>,
},
Exclude(Box<Expr, Global>, Vec<Excluded, Global>),
KeepName(Box<Expr, Global>),
Count,
Nth(i64),
RenameAlias {
function: SpecialEq<Arc<dyn RenameAliasFn + 'static>>,
expr: Box<Expr, Global>,
},
AnonymousFunction {
input: Vec<Expr, Global>,
function: SpecialEq<Arc<dyn SeriesUdf + 'static>>,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
options: FunctionOptions,
},
}
Expand description
Queries consists of multiple expressions.
Variants
Alias(Box<Expr, Global>, Arc<str>)
Column(Arc<str>)
Columns(Vec<String, Global>)
DtypeColumn(Vec<DataType, Global>)
Literal(LiteralValue)
BinaryExpr
Cast
Sort
Take
SortBy
Agg(AggExpr)
Ternary
A ternary operation if true then “foo” else “bar”
Function
Fields
function: FunctionExpr
function to apply
Explode(Box<Expr, Global>)
Filter
Window
Fields
See postgres window functions
Wildcard
Slice
Fields
Exclude(Box<Expr, Global>, Vec<Excluded, Global>)
Can be used in a select statement to exclude a column from selection
KeepName(Box<Expr, Global>)
Set root name as Alias
Count
Special case that does not need columns
Nth(i64)
Take the nth column in the DataFrame
RenameAlias
AnonymousFunction
Fields
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
output dtype of the function
Implementations
impl Expr
impl Expr
pub fn to_dot(&self) -> Result<String, PolarsError>
pub fn to_dot(&self) -> Result<String, PolarsError>
Get a dot language representation of the Expression.
impl Expr
impl Expr
pub fn to_field(
&self,
schema: &Schema,
ctxt: Context
) -> Result<Field, PolarsError>
pub fn to_field(
&self,
schema: &Schema,
ctxt: Context
) -> Result<Field, PolarsError>
Get Field result of the expression. The schema is the input data.
impl Expr
impl Expr
pub fn neq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,
pub fn neq<E>(self, other: E) -> Exprwhere
E: Into<Expr>,
Compare Expr
with other Expr
on non-equality
pub fn is_not_null(self) -> Expr
pub fn is_not_null(self) -> Expr
Run is_not_null operation on Expr
.
pub fn drop_nulls(self) -> Expr
pub fn drop_nulls(self) -> Expr
Drop null values
pub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Expr
pub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Expr
Compute the quantile per group.
pub fn agg_groups(self) -> Expr
pub fn agg_groups(self) -> Expr
Get the group indexes of the group by operation.
pub fn slice<E, F>(self, offset: E, length: F) -> Exprwhere
E: Into<Expr>,
F: Into<Expr>,
pub fn slice<E, F>(self, offset: E, length: F) -> Exprwhere
E: Into<Expr>,
F: Into<Expr>,
Slice the Series.
offset
may be negative.
pub fn append<E>(self, other: E, upcast: bool) -> Exprwhere
E: Into<Expr>,
pub fn append<E>(self, other: E, upcast: bool) -> Exprwhere
E: Into<Expr>,
Append expressions. This is done by adding the chunks of other
to this Series
.
pub fn unique_stable(self) -> Expr
pub fn unique_stable(self) -> Expr
Get unique values of this expression, while maintaining order.
This requires more work than Expr::unique
.
pub fn arg_unique(self) -> Expr
pub fn arg_unique(self) -> Expr
Get the first index of unique values of this expression.
pub fn arg_sort(self, sort_options: SortOptions) -> Expr
pub fn arg_sort(self, sort_options: SortOptions) -> Expr
Get the index values that would sort this expression.
pub fn strict_cast(self, data_type: DataType) -> Expr
pub fn strict_cast(self, data_type: DataType) -> Expr
Cast expression to another data type. Throws an error if conversion had overflows
pub fn sort(self, reverse: bool) -> Expr
pub fn sort(self, reverse: bool) -> Expr
Sort in increasing order. See the eager implementation.
pub fn sort_with(self, options: SortOptions) -> Expr
pub fn sort_with(self, options: SortOptions) -> Expr
Sort with given options.
pub fn map<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
pub fn map<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
Apply a function/closure once the logical plan get executed.
This function is very similar to Expr::apply
, but differs in how it handles aggregations.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.
It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.
pub fn map_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,
pub fn map_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,
pub fn map_list<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
pub fn map_list<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
Apply a function/closure once the logical plan get executed.
This function is very similar to apply, but differs in how it handles aggregations.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.map_list
should be used when the function expects a list aggregated series.
pub fn function_with_options<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
options: FunctionOptions
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
pub fn function_with_options<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
options: FunctionOptions
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
A function that cannot be expressed with map
or apply
and requires extra settings.
pub fn apply<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
pub fn apply<F>(
self,
function: F,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,
Apply a function/closure over the groups. This should only be used in a groupby aggregation.
It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.
This difference with map is that apply
will create a separate Series
per group.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.
pub fn apply_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,
pub fn apply_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>
) -> Exprwhere
F: 'static + Fn(&mut [Series]) -> Result<Series, PolarsError> + Send + Sync,
Apply a function/closure over the groups with many arguments. This should only be used in a groupby aggregation.
See the Expr::apply
function for the differences between map
and apply
.
pub fn apply_many_private(
self,
function_expr: FunctionExpr,
arguments: &[Expr],
auto_explode: bool,
cast_to_supertypes: bool
) -> Expr
pub fn map_many_private(
self,
function_expr: FunctionExpr,
arguments: &[Expr],
cast_to_supertypes: bool
) -> Expr
pub fn is_infinite(self) -> Expr
pub fn is_infinite(self) -> Expr
Get mask of infinite values if dtype is Float
pub fn is_not_nan(self) -> Expr
pub fn is_not_nan(self) -> Expr
Get inverse mask of NaN values if dtype is Float
pub fn shift(self, periods: i64) -> Expr
pub fn shift(self, periods: i64) -> Expr
Shift the values in the array by some period. See the eager implementation.
pub fn shift_and_fill<E>(self, periods: i64, fill_value: E) -> Exprwhere
E: Into<Expr>,
pub fn shift_and_fill<E>(self, periods: i64, fill_value: E) -> Exprwhere
E: Into<Expr>,
Shift the values in the array by some period and fill the resulting empty values.
pub fn cumsum(self, reverse: bool) -> Expr
pub fn cumsum(self, reverse: bool) -> Expr
Get an array with the cumulative sum computed at every element
pub fn cumprod(self, reverse: bool) -> Expr
pub fn cumprod(self, reverse: bool) -> Expr
Get an array with the cumulative product computed at every element
pub fn cummin(self, reverse: bool) -> Expr
pub fn cummin(self, reverse: bool) -> Expr
Get an array with the cumulative min computed at every element
pub fn cummax(self, reverse: bool) -> Expr
pub fn cummax(self, reverse: bool) -> Expr
Get an array with the cumulative max computed at every element
pub fn backward_fill(self, limit: Option<u32>) -> Expr
pub fn backward_fill(self, limit: Option<u32>) -> Expr
Fill missing value with next non-null.
pub fn forward_fill(self, limit: Option<u32>) -> Expr
pub fn forward_fill(self, limit: Option<u32>) -> Expr
Fill missing value with previous non-null.
pub fn round(self, decimals: u32) -> Expr
pub fn round(self, decimals: u32) -> Expr
Round underlying floating point array to given decimal numbers.
pub fn floor(self) -> Expr
pub fn floor(self) -> Expr
Floor underlying floating point array to the lowest integers smaller or equal to the float value.
pub fn ceil(self) -> Expr
pub fn ceil(self) -> Expr
Ceil underlying floating point array to the highest integers smaller or equal to the float value.
pub fn clip(self, min: AnyValue<'_>, max: AnyValue<'_>) -> Expr
pub fn clip(self, min: AnyValue<'_>, max: AnyValue<'_>) -> Expr
Clip underlying values to a set boundary.
pub fn over<E, IE>(self, partition_by: E) -> Exprwhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
pub fn over<E, IE>(self, partition_by: E) -> Exprwhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
Apply window function over a subgroup. This is similar to a groupby + aggregation + self join. Or similar to window functions in Postgres.
Example
#[macro_use] extern crate polars_core;
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example() -> PolarsResult<()> {
let df = df! {
"groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
"values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
}?;
let out = df
.lazy()
.select(&[
col("groups"),
sum("values").over([col("groups")]),
])
.collect()?;
dbg!(&out);
Ok(())
}
Outputs:
╭────────┬────────╮
│ groups ┆ values │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞════════╪════════╡
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ ... ┆ ... │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3 ┆ 15 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3 ┆ 15 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
╰────────┴────────╯
pub fn fill_null<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,
pub fn fill_null<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,
Replace the null values by a value.
pub fn fill_nan<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,
pub fn fill_nan<E>(self, fill_value: E) -> Exprwhere
E: Into<Expr>,
Replace the floating point NaN
values by a value.
pub fn is_duplicated(self) -> Expr
pub fn is_duplicated(self) -> Expr
Get a mask of duplicated values
pub fn xor<E>(self, expr: E) -> Exprwhere
E: Into<Expr>,
pub fn filter<E>(self, predicate: E) -> Exprwhere
E: Into<Expr>,
pub fn filter<E>(self, predicate: E) -> Exprwhere
E: Into<Expr>,
Filter a single column Should be used in aggregation context. If you want to filter on a DataFrame level, use LazyFrame::filter
pub fn is_in<E>(self, other: E) -> Exprwhere
E: Into<Expr>,
pub fn is_in<E>(self, other: E) -> Exprwhere
E: Into<Expr>,
Check if the values of the left expression are in the lists of the right expr.
pub fn sort_by<E, IE, R>(self, by: E, reverse: R) -> Exprwhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
R: AsRef<[bool]>,
pub fn sort_by<E, IE, R>(self, by: E, reverse: R) -> Exprwhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
R: AsRef<[bool]>,
Sort this column by the ordering of another column. Can also be used in a groupby context to sort the groups.
pub fn repeat_by<E>(self, by: E) -> Exprwhere
E: Into<Expr>,
pub fn repeat_by<E>(self, by: E) -> Exprwhere
E: Into<Expr>,
Repeat the column n
times, where n
is determined by the values in by
.
This yields an Expr
of dtype List
pub fn keep_name(self) -> Expr
pub fn keep_name(self) -> Expr
Keep the original root name
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: LazyFrame) -> LazyFrame {
df.select([
// even thought the alias yields a different column name,
// `keep_name` will make sure that the original column name is used
col("*").alias("foo").keep_name()
])
}
pub fn map_alias<F>(self, function: F) -> Exprwhere
F: 'static + Fn(&str) -> String + Send + Sync,
pub fn map_alias<F>(self, function: F) -> Exprwhere
F: 'static + Fn(&str) -> String + Send + Sync,
Define an alias by mapping a function over the original root column name.
pub fn exclude(self, columns: impl IntoVec<String>) -> Expr
pub fn exclude(self, columns: impl IntoVec<String>) -> Expr
Exclude a column from a wildcard/regex selection.
You may also use regexes in the exclude as long as they start with ^
and end with $
/
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
// Select all columns except foo.
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[
col("*").exclude(&["foo"])
])
}
pub fn exclude_dtype<D>(self, dtypes: D) -> Exprwhere
D: AsRef<[DataType]>,
pub fn interpolate(self) -> Expr
pub fn rolling_min(self, options: RollingOptions) -> Expr
pub fn rolling_min(self, options: RollingOptions) -> Expr
Apply a rolling min See: [ChunkedArray::rolling_min]
pub fn rolling_max(self, options: RollingOptions) -> Expr
pub fn rolling_max(self, options: RollingOptions) -> Expr
Apply a rolling max See: [ChunkedArray::rolling_max]
pub fn rolling_mean(self, options: RollingOptions) -> Expr
pub fn rolling_mean(self, options: RollingOptions) -> Expr
Apply a rolling mean See: [ChunkedArray::rolling_mean]
pub fn rolling_sum(self, options: RollingOptions) -> Expr
pub fn rolling_sum(self, options: RollingOptions) -> Expr
Apply a rolling sum See: [ChunkedArray::rolling_sum]
pub fn rolling_median(self, options: RollingOptions) -> Expr
pub fn rolling_median(self, options: RollingOptions) -> Expr
Apply a rolling median See:
[ChunkedArray::rolling_median
]
pub fn rolling_quantile(
self,
quantile: f64,
interpolation: QuantileInterpolOptions,
options: RollingOptions
) -> Expr
pub fn rolling_quantile(
self,
quantile: f64,
interpolation: QuantileInterpolOptions,
options: RollingOptions
) -> Expr
Apply a rolling quantile See:
[ChunkedArray::rolling_quantile
]
pub fn rolling_var(self, options: RollingOptions) -> Expr
pub fn rolling_var(self, options: RollingOptions) -> Expr
Apply a rolling variance
pub fn rolling_std(self, options: RollingOptions) -> Expr
pub fn rolling_std(self, options: RollingOptions) -> Expr
Apply a rolling std-dev
pub fn rolling_apply(
self,
f: Arc<dyn Fn(&Series) + Send + Sync + 'static>,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
options: RollingOptionsFixedWindow
) -> Expr
pub fn rolling_apply(
self,
f: Arc<dyn Fn(&Series) + Send + Sync + 'static>,
output_type: SpecialEq<Arc<dyn FunctionOutputField + 'static>>,
options: RollingOptionsFixedWindow
) -> Expr
Apply a custom function over a rolling/ moving window of the array. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
pub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Exprwhere
F: 'static + FnMut(&mut ChunkedArray<Float64Type>) -> Option<f64> + Send + Sync + Copy,
pub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Exprwhere
F: 'static + FnMut(&mut ChunkedArray<Float64Type>) -> Option<f64> + Send + Sync + Copy,
Apply a custom function over a rolling/ moving window of the array. Prefer this over rolling_apply in case of floating point numbers as this is faster. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
pub fn rank(self, options: RankOptions) -> Expr
pub fn diff(self, n: usize, null_behavior: NullBehavior) -> Expr
pub fn upper_bound(self) -> Expr
pub fn upper_bound(self) -> Expr
Get maximal value that could be hold by this dtype.
pub fn lower_bound(self) -> Expr
pub fn lower_bound(self) -> Expr
Get minimal value that could be hold by this dtype.
pub fn reshape(self, dims: &[i64]) -> Expr
pub fn shuffle(self, seed: Option<u64>) -> Expr
pub fn sample_n(
self,
n: usize,
with_replacement: bool,
shuffle: bool,
seed: Option<u64>
) -> Expr
pub fn sample_frac(
self,
frac: f64,
with_replacement: bool,
shuffle: bool,
seed: Option<u64>
) -> Expr
pub fn shrink_dtype(self) -> Expr
pub fn shrink_dtype(self) -> Expr
Shrink numeric columns to the minimal required datatype
needed to fit the extrema of this Series
.
This can be used to reduce memory pressure.
pub fn value_counts(self, multithreaded: bool, sorted: bool) -> Expr
pub fn value_counts(self, multithreaded: bool, sorted: bool) -> Expr
Count all unique values and create a struct mapping value to count Note that it is better to turn multithreaded off in the aggregation context
pub fn null_count(self) -> Expr
pub fn null_count(self) -> Expr
Get the null count of the column/group
pub fn set_sorted(self, sorted: IsSorted) -> Expr
pub fn set_sorted(self, sorted: IsSorted) -> Expr
Set this Series
as sorted
so that downstream code can use
fast paths for sorted arrays.
Warning
This can lead to incorrect results if this Series
is not sorted!!
Use with care!
pub fn str(self) -> StringNameSpace
pub fn dt(self) -> DateLikeNameSpace
pub fn arr(self) -> ListNameSpace
pub fn cat(self) -> CategoricalNameSpace
pub fn struct_(self) -> StructNameSpace
Trait Implementations
sourceimpl ExprEvalExtension for Expr
impl ExprEvalExtension for Expr
impl<'a> IntoIterator for &'a Expr
impl<'a> IntoIterator for &'a Expr
impl Eq for Expr
impl StructuralPartialEq for Expr
Auto Trait Implementations
impl !RefUnwindSafe for Expr
impl Send for Expr
impl Sync for Expr
impl Unpin for Expr
impl !UnwindSafe for Expr
Blanket Implementations
sourceimpl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
sourceimpl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
impl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
sourcefn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key
and return true
if they are equal.