pub struct LazyFrame {
pub logical_plan: LogicalPlan,
/* private fields */
}
Expand description
Lazy abstraction over an eager DataFrame
.
It really is an abstraction over a logical plan. The methods of this struct will incrementally
modify a logical plan until output is requested (via collect)
Fields
logical_plan: LogicalPlan
Implementations
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn scan_ipc(
path: impl AsRef<Path>,
args: ScanArgsIpc
) -> Result<LazyFrame, PolarsError>
pub fn scan_ipc(
path: impl AsRef<Path>,
args: ScanArgsIpc
) -> Result<LazyFrame, PolarsError>
Create a LazyFrame directly from a ipc scan.
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn scan_parquet_files<P>(
paths: Vec<P, Global>,
args: ScanArgsParquet
) -> Result<LazyFrame, PolarsError>where
P: AsRef<Path>,
👎Deprecated: please use concat_lf
instead
pub fn scan_parquet_files<P>(
paths: Vec<P, Global>,
args: ScanArgsParquet
) -> Result<LazyFrame, PolarsError>where
P: AsRef<Path>,
concat_lf
insteadCreate a LazyFrame directly from a parquet scan.
sourcepub fn scan_parquet(
path: impl AsRef<Path>,
args: ScanArgsParquet
) -> Result<LazyFrame, PolarsError>
pub fn scan_parquet(
path: impl AsRef<Path>,
args: ScanArgsParquet
) -> Result<LazyFrame, PolarsError>
Create a LazyFrame directly from a parquet scan.
sourceimpl LazyFrame
impl LazyFrame
pub fn anonymous_scan(
function: Arc<dyn AnonymousScan + 'static>,
args: ScanArgsAnonymous
) -> Result<LazyFrame, PolarsError>
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn schema(&self) -> Result<Arc<Schema>, PolarsError>
pub fn schema(&self) -> Result<Arc<Schema>, PolarsError>
Get a hold on the schema of the current LazyFrame computation.
sourcepub fn with_optimizations(self, opt_state: OptState) -> LazyFrame
pub fn with_optimizations(self, opt_state: OptState) -> LazyFrame
Set allowed optimizations
sourcepub fn without_optimizations(self) -> LazyFrame
pub fn without_optimizations(self) -> LazyFrame
Turn off all optimizations
sourcepub fn with_projection_pushdown(self, toggle: bool) -> LazyFrame
pub fn with_projection_pushdown(self, toggle: bool) -> LazyFrame
Toggle projection pushdown optimization.
sourcepub fn with_predicate_pushdown(self, toggle: bool) -> LazyFrame
pub fn with_predicate_pushdown(self, toggle: bool) -> LazyFrame
Toggle predicate pushdown optimization.
sourcepub fn with_type_coercion(self, toggle: bool) -> LazyFrame
pub fn with_type_coercion(self, toggle: bool) -> LazyFrame
Toggle type coercion optimization.
sourcepub fn with_simplify_expr(self, toggle: bool) -> LazyFrame
pub fn with_simplify_expr(self, toggle: bool) -> LazyFrame
Toggle expression simplification optimization on or off
sourcepub fn with_aggregate_pushdown(self, toggle: bool) -> LazyFrame
pub fn with_aggregate_pushdown(self, toggle: bool) -> LazyFrame
Toggle aggregate pushdown.
sourcepub fn with_slice_pushdown(self, toggle: bool) -> LazyFrame
pub fn with_slice_pushdown(self, toggle: bool) -> LazyFrame
Toggle slice pushdown optimization
sourcepub fn describe_plan(&self) -> String
pub fn describe_plan(&self) -> String
Describe the logical plan.
sourcepub fn describe_optimized_plan(&self) -> Result<String, PolarsError>
pub fn describe_optimized_plan(&self) -> Result<String, PolarsError>
Describe the optimized logical plan.
sourcepub fn sort(self, by_column: &str, options: SortOptions) -> LazyFrame
pub fn sort(self, by_column: &str, options: SortOptions) -> LazyFrame
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort("sepal.width", Default::default())
}
sourcepub fn sort_by_exprs<E, B>(
self,
by_exprs: E,
reverse: B,
nulls_last: bool
) -> LazyFramewhere
E: AsRef<[Expr]>,
B: AsRef<[bool]>,
pub fn sort_by_exprs<E, B>(
self,
by_exprs: E,
reverse: B,
nulls_last: bool
) -> LazyFramewhere
E: AsRef<[Expr]>,
B: AsRef<[bool]>,
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort_by_exprs(vec![col("sepal.width")], vec![false], false)
}
sourcepub fn reverse(self) -> LazyFrame
pub fn reverse(self) -> LazyFrame
Reverse the DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.reverse()
}
sourcepub fn rename<I, J, T, S>(self, existing: I, new: J) -> LazyFramewhere
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> LazyFramewhere
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
Rename columns in the DataFrame.
sourcepub fn drop_columns<I, T>(self, columns: I) -> LazyFramewhere
I: IntoIterator<Item = T>,
T: AsRef<str>,
pub fn drop_columns<I, T>(self, columns: I) -> LazyFramewhere
I: IntoIterator<Item = T>,
T: AsRef<str>,
Removes columns from the DataFrame. Note that its better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.
sourcepub fn shift(self, periods: i64) -> LazyFrame
pub fn shift(self, periods: i64) -> LazyFrame
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
sourcepub fn shift_and_fill<E>(self, periods: i64, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
pub fn shift_and_fill<E>(self, periods: i64, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
Shift the values by a given period and fill the parts that will be empty due to this operation
with the result of the fill_value
expression.
See the method on Series for more info on the shift
operation.
sourcepub fn fill_null<E>(self, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
pub fn fill_null<E>(self, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
Fill none values in the DataFrame
sourcepub fn fill_nan<E>(self, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
pub fn fill_nan<E>(self, fill_value: E) -> LazyFramewhere
E: Into<Expr>,
Fill NaN values in the DataFrame
sourcepub fn cache(self) -> LazyFrame
pub fn cache(self) -> LazyFrame
Caches the result into a new LazyFrame. This should be used to prevent computations running multiple times
sourcepub fn fetch(self, n_rows: usize) -> Result<DataFrame, PolarsError>
pub fn fetch(self, n_rows: usize) -> Result<DataFrame, PolarsError>
Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.
pub fn optimize(
self,
lp_arena: &mut Arena<ALogicalPlan>,
expr_arena: &mut Arena<AExpr>
) -> Result<Node, PolarsError>
sourcepub fn collect(self) -> Result<DataFrame, PolarsError>
pub fn collect(self) -> Result<DataFrame, PolarsError>
Execute all the lazy operations and collect them into a DataFrame. Before execution the query is being optimized.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.lazy()
.groupby([col("foo")])
.agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
.collect()
}
pub fn profile(self) -> Result<(DataFrame, DataFrame), PolarsError>
sourcepub fn filter(self, predicate: Expr) -> LazyFrame
pub fn filter(self, predicate: Expr) -> LazyFrame
Filter by some predicate expression.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.filter(col("sepal.width").is_not_null())
.select(&[col("sepal.width"), col("sepal.length")])
}
sourcepub fn select<E>(self, exprs: E) -> LazyFramewhere
E: AsRef<[Expr]>,
pub fn select<E>(self, exprs: E) -> LazyFramewhere
E: AsRef<[Expr]>,
Select (and rename) columns from the query.
Columns can be selected with col;
If you want to select all columns use col("*")
.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("foo"),
col("bar").alias("ham")])
}
/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("*").exclude(["foo"])])
}
sourcepub fn groupby<E, IE>(self, by: E) -> LazyGroupBywhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
pub fn groupby<E, IE>(self, by: E) -> LazyGroupBywhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
Group by and aggregate.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
use polars_arrow::prelude::QuantileInterpolOptions;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.groupby([col("date")])
.agg([
col("rain").min(),
col("rain").sum(),
col("rain").quantile(0.5, QuantileInterpolOptions::Nearest).alias("median_rain"),
])
}
sourcepub fn groupby_rolling<E>(
self,
by: E,
options: RollingGroupOptions
) -> LazyGroupBywhere
E: AsRef<[Expr]>,
pub fn groupby_rolling<E>(
self,
by: E,
options: RollingGroupOptions
) -> LazyGroupBywhere
E: AsRef<[Expr]>,
Create rolling groups based on a time column.
Also works for index values of type Int32 or Int64.
Different from a [dynamic_groupby
] the windows are now determined by the
individual values and are not of constant intervals. For constant intervals use
groupby_dynamic
sourcepub fn groupby_dynamic<E>(
self,
by: E,
options: DynamicGroupOptions
) -> LazyGroupBywhere
E: AsRef<[Expr]>,
pub fn groupby_dynamic<E>(
self,
by: E,
options: DynamicGroupOptions
) -> LazyGroupBywhere
E: AsRef<[Expr]>,
Group based on a time value (or index value of type Int32, Int64).
Time windows are calculated and rows are assigned to windows. Different from a normal groupby is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.
A window is defined by:
- every: interval of the window
- period: length of the window
- offset: offset of the window
The by
argument should be empty []
if you don’t want to combine this
with a ordinary groupby on these keys.
sourcepub fn groupby_stable<E, IE>(self, by: E) -> LazyGroupBywhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
pub fn groupby_stable<E, IE>(self, by: E) -> LazyGroupBywhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
Similar to groupby
, but order of the DataFrame is maintained.
sourcepub fn left_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
pub fn left_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.left_join(other, col("foo"), col("bar"))
}
sourcepub fn outer_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
pub fn outer_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.outer_join(other, col("foo"), col("bar"))
}
sourcepub fn inner_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
pub fn inner_join<E>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFramewhere
E: Into<Expr>,
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.inner_join(other, col("foo"), col("bar").cast(DataType::Utf8))
}
sourcepub fn cross_join(self, other: LazyFrame) -> LazyFrame
pub fn cross_join(self, other: LazyFrame) -> LazyFrame
Creates the cartesian product from both frames, preserves the order of the left keys.
sourcepub fn join<E>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFramewhere
E: AsRef<[Expr]>,
pub fn join<E>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFramewhere
E: AsRef<[Expr]>,
Generic join function that can join on multiple columns.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.join(other, [col("foo"), col("bar")], [col("foo"), col("bar")], JoinType::Inner)
}
sourcepub fn join_builder(self) -> JoinBuilder
pub fn join_builder(self) -> JoinBuilder
Control more join options with the join builder.
sourcepub fn with_column(self, expr: Expr) -> LazyFrame
pub fn with_column(self, expr: Expr) -> LazyFrame
Add a column to a DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
df.lazy()
.with_column(
when(col("sepal.length").lt(lit(5.0)))
.then(lit(10))
.otherwise(lit(1))
.alias("new_column_name"),
)
}
sourcepub fn with_columns<E>(self, exprs: E) -> LazyFramewhere
E: AsRef<[Expr]>,
pub fn with_columns<E>(self, exprs: E) -> LazyFramewhere
E: AsRef<[Expr]>,
Add multiple columns to a DataFrame.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
df.lazy()
.with_columns(
vec![lit(10).alias("foo"), lit(100).alias("bar")]
)
}
pub fn with_context<C>(self, contexts: C) -> LazyFramewhere
C: AsRef<[LazyFrame]>,
sourcepub fn quantile(
self,
quantile: f64,
interpol: QuantileInterpolOptions
) -> LazyFrame
pub fn quantile(
self,
quantile: f64,
interpol: QuantileInterpolOptions
) -> LazyFrame
Aggregate all the columns as their quantile values.
sourcepub fn std(self, ddof: u8) -> LazyFrame
pub fn std(self, ddof: u8) -> LazyFrame
Aggregate all the columns as their standard deviation values.
sourcepub fn explode<E, IE>(self, columns: E) -> LazyFramewhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
pub fn explode<E, IE>(self, columns: E) -> LazyFramewhere
E: AsRef<[IE]>,
IE: Into<Expr> + Clone,
Apply explode operation. See eager explode.
sourcepub fn unique_stable(
self,
subset: Option<Vec<String, Global>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique_stable(
self,
subset: Option<Vec<String, Global>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows and maintain order
sourcepub fn unique(
self,
subset: Option<Vec<String, Global>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique(
self,
subset: Option<Vec<String, Global>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows, do not maintain order
sourcepub fn drop_nulls(self, subset: Option<Vec<Expr, Global>>) -> LazyFrame
pub fn drop_nulls(self, subset: Option<Vec<Expr, Global>>) -> LazyFrame
Drop null rows.
Equal to LazyFrame::filter(col("*").is_not_null())
sourcepub fn limit(self, n: u32) -> LazyFrame
pub fn limit(self, n: u32) -> LazyFrame
Limit the DataFrame to the first n
rows. Note if you don’t want the rows to be scanned,
use fetch.
sourcepub fn map<F>(
self,
function: F,
optimizations: Option<OptState>,
schema: Option<Arc<dyn UdfSchema + 'static>>,
name: Option<&'static str>
) -> LazyFramewhere
F: 'static + Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,
pub fn map<F>(
self,
function: F,
optimizations: Option<OptState>,
schema: Option<Arc<dyn UdfSchema + 'static>>,
name: Option<&'static str>
) -> LazyFramewhere
F: 'static + Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,
Apply a function/closure once the logical plan get executed.
Warning
This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.
You can toggle certain optimizations off.