Struct polars_lazy::frame::LazyFrame
source · [−]pub struct LazyFrame {
pub logical_plan: LogicalPlan,
/* private fields */
}
Expand description
Lazy abstraction over an eager DataFrame
.
It really is an abstraction over a logical plan. The methods of this struct will incrementally
modify a logical plan until output is requested (via collect)
Fields
logical_plan: LogicalPlan
Implementations
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn scan_ipc(path: String, args: ScanArgsIpc) -> Result<Self>
Available on crate feature ipc
only.
pub fn scan_ipc(path: String, args: ScanArgsIpc) -> Result<Self>
ipc
only.Create a LazyFrame directly from a ipc scan.
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn scan_parquet(path: String, args: ScanArgsParquet) -> Result<Self>
Available on crate feature parquet
only.
pub fn scan_parquet(path: String, args: ScanArgsParquet) -> Result<Self>
parquet
only.Create a LazyFrame directly from a parquet scan.
sourceimpl LazyFrame
impl LazyFrame
pub fn scan_from_python_function(schema: Schema, scan_fn: Vec<u8>) -> Self
sourceimpl LazyFrame
impl LazyFrame
pub fn anonymous_scan(
function: Arc<dyn AnonymousScan>,
args: ScanArgsAnonymous
) -> Result<Self>
sourceimpl LazyFrame
impl LazyFrame
sourcepub fn schema(&self) -> SchemaRef
pub fn schema(&self) -> SchemaRef
Get a hold on the schema of the current LazyFrame computation.
sourcepub fn with_optimizations(self, opt_state: OptState) -> Self
pub fn with_optimizations(self, opt_state: OptState) -> Self
Set allowed optimizations
sourcepub fn without_optimizations(self) -> Self
pub fn without_optimizations(self) -> Self
Turn off all optimizations
sourcepub fn with_projection_pushdown(self, toggle: bool) -> Self
pub fn with_projection_pushdown(self, toggle: bool) -> Self
Toggle projection pushdown optimization.
sourcepub fn with_predicate_pushdown(self, toggle: bool) -> Self
pub fn with_predicate_pushdown(self, toggle: bool) -> Self
Toggle predicate pushdown optimization.
sourcepub fn with_type_coercion(self, toggle: bool) -> Self
pub fn with_type_coercion(self, toggle: bool) -> Self
Toggle type coercion optimization.
sourcepub fn with_simplify_expr(self, toggle: bool) -> Self
pub fn with_simplify_expr(self, toggle: bool) -> Self
Toggle expression simplification optimization on or off
sourcepub fn with_aggregate_pushdown(self, toggle: bool) -> Self
pub fn with_aggregate_pushdown(self, toggle: bool) -> Self
Toggle aggregate pushdown.
sourcepub fn with_string_cache(self, toggle: bool) -> Self
pub fn with_string_cache(self, toggle: bool) -> Self
Toggle global string cache.
sourcepub fn with_slice_pushdown(self, toggle: bool) -> Self
pub fn with_slice_pushdown(self, toggle: bool) -> Self
Toggle slice pushdown optimization
sourcepub fn describe_plan(&self) -> String
pub fn describe_plan(&self) -> String
Describe the logical plan.
sourcepub fn describe_optimized_plan(&self) -> Result<String>
pub fn describe_optimized_plan(&self) -> Result<String>
Describe the optimized logical plan.
sourcepub fn sort(self, by_column: &str, options: SortOptions) -> Self
pub fn sort(self, by_column: &str, options: SortOptions) -> Self
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort("sepal.width", Default::default())
}
sourcepub fn sort_by_exprs<E: AsRef<[Expr]>>(
self,
by_exprs: E,
reverse: Vec<bool>
) -> Self
pub fn sort_by_exprs<E: AsRef<[Expr]>>(
self,
by_exprs: E,
reverse: Vec<bool>
) -> Self
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort_by_exprs(vec![col("sepal.width")], vec![false])
}
sourcepub fn reverse(self) -> Self
pub fn reverse(self) -> Self
Reverse the DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.reverse()
}
sourcepub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self where
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self where
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
Rename columns in the DataFrame.
sourcepub fn drop_columns<I, T>(self, columns: I) -> Self where
I: IntoIterator<Item = T>,
T: AsRef<str>,
pub fn drop_columns<I, T>(self, columns: I) -> Self where
I: IntoIterator<Item = T>,
T: AsRef<str>,
Removes columns from the DataFrame. Note that its better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.
sourcepub fn shift(self, periods: i64) -> Self
pub fn shift(self, periods: i64) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
sourcepub fn shift_and_fill<E: Into<Expr>>(self, periods: i64, fill_value: E) -> Self
pub fn shift_and_fill<E: Into<Expr>>(self, periods: i64, fill_value: E) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with the result of the fill_value
expression.
See the method on Series for more info on the shift
operation.
sourcepub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill none values in the DataFrame
sourcepub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill NaN values in the DataFrame
sourcepub fn cache(self) -> Self
pub fn cache(self) -> Self
Caches the result into a new LazyFrame. This should be used to prevent computations running multiple times
sourcepub fn fetch(self, n_rows: usize) -> Result<DataFrame>
pub fn fetch(self, n_rows: usize) -> Result<DataFrame>
Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.
pub fn optimize(
self,
lp_arena: &mut Arena<ALogicalPlan>,
expr_arena: &mut Arena<AExpr>
) -> Result<Node>
sourcepub fn collect(self) -> Result<DataFrame>
pub fn collect(self) -> Result<DataFrame>
Execute all the lazy operations and collect them into a DataFrame. Before execution the query is being optimized.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> Result<DataFrame> {
df.lazy()
.groupby([col("foo")])
.agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
.collect()
}
sourcepub fn filter(self, predicate: Expr) -> Self
pub fn filter(self, predicate: Expr) -> Self
Filter by some predicate expression.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.filter(col("sepal.width").is_not_null())
.select(&[col("sepal.width"), col("sepal.length")])
}
sourcepub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
pub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
Select (and rename) columns from the query.
Columns can be selected with col;
If you want to select all columns use col("*")
.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("foo"),
col("bar").alias("ham")])
}
/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("*").exclude(["foo"])])
}
sourcepub fn groupby<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
pub fn groupby<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
Group by and aggregate.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
use polars_arrow::prelude::QuantileInterpolOptions;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.groupby([col("date")])
.agg([
col("rain").min(),
col("rain").sum(),
col("rain").quantile(0.5, QuantileInterpolOptions::Nearest).alias("median_rain"),
])
}
pub fn groupby_rolling<E: AsRef<[Expr]>>(
self,
by: E,
options: RollingGroupOptions
) -> LazyGroupBy
pub fn groupby_dynamic<E: AsRef<[Expr]>>(
self,
by: E,
options: DynamicGroupOptions
) -> LazyGroupBy
sourcepub fn groupby_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
pub fn groupby_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
Similar to groupby, but order of the DataFrame is maintained.
sourcepub fn left_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn left_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.left_join(other, col("foo"), col("bar"))
}
sourcepub fn outer_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn outer_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.outer_join(other, col("foo"), col("bar"))
}
sourcepub fn inner_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn inner_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.inner_join(other, col("foo"), col("bar").cast(DataType::Utf8))
}
sourcepub fn cross_join(self, other: LazyFrame) -> LazyFrame
pub fn cross_join(self, other: LazyFrame) -> LazyFrame
Creates the cartesian product from both frames, preserves the order of the left keys.
sourcepub fn join<E: AsRef<[Expr]>>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFrame
pub fn join<E: AsRef<[Expr]>>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFrame
Generic join function that can join on multiple columns.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.join(other, [col("foo"), col("bar")], [col("foo"), col("bar")], JoinType::Inner)
}
sourcepub fn join_builder(self) -> JoinBuilder
pub fn join_builder(self) -> JoinBuilder
Control more join options with the join builder.
sourcepub fn with_column(self, expr: Expr) -> LazyFrame
pub fn with_column(self, expr: Expr) -> LazyFrame
Add a column to a DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
df.lazy()
.with_column(
when(col("sepal.length").lt(lit(5.0)))
.then(lit(10))
.otherwise(lit(1))
.alias("new_column_name"),
)
}
sourcepub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
pub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
Add multiple columns to a DataFrame.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
df.lazy()
.with_columns(
vec![lit(10).alias("foo"), lit(100).alias("bar")]
)
}
sourcepub fn quantile(
self,
quantile: f64,
interpol: QuantileInterpolOptions
) -> LazyFrame
pub fn quantile(
self,
quantile: f64,
interpol: QuantileInterpolOptions
) -> LazyFrame
Aggregate all the columns as their quantile values.
sourcepub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
columns: E
) -> LazyFrame
pub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
columns: E
) -> LazyFrame
Apply explode operation. See eager explode.
sourcepub fn unique_stable(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique_stable(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows and maintain order
sourcepub fn unique(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows, do not maintain order
sourcepub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
Drop null rows.
Equal to LazyFrame::filter(col("*").is_not_null())
sourcepub fn limit(self, n: IdxSize) -> LazyFrame
pub fn limit(self, n: IdxSize) -> LazyFrame
Limit the DataFrame to the first n
rows. Note if you don’t want the rows to be scanned,
use fetch.
sourcepub fn map<F>(
self,
function: F,
optimizations: Option<AllowedOptimizations>,
schema: Option<Schema>,
name: Option<&'static str>
) -> LazyFrame where
F: 'static + Fn(DataFrame) -> Result<DataFrame> + Send + Sync,
pub fn map<F>(
self,
function: F,
optimizations: Option<AllowedOptimizations>,
schema: Option<Schema>,
name: Option<&'static str>
) -> LazyFrame where
F: 'static + Fn(DataFrame) -> Result<DataFrame> + Send + Sync,
Apply a function/closure once the logical plan get executed.
Warning
This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.
You can toggle certain optimizations off.
Trait Implementations
sourceimpl From<LogicalPlan> for LazyFrame
impl From<LogicalPlan> for LazyFrame
sourcefn from(plan: LogicalPlan) -> Self
fn from(plan: LogicalPlan) -> Self
Converts to this type from the input type.
Auto Trait Implementations
impl !RefUnwindSafe for LazyFrame
impl Send for LazyFrame
impl Sync for LazyFrame
impl Unpin for LazyFrame
impl !UnwindSafe for LazyFrame
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more