Struct polars::prelude::LazyFrame [−][src]
pub struct LazyFrame { /* fields omitted */ }
Lazy abstraction over an eager DataFrame
.
It really is an abstraction over a logical plan. The methods of this struct will incrementally
modify a logical plan until output is requested (via collect)
Implementations
impl LazyFrame
[src]
impl LazyFrame
[src]pub fn new_from_parquet(
path: String,
stop_after_n_rows: Option<usize>,
cache: bool
) -> LazyFrame
[src]
path: String,
stop_after_n_rows: Option<usize>,
cache: bool
) -> LazyFrame
Create a LazyFrame directly from a parquet scan.
pub fn to_dot(&self, optimized: bool) -> Result<String, PolarsError>
[src]
Get a dot language representation of the LogicalPlan.
pub fn with_projection_pushdown(self, toggle: bool) -> LazyFrame
[src]
Toggle projection pushdown optimization.
pub fn with_predicate_pushdown(self, toggle: bool) -> LazyFrame
[src]
Toggle predicate pushdown optimization.
pub fn with_type_coercion(self, toggle: bool) -> LazyFrame
[src]
Toggle type coercion optimization.
pub fn with_simplify_expr(self, toggle: bool) -> LazyFrame
[src]
Toggle expression simplification optimization on or off
pub fn with_aggregate_pushdown(self, toggle: bool) -> LazyFrame
[src]
Toggle aggregate pushdown.
pub fn with_string_cache(self, toggle: bool) -> LazyFrame
[src]
Toggle global string cache.
pub fn with_join_pruning(self, toggle: bool) -> LazyFrame
[src]
Toggle join pruning optimization
pub fn describe_plan(&self) -> String
[src]
Describe the logical plan.
pub fn describe_optimized_plan(&self) -> Result<String, PolarsError>
[src]
Describe the optimized logical plan.
pub fn sort(self, by_column: &str, reverse: bool) -> LazyFrame
[src]
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; /// Sort DataFrame by 'sepal.width' column fn example(df: DataFrame) -> LazyFrame { df.lazy() .sort("sepal.width", false) }
pub fn reverse(self) -> LazyFrame
[src]
Reverse the DataFrame
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn example(df: DataFrame) -> LazyFrame { df.lazy() .reverse() }
pub fn with_column_renamed(
self,
existing_name: &str,
new_name: &str
) -> LazyFrame
[src]
self,
existing_name: &str,
new_name: &str
) -> LazyFrame
Rename a column in the DataFrame
pub fn shift(self, periods: i64) -> LazyFrame
[src]
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
pub fn shift_and_fill(self, periods: i64, fill_value: Expr) -> LazyFrame
[src]
Shift the values by a given period and fill the parts that will be empty due to this operation
with the result of the fill_value
expression.
See the method on Series for more info on the shift
operation.
pub fn fill_none(self, fill_value: Expr) -> LazyFrame
[src]
Fill none values in the DataFrame
pub fn cache(self) -> LazyFrame
[src]
Caches the result into a new LazyFrame. This should be used to prevent computations running multiple times
pub fn fetch(self, n_rows: usize) -> Result<DataFrame, PolarsError>
[src]
Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.
pub fn optimize(
self,
lp_arena: &mut Arena<ALogicalPlan>,
expr_arena: &mut Arena<AExpr>
) -> Result<Node, PolarsError>
[src]
self,
lp_arena: &mut Arena<ALogicalPlan>,
expr_arena: &mut Arena<AExpr>
) -> Result<Node, PolarsError>
pub fn collect(self) -> Result<DataFrame, PolarsError>
[src]
Execute all the lazy operations and collect them into a DataFrame. Before execution the query is being optimized.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn example(df: DataFrame) -> Result<DataFrame> { df.lazy() .groupby(vec![col("foo")]) .agg(vec!(col("bar").sum(), col("ham").mean().alias("avg_ham"))) .collect() }
pub fn filter(self, predicate: Expr) -> LazyFrame
[src]
Filter by some predicate expression.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn example(df: DataFrame) -> LazyFrame { df.lazy() .filter(col("sepal.width").is_not_null()) .select(&[col("sepal.width"), col("sepal.length")]) }
pub fn select<E>(self, exprs: E) -> LazyFrame where
E: AsRef<[Expr]>,
[src]
E: AsRef<[Expr]>,
Select (and rename) columns from the query.
Columns can be selected with col;
If you want to select all columns use col("*")
.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; /// This function selects column "foo" and column "bar". /// Column "bar" is renamed to "ham". fn example(df: DataFrame) -> LazyFrame { df.lazy() .select(&[col("foo"), col("bar").alias("ham")]) } /// This function selects all columns except "foo" fn exclude_a_column(df: DataFrame) -> LazyFrame { df.lazy() .select(&[col("*"), except("foo")]) }
pub fn groupby(self, by: Vec<Expr, Global>) -> LazyGroupBy
[src]
Group by and aggregate.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn example(df: DataFrame) -> LazyFrame { df.lazy() .groupby(vec![col("date")]) .agg(vec![ col("rain").min(), col("rain").sum(), col("rain").quantile(0.5).alias("median_rain"), ]) .sort("date", false) }
pub fn left_join(
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
[src]
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame { ldf .left_join(other, col("foo"), col("bar"), None) }
pub fn outer_join(
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
[src]
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame { ldf .outer_join(other, col("foo"), col("bar"), None) }
pub fn inner_join(
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
[src]
self,
other: LazyFrame,
left_on: Expr,
right_on: Expr,
options: Option<JoinOptions>
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame { ldf .inner_join(other, col("foo"), col("bar").cast(DataType::Utf8), None) }
pub fn join(
self,
other: LazyFrame,
left_on: Vec<Expr, Global>,
right_on: Vec<Expr, Global>,
options: Option<JoinOptions>,
how: JoinType
) -> LazyFrame
[src]
self,
other: LazyFrame,
left_on: Vec<Expr, Global>,
right_on: Vec<Expr, Global>,
options: Option<JoinOptions>,
how: JoinType
) -> LazyFrame
Generic join function that can join on multiple columns.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame { ldf .join(other, vec![col("foo"), col("bar")], vec![col("foo"), col("bar")], None, JoinType::Inner) }
pub fn with_column(self, expr: Expr) -> LazyFrame
[src]
Add a column to a DataFrame
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn add_column(df: DataFrame) -> LazyFrame { df.lazy() .with_column( when(col("sepal.length").lt(lit(5.0))) .then(lit(10)) .otherwise(lit(1)) .alias("new_column_name"), ) }
pub fn with_columns(self, exprs: Vec<Expr, Global>) -> LazyFrame
[src]
Add multiple columns to a DataFrame.
Example
use polars_core::prelude::*; use polars_lazy::prelude::*; fn add_columns(df: DataFrame) -> LazyFrame { df.lazy() .with_columns( vec![lit(10).alias("foo"), lit(100).alias("bar")] ) }
pub fn max(self) -> LazyFrame
[src]
Aggregate all the columns as their maximum values.
pub fn min(self) -> LazyFrame
[src]
Aggregate all the columns as their minimum values.
pub fn sum(self) -> LazyFrame
[src]
Aggregate all the columns as their sum values.
pub fn mean(self) -> LazyFrame
[src]
Aggregate all the columns as their mean values.
pub fn median(self) -> LazyFrame
[src]
Aggregate all the columns as their median values.
pub fn quantile(self, quantile: f64) -> LazyFrame
[src]
Aggregate all the columns as their quantile values.
pub fn std(self) -> LazyFrame
[src]
Aggregate all the columns as their standard deviation values.
pub fn var(self) -> LazyFrame
[src]
Aggregate all the columns as their variance values.
pub fn explode(self, columns: &[Expr]) -> LazyFrame
[src]
Apply explode operation. See eager explode.
pub fn drop_duplicates(
self,
maintain_order: bool,
subset: Option<Vec<String, Global>>
) -> LazyFrame
[src]
self,
maintain_order: bool,
subset: Option<Vec<String, Global>>
) -> LazyFrame
Drop duplicate rows. See eager.
pub fn drop_nulls(self, subset: Option<Vec<Expr, Global>>) -> LazyFrame
[src]
Drop null rows.
Equal to LazyFrame::filter(col("*").is_not_null())
pub fn slice(self, offset: i64, len: usize) -> LazyFrame
[src]
Slice the DataFrame.
pub fn first(self) -> LazyFrame
[src]
Get the first row.
pub fn last(self) -> LazyFrame
[src]
Get the last row
pub fn tail(self, n: usize) -> LazyFrame
[src]
Get the n last rows
pub fn melt(
self,
id_vars: Vec<String, Global>,
value_vars: Vec<String, Global>
) -> LazyFrame
[src]
self,
id_vars: Vec<String, Global>,
value_vars: Vec<String, Global>
) -> LazyFrame
Melt the DataFrame from wide to long format
pub fn limit(self, n: usize) -> LazyFrame
[src]
Limit the DataFrame to the first n
rows. Note if you don’t want the rows to be scanned,
use fetch.
pub fn map<F>(
self,
function: F,
optimizations: Option<OptState>,
schema: Option<Schema>
) -> LazyFrame where
F: DataFrameUdf + 'static,
[src]
self,
function: F,
optimizations: Option<OptState>,
schema: Option<Schema>
) -> LazyFrame where
F: DataFrameUdf + 'static,
Apply a function/closure once the logical plan get executed.
Warning
This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.
You can toggle certain optimizations off.
Trait Implementations
impl From<LogicalPlan> for LazyFrame
[src]
impl From<LogicalPlan> for LazyFrame
[src]pub fn from(plan: LogicalPlan) -> LazyFrame
[src]
Auto Trait Implementations
impl !RefUnwindSafe for LazyFrame
impl !RefUnwindSafe for LazyFrame
impl !UnwindSafe for LazyFrame
impl !UnwindSafe for LazyFrame
Blanket Implementations
impl<T, U> Cast<U> for T where
U: FromCast<T>,
impl<T, U> Cast<U> for T where
U: FromCast<T>,
pub fn cast(self) -> U
impl<T> FromCast<T> for T
impl<T> FromCast<T> for T
pub fn from_cast(t: T) -> T
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
impl<V, T> VZip<V> for T where
V: MultiLane<T>,