Expand description
Polars Lazy cookbook
This page should serve a cookbook to quickly get you started with polars’ query engine. The lazy API allows you to create complex well performing queries on top of Polars eager.
Tree Of Contents
Start a lazy computation
use polars::prelude::*;
use polars::df;
let df = df![
"a" => [1, 2, 3],
"b" => [None, Some("a"), Some("b")]
]?;
// from an eager DataFrame
let lf: LazyFrame = df.lazy();
// scan a csv file lazily
let lf: LazyFrame = LazyCsvReader::new("some_path".into())
.has_header(true)
.finish();
// scan a parquet file lazily
let lf: LazyFrame = LazyFrame::scan_parquet("some_path".into(), None, true);
Filter
use polars::prelude::*;
use polars::df;
let df = df![
"a" => [1, 2, 3],
"b" => [None, Some("a"), Some("b")]
]?;
let filtered = df.lazy()
.filter(col("a").gt(lit(2)))
.collect()?;
// filtered:
// ╭─────┬─────╮
// │ a ┆ b │
// │ --- ┆ --- │
// │ i64 ┆ str │
// ╞═════╪═════╡
// │ 3 ┆ "c" │
// ╰─────┴─────╯
Sort
use polars::prelude::*;
use polars::df;
let df = df![
"a" => [1, 2, 3],
"b" => ["a", "a", "b"]
]?;
// sort this DataFrame by multiple columns
// ordering of the columns
let reverse = vec![true, false];
let sorted = df.lazy()
.sort_by_exprs(vec![col("b"), col("a")], reverse)
.collect()?;
// sorted:
// ╭─────┬─────╮
// │ a ┆ b │
// │ --- ┆ --- │
// │ i64 ┆ str │
// ╞═════╪═════╡
// │ 1 ┆ "a" │
// ├╌╌╌╌╌┼╌╌╌╌╌┤
// │ 2 ┆ "a" │
// ├╌╌╌╌╌┼╌╌╌╌╌┤
// │ 3 ┆ "b" │
// ╰─────┴─────╯
Groupby
This example is from the polars user guide.
use polars::prelude::*;
let df = LazyCsvReader::new("reddit.csv".into())
.has_header(true)
.with_delimiter(b',')
.finish()
.groupby([col("comment_karma")])
.agg([col("name").n_unique().alias("unique_names"), col("link_karma").max()])
// take only 100 rows.
.fetch(100)?;
Joins
use polars::prelude::*;
use polars::df;
let df_a = df![
"a" => [1, 2, 1, 1],
"b" => ["a", "b", "c", "c"],
"c" => [0, 1, 2, 3]
]?;
let df_b = df![
"foo" => [1, 1, 1],
"bar" => ["a", "c", "c"],
"ham" => ["let", "var", "const"]
]?;
let lf_a = df_a.clone().lazy();
let lf_b = df_b.clone().lazy();
let joined = lf_a.join(lf_b, vec![col("a")], vec![col("foo")], JoinType::Outer).collect()?;
// joined:
// ╭─────┬─────┬─────┬──────┬─────────╮
// │ b ┆ c ┆ a ┆ bar ┆ ham │
// │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
// │ str ┆ i64 ┆ i64 ┆ str ┆ str │
// ╞═════╪═════╪═════╪══════╪═════════╡
// │ "a" ┆ 0 ┆ 1 ┆ "a" ┆ "let" │
// ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
// │ "a" ┆ 0 ┆ 1 ┆ "c" ┆ "var" │
// ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
// │ "a" ┆ 0 ┆ 1 ┆ "c" ┆ "const" │
// ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
// │ "b" ┆ 1 ┆ 2 ┆ null ┆ null │
// ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
// │ "c" ┆ 2 ┆ 1 ┆ null ┆ null │
// ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
// │ "c" ┆ 3 ┆ 1 ┆ null ┆ null │
// ╰─────┴─────┴─────┴──────┴─────────╯
// other join syntax options
let inner = lf_a.inner_join(lf_b, col("a"), col("foo")).collect()?;
let left = lf_a.left_join(lf_b, col("a"), col("foo")).collect()?;
let outer = lf_a.outer_join(lf_b, col("a"), col("foo")).collect()?;
let joined_with_builder = lf_a.join_builder()
.with(lf_b)
.left_on(vec![col("a")])
.right_on(vec![col("foo")])
.how(JoinType::Inner)
.force_parallel(true)
.finish()
.collect()?;
Conditionally apply
If we want to create a new column based on some condition, we can use the .when()/.then()/.otherwise()
expressions.
when
- accepts a predicate epxressionthen
- expression to use whenpredicate == true
otherwise
- expression to use whenpredicate == false
use polars::prelude::*;
use polars::df;
let df = df![
"range" => [1, 2, 3, 4, 5, 6, 8, 9, 10],
"left" => (0..10).map(|_| Some("foo")).collect::<Vec<_>>(),
"right" => (0..10).map(|_| Some("bar")).collect::<Vec<_>>()
]?;
let new = df.lazy()
.with_column(when(col("range").gt_eq(lit(5)))
.then(col("left"))
.otherwise(col("right")).alias("foo_or_bar")
).collect()?;
// new:
// ╭───────┬───────┬───────┬────────────╮
// │ range ┆ left ┆ right ┆ foo_or_bar │
// │ --- ┆ --- ┆ --- ┆ --- │
// │ i64 ┆ str ┆ str ┆ str │
// ╞═══════╪═══════╪═══════╪════════════╡
// │ 0 ┆ "foo" ┆ "bar" ┆ "bar" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 1 ┆ "foo" ┆ "bar" ┆ "bar" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 2 ┆ "foo" ┆ "bar" ┆ "bar" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 3 ┆ "foo" ┆ "bar" ┆ "bar" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ ... ┆ ... ┆ ... ┆ ... │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 5 ┆ "foo" ┆ "bar" ┆ "foo" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 6 ┆ "foo" ┆ "bar" ┆ "foo" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 7 ┆ "foo" ┆ "bar" ┆ "foo" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 8 ┆ "foo" ┆ "bar" ┆ "foo" │
// ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
// │ 9 ┆ "foo" ┆ "bar" ┆ "foo" │
// ╰───────┴───────┴───────┴────────────╯