Module polars::docs::lazy [−][src]
Expand description
Polars Lazy cookbook
This page should serve a cookbook to quickly get you started with polars’ query engine. The lazy API allows you to create complex well performing queries on top of Polars eager.
Tree Of Contents
Start a lazy computation
use polars::prelude::*; use polars::df; let df = df![ "a" => [1, 2, 3], "b" => [None, Some("a"), Some("b")] ]?; // from an eager DataFrame let lf: LazyFrame = df.lazy(); // scan a csv file lazily let lf: LazyFrame = LazyCsvReader::new("some_path".into()) .has_header(true) .finish(); // scan a parquet file lazily let lf: LazyFrame = LazyFrame::new_from_parquet("some_path".into(), None, true);
Filter
use polars::prelude::*; use polars::df; let df = df![ "a" => [1, 2, 3], "b" => [None, Some("a"), Some("b")] ]?; let filtered = df.lazy() .filter(col("a").gt(lit(2))) .collect()?; // filtered: // ╭─────┬─────╮ // │ a ┆ b │ // │ --- ┆ --- │ // │ i64 ┆ str │ // ╞═════╪═════╡ // │ 3 ┆ "c" │ // ╰─────┴─────╯
Sort
use polars::prelude::*; use polars::df; let df = df![ "a" => [1, 2, 3], "b" => ["a", "a", "b"] ]?; // sort this DataFrame by multiple columns // ordering of the columns let reverse = vec![true, false]; let sorted = df.lazy() .sort_by_exprs(vec![col("b"), col("a")], reverse) .collect()?; // sorted: // ╭─────┬─────╮ // │ a ┆ b │ // │ --- ┆ --- │ // │ i64 ┆ str │ // ╞═════╪═════╡ // │ 1 ┆ "a" │ // ├╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2 ┆ "a" │ // ├╌╌╌╌╌┼╌╌╌╌╌┤ // │ 3 ┆ "b" │ // ╰─────┴─────╯
Groupby
This example is from the polars user guide.
use polars::prelude::*; let df = LazyCsvReader::new("reddit.csv".into()) .has_header(true) .with_delimiter(b',') .finish() .groupby(vec![col("comment_karma")]) .agg(vec![col("name").n_unique().alias("unique_names"), col("link_karma").max()]) // take only 100 rows. .fetch(100)?;
Joins
use polars::prelude::*; use polars::df; let df_a = df![ "a" => [1, 2, 1, 1], "b" => ["a", "b", "c", "c"], "c" => [0, 1, 2, 3] ]?; let df_b = df![ "foo" => [1, 1, 1], "bar" => ["a", "c", "c"], "ham" => ["let", "var", "const"] ]?; let lf_a = df_a.clone().lazy(); let lf_b = df_b.clone().lazy(); let joined = lf_a.join(lf_b, vec![col("a")], vec![col("foo")], JoinType::Outer).collect()?; // joined: // ╭─────┬─────┬─────┬──────┬─────────╮ // │ b ┆ c ┆ a ┆ bar ┆ ham │ // │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ // │ str ┆ i64 ┆ i64 ┆ str ┆ str │ // ╞═════╪═════╪═════╪══════╪═════════╡ // │ "a" ┆ 0 ┆ 1 ┆ "a" ┆ "let" │ // ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ "a" ┆ 0 ┆ 1 ┆ "c" ┆ "var" │ // ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ "a" ┆ 0 ┆ 1 ┆ "c" ┆ "const" │ // ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ "b" ┆ 1 ┆ 2 ┆ null ┆ null │ // ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ "c" ┆ 2 ┆ 1 ┆ null ┆ null │ // ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ "c" ┆ 3 ┆ 1 ┆ null ┆ null │ // ╰─────┴─────┴─────┴──────┴─────────╯ // other join syntax options let inner = lf_a.inner_join(lf_b, col("a"), col("foo")).collect()?; let left = lf_a.left_join(lf_b, col("a"), col("foo")).collect()?; let outer = lf_a.outer_join(lf_b, col("a"), col("foo")).collect()?; let joined_with_builder = lf_a.join_builder() .with(lf_b) .left_on(vec![col("a")]) .right_on(vec![col("foo")]) .how(JoinType::Inner) .force_parallel(true) .finish() .collect()?;
Conditionally apply
If we want to create a new column based on some condition, we can use the .when()/.then()/.otherwise()
expressions.
when
- accpets a predicate epxressionthen
- expression to use whenpredicate == true
otherwise
- expression to use whenpredicate == false
use polars::prelude::*; use polars::df; let df = df![ "range" => [1, 2, 3, 4, 5, 6, 8, 9, 10], "left" => (0..10).map(|_| Some("foo")).collect::<Vec<_>>(), "right" => (0..10).map(|_| Some("bar")).collect::<Vec<_>>() ]?; let new = df.lazy() .with_column(when(col("range").gt_eq(lit(5))) .then(col("left")) .otherwise(col("right")).alias("foo_or_bar") ).collect()?; // new: // ╭───────┬───────┬───────┬────────────╮ // │ range ┆ left ┆ right ┆ foo_or_bar │ // │ --- ┆ --- ┆ --- ┆ --- │ // │ i64 ┆ str ┆ str ┆ str │ // ╞═══════╪═══════╪═══════╪════════════╡ // │ 0 ┆ "foo" ┆ "bar" ┆ "bar" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 1 ┆ "foo" ┆ "bar" ┆ "bar" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 2 ┆ "foo" ┆ "bar" ┆ "bar" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 3 ┆ "foo" ┆ "bar" ┆ "bar" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ ... ┆ ... ┆ ... ┆ ... │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 5 ┆ "foo" ┆ "bar" ┆ "foo" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 6 ┆ "foo" ┆ "bar" ┆ "foo" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 7 ┆ "foo" ┆ "bar" ┆ "foo" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 8 ┆ "foo" ┆ "bar" ┆ "foo" │ // ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ // │ 9 ┆ "foo" ┆ "bar" ┆ "foo" │ // ╰───────┴───────┴───────┴────────────╯