Module polars::docs::eager [−][src]
Expand description
Polars Eager cookbook
This page should serve a cookbook to quickly get you started with most fundamental operations
executed on a ChunkedArray
, Series
or DataFrame
.
Tree Of Contents
- Creation of data structures
- Arithmetic
- Comparisons
- Apply functions/ closures
- Filter
- Sort
- Joins
- GroupBy
- Melt
- Explode
- IO
- Various
Creation of Data structures
ChunkedArray
use polars::prelude::*; // use iterators let ca: UInt32Chunked = (0..10).map(Some).collect(); // from slices let ca = UInt32Chunked::new_from_slice("foo", &[1, 2, 3]); // use builders let mut builder = PrimitiveChunkedBuilder::<UInt32Type>::new("foo", 10); for value in 0..10 { builder.append_value(value); } let ca = builder.finish();
Series
use polars::prelude::*; // use iterators let s: Series = (0..10).map(Some).collect(); // from slices let s = Series::new("foo", &[1, 2, 3]); // from a chunked-array let ca = UInt32Chunked::new_from_opt_slice("foo", &[Some(1), None, Some(3)]); let s = ca.into_series();
DataFrame
use polars::prelude::*; use polars::df; // use macro let df = df! [ "names" => ["a", "b", "c"], "values" => [1, 2, 3], "values_nulls" => [Some(1), None, Some(3)] ]?; // from a Vec<Series> let s1 = Series::new("names", &["a", "b", "c"]); let s2 = Series::new("values", &[Some(1), None, Some(3)]); let df = DataFrame::new(vec![s1, s2])?;
Arithmetic
Arithmetic can be done on both Series
and ChunkedArray
s. The most notable difference is that
a Series
coerces the data to match the underlying data types.
use polars::prelude::*; let s_int = Series::new("a", &[1, 2, 3]); let s_flt = Series::new("b", &[1.0, 2.0, 3.0]); let added = &s_int + &s_flt; let subtracted = &s_int - &s_flt; let multiplied = &s_int * &s_flt; let divided = &s_int / &s_flt; let moduloed = &s_int % &s_flt; // on chunked-arrays we first need to cast to same types let ca_int = s_int.i32()?; let ca_flt = s_flt.f32()?; &ca_int.cast::<Float32Type>()? * ca_flt; &ca_flt.cast::<Int32Type>()? * ca_int; // we can also do arithmetic with numeric values let multiplied = ca_int * 2.0; let multiplied = s_flt * 2.0; // or broadcast Series to match the operands type let added = &s_int * &Series::new("broadcast_me", &[10]);
Because Rusts Orphan Rule doesn’t allow use to implement left side operations, we need to call such operation directly.
let series = Series::new("foo", [1, 2, 3]); // 1 / s let divide_one_by_s = 1.div(&series); // 1 - s let subtract_one_by_s = 1.sub(&series);
For ChunkedArray
s this left hand side operations can be done with the apply
method.
let ca = UInt32Chunked::new_from_slice("foo", &[1, 2, 3]); // 1 / ca let divide_one_by_ca = ca.apply(|rhs| 1 / rhs);
Comparisons
Series
and ChunkedArray
s can be used in comparison operations to create boolean
masks/predicates.
use polars::prelude::*; let s = Series::new("a", &[1, 2, 3]); let ca = UInt32Chunked::new_from_opt_slice("b", &[Some(3), None, Some(1)]); // compare Series with numeric values // == s.eq(2); // != s.neq(2); // > s.gt(2); // >= s.gt_eq(2); // < s.lt(2); // <= s.lt_eq(2); // compare Series with Series // == s.eq(&s); // != s.neq(&s); // > s.gt(&s); // >= s.gt_eq(&s); // < s.lt(&s); // <= s.lt_eq(&s); // compare chunked-array with numeric values // == ca.eq(2); // != ca.neq(2); // > ca.gt(2); // >= ca.gt_eq(2); // < ca.lt(2); // <= ca.lt_eq(2); // compare chunked-array with chunked-array // == ca.eq(&ca); // != ca.neq(&ca); // > ca.gt(&ca); // >= ca.gt_eq(&ca); // < ca.lt(&ca); // <= ca.lt_eq(&ca); // use iterators let a: BooleanChunked = ca.into_iter() .map(|opt_value| { match opt_value { Some(value) => value < 10, None => false }}).collect();
Apply functions/ closures
See all possible apply methods here.
Series / ChunkedArrays
use polars::prelude::*; // apply a closure over all values let s = Series::new("foo", &[Some(1), Some(2), None]); s.i32()?.apply(|value| value * 20); // count string lengths let s = Series::new("foo", &["foo", "bar", "foobar"]); s.utf8()?.apply_cast_numeric::<_, UInt64Type>(|str_val| str_val.len() as u64);
DataFrame
use polars::prelude::*; use polars::df; let mut df = df![ "letters" => ["a", "b", "c", "d"], "numbers" => [1, 2, 3, 4] ]?; // coerce numbers to floats df.may_apply("number", |s: &Series| s.cast::<Float64Type>())?; // transform letters to uppercase letters df.may_apply("letters", |s: &Series| { Ok(s.utf8()?.to_uppercase()) });
Filter
use polars::prelude::*; // create a mask to filter out null values let mask = df.column("sepal.width")?.is_not_null(); // select column let s = df.column("sepal.length")?; // apply filter on a Series let filtered_series = s.filter(&mask); // apply the filter on a DataFrame let filtered_df = df.filter(&mask)?;
Sort
use polars::prelude::*; use polars::df; let df = df![ "a" => [1, 2, 3], "b" => ["a", "a", "b"] ]?; // sort this DataFrame by multiple columns // ordering of the columns let reverse = &[true, false]; // columns to sort by let by = &["b", "a"]; // do the sort operation let sorted = df.sort(by, reverse)?; // sorted: // ╭─────┬─────╮ // │ a ┆ b │ // │ --- ┆ --- │ // │ i64 ┆ str │ // ╞═════╪═════╡ // │ 1 ┆ "a" │ // ├╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2 ┆ "a" │ // ├╌╌╌╌╌┼╌╌╌╌╌┤ // │ 3 ┆ "b" │ // ╰─────┴─────╯
Joins
use polars::prelude::*; use polars::df; // Create first df. let temp = df!("days" => &[0, 1, 2, 3, 4], "temp" => &[22.1, 19.9, 7., 2., 3.], "other" => &[1, 2, 3, 4, 5] )?; // Create second df. let rain = df!("days" => &[1, 2], "rain" => &[0.1, 0.2], "other" => &[1, 2, 3, 4, 5] )?; // join on a single column temp.left_join(&rain, "days", "days"); temp.inner_join(&rain, "days", "days"); temp.outer_join(&rain, "days", "days"); // join on multiple columns temp.join(&rain, vec!["days", "other"], vec!["days", "other"], JoinType::Left);
Groupby
Note that Polars lazy is a lot more powerful in and more performant in groupby operations. In lazy a meriad of aggregations can be combined from expressions.
See more in:
GroupBy
use polars::prelude::*; // groupby "groups" | sum "foo" let out = df.groupby("groups")? .select("foo") .sum();
Pivot
use polars::prelude::*; use polars::df; let df = df!("foo" => ["A", "A", "B", "B", "C"], "N" => [1, 2, 2, 4, 2], "bar" => ["k", "l", "m", "n", "0"] )?; // groupby "foo" | pivot "bar" column | aggregate "N" let pivoted = df.groupby("groups")? .pivot("bar", "N") .first(); // pivoted: // +-----+------+------+------+------+------+ // | foo | o | n | m | l | k | // | --- | --- | --- | --- | --- | --- | // | str | i32 | i32 | i32 | i32 | i32 | // +=====+======+======+======+======+======+ // | "A" | null | null | null | 2 | 1 | // +-----+------+------+------+------+------+ // | "B" | null | 4 | 2 | null | null | // +-----+------+------+------+------+------+ // | "C" | 2 | null | null | null | null | // +-----+------+------+------+------+------+!
Downsample
Downsample the DataFrame given some frequency/ downsample rule
use polars::prelude::*; use polars::df; use polars::frame::groupby::resample::SampleRule; // given an input dataframe // ╭─────────────────────┬─────╮ // │ ms ┆ i │ // │ --- ┆ --- │ // │ date64(ms) ┆ u8 │ // ╞═════════════════════╪═════╡ // │ 2000-01-01 00:00:00 ┆ 0 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:01:00 ┆ 1 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:02:00 ┆ 2 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:03:00 ┆ 3 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ ... ┆ ... │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:15:00 ┆ 15 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:16:00 ┆ 16 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:17:00 ┆ 17 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:18:00 ┆ 18 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ // │ 2000-01-01 00:19:00 ┆ 19 │ // ╰─────────────────────┴─────╯ let downsampled = df.downsample("datetime", SampleRule::Minute(5))? .first()? .sort("datetime", false)?; // downsampled: // ╭─────────────────────┬─────────╮ // │ ms ┆ i_first │ // │ --- ┆ --- │ // │ date64(ms) ┆ u8 │ // ╞═════════════════════╪═════════╡ // │ 2000-01-01 00:00:00 ┆ 0 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ 2000-01-01 00:05:00 ┆ 5 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ 2000-01-01 00:10:00 ┆ 10 │ // ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ // │ 2000-01-01 00:15:00 ┆ 15 │ // ╰─────────────────────┴─────────╯
Melt
use polars::prelude::*; use polars::df; let df = df!["A" => &["a", "b", "a"], "B" => &[1, 3, 5], "C" => &[10, 11, 12], "D" => &[2, 4, 6] ]?; let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap(); // melted: // +-----+-----+----------+-------+ // | A | B | variable | value | // | --- | --- | --- | --- | // | str | i32 | str | i32 | // +=====+=====+==========+=======+ // | "a" | 1 | "C" | 10 | // +-----+-----+----------+-------+ // | "b" | 3 | "C" | 11 | // +-----+-----+----------+-------+ // | "a" | 5 | "C" | 12 | // +-----+-----+----------+-------+ // | "a" | 1 | "D" | 2 | // +-----+-----+----------+-------+ // | "b" | 3 | "D" | 4 | // +-----+-----+----------+-------+ // | "a" | 5 | "D" | 6 | // +-----+-----+----------+-------+
Explode
use polars::prelude::*; use polars::df; let s0 = Series::new("a", &[1i64, 2, 3]); let s1 = Series::new("b", &[1i64, 1, 1]); let s2 = Series::new("c", &[2i64, 2, 2]); // construct a new ListChunked for a slice of Series. let list = Series::new("foo", &[s0, s1, s2]); // construct a few more Series. let s0 = Series::new("B", [1, 2, 3]); let s1 = Series::new("C", [1, 1, 1]); let df = DataFrame::new(vec![list, s0, s1])?; let exploded = df.explode("foo")?; // exploded: // +-----+-----+-----+ // | foo | B | C | // | --- | --- | --- | // | i64 | i32 | i32 | // +=====+=====+=====+ // | 1 | 1 | 1 | // +-----+-----+-----+ // | 2 | 1 | 1 | // +-----+-----+-----+ // | 3 | 1 | 1 | // +-----+-----+-----+ // | 1 | 2 | 1 | // +-----+-----+-----+ // | 1 | 2 | 1 | // +-----+-----+-----+ // | 1 | 2 | 1 | // +-----+-----+-----+ // | 2 | 3 | 1 | // +-----+-----+-----+ // | 2 | 3 | 1 | // +-----+-----+-----+ // | 2 | 3 | 1 | // +-----+-----+-----+
IO
Read CSV
use polars::prelude::*; // read from path let df = CsvReader::from_path("iris_csv")? .infer_schema(None) .has_header(true) .finish()?;
Write CSV
use polars::prelude::*; use std::fs::File; // create a file let mut file = File::create("example.csv").expect("could not create file"); // write DataFrame to file CsvWriter::new(&mut file) .has_headers(true) .with_delimiter(b',') .finish(df);
Read IPC
use polars::prelude::*; use std::fs::File; // open file let file = File::open("file.ipc").expect("file not found"); // read to DataFrame let df = IpcReader::new(file) .finish()?;
Write IPC
use polars::prelude::*; use std::fs::File; // create a file let mut file = File::create("file.ipc").expect("could not create file"); // write DataFrame to file IpcWriter::new(&mut file) .finish(df)
Read Parquet
use polars::prelude::*; use std::fs::File; // open file let file = File::open("some_file.parquet").unwrap(); // read to DataFrame let df = ParquetReader::new(file).finish()?;
Write Parquet
use polars::prelude::*; use std::fs::File; // create a file let file = File::create("example.parquet").expect("could not create file"); ParquetWriter::new(file) .finish(df)
Various
Replace NaN with Missing.
The floating point Not a Number: NaN is conceptually different
than missing data in Polars. In the snippet below we show how we can replace NaN
values with
missing values, by setting them to None
.
use polars::prelude::*; use polars::df; /// Replaces NaN with missing values. fn fill_nan_with_nulls() -> Result<DataFrame> { let nan = f64::NAN; let mut df = df! { "a" => [nan, 1.0, 2.0], "b" => [nan, 1.0, 2.0] } .unwrap(); for idx in 0..df.width() { df.may_apply_at_idx(idx, |series| { let mask = series.is_nan()?; let ca = series.f64()?; ca.set(&mask, None) })?; } Ok(df) }
Extracting data
To be able to extract data out of Series
, either by iterating over them or converting them
to other datatypes like a Vec<T>
, we first need to downcast them to a ChunkedArray<T>
. This
is needed because we don’t know the data type that is hold by the Series
.
use polars::prelude::*; use polars::df; fn extract_data() -> Result<()> { let df = df! [ "a" => [None, Some(1.0f32), Some(2.0)], "str" => ["foo", "bar", "ham"] ]?; // first extract ChunkedArray to get the inner type. let ca = df.column("a").f32(); // Then convert to vec let to_vec: Vec<Option<f32>> = Vec::from(ca); // We can also do this with iterators let ca = df.column("str").utf8(); let to_vec: Vec<Option<&str>> = ca.into_iter().collect(); let to_vec_no_options: Vec<&str> = ca.into_no_null_iter().collect(); Ok(()) }