Struct polars_core::frame::DataFrame [−][src]
pub struct DataFrame { /* fields omitted */ }
Implementations
This is supported on crate feature ndarray
only.
ndarray
only.Create a 2D ndarray::Array
from this DataFrame
. This requires all columns in the
DataFrame
to be non-null and numeric. They will be casted to the same data type
(if they aren’t already).
use polars_core::prelude::*;
let a = UInt32Chunked::new_from_slice("a", &[1, 2, 3]).into_series();
let b = Float64Chunked::new_from_slice("b", &[10., 8., 6.]).into_series();
let df = DataFrame::new(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>().unwrap();
println!("{:?}", ndarray);
Outputs:
[[1.0, 10.0],
[2.0, 8.0],
[3.0, 6.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2/
This is supported on crate feature random
only.
random
only.Sample n datapoints from this DataFrame.
This is supported on crate feature random
only.
random
only.Sample a fraction between 0.0-1.0 of this DataFrame.
pub fn join_asof_by<'a, S, J>(
&self,
other: &DataFrame,
left_on: &str,
right_on: &str,
left_by: S,
right_by: S
) -> Result<DataFrame> where
S: Selection<'a, J>,
This is supported on crate feature asof_join
only.
pub fn join_asof_by<'a, S, J>(
&self,
other: &DataFrame,
left_on: &str,
right_on: &str,
left_by: S,
right_by: S
) -> Result<DataFrame> where
S: Selection<'a, J>,
asof_join
only.This is similar to a left-join except that we match on nearest key rather than equal keys.
The keys must be sorted to perform an asof join. This is a special implementation of an asof join
that searches for the nearest keys within a subgroup set by by
.
Creates the cartesian product from both frames, preserves the order of the left keys.
Explode DataFrame
to long format by exploding a column with Lists.
Example
use polars_core::prelude::*;
let s0 = Series::new("a", &[1i64, 2, 3]);
let s1 = Series::new("b", &[1i64, 1, 1]);
let s2 = Series::new("c", &[2i64, 2, 2]);
let list = Series::new("foo", &[s0, s1, s2]);
let s0 = Series::new("B", [1, 2, 3]);
let s1 = Series::new("C", [1, 1, 1]);
let df = DataFrame::new(vec![list, s0, s1]).unwrap();
let exploded = df.explode("foo").unwrap();
println!("{:?}", df);
println!("{:?}", exploded);
Outputs:
+-------------+-----+-----+
| foo | B | C |
| --- | --- | --- |
| list [i64] | i32 | i32 |
+=============+=====+=====+
| "[1, 2, 3]" | 1 | 1 |
+-------------+-----+-----+
| "[1, 1, 1]" | 2 | 1 |
+-------------+-----+-----+
| "[2, 2, 2]" | 3 | 1 |
+-------------+-----+-----+
+-----+-----+-----+
| foo | B | C |
| --- | --- | --- |
| i64 | i32 | i32 |
+=====+=====+=====+
| 1 | 1 | 1 |
+-----+-----+-----+
| 2 | 1 | 1 |
+-----+-----+-----+
| 3 | 1 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 1 | 2 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
| 2 | 3 | 1 |
+-----+-----+-----+
Unpivot a DataFrame
from wide to long format.
Example
Arguments
id_vars
- String slice that represent the columns to use as id variables.value_vars
- String slice that represent the columns to use as value variables.
use polars_core::prelude::*;
let df = df!("A" => &["a", "b", "a"],
"B" => &[1, 3, 5],
"C" => &[10, 11, 12],
"D" => &[2, 4, 6]
)
.unwrap();
let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap();
println!("{:?}", df);
println!("{:?}", melted);
Outputs:
+-----+-----+-----+-----+
| A | B | C | D |
| --- | --- | --- | --- |
| str | i32 | i32 | i32 |
+=====+=====+=====+=====+
| "a" | 1 | 10 | 2 |
+-----+-----+-----+-----+
| "b" | 3 | 11 | 4 |
+-----+-----+-----+-----+
| "a" | 5 | 12 | 6 |
+-----+-----+-----+-----+
+-----+-----+----------+-------+
| A | B | variable | value |
| --- | --- | --- | --- |
| str | i32 | str | i32 |
+=====+=====+==========+=======+
| "a" | 1 | "C" | 10 |
+-----+-----+----------+-------+
| "b" | 3 | "C" | 11 |
+-----+-----+----------+-------+
| "a" | 5 | "C" | 12 |
+-----+-----+----------+-------+
| "a" | 1 | "D" | 2 |
+-----+-----+----------+-------+
| "b" | 3 | "D" | 4 |
+-----+-----+----------+-------+
| "a" | 5 | "D" | 6 |
+-----+-----+----------+-------+
This is supported on crate features downsample
and temporal
only.
downsample
and temporal
only.Downsample a temporal column by some frequency/ rule
Examples
Consider the following input DataFrame:
╭─────────────────────┬─────╮
│ ms ┆ i │
│ --- ┆ --- │
│ datetime(ms) ┆ u8 │
╞═════════════════════╪═════╡
│ 2000-01-01 00:00:00 ┆ 0 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:01:00 ┆ 1 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:02:00 ┆ 2 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:03:00 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ ... ┆ ... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:15:00 ┆ 15 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:16:00 ┆ 16 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:17:00 ┆ 17 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:18:00 ┆ 18 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:19:00 ┆ 19 │
╰─────────────────────┴─────╯
use polars_core::prelude::*;
use polars_core::frame::groupby::resample::SampleRule;
fn example(df: &DataFrame) -> Result<DataFrame> {
df.downsample("datetime", SampleRule::Minute(5))?
.first()?
.sort("datetime", false)
}
outputs:
╭─────────────────────┬─────────╮
│ ms ┆ i_first │
│ --- ┆ --- │
│ datetime(ms) ┆ u8 │
╞═════════════════════╪═════════╡
│ 2000-01-01 00:00:00 ┆ 0 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 2000-01-01 00:05:00 ┆ 5 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 2000-01-01 00:10:00 ┆ 10 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 2000-01-01 00:15:00 ┆ 15 │
╰─────────────────────┴─────────╯
pub fn downsample_with_series(
&self,
key: &Series,
rule: SampleRule
) -> Result<GroupBy<'_, '_>>
This is supported on crate features downsample
and temporal
only.
pub fn downsample_with_series(
&self,
key: &Series,
rule: SampleRule
) -> Result<GroupBy<'_, '_>>
downsample
and temporal
only.See downsample.
Group DataFrame using a Series column.
Example
use polars_core::prelude::*;
fn groupby_sum(df: &DataFrame) -> Result<DataFrame> {
df.groupby("column_name")?
.select("agg_column_name")
.sum()
}
Group DataFrame using a Series column. The groups are ordered by their smallest row index.
Generic join method. Can be used to join on multiple columns.
Example
use polars_core::df;
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Fruit" => &["Apple", "Banana", "Pear"],
"Phosphorus (mg/100g)" => &[11, 22, 12])?;
let df2: DataFrame = df!("Name" => &["Apple", "Banana", "Pear"],
"Potassium (mg/100g)" => &[107, 358, 115])?;
let df3: DataFrame = df1.join(&df2, "Fruit", "Name", JoinType::Inner, None)?;
assert_eq!(df3.shape(), (3, 3));
println!("{}", df3);
Ok(())
}
Output:
shape: (3, 3)
+--------+----------------------+---------------------+
| Fruit | Phosphorus (mg/100g) | Potassium (mg/100g) |
| --- | --- | --- |
| str | i32 | i32 |
+========+======================+=====================+
| Apple | 11 | 107 |
+--------+----------------------+---------------------+
| Banana | 22 | 358 |
+--------+----------------------+---------------------+
| Pear | 12 | 115 |
+--------+----------------------+---------------------+
Perform an inner join on two DataFrames.
Example
use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
left.inner_join(right, "join_column_left", "join_column_right")
}
Perform a left join on two DataFrames
Example
use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
left.left_join(right, "join_column_left", "join_column_right")
}
Perform an outer join on two DataFrames
Example
use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
left.outer_join(right, "join_column_left", "join_column_right")
}
This is supported on crate feature rows
only.
rows
only.Get a row from a DataFrame. Use of this is discouraged as it will likely be slow.
This is supported on crate feature rows
only.
rows
only.Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame
This is supported on crate feature rows
only.
rows
only.Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame
Safety
Does not do any bounds checking.
This is supported on crate feature rows
only.
rows
only.Create a new DataFrame from rows. This should only be used when you have row wise data,
as this is a lot slower than creating the Series
in a columnar fashion
This is supported on crate feature rows
only.
rows
only.Create a new DataFrame from rows. This should only be used when you have row wise data,
as this is a lot slower than creating the Series
in a columnar fashion
Create a DataFrame from a Vector of Series.
Example
use polars_core::prelude::*;
let s0 = Series::new("days", [0, 1, 2].as_ref());
let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());
let df = DataFrame::new(vec![s0, s1]).unwrap();
Add a new column at index 0 that counts the rows.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
assert_eq!(df1.shape(), (4, 1));
let df2: DataFrame = df1.with_row_count("Id")?;
assert_eq!(df2.shape(), (4, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (4, 2)
+-----+----------+
| Id | Name |
| --- | --- |
| u32 | str |
+=====+==========+
| 0 | James |
+-----+----------+
| 1 | Mary |
+-----+----------+
| 2 | John |
+-----+----------+
| 3 | Patricia |
+-----+----------+
Create a new DataFrame
but does not check the length or duplicate occurrence of the Series
.
It is advised to use Series::new in favor of this method.
Panic
It is the callers responsibility to uphold the contract of all Series
having an equal length, if not this may panic down the line.
Aggregate all chunks to contiguous memory.
Shrink the capacity of this DataFrame to fit it’s length.
Aggregate all the chunks in the DataFrame to a single chunk.
Aggregate all the chunks in the DataFrame to a single chunk in parallel. This may lead to more peak memory consumption.
Get the DataFrame schema.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
"Diameter (m)" => &[8.8e26, f64::INFINITY])?;
let f1: Field = Field::new("Thing", DataType::Utf8);
let f2: Field = Field::new("Diameter (m)", DataType::Float64);
let sc: Schema = Schema::new(vec![f1, f2]);
assert_eq!(df.schema(), sc);
Ok(())
}
Get a reference to the DataFrame columns.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
"Symbol" => &["A", "C", "G", "T"])?;
let columns: &Vec<Series> = df.get_columns();
assert_eq!(columns[0].name(), "Name");
assert_eq!(columns[1].name(), "Symbol");
Ok(())
}
Iterator over the columns as Series.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
let mut iterator = df.iter();
assert_eq!(iterator.next(), Some(&s1));
assert_eq!(iterator.next(), Some(&s2));
assert_eq!(iterator.next(), None);
Ok(())
}
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Language" => &["Rust", "Python"],
"Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
assert_eq!(df.get_column_names(), &["Language", "Designer"]);
Ok(())
}
Set the column names.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
df.set_column_names(&["Set"])?;
assert_eq!(df.get_column_names(), &["Set"]);
Ok(())
}
Get the data types of the columns in the DataFrame.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
"Fraction" => &[0.965, 0.035])?;
assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
Ok(())
}
Get a reference to the schema fields of the DataFrame.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
"Fraction" => &[0.708, 0.292])?;
let f1: Field = Field::new("Surface type", DataType::Utf8);
let f2: Field = Field::new("Fraction", DataType::Float64);
assert_eq!(earth.fields(), &[f1, f2]);
Ok(())
}
Get (height, width) of the DataFrame
.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
"2" => &[1, 2, 3, 4, 5])?;
assert_eq!(df0.shape(), (0 ,0));
assert_eq!(df1.shape(), (5, 1));
assert_eq!(df2.shape(), (5, 2));
Ok(())
}
Get the width of the DataFrame
which is the number of columns.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Series 1" => &[0; 0])?;
let df2: DataFrame = df!("Series 1" => &[0; 0],
"Series 2" => &[0; 0])?;
assert_eq!(df0.width(), 0);
assert_eq!(df1.width(), 1);
assert_eq!(df2.width(), 2);
Ok(())
}
Get the height of the DataFrame
which is the number of rows.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Currency" => &["€", "$"])?;
let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
assert_eq!(df0.height(), 0);
assert_eq!(df1.height(), 2);
assert_eq!(df2.height(), 5);
Ok(())
}
Check if DataFrame is empty
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df1: DataFrame = DataFrame::default();
assert!(df1.is_empty());
let df2: DataFrame = df!("First name" => &["Forever"],
"Last name" => &["Alone"])?;
assert!(!df2.is_empty());
Ok(())
}
Add multiple Series to a DataFrame The added Series are required to have the same length.
Example
use polars_core::prelude::*;
fn stack(df: &mut DataFrame, columns: &[Series]) {
df.hstack_mut(columns);
}
Add multiple Series to a DataFrame The added Series are required to have the same length.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
let s1: Series = Series::new("Proton", &[29, 47, 79]);
let s2: Series = Series::new("Electron", &[29, 47, 79]);
let df2: DataFrame = df1.hstack(&[s1, s2])?;
assert_eq!(df2.shape(), (3, 3));
println!("{}", df2);
Ok(())
}
Output:
shape: (3, 3)
+---------+--------+----------+
| Element | Proton | Electron |
| --- | --- | --- |
| str | i32 | i32 |
+=========+========+==========+
| Copper | 29 | 29 |
+---------+--------+----------+
| Silver | 47 | 47 |
+---------+--------+----------+
| Gold | 79 | 79 |
+---------+--------+----------+
Concatenate a DataFrame to this DataFrame and return as newly allocated DataFrame
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
"Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
"Melting Point(K)" => &[2041.4, 1828.05])?;
let df3: DataFrame = df1.vstack(&df2)?;
assert_eq!(df3.shape(), (5, 2));
println!("{}", df3);
Ok(())
}
Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+
Concatenate a DataFrame to this DataFrame
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
"Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
"Melting Point(K)" => &[2041.4, 1828.05])?;
df1.vstack_mut(&df2)?;
assert_eq!(df1.shape(), (5, 2));
println!("{}", df1);
Ok(())
}
Output:
shape: (5, 2)
+-----------+-------------------+
| Element | Melting Point (K) |
| --- | --- |
| str | f64 |
+===========+===================+
| Copper | 1357.77 |
+-----------+-------------------+
| Silver | 1234.93 |
+-----------+-------------------+
| Gold | 1337.33 |
+-----------+-------------------+
| Platinum | 2041.4 |
+-----------+-------------------+
| Palladium | 1828.05 |
+-----------+-------------------+
Remove a column by name and return the column removed.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
"IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
let s1: Result<Series> = df.drop_in_place("Average weight");
assert!(s1.is_err());
let s2: Series = df.drop_in_place("IUCN")?;
assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
Ok(())
}
Return a new DataFrame
where all null values are dropped.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df1: DataFrame = df!("Country" => &["Malta", "Liechtenstein", "North Korea"],
"Tax revenue (% GDP)" => &[Some(32.7), None, None])?;
assert_eq!(df1.shape(), (3, 2));
let df2: DataFrame = df1.drop_nulls(None)?;
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------------------+
| Country | Tax revenue (% GDP) |
| --- | --- |
| str | f64 |
+=========+=====================+
| Malta | 32.7 |
+---------+---------------------+
Drop a column by name.
This is a pure method and will return a new DataFrame
instead of modifying
the current one in place.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
let df2: DataFrame = df1.drop("Ray type")?;
assert_eq!(df1, df2);
Ok(())
}
Insert a new column at a given index
Add a new column to this DataFrame
or replace an existing one.
Get a row in the DataFrame
Beware this is slow.
Example
use polars_core::prelude::*;
fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
df.get(idx)
}
Select a Series
by index.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
"Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
let s1: Option<&Series> = df.select_at_idx(0);
let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
assert_eq!(s1, Some(&s2));
Ok(())
}
Get column index of a series by name.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
"Health" => &[100, 200, 500],
"Mana" => &[250, 100, 0],
"Strength" => &[30, 150, 300])?;
assert_eq!(df.find_idx_by_name("Name"), Some(0));
assert_eq!(df.find_idx_by_name("Health"), Some(1));
assert_eq!(df.find_idx_by_name("Mana"), Some(2));
assert_eq!(df.find_idx_by_name("Strength"), Some(3));
assert_eq!(df.find_idx_by_name("Haste"), None);
Ok(())
}
Select a single column by name.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
assert_eq!(df.column("Password")?, &s1);
Ok(())
}
Selected multiple columns by name.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
"Max weight (kg)" => &[16.0, 35.89])?;
let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
assert_eq!(&df[0], sv[0]);
assert_eq!(&df[1], sv[1]);
Ok(())
}
Select column(s) from this DataFrame and return a new DataFrame.
Examples
use polars_core::prelude::*;
fn example(df: &DataFrame, possible: &str) -> Result<DataFrame> {
match possible {
"by_str" => df.select("my-column"),
"by_tuple" => df.select(("col_1", "col_2")),
"by_vec" => df.select(vec!["col_a", "col_b"]),
_ => unimplemented!()
}
}
Select column(s) from this DataFrame and return them into a Vector.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
"Carbon" => &[1, 2, 3],
"Hydrogen" => &[4, 6, 8])?;
let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
assert_eq!(df["Carbon"], sv[0]);
assert_eq!(df["Hydrogen"], sv[1]);
Ok(())
}
Take DataFrame rows by a boolean mask.
Example
use polars_core::prelude::*;
fn example(df: &DataFrame) -> Result<DataFrame> {
let mask = df.column("sepal.width")?.is_not_null();
df.filter(&mask)
}
Take DataFrame value by indexes from an iterator.
Example
use polars_core::prelude::*;
fn example(df: &DataFrame) -> Result<DataFrame> {
let iterator = (0..9).into_iter();
df.take_iter(iterator)
}
Take DataFrame values by indexes from an iterator.
Safety
This doesn’t do any bound checking but checks null validity.
Take DataFrame values by indexes from an iterator that may contain None values.
Safety
This doesn’t do any bound checking. Out of bounds may access uninitialized memory. Null validity is checked
Take DataFrame rows by index values.
Example
use polars_core::prelude::*;
fn example(df: &DataFrame) -> Result<DataFrame> {
let idx = UInt32Chunked::new_from_slice("idx", &[0, 1, 9]);
df.take(&idx)
}
Rename a column in the DataFrame
Example
use polars_core::prelude::*;
fn example(df: &mut DataFrame) -> Result<&mut DataFrame> {
let original_name = "foo";
let new_name = "bar";
df.rename(original_name, new_name)
}
Sort DataFrame in place by a column.
Return a sorted clone of this DataFrame.
Example
use polars_core::prelude::*;
fn sort_example(df: &DataFrame, reverse: bool) -> Result<DataFrame> {
df.sort("a", reverse)
}
fn sort_by_multiple_columns_example(df: &DataFrame) -> Result<DataFrame> {
df.sort(&["a", "b"], vec![false, true])
}
Replace a column with a series.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let mut df: DataFrame = df!("Country" => &["United States", "China"],
"Area (km²)" => &[9_833_520, 9_596_961])?;
let s: Series = Series::new("Country", &["USA", "PRC"]);
assert!(df.replace("Nation", s.clone()).is_err());
assert!(df.replace("Country", s).is_ok());
Ok(())
}
pub fn replace_or_add<S: IntoSeries>(
&mut self,
column: &str,
new_col: S
) -> Result<&mut Self>
pub fn replace_or_add<S: IntoSeries>(
&mut self,
column: &str,
new_col: S
) -> Result<&mut Self>
Replace or update a column. The difference between this method and DataFrame::with_column
is that now the value of column: &str
determines the name of the column and not the name
of the Series
passed to this method.
Replace column at index idx
with a series.
Example
use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();
// Add 32 to get lowercase ascii values
df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
Apply a closure to a column. This is the recommended way to do in place modification.
Example
use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("names", &["Jean", "Claude", "van"]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();
fn str_to_len(str_val: &Series) -> Series {
str_val.utf8()
.unwrap()
.into_iter()
.map(|opt_name: Option<&str>| {
opt_name.map(|name: &str| name.len() as u32)
})
.collect::<UInt32Chunked>()
.into_series()
}
// Replace the names column by the length of the names.
df.apply("names", str_to_len);
Results in:
+--------+-------+
| foo | |
| --- | names |
| str | u32 |
+========+=======+
| "ham" | 4 |
+--------+-------+
| "spam" | 6 |
+--------+-------+
| "egg" | 3 |
+--------+-------+
pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
F: FnOnce(&Series) -> S,
S: IntoSeries,
pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
F: FnOnce(&Series) -> S,
S: IntoSeries,
Apply a closure to a column at index idx
. This is the recommended way to do in place
modification.
Example
use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();
// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);
Results in:
+--------+-------+
| foo | ascii |
| --- | --- |
| str | i32 |
+========+=======+
| "ham" | 102 |
+--------+-------+
| "spam" | 111 |
+--------+-------+
| "egg" | 111 |
+--------+-------+
pub fn may_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
F: FnOnce(&Series) -> Result<S>,
S: IntoSeries,
pub fn may_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
F: FnOnce(&Series) -> Result<S>,
S: IntoSeries,
Apply a closure that may fail to a column at index idx
. This is the recommended way to do in place
modification.
Example
This is the idomatic way to replace some values a column of a DataFrame
given range of indexes.
let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();
let idx = vec![0, 1, 4];
df.may_apply("foo", |s| {
s.utf8()?
.set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});
Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "ham-is-modified" | 1 |
+---------------------+--------+
| "spam-is-modified" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "quack-is-modified" | 5 |
+---------------------+--------+
Apply a closure that may fail to a column. This is the recommended way to do in place modification.
Example
This is the idomatic way to replace some values a column of a DataFrame
given a boolean mask.
let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();
// create a mask
let values = df.column("values").unwrap();
let mask = values.lt_eq(1) | values.gt_eq(5);
df.may_apply("foo", |s| {
s.utf8()?
.set(&mask, Some("not_within_bounds"))
});
Results in:
+---------------------+--------+
| foo | values |
| --- | --- |
| str | i32 |
+=====================+========+
| "not_within_bounds" | 1 |
+---------------------+--------+
| "spam" | 2 |
+---------------------+--------+
| "egg" | 3 |
+---------------------+--------+
| "bacon" | 4 |
+---------------------+--------+
| "not_within_bounds" | 5 |
+---------------------+--------+
Slice the DataFrame along the rows.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
"Color" => &["Green", "Red", "White", "White", "Red"])?;
let sl: DataFrame = df.slice(2, 3);
assert_eq!(sl.shape(), (3, 2));
println!("{}", sl);
Ok(())
}
Output:
shape: (3, 2)
+-------+-------+
| Fruit | Color |
| --- | --- |
| str | str |
+=======+=======+
| Grape | White |
+-------+-------+
| Fig | White |
+-------+-------+
| Fig | Red |
+-------+-------+
Get the head of the DataFrame
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let countries: DataFrame =
df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
"Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
"Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
"Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
assert_eq!(countries.shape(), (5, 4));
println!("{}", countries.head(Some(3)));
Ok(())
}
Output:
shape: (3, 4)
+--------------------+---------------+---------------+------------+
| Rank by GDP (2021) | Continent | Country | Capital |
| --- | --- | --- | --- |
| i32 | str | str | str |
+====================+===============+===============+============+
| 1 | North America | United States | Washington |
+--------------------+---------------+---------------+------------+
| 2 | Asia | China | Beijing |
+--------------------+---------------+---------------+------------+
| 3 | Asia | Japan | Tokyo |
+--------------------+---------------+---------------+------------+
Get the tail of the DataFrame
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let countries: DataFrame =
df!("Rank (2021)" => &[105, 106, 107, 108, 109],
"Apple Price (€/kg)" => &[0.76, 0.72, 0.70, 0.63],
"Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
assert_eq!(countries.shape(), (5, 3));
println!("{}", countries.tail(Some(2)));
Ok(())
}
Output:
shape: (2, 3)
+-------------+--------------------+---------+
| Rank (2021) | Apple Price (€/kg) | Country |
| --- | --- | --- |
| i32 | f64 | str |
+=============+====================+=========+
| 108 | 0.63 | Syria |
+-------------+--------------------+---------+
| 109 | 0.63 | Turkey |
+-------------+--------------------+---------+
Transform the underlying chunks in the DataFrame to Arrow RecordBatches
Iterator over the rows in this DataFrame as Arrow RecordBatches.
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
Replace None values with one of the following strategies:
- Forward fill (replace None with the previous value)
- Backward fill (replace None with the next value)
- Mean fill (replace None with the mean of the whole array)
- Min fill (replace None with the minimum of the whole array)
- Max fill (replace None with the maximum of the whole array)
See the method on Series for more info on the fill_null
operation.
Aggregate the columns to their maximum values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.max();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| i32 | i32 |
+=========+=========+
| 6 | 5 |
+---------+---------+
Aggregate the columns to their standard deviation values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.std();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+-------------------+--------------------+
| Die n°1 | Die n°2 |
| --- | --- |
| f64 | f64 |
+===================+====================+
| 2.280350850198276 | 1.0954451150103321 |
+-------------------+--------------------+
Aggregate the columns to their variation values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.var();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| f64 | f64 |
+=========+=========+
| 5.2 | 1.2 |
+---------+---------+
Aggregate the columns to their minimum values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.min();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| i32 | i32 |
+=========+=========+
| 1 | 2 |
+---------+---------+
Aggregate the columns to their sum values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.sum();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| i32 | i32 |
+=========+=========+
| 16 | 16 |
+---------+---------+
Aggregate the columns to their mean values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.mean();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| f64 | f64 |
+=========+=========+
| 3.2 | 3.2 |
+---------+---------+
Aggregate the columns to their median values.
Example
use polars_core::prelude::*;
fn example() -> Result<()> {
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
"Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.median();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);
Ok(())
}
Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| --- | --- |
| i32 | i32 |
+=========+=========+
| 3 | 3 |
+---------+---------+
Aggregate the columns to their quantile values.
This is supported on crate feature zip_with
only.
zip_with
only.Aggregate the column horizontally to their min values
This is supported on crate feature zip_with
only.
zip_with
only.Aggregate the column horizontally to their max values
Aggregate the column horizontally to their sum values
Aggregate the column horizontally to their mean values
Pipe different functions/ closure operations that work on a DataFrame together.
Pipe different functions/ closure operations that work on a DataFrame together.
Pipe different functions/ closure operations that work on a DataFrame together.
Create dummy variables.
Example
use polars_core::prelude::*;
let df = df! {
"id" => &[1, 2, 3, 1, 2, 3, 1, 1],
"type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
"code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
}.unwrap();
let dummies = df.to_dummies().unwrap();
dbg!(dummies);
Outputs:
+------+------+------+--------+--------+--------+---------+---------+---------+
| id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 |
+======+======+======+========+========+========+=========+=========+=========+
| 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
| 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
+------+------+------+--------+--------+--------+---------+---------+---------+
Drop duplicate rows from a DataFrame. This fails when there is a column of type List in DataFrame
Example
use polars_core::prelude::*;
fn example() -> Result<DataFrame> {
let df = df! {
"flt" => [1., 1., 2., 2., 3., 3.],
"int" => [1, 1, 2, 2, 3, 3, ],
"str" => ["a", "a", "b", "b", "c", "c"]
}?;
df.drop_duplicates(true, None)
}
Returns
+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1 | 1 | "a" |
+-----+-----+-----+
| 2 | 2 | "b" |
+-----+-----+-----+
| 3 | 3 | "c" |
+-----+-----+-----+
Get a mask of all the unique rows in the DataFrame.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
"ISIN" => &["US0378331005", "US5949181045"])?;
let ca: ChunkedArray<BooleanType> = df.is_unique()?;
assert!(ca.all_true());
Ok(())
}
Get a mask of all the duplicated rows in the DataFrame.
Example
use polars_core::prelude::*; // or "use polars::prelude::*"
fn example() -> Result<()> {
let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
"ISIN" => &["US02079K3059", "US02079K1079"])?;
let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
assert!(ca.all_false());
Ok(())
}
Create a new DataFrame that shows the null counts per column.
Hash and combine the row values
Check if DataFrames
are equal. Note that None == None
evaluates to false
Check if all values in DataFrames
are equal where None == None
evaluates to true
.
Trait Implementations
Panics
Panics if Series have different lengths.
Conversion from Vec
type Error = PolarsError
type Error = PolarsError
The type returned in the event of a conversion error.
Performs the conversion.
Conversion from Vec
If batch-size is small it might be advisable to call rechunk to ensure predictable performance
type Error = PolarsError
type Error = PolarsError
The type returned in the event of a conversion error.
Auto Trait Implementations
impl !RefUnwindSafe for DataFrame
impl !UnwindSafe for DataFrame
Blanket Implementations
Mutably borrows from an owned value. Read more