[−][src]Struct polars_core::frame::group_by::GroupBy
Returned by a groupby operation on a DataFrame. This struct supports several aggregations.
Until described otherwise, the examples in this struct are performed on the following DataFrame:
use polars_core::prelude::*; let dates = &[ "2020-08-21", "2020-08-21", "2020-08-22", "2020-08-23", "2020-08-22", ]; // date format let fmt = "%Y-%m-%d"; // create date series let s0 = Date32Chunked::parse_from_str_slice("date", dates, fmt) .into_series(); // create temperature series let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref()); // create rain series let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref()); // create a new DataFrame let df = DataFrame::new(vec![s0, s1, s2]).unwrap(); println!("{:?}", df);
Outputs:
+------------+------+------+
| date | temp | rain |
| --- | --- | --- |
| date32 | i32 | f64 |
+============+======+======+
| 2020-08-21 | 20 | 0.2 |
+------------+------+------+
| 2020-08-21 | 10 | 0.1 |
+------------+------+------+
| 2020-08-22 | 7 | 0.3 |
+------------+------+------+
| 2020-08-23 | 9 | 0.1 |
+------------+------+------+
| 2020-08-22 | 1 | 0.01 |
+------------+------+------+
Implementations
impl<'df, 'selection_str> GroupBy<'df, 'selection_str>
[src]
pub fn select<S, J>(mut self: Self, selection: S) -> Self where
S: Selection<'selection_str, J>,
[src]
S: Selection<'selection_str, J>,
Select the column(s) that should be aggregated. You can select a single column or a slice of columns.
Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.
pub fn get_groups(&self) -> &Vec<(usize, Vec<usize>)>
[src]
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec
pub fn keys(&self) -> Vec<Series>
[src]
pub fn mean(&self) -> Result<DataFrame>
[src]
Aggregate grouped series and compute the mean per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select(&["temp", "rain"]).mean() }
Returns:
+------------+-----------+-----------+
| date | temp_mean | rain_mean |
| --- | --- | --- |
| date32 | f64 | f64 |
+============+===========+===========+
| 2020-08-23 | 9 | 0.1 |
+------------+-----------+-----------+
| 2020-08-22 | 4 | 0.155 |
+------------+-----------+-----------+
| 2020-08-21 | 15 | 0.15 |
+------------+-----------+-----------+
pub fn sum(&self) -> Result<DataFrame>
[src]
Aggregate grouped series and compute the sum per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").sum() }
Returns:
+------------+----------+
| date | temp_sum |
| --- | --- |
| date32 | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 8 |
+------------+----------+
| 2020-08-21 | 30 |
+------------+----------+
pub fn min(&self) -> Result<DataFrame>
[src]
Aggregate grouped series and compute the minimal value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").min() }
Returns:
+------------+----------+
| date | temp_min |
| --- | --- |
| date32 | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 1 |
+------------+----------+
| 2020-08-21 | 10 |
+------------+----------+
pub fn max(&self) -> Result<DataFrame>
[src]
Aggregate grouped series and compute the maximum value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").max() }
Returns:
+------------+----------+
| date | temp_max |
| --- | --- |
| date32 | i32 |
+============+==========+
| 2020-08-23 | 9 |
+------------+----------+
| 2020-08-22 | 7 |
+------------+----------+
| 2020-08-21 | 20 |
+------------+----------+
pub fn first(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
and find the first value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").first() }
Returns:
+------------+------------+
| date | temp_first |
| --- | --- |
| date32 | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 7 |
+------------+------------+
| 2020-08-21 | 20 |
+------------+------------+
pub fn last(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
and return the last value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").last() }
Returns:
+------------+------------+
| date | temp_last |
| --- | --- |
| date32 | i32 |
+============+============+
| 2020-08-23 | 9 |
+------------+------------+
| 2020-08-22 | 1 |
+------------+------------+
| 2020-08-21 | 10 |
+------------+------------+
pub fn n_unique(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
by counting the number of unique values.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").n_unique() }
Returns:
+------------+---------------+
| date | temp_n_unique |
| --- | --- |
| date32 | u32 |
+============+===============+
| 2020-08-23 | 1 |
+------------+---------------+
| 2020-08-22 | 2 |
+------------+---------------+
| 2020-08-21 | 2 |
+------------+---------------+
pub fn quantile(&self, quantile: f64) -> Result<DataFrame>
[src]
Aggregate grouped Series
and determine the quantile per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").quantile(0.2) }
pub fn median(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
and determine the median per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").median() }
pub fn var(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
and determine the variance per group.
pub fn std(&self) -> Result<DataFrame>
[src]
Aggregate grouped Series
and determine the standard deviation per group.
pub fn count(&self) -> Result<DataFrame>
[src]
Aggregate grouped series and compute the number of values per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.select("temp").count() }
Returns:
+------------+------------+
| date | temp_count |
| --- | --- |
| date32 | u32 |
+============+============+
| 2020-08-23 | 1 |
+------------+------------+
| 2020-08-22 | 2 |
+------------+------------+
| 2020-08-21 | 2 |
+------------+------------+
pub fn groups(&self) -> Result<DataFrame>
[src]
Get the groupby group indexes.
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.groups() }
Returns:
+--------------+------------+
| date | groups |
| --- | --- |
| date32(days) | list [u32] |
+==============+============+
| 2020-08-23 | "[3]" |
+--------------+------------+
| 2020-08-22 | "[2, 4]" |
+--------------+------------+
| 2020-08-21 | "[0, 1]" |
+--------------+------------+
pub fn agg<Column, S, Slice>(
&self,
column_to_agg: &[(Column, Slice)]
) -> Result<DataFrame> where
S: AsRef<str>,
S: AsRef<str>,
Slice: AsRef<[S]>,
Column: AsRef<str>,
[src]
&self,
column_to_agg: &[(Column, Slice)]
) -> Result<DataFrame> where
S: AsRef<str>,
S: AsRef<str>,
Slice: AsRef<[S]>,
Column: AsRef<str>,
Combine different aggregations on columns
Operations
- count
- first
- last
- sum
- min
- max
- mean
- median
Example
fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("date")?.agg(&[("temp", &["n_unique", "sum", "min"])]) }
Returns:
+--------------+---------------+----------+----------+
| date | temp_n_unique | temp_sum | temp_min |
| --- | --- | --- | --- |
| date32(days) | u32 | i32 | i32 |
+==============+===============+==========+==========+
| 2020-08-23 | 1 | 9 | 9 |
+--------------+---------------+----------+----------+
| 2020-08-22 | 2 | 8 | 1 |
+--------------+---------------+----------+----------+
| 2020-08-21 | 2 | 30 | 10 |
+--------------+---------------+----------+----------+
pub fn agg_list(&self) -> Result<DataFrame>
[src]
Aggregate the groups of the groupby operation into lists.
Example
fn example(df: DataFrame) -> Result<DataFrame> { // GroupBy and aggregate to Lists df.groupby("date")?.select("temp").agg_list() }
Returns:
+------------+------------------------+
| date | temp_agg_list |
| --- | --- |
| date32 | list [i32] |
+============+========================+
| 2020-08-23 | "[Some(9)]" |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]" |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+
pub fn apply<F>(&self, f: F) -> Result<DataFrame> where
F: Fn(DataFrame) -> Result<DataFrame> + Send + Sync,
[src]
F: Fn(DataFrame) -> Result<DataFrame> + Send + Sync,
Apply a closure over the groups as a new DataFrame.
pub fn pivot(
&mut self,
pivot_column: &'selection_str str,
values_column: &'selection_str str
) -> Pivot<'_, '_>
[src]
&mut self,
pivot_column: &'selection_str str,
values_column: &'selection_str str
) -> Pivot<'_, '_>
Pivot a column of the current DataFrame
and perform one of the following aggregations:
- first
- sum
- min
- max
- mean
- median
The pivot operation consists of a group by one, or multiple collumns (these will be the new y-axis), column that will be pivoted (this will be the new x-axis) and an aggregation.
Panics
If the values column is not a numerical type, the code will panic.
Example
use polars_core::prelude::*; let s0 = Series::new("foo", ["A", "A", "B", "B", "C"].as_ref()); let s1 = Series::new("N", [1, 2, 2, 4, 2].as_ref()); let s2 = Series::new("bar", ["k", "l", "m", "n", "o"].as_ref()); // create a new DataFrame let df = DataFrame::new(vec![s0, s1, s2]).unwrap(); fn example(df: DataFrame) -> Result<DataFrame> { df.groupby("foo")? .pivot("bar", "N") .first() }
Transforms:
+-----+-----+-----+
| foo | N | bar |
| --- | --- | --- |
| str | i32 | str |
+=====+=====+=====+
| "A" | 1 | "k" |
+-----+-----+-----+
| "A" | 2 | "l" |
+-----+-----+-----+
| "B" | 2 | "m" |
+-----+-----+-----+
| "B" | 4 | "n" |
+-----+-----+-----+
| "C" | 2 | "o" |
+-----+-----+-----+
Into:
+-----+------+------+------+------+------+
| foo | o | n | m | l | k |
| --- | --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 | i32 |
+=====+======+======+======+======+======+
| "A" | null | null | null | 2 | 1 |
+-----+------+------+------+------+------+
| "B" | null | 4 | 2 | null | null |
+-----+------+------+------+------+------+
| "C" | 2 | null | null | null | null |
+-----+------+------+------+------+------+
Trait Implementations
impl<'df, 'selection_str> Clone for GroupBy<'df, 'selection_str>
[src]
fn clone(&self) -> GroupBy<'df, 'selection_str>
[src]
pub fn clone_from(&mut self, source: &Self)
1.0.0[src]
impl<'df, 'selection_str> Debug for GroupBy<'df, 'selection_str>
[src]
Auto Trait Implementations
impl<'df, 'selection_str> !RefUnwindSafe for GroupBy<'df, 'selection_str>
[src]
impl<'df, 'selection_str> Send for GroupBy<'df, 'selection_str>
[src]
impl<'df, 'selection_str> Sync for GroupBy<'df, 'selection_str>
[src]
impl<'df, 'selection_str> Unpin for GroupBy<'df, 'selection_str>
[src]
impl<'df, 'selection_str> !UnwindSafe for GroupBy<'df, 'selection_str>
[src]
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T, U> Cast<U> for T where
U: FromCast<T>,
U: FromCast<T>,
pub fn cast(self) -> U
impl<T> From<T> for T
[src]
impl<T> FromCast<T> for T
pub fn from_cast(t: T) -> T
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
pub fn to_owned(&self) -> T
[src]
pub fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,