Struct polars::frame::groupby::GroupBy[][src]

pub struct GroupBy<'df, 'selection_str> { /* fields omitted */ }
Expand description

Returned by a groupby operation on a DataFrame. This struct supports several aggregations.

Until described otherwise, the examples in this struct are performed on the following DataFrame:

use polars_core::prelude::*;

let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = Date32Chunked::parse_from_str_slice("date", dates, fmt)
        .into_series();
// create temperature series
let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
// create rain series
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);

Outputs:

+------------+------+------+
| date       | temp | rain |
| ---        | ---  | ---  |
| date32     | i32  | f64  |
+============+======+======+
| 2020-08-21 | 20   | 0.2  |
+------------+------+------+
| 2020-08-21 | 10   | 0.1  |
+------------+------+------+
| 2020-08-22 | 7    | 0.3  |
+------------+------+------+
| 2020-08-23 | 9    | 0.1  |
+------------+------+------+
| 2020-08-22 | 1    | 0.01 |
+------------+------+------+

Implementations

impl<'df, 'selection_str> GroupBy<'df, 'selection_str>[src]

pub fn pivot(
    &mut self,
    pivot_column: &'selection_str str,
    values_column: &'selection_str str
) -> Pivot<'_, '_>
[src]

Pivot a column of the current DataFrame and perform one of the following aggregations:

  • first
  • sum
  • min
  • max
  • mean
  • median

The pivot operation consists of a group by one, or multiple columns (these will be the new y-axis), column that will be pivoted (this will be the new x-axis) and an aggregation.

Panics

If the values column is not a numerical type, the code will panic.

Example

use polars_core::prelude::*;
use polars_core::df;

fn example() -> Result<DataFrame> {
    let df = df!("foo" => &["A", "A", "B", "B", "C"],
        "N" => &[1, 2, 2, 4, 2],
        "bar" => &["k", "l", "m", "n", "0"]
        ).unwrap();

    df.groupby("foo")?
    .pivot("bar", "N")
    .first()
}

Transforms:

+-----+-----+-----+
| foo | N   | bar |
| --- | --- | --- |
| str | i32 | str |
+=====+=====+=====+
| "A" | 1   | "k" |
+-----+-----+-----+
| "A" | 2   | "l" |
+-----+-----+-----+
| "B" | 2   | "m" |
+-----+-----+-----+
| "B" | 4   | "n" |
+-----+-----+-----+
| "C" | 2   | "o" |
+-----+-----+-----+

Into:

+-----+------+------+------+------+------+
| foo | o    | n    | m    | l    | k    |
| --- | ---  | ---  | ---  | ---  | ---  |
| str | i32  | i32  | i32  | i32  | i32  |
+=====+======+======+======+======+======+
| "A" | null | null | null | 2    | 1    |
+-----+------+------+------+------+------+
| "B" | null | 4    | 2    | null | null |
+-----+------+------+------+------+------+
| "C" | 2    | null | null | null | null |
+-----+------+------+------+------+------+

impl<'df, 'selection_str> GroupBy<'df, 'selection_str>[src]

pub fn new(
    df: &'df DataFrame,
    by: Vec<Series, Global>,
    groups: Vec<(u32, Vec<u32, Global>), Global>,
    selected_agg: Option<Vec<&'selection_str str, Global>>
) -> GroupBy<'df, 'selection_str>
[src]

pub fn select<S, J>(self, selection: S) -> GroupBy<'df, 'selection_str> where
    S: Selection<'selection_str, J>, 
[src]

Select the column(s) that should be aggregated. You can select a single column or a slice of columns.

Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.

pub fn get_groups(&self) -> &Vec<(u32, Vec<u32, Global>), Global>[src]

Get the internal representation of the GroupBy operation. The Vec returned contains: (first_idx, Vec) Where second value in the tuple is a vector with all matching indexes.

pub fn get_groups_mut(&mut self) -> &mut Vec<(u32, Vec<u32, Global>), Global>[src]

Get the internal representation of the GroupBy operation. The Vec returned contains: (first_idx, Vec) Where second value in the tuple is a vector with all matching indexes.

pub fn keys(&self) -> Vec<Series, Global>[src]

pub fn mean(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped series and compute the mean per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select(&["temp", "rain"]).mean()
}

Returns:

+------------+-----------+-----------+
| date       | temp_mean | rain_mean |
| ---        | ---       | ---       |
| date32     | f64       | f64       |
+============+===========+===========+
| 2020-08-23 | 9         | 0.1       |
+------------+-----------+-----------+
| 2020-08-22 | 4         | 0.155     |
+------------+-----------+-----------+
| 2020-08-21 | 15        | 0.15      |
+------------+-----------+-----------+

pub fn sum(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped series and compute the sum per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").sum()
}

Returns:

+------------+----------+
| date       | temp_sum |
| ---        | ---      |
| date32     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 8        |
+------------+----------+
| 2020-08-21 | 30       |
+------------+----------+

pub fn min(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped series and compute the minimal value per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").min()
}

Returns:

+------------+----------+
| date       | temp_min |
| ---        | ---      |
| date32     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 1        |
+------------+----------+
| 2020-08-21 | 10       |
+------------+----------+

pub fn max(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped series and compute the maximum value per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").max()
}

Returns:

+------------+----------+
| date       | temp_max |
| ---        | ---      |
| date32     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 7        |
+------------+----------+
| 2020-08-21 | 20       |
+------------+----------+

pub fn first(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and find the first value per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").first()
}

Returns:

+------------+------------+
| date       | temp_first |
| ---        | ---        |
| date32     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 7          |
+------------+------------+
| 2020-08-21 | 20         |
+------------+------------+

pub fn last(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and return the last value per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").last()
}

Returns:

+------------+------------+
| date       | temp_last |
| ---        | ---        |
| date32     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 1          |
+------------+------------+
| 2020-08-21 | 10         |
+------------+------------+

pub fn n_unique(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series by counting the number of unique values.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").n_unique()
}

Returns:

+------------+---------------+
| date       | temp_n_unique |
| ---        | ---           |
| date32     | u32           |
+============+===============+
| 2020-08-23 | 1             |
+------------+---------------+
| 2020-08-22 | 2             |
+------------+---------------+
| 2020-08-21 | 2             |
+------------+---------------+

pub fn quantile(&self, quantile: f64) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and determine the quantile per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").quantile(0.2)
}

pub fn median(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and determine the median per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").median()
}

pub fn var(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and determine the variance per group.

pub fn std(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped Series and determine the standard deviation per group.

pub fn count(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate grouped series and compute the number of values per group.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.select("temp").count()
}

Returns:

+------------+------------+
| date       | temp_count |
| ---        | ---        |
| date32     | u32        |
+============+============+
| 2020-08-23 | 1          |
+------------+------------+
| 2020-08-22 | 2          |
+------------+------------+
| 2020-08-21 | 2          |
+------------+------------+

pub fn groups(&self) -> Result<DataFrame, PolarsError>[src]

Get the groupby group indexes.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.groups()
}

Returns:

+--------------+------------+
| date         | groups     |
| ---          | ---        |
| date32(days) | list [u32] |
+==============+============+
| 2020-08-23   | "[3]"      |
+--------------+------------+
| 2020-08-22   | "[2, 4]"   |
+--------------+------------+
| 2020-08-21   | "[0, 1]"   |
+--------------+------------+

pub fn agg<Column, S, Slice>(
    &self,
    column_to_agg: &[(Column, Slice)]
) -> Result<DataFrame, PolarsError> where
    Slice: AsRef<[S]>,
    Column: AsRef<str>,
    S: AsRef<str>, 
[src]

Combine different aggregations on columns

Operations

  • count
  • first
  • last
  • sum
  • min
  • max
  • mean
  • median

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby("date")?.agg(&[("temp", &["n_unique", "sum", "min"])])
}

Returns:

+--------------+---------------+----------+----------+
| date         | temp_n_unique | temp_sum | temp_min |
| ---          | ---           | ---      | ---      |
| date32(days) | u32           | i32      | i32      |
+==============+===============+==========+==========+
| 2020-08-23   | 1             | 9        | 9        |
+--------------+---------------+----------+----------+
| 2020-08-22   | 2             | 8        | 1        |
+--------------+---------------+----------+----------+
| 2020-08-21   | 2             | 30       | 10       |
+--------------+---------------+----------+----------+

pub fn agg_list(&self) -> Result<DataFrame, PolarsError>[src]

Aggregate the groups of the groupby operation into lists.

Example

fn example(df: DataFrame) -> Result<DataFrame> {
    // GroupBy and aggregate to Lists
    df.groupby("date")?.select("temp").agg_list()
}

Returns:

+------------+------------------------+
| date       | temp_agg_list          |
| ---        | ---                    |
| date32     | list [i32]             |
+============+========================+
| 2020-08-23 | "[Some(9)]"            |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]"   |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+

pub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError> where
    F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync
[src]

Apply a closure over the groups as a new DataFrame.

Trait Implementations

impl<'df, 'selection_str> Clone for GroupBy<'df, 'selection_str>[src]

pub fn clone(&self) -> GroupBy<'df, 'selection_str>[src]

Returns a copy of the value. Read more

fn clone_from(&mut self, source: &Self)1.0.0[src]

Performs copy-assignment from source. Read more

impl<'df, 'selection_str> Debug for GroupBy<'df, 'selection_str>[src]

pub fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>[src]

Formats the value using the given formatter. Read more

Auto Trait Implementations

impl<'df, 'selection_str> !RefUnwindSafe for GroupBy<'df, 'selection_str>

impl<'df, 'selection_str> Send for GroupBy<'df, 'selection_str>

impl<'df, 'selection_str> Sync for GroupBy<'df, 'selection_str>

impl<'df, 'selection_str> Unpin for GroupBy<'df, 'selection_str>

impl<'df, 'selection_str> !UnwindSafe for GroupBy<'df, 'selection_str>

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

pub fn type_id(&self) -> TypeId[src]

Gets the TypeId of self. Read more

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

pub fn borrow(&self) -> &T[src]

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

pub fn borrow_mut(&mut self) -> &mut T[src]

Mutably borrows from an owned value. Read more

impl<T, U> Cast<U> for T where
    U: FromCast<T>, 

pub fn cast(self) -> U

Numeric cast from self to T.

impl<T> From<T> for T[src]

pub fn from(t: T) -> T[src]

Performs the conversion.

impl<T> FromCast<T> for T

pub fn from_cast(t: T) -> T

Numeric cast from T to Self.

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

pub fn into(self) -> U[src]

Performs the conversion.

impl<T> Pointable for T

pub const ALIGN: usize

The alignment of pointer.

type Init = T

The type for initializers.

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

pub unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

impl<T> ToOwned for T where
    T: Clone
[src]

type Owned = T

The resulting type after obtaining ownership.

pub fn to_owned(&self) -> T[src]

Creates owned data from borrowed data, usually by cloning. Read more

pub fn clone_into(&self, target: &mut T)[src]

🔬 This is a nightly-only experimental API. (toowned_clone_into)

recently added

Uses borrowed data to replace owned data, usually by cloning. Read more

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]

Performs the conversion.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]

Performs the conversion.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>, 

pub fn vzip(self) -> V