Struct polars::frame::groupby::GroupBy

source · [−]

pub struct GroupBy<'df> {
    pub df: &'df DataFrame,
    /* private fields */
}

Expand description

Returned by a groupby operation on a DataFrame. This struct supports several aggregations.

Until described otherwise, the examples in this struct are performed on the following DataFrame:

use polars_core::prelude::*;

let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
        .into_series();
// create temperature series
let s1 = Series::new("temp", [20, 10, 7, 9, 1]);
// create rain series
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01]);
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);

Outputs:

+------------+------+------+
| date       | temp | rain |
| ---        | ---  | ---  |
| Date     | i32  | f64  |
+============+======+======+
| 2020-08-21 | 20   | 0.2  |
+------------+------+------+
| 2020-08-21 | 10   | 0.1  |
+------------+------+------+
| 2020-08-22 | 7    | 0.3  |
+------------+------+------+
| 2020-08-23 | 9    | 0.1  |
+------------+------+------+
| 2020-08-22 | 1    | 0.01 |
+------------+------+------+

Fields

df: &'df DataFrame

Implementations

source

impl<'df> GroupBy<'df>

source

pub fn new(
 df: &'df DataFrame,
 by: Vec<Series, Global>,
 groups: GroupsProxy,
 selected_agg: Option<Vec<String, Global>>
) -> GroupBy<'df>

source

pub fn select<I, S>(self, selection: I) -> GroupBy<'df>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Select the column(s) that should be aggregated. You can select a single column or a slice of columns.

Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.

source

pub fn get_groups(&self) -> &GroupsProxy

Get the internal representation of the GroupBy operation. The Vec returned contains: (first_idx, Vec) Where second value in the tuple is a vector with all matching indexes.

source

pub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy

Get the internal representation of the GroupBy operation. The Vec returned contains: (first_idx, Vec) Where second value in the tuple is a vector with all matching indexes.

Safety

Groups should always be in bounds of the DataFrame hold by this [GroupBy]. If you mutate it, you must hold that invariant.

source

pub fn take_groups(self) -> GroupsProxy

source

pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>`impl<A> Write for Vec<u8, A>where A: Allocator,`

source

pub fn keys(&self) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>`impl<A> Write for Vec<u8, A>where A: Allocator,`

source

pub fn mean(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped series and compute the mean per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(&["temp", "rain"]).mean()
}

Returns:

+------------+-----------+-----------+
| date       | temp_mean | rain_mean |
| ---        | ---       | ---       |
| Date     | f64       | f64       |
+============+===========+===========+
| 2020-08-23 | 9         | 0.1       |
+------------+-----------+-----------+
| 2020-08-22 | 4         | 0.155     |
+------------+-----------+-----------+
| 2020-08-21 | 15        | 0.15      |
+------------+-----------+-----------+

source

pub fn sum(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped series and compute the sum per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).sum()
}

Returns:

+------------+----------+
| date       | temp_sum |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 8        |
+------------+----------+
| 2020-08-21 | 30       |
+------------+----------+

source

pub fn min(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped series and compute the minimal value per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).min()
}

Returns:

+------------+----------+
| date       | temp_min |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 1        |
+------------+----------+
| 2020-08-21 | 10       |
+------------+----------+

source

pub fn max(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped series and compute the maximum value per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).max()
}

Returns:

+------------+----------+
| date       | temp_max |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 7        |
+------------+----------+
| 2020-08-21 | 20       |
+------------+----------+

source

pub fn first(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and find the first value per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).first()
}

Returns:

+------------+------------+
| date       | temp_first |
| ---        | ---        |
| Date     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 7          |
+------------+------------+
| 2020-08-21 | 20         |
+------------+------------+

source

pub fn last(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and return the last value per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).last()
}

Returns:

+------------+------------+
| date       | temp_last |
| ---        | ---        |
| Date     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 1          |
+------------+------------+
| 2020-08-21 | 10         |
+------------+------------+

source

pub fn n_unique(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series by counting the number of unique values.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).n_unique()
}

Returns:

+------------+---------------+
| date       | temp_n_unique |
| ---        | ---           |
| Date     | u32           |
+============+===============+
| 2020-08-23 | 1             |
+------------+---------------+
| 2020-08-22 | 2             |
+------------+---------------+
| 2020-08-21 | 2             |
+------------+---------------+

source

pub fn quantile(
 &self,
 quantile: f64,
 interpol: QuantileInterpolOptions
) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and determine the quantile per group.

Example


fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
}

source

pub fn median(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and determine the median per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).median()
}

source

pub fn var(&self, ddof: u8) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and determine the variance per group.

source

pub fn std(&self, ddof: u8) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate grouped Series and determine the standard deviation per group.

source

pub fn count(&self) -> Result<DataFrame, PolarsError>

Aggregate grouped series and compute the number of values per group.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).count()
}

Returns:

+------------+------------+
| date       | temp_count |
| ---        | ---        |
| Date     | u32        |
+============+============+
| 2020-08-23 | 1          |
+------------+------------+
| 2020-08-22 | 2          |
+------------+------------+
| 2020-08-21 | 2          |
+------------+------------+

source

pub fn groups(&self) -> Result<DataFrame, PolarsError>

Get the groupby group indexes.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["date"])?.groups()
}

Returns:

+--------------+------------+
| date         | groups     |
| ---          | ---        |
| Date(days) | list [u32] |
+==============+============+
| 2020-08-23   | "[3]"      |
+--------------+------------+
| 2020-08-22   | "[2, 4]"   |
+--------------+------------+
| 2020-08-21   | "[0, 1]"   |
+--------------+------------+

source

pub fn agg_list(&self) -> Result<DataFrame, PolarsError>

👎Deprecated since 0.24.1: use polars.lazy aggregations

Aggregate the groups of the groupby operation into lists.

Example

fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    // GroupBy and aggregate to Lists
    df.groupby(["date"])?.select(["temp"]).agg_list()
}

Returns:

+------------+------------------------+
| date       | temp_agg_list          |
| ---        | ---                    |
| Date     | list [i32]             |
+============+========================+
| 2020-08-23 | "[Some(9)]"            |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]"   |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+

source

pub fn par_apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where
F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

👎Deprecated since 0.24.1: use polars.lazy aggregations

Apply a closure over the groups as a new DataFrame in parallel.

source

pub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where
F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

Apply a closure over the groups as a new DataFrame.

Trait Implementations

source

impl<'df> Clone for GroupBy<'df>

source

fn clone(&self) -> GroupBy<'df>

Returns a copy of the value. Read more

1.0.0 · source

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

source

impl<'df> Debug for GroupBy<'df>

source

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

Auto Trait Implementations

impl<'df> !RefUnwindSafe for GroupBy<'df>

impl<'df> Send for GroupBy<'df>

impl<'df> Sync for GroupBy<'df>

impl<'df> Unpin for GroupBy<'df>

impl<'df> !UnwindSafe for GroupBy<'df>

Blanket Implementations

source

impl<T> Any for Twhere
T: 'static + ?Sized,

source

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source

impl<T> Borrow<T> for Twhere
T: ?Sized,

const: unstable · source

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source

impl<T> BorrowMut<T> for Twhere
T: ?Sized,

const: unstable · source

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source

fn from(t: T) -> T

Returns the argument unchanged.

source

impl<T, U> Into for Twhere
U: From<T>,

const: unstable · source

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::<T>()

The alignment of pointer.

type Init = T

The type for initializers.

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

source

impl<T> ToOwned for Twhere
T: Clone,

type Owned = T

The resulting type after obtaining ownership.

source

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

source

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

source

impl<T, U> TryFrom for Twhere
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

const: unstable · source

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source

impl<T, U> TryInto for Twhere
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

const: unstable · source

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct polars::frame::groupby::GroupBy

Fields

Implementations

impl<'df> GroupBy<'df>

pub fn new( df: &'df DataFrame, by: Vec<Series, Global>, groups: GroupsProxy, selected_agg: Option<Vec<String, Global>>) -> GroupBy<'df>

pub fn select<I, S>(self, selection: I) -> GroupBy<'df>where I: IntoIterator<Item = S>, S: AsRef<str>,

pub fn get_groups(&self) -> &GroupsProxy

pub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy

pub fn take_groups(self) -> GroupsProxy

pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A>where A: Allocator,

pub fn keys(&self) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A>where A: Allocator,

pub fn mean(&self) -> Result<DataFrame, PolarsError>

pub fn sum(&self) -> Result<DataFrame, PolarsError>

pub fn min(&self) -> Result<DataFrame, PolarsError>

pub fn max(&self) -> Result<DataFrame, PolarsError>

pub fn first(&self) -> Result<DataFrame, PolarsError>

pub fn last(&self) -> Result<DataFrame, PolarsError>

pub fn n_unique(&self) -> Result<DataFrame, PolarsError>

pub fn quantile( &self, quantile: f64, interpol: QuantileInterpolOptions) -> Result<DataFrame, PolarsError>

pub fn median(&self) -> Result<DataFrame, PolarsError>

pub fn var(&self, ddof: u8) -> Result<DataFrame, PolarsError>

pub fn std(&self, ddof: u8) -> Result<DataFrame, PolarsError>

pub fn count(&self) -> Result<DataFrame, PolarsError>

pub fn groups(&self) -> Result<DataFrame, PolarsError>

pub fn agg_list(&self) -> Result<DataFrame, PolarsError>

pub fn par_apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

pub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

Trait Implementations

impl<'df> Clone for GroupBy<'df>

fn clone(&self) -> GroupBy<'df>

fn clone_from(&mut self, source: &Self)

impl<'df> Debug for GroupBy<'df>

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Auto Trait Implementations

impl<'df> !RefUnwindSafe for GroupBy<'df>

impl<'df> Send for GroupBy<'df>

impl<'df> Sync for GroupBy<'df>

impl<'df> Unpin for GroupBy<'df>

impl<'df> !UnwindSafe for GroupBy<'df>

Blanket Implementations

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> DynClone for Twhere T: Clone,

fn __clone_box(&self, Private) -> *mut ()

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::<T>()

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

pub fn new(
df: &'df DataFrame,
by: Vec<Series, Global>,
groups: GroupsProxy,
selected_agg: Option<Vec<String, Global>>
) -> GroupBy<'df>

pub fn select<I, S>(self, selection: I) -> GroupBy<'df>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>`impl<A> Write for Vec<u8, A>where A: Allocator,`

pub fn keys(&self) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>`impl<A> Write for Vec<u8, A>where A: Allocator,`

pub fn quantile(
&self,
quantile: f64,
interpol: QuantileInterpolOptions
) -> Result<DataFrame, PolarsError>

pub fn par_apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where
F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

pub fn apply<F>(&self, f: F) -> Result<DataFrame, PolarsError>where
F: Fn(DataFrame) -> Result<DataFrame, PolarsError> + Send + Sync,

impl<T> Any for Twhere
T: 'static + ?Sized,

impl<T> Borrow<T> for Twhere
T: ?Sized,

impl<T> BorrowMut<T> for Twhere
T: ?Sized,

impl<T> DynClone for Twhere
T: Clone,

impl<T, U> Into<U> for Twhere
U: From<T>,

impl<T> ToOwned for Twhere
T: Clone,

impl<T, U> TryFrom<U> for Twhere
U: Into<T>,

impl<T, U> TryInto<U> for Twhere
U: TryFrom<T>,

impl<V, T> VZip<V> for Twhere
V: MultiLane<T>,