Skip to main content

GroupedData

Struct GroupedData 

Source
pub struct GroupedData { /* private fields */ }
Expand description

GroupedData - represents a DataFrame grouped by certain columns. Similar to PySpark’s GroupedData

Implementations§

Source§

impl GroupedData

Source

pub fn count(&self) -> Result<DataFrame, PolarsError>

Count rows in each group

Source

pub fn sum(&self, column: &str) -> Result<DataFrame, PolarsError>

Sum a column in each group

Source

pub fn avg(&self, column: &str) -> Result<DataFrame, PolarsError>

Average (mean) of a column in each group

Source

pub fn min(&self, column: &str) -> Result<DataFrame, PolarsError>

Minimum value of a column in each group

Source

pub fn max(&self, column: &str) -> Result<DataFrame, PolarsError>

Maximum value of a column in each group

Source

pub fn first(&self, column: &str) -> Result<DataFrame, PolarsError>

First value of a column in each group (order not guaranteed unless explicitly sorted).

Source

pub fn last(&self, column: &str) -> Result<DataFrame, PolarsError>

Last value of a column in each group (order not guaranteed unless explicitly sorted).

Source

pub fn approx_count_distinct( &self, column: &str, ) -> Result<DataFrame, PolarsError>

Approximate count of distinct values in each group (uses n_unique; same as count_distinct for exact).

Source

pub fn any_value(&self, column: &str) -> Result<DataFrame, PolarsError>

Any value from the group (PySpark any_value). Uses first value.

Source

pub fn bool_and(&self, column: &str) -> Result<DataFrame, PolarsError>

Boolean AND across group (PySpark bool_and / every).

Source

pub fn bool_or(&self, column: &str) -> Result<DataFrame, PolarsError>

Boolean OR across group (PySpark bool_or / some).

Source

pub fn product(&self, column: &str) -> Result<DataFrame, PolarsError>

Product of column values in each group (PySpark product).

Source

pub fn collect_list(&self, column: &str) -> Result<DataFrame, PolarsError>

Collect column values into list per group (PySpark collect_list).

Source

pub fn collect_set(&self, column: &str) -> Result<DataFrame, PolarsError>

Collect distinct column values into list per group (PySpark collect_set).

Source

pub fn count_if(&self, column: &str) -> Result<DataFrame, PolarsError>

Count rows where condition column is true (PySpark count_if).

Source

pub fn percentile(&self, column: &str, p: f64) -> Result<DataFrame, PolarsError>

Percentile of column (PySpark percentile). p in 0.0..=1.0.

Source

pub fn max_by( &self, value_col: &str, ord_col: &str, ) -> Result<DataFrame, PolarsError>

Value of value_col where ord_col is maximum (PySpark max_by).

Source

pub fn min_by( &self, value_col: &str, ord_col: &str, ) -> Result<DataFrame, PolarsError>

Value of value_col where ord_col is minimum (PySpark min_by).

Source

pub fn covar_pop( &self, col1: &str, col2: &str, ) -> Result<DataFrame, PolarsError>

Population covariance between two columns in each group (PySpark covar_pop).

Source

pub fn covar_samp( &self, col1: &str, col2: &str, ) -> Result<DataFrame, PolarsError>

Sample covariance between two columns in each group (PySpark covar_samp). ddof=1.

Source

pub fn corr(&self, col1: &str, col2: &str) -> Result<DataFrame, PolarsError>

Pearson correlation between two columns in each group (PySpark corr).

Source

pub fn regr_count( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression count of (y, x) pairs where both non-null (PySpark regr_count).

Source

pub fn regr_avgx( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression average of x (PySpark regr_avgx).

Source

pub fn regr_avgy( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression average of y (PySpark regr_avgy).

Source

pub fn regr_slope( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression slope (PySpark regr_slope).

Source

pub fn regr_intercept( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression intercept (PySpark regr_intercept).

Source

pub fn regr_r2( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression R-squared (PySpark regr_r2).

Source

pub fn regr_sxx( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression sum (x - avg_x)^2 (PySpark regr_sxx).

Source

pub fn regr_syy( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression sum (y - avg_y)^2 (PySpark regr_syy).

Source

pub fn regr_sxy( &self, y_col: &str, x_col: &str, ) -> Result<DataFrame, PolarsError>

Regression sum (x - avg_x)(y - avg_y) (PySpark regr_sxy).

Source

pub fn kurtosis(&self, column: &str) -> Result<DataFrame, PolarsError>

Kurtosis of a column in each group (PySpark kurtosis). Fisher definition, bias=true.

Source

pub fn skewness(&self, column: &str) -> Result<DataFrame, PolarsError>

Skewness of a column in each group (PySpark skewness). bias=true.

Source

pub fn agg(&self, aggregations: Vec<Expr>) -> Result<DataFrame, PolarsError>

Apply multiple aggregations at once (generic agg method)

Source

pub fn grouping_columns(&self) -> &[String]

Get grouping columns

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V