Struct SeriesGroupBy

Source

pub struct SeriesGroupBy<'a> { /* private fields */ }

Expand description

GroupBy for Series, grouping by values of another Series.

Created by Series::groupby(by). Supports standard aggregation methods that return a new Series indexed by group keys.

Implementations§

Source §

impl SeriesGroupBy<'_>

Source

pub fn keys(&self) -> Vec<IndexLabel>

Group labels in first-seen order.

Source

pub fn indices(&self) -> HashMap<IndexLabel, Vec<usize>>

Mapping from group labels to source row positions.

Source

pub fn groups(&self) -> HashMap<IndexLabel, Vec<usize>>

Alias for indices, matching pandas’ groups property shape.

Source

pub fn grouper(&self) -> String

Name of the Series used as the grouper.

Source

pub fn level(&self) -> Option<String>

Grouping level descriptor.

Series::groupby(by) currently groups by a supplied Series rather than by an index level, so this mirrors pandas’ None level state.

Source

pub fn plot(&self) -> Result<PlotSpec, FrameError>

Return a backend-neutral pandas-style grouped plotting request.

Source

pub fn hist(&self) -> Result<HistogramSpec, FrameError>

Return a backend-neutral pandas-style grouped histogram request.

Source

pub fn ngroups(&self) -> usize

Number of groups.

Source

pub fn ndim(&self) -> usize

Grouped object dimensionality.

Source

pub fn dtype(&self) -> DType

DType of the grouped values.

Source

pub fn sum(&self) -> Result<Series, FrameError>

Sum of each group.

Source

pub fn mean(&self) -> Result<Series, FrameError>

Mean of each group.

Source

pub fn count(&self) -> Result<Series, FrameError>

Count of non-null values in each group.

Source

pub fn min(&self) -> Result<Series, FrameError>

Minimum of each group.

Source

pub fn max(&self) -> Result<Series, FrameError>

Maximum of each group.

Source

pub fn any(&self) -> Result<Series, FrameError>

Whether any non-missing value is truthy in each group.

Source

pub fn all(&self) -> Result<Series, FrameError>

Whether all non-missing values are truthy in each group.

Source

pub fn nunique(&self) -> Result<Series, FrameError>

Number of unique non-missing values in each group.

Source

pub fn unique(&self) -> Result<Series, FrameError>

Unique values in each group, preserving first-seen group and value order.

Source

pub fn ohlc(&self) -> Result<DataFrame, FrameError>

Open-high-low-close per group.

Source

pub fn quantile(&self, q: f64) -> Result<Series, FrameError>

Quantile of each group using linear interpolation.

Source

pub fn sem(&self) -> Result<Series, FrameError>

Standard error of the mean for each group.

Source

pub fn skew(&self) -> Result<Series, FrameError>

Skewness of each group.

Source

pub fn kurtosis(&self) -> Result<Series, FrameError>

Excess kurtosis (Fisher’s definition, bias=False) of each group.

Matches pd.Series.groupby(...).kurtosis(). Mirrors fp_types::nankurt which requires at least 4 non-missing values per group; returns Null(NaN) otherwise and when the sample standard deviation is zero.

Source

pub fn kurt(&self) -> Result<Series, FrameError>

Alias for kurtosis() — pandas exposes both .kurt() and .kurtosis() on Series.groupby aggregations, and parity with that surface requires both spellings.

Source

pub fn idxmin(&self) -> Result<Series, FrameError>

Original index label of the minimum value in each group.

Source

pub fn idxmax(&self) -> Result<Series, FrameError>

Original index label of the maximum value in each group.

Source

pub fn is_monotonic_increasing(&self) -> Result<Series, FrameError>

Per-group monotonic-increasing predicate.

Source

pub fn is_monotonic_decreasing(&self) -> Result<Series, FrameError>

Per-group monotonic-decreasing predicate.

Source

pub fn rank( &self, method: &str, ascending: bool, na_option: &str, ) -> Result<Series, FrameError>

Rank values within each group.

Matches series.groupby(by).rank(). method: “average” (default), “min”, “max”, “first”, “dense” ascending: rank direction (default true = smallest gets rank 1) na_option: “keep” (NaN stays NaN), “top” (NaN gets lowest ranks), “bottom” (NaN gets highest ranks)

Source

pub fn rank_with_pct( &self, method: &str, ascending: bool, na_option: &str, pct: bool, ) -> Result<Series, FrameError>

Rank values within each group and optionally scale them to percentile ranks.

Source

pub fn std(&self) -> Result<Series, FrameError>

Standard deviation of each group (ddof=1).

Source

pub fn var(&self) -> Result<Series, FrameError>

Variance of each group (ddof=1).

Source

pub fn median(&self) -> Result<Series, FrameError>

Median of each group.

Source

pub fn prod(&self) -> Result<Series, FrameError>

Product of each group.

Source

pub fn cumcount(&self) -> Result<Series, FrameError>

Assign within-group cumulative count (0-based).

Source

pub fn cumcount_with_ascending( &self, ascending: bool, ) -> Result<Series, FrameError>

Assign within-group cumulative count (0-based) with explicit direction.

Source

pub fn ngroup(&self) -> Result<Series, FrameError>

Assign ordinal group number to each source row.

Source

pub fn ngroup_with_ascending( &self, ascending: bool, ) -> Result<Series, FrameError>

Assign ordinal group number with explicit direction.

Source

pub fn cumsum(&self) -> Result<Series, FrameError>

GroupBy cumulative sum.

Source

pub fn cumprod(&self) -> Result<Series, FrameError>

GroupBy cumulative product.

Source

pub fn cummin(&self) -> Result<Series, FrameError>

GroupBy cumulative minimum.

Source

pub fn cummax(&self) -> Result<Series, FrameError>

GroupBy cumulative maximum.

Source

pub fn shift(&self, periods: i64) -> Result<Series, FrameError>

GroupBy shift within each group.

Examples found in repository ?

examples/bench_groupby_shift.rs (line 41)

19fn main() {
20    let args: Vec<String> = std::env::args().collect();
21    let n: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(1_000_000);
22    let iters: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(30);
23    let groups = 100usize;
24
25    let labels: Vec<IndexLabel> = (0..n as i64).map(IndexLabel::Int64).collect();
26    let keys: Vec<i64> = (0..n).map(|i| (i % groups) as i64).collect();
27    let vals: Vec<f64> = (0..n).map(|i| (i.wrapping_mul(37) % 9973) as f64 * 0.25).collect();
28    let value = Series::new(
29        "v".to_string(),
30        Index::new(labels.clone()),
31        Column::from_f64_values(vals),
32    )
33    .unwrap();
34    let key = Series::new(
35        "k".to_string(),
36        Index::new(labels),
37        Column::from_i64_values(keys),
38    )
39    .unwrap();
40
41    let out = value.groupby(&key).unwrap().shift(1).unwrap();
42    let mut chk: u64 = 0xcbf29ce484222325;
43    for v in out.values() {
44        let bits = match v {
45            Scalar::Float64(f) => f.to_bits(),
46            _ => 0xDEAD_BEEF_DEAD_BEEF,
47        };
48        chk = (chk ^ bits).wrapping_mul(0x100000001b3);
49    }
50
51    let mut sink = 0usize;
52    let start = Instant::now();
53    for _ in 0..iters {
54        let s = black_box(value.groupby(&key).unwrap().shift(1).unwrap());
55        sink ^= s.len();
56    }
57    let elapsed = start.elapsed();
58    println!(
59        "groupby_shift n={n} iters={iters}: {:.3} ms/iter (chk={chk:016x} sink={sink})",
60        elapsed.as_secs_f64() * 1000.0 / iters as f64
61    );
62}

Source

pub fn diff(&self, periods: usize) -> Result<Series, FrameError>

GroupBy difference within each group.

Source

pub fn pct_change(&self, periods: usize) -> Result<Series, FrameError>

GroupBy percentage change within each group.

Source

pub fn ffill(&self, limit: Option<usize>) -> Result<Series, FrameError>

Forward-fill missing values within each group.

Source

pub fn bfill(&self, limit: Option<usize>) -> Result<Series, FrameError>

Backward-fill missing values within each group.

Source

pub fn fillna(&self, value: &Scalar) -> Result<Series, FrameError>

Fill missing values in each group with a scalar value.

Source

pub fn rolling(&self, window: usize) -> SeriesGroupByRolling<'_, '_>

Grouped rolling window operations.

Matches the narrow pd.SeriesGroupBy.rolling(window) reduction shape. Results are mapped back onto the original flat Series index.

Source

pub fn expanding( &self, min_periods: Option<usize>, ) -> SeriesGroupByExpanding<'_, '_>

Grouped expanding window operations.

Matches the narrow pd.SeriesGroupBy.expanding(min_periods=...) reduction shape. Results are mapped back onto the original flat Series index.

Source

pub fn ewm( &self, span: Option<f64>, alpha: Option<f64>, ) -> SeriesGroupByEwm<'_, '_>

Grouped exponentially weighted moving window operations.

Matches the narrow pd.SeriesGroupBy.ewm(span=..., alpha=...) reduction shape. Results are mapped back onto the original flat Series index.

Source

pub fn resample(&self, freq: &str) -> SeriesGroupByResample<'_, '_>

Grouped resampling.

Matches the narrow pd.SeriesGroupBy.resample(freq) reduction shape. Until row MultiIndex support is real, group and bucket labels are represented as flat "{group}, {bucket}" labels.

Source

pub fn nlargest(&self, n: usize) -> Result<Series, FrameError>

Return up to n largest non-missing values from each group.

Examples found in repository ?

examples/bench_take_gather.rs (line 75)

27fn golden() -> String {
28    let mut out = String::new();
29
30    // Series::take across dtypes, negative + duplicate indices.
31    let s = s_i64(vec![10, 20, 30, 40, 50]);
32    let r = s.take(&[4, 0, -1, 2, -5, 2]).unwrap();
33    out.push_str(&format!("take_lbls={:?}\n", r.index().labels()));
34    out.push_str(&format!("take_vals={:?}\n", r.values()));
35    out.push_str(&format!("take_oob_err={}\n", s.take(&[99]).is_err()));
36
37    let f = s_scalars(vec![
38        Scalar::Float64(1.5),
39        Scalar::Float64(f64::NAN),
40        Scalar::Float64(-3.0),
41    ]);
42    out.push_str(&format!(
43        "take_f64={:?}\n",
44        f.take(&[2, 1, 0]).unwrap().values()
45    ));
46    let ni = s_scalars(vec![
47        Scalar::Int64(7),
48        Scalar::Null(NullKind::NaN),
49        Scalar::Int64(9),
50    ]);
51    out.push_str(&format!(
52        "take_ni={:?}\n",
53        ni.take(&[1, 2, 0]).unwrap().values()
54    ));
55    let u = s_scalars(
56        vec!["a", "b", "c"]
57            .into_iter()
58            .map(|x| Scalar::Utf8(x.into()))
59            .collect(),
60    );
61    out.push_str(&format!(
62        "take_utf8={:?}\n",
63        u.take(&[2, -3]).unwrap().values()
64    ));
65
66    // SeriesGroupBy gather paths (nlargest / head) route through take_positions.
67    let data = s_i64(vec![5, 1, 9, 3, 7, 2, 8]);
68    let keys = s_scalars(
69        vec!["a", "b", "a", "b", "a", "b", "a"]
70            .into_iter()
71            .map(|x| Scalar::Utf8(x.into()))
72            .collect(),
73    );
74    let gb = data.groupby(&keys).unwrap();
75    let nl = gb.nlargest(2).unwrap();
76    out.push_str(&format!("gb_nlargest_lbls={:?}\n", nl.index().labels()));
77    out.push_str(&format!("gb_nlargest_vals={:?}\n", nl.values()));
78    let hd = data.groupby(&keys).unwrap().head(1).unwrap();
79    out.push_str(&format!("gb_head_vals={:?}\n", hd.values()));
80    out
81}

Source

pub fn nsmallest(&self, n: usize) -> Result<Series, FrameError>

Return up to n smallest non-missing values from each group.

Source

pub fn value_counts(&self) -> Result<Series, FrameError>

Count non-missing values within each group.

Source

pub fn describe(&self) -> Result<DataFrame, FrameError>

Summary statistics per group.

Matches pd.SeriesGroupBy.describe() for numeric grouped values.

Source

pub fn corr(&self, other: &Series) -> Result<Series, FrameError>

Pairwise Pearson correlation with another Series per group.

Source

pub fn cov(&self, other: &Series) -> Result<Series, FrameError>

Pairwise sample covariance with another Series per group.

Source

pub fn transform(&self, func: &str) -> Result<Series, FrameError>

Broadcast a named per-group reduction back to the original Series shape.

Matches series.groupby(by).transform("mean") for supported reduction names. The output keeps the original index and length.

Source

pub fn apply<F>(&self, func: F) -> Result<Series, FrameError>
where F: Fn(&Series) -> Result<Series, FrameError>,

Apply a Series-returning function to each group and concatenate results.

Matches the Rust callback shape for series.groupby(by).apply(func). Returned rows are emitted in the existing SeriesGroupBy first-seen group order, with flat labels retaining both group key and returned index label until Series row MultiIndex metadata exists.

Source

pub fn apply_scalar<F>(&self, name: &str, func: F) -> Result<Series, FrameError>
where F: Fn(&Series) -> Result<Scalar, FrameError>,

Apply a scalar-returning function to each group.

This exposes pandas’ scalar SeriesGroupBy.apply result shape as a typed Rust API: one value per group, indexed by group keys.

Source

pub fn filter<F>(&self, func: F) -> Result<Series, FrameError>
where F: Fn(&Series) -> Result<bool, FrameError>,

Keep or discard whole groups with a caller-supplied predicate.

Matches series.groupby(by).filter(func): the predicate sees one per-group Series and the returned Series preserves the original row order and index labels for groups that pass.

Source

pub fn pipe<T, F>(&self, func: F) -> Result<T, FrameError>
where F: Fn(&Self) -> Result<T, FrameError>,

Pipe this grouped Series through a caller-provided function.

Source

pub fn head(&self, n: i64) -> Result<Series, FrameError>

Select first n rows per group, preserving original row order.

Matches pd.SeriesGroupBy.head(n), including pandas’ negative n: head(-k) keeps all but the last k rows of each group. Verified vs live pandas 2.2.3.

Examples found in repository ?

examples/bench_take_gather.rs (line 78)

27fn golden() -> String {
28    let mut out = String::new();
29
30    // Series::take across dtypes, negative + duplicate indices.
31    let s = s_i64(vec![10, 20, 30, 40, 50]);
32    let r = s.take(&[4, 0, -1, 2, -5, 2]).unwrap();
33    out.push_str(&format!("take_lbls={:?}\n", r.index().labels()));
34    out.push_str(&format!("take_vals={:?}\n", r.values()));
35    out.push_str(&format!("take_oob_err={}\n", s.take(&[99]).is_err()));
36
37    let f = s_scalars(vec![
38        Scalar::Float64(1.5),
39        Scalar::Float64(f64::NAN),
40        Scalar::Float64(-3.0),
41    ]);
42    out.push_str(&format!(
43        "take_f64={:?}\n",
44        f.take(&[2, 1, 0]).unwrap().values()
45    ));
46    let ni = s_scalars(vec![
47        Scalar::Int64(7),
48        Scalar::Null(NullKind::NaN),
49        Scalar::Int64(9),
50    ]);
51    out.push_str(&format!(
52        "take_ni={:?}\n",
53        ni.take(&[1, 2, 0]).unwrap().values()
54    ));
55    let u = s_scalars(
56        vec!["a", "b", "c"]
57            .into_iter()
58            .map(|x| Scalar::Utf8(x.into()))
59            .collect(),
60    );
61    out.push_str(&format!(
62        "take_utf8={:?}\n",
63        u.take(&[2, -3]).unwrap().values()
64    ));
65
66    // SeriesGroupBy gather paths (nlargest / head) route through take_positions.
67    let data = s_i64(vec![5, 1, 9, 3, 7, 2, 8]);
68    let keys = s_scalars(
69        vec!["a", "b", "a", "b", "a", "b", "a"]
70            .into_iter()
71            .map(|x| Scalar::Utf8(x.into()))
72            .collect(),
73    );
74    let gb = data.groupby(&keys).unwrap();
75    let nl = gb.nlargest(2).unwrap();
76    out.push_str(&format!("gb_nlargest_lbls={:?}\n", nl.index().labels()));
77    out.push_str(&format!("gb_nlargest_vals={:?}\n", nl.values()));
78    let hd = data.groupby(&keys).unwrap().head(1).unwrap();
79    out.push_str(&format!("gb_head_vals={:?}\n", hd.values()));
80    out
81}