pub struct SeriesGroupBy<'a> { /* private fields */ }Expand description
GroupBy for Series, grouping by values of another Series.
Created by Series::groupby(by). Supports standard aggregation
methods that return a new Series indexed by group keys.
Implementations§
Source§impl SeriesGroupBy<'_>
impl SeriesGroupBy<'_>
Sourcepub fn keys(&self) -> Vec<IndexLabel>
pub fn keys(&self) -> Vec<IndexLabel>
Group labels in first-seen order.
Sourcepub fn indices(&self) -> HashMap<IndexLabel, Vec<usize>>
pub fn indices(&self) -> HashMap<IndexLabel, Vec<usize>>
Mapping from group labels to source row positions.
Sourcepub fn groups(&self) -> HashMap<IndexLabel, Vec<usize>>
pub fn groups(&self) -> HashMap<IndexLabel, Vec<usize>>
Alias for indices, matching pandas’ groups property shape.
Sourcepub fn level(&self) -> Option<String>
pub fn level(&self) -> Option<String>
Grouping level descriptor.
Series::groupby(by) currently groups by a supplied Series rather than
by an index level, so this mirrors pandas’ None level state.
Sourcepub fn plot(&self) -> Result<PlotSpec, FrameError>
pub fn plot(&self) -> Result<PlotSpec, FrameError>
Return a backend-neutral pandas-style grouped plotting request.
Sourcepub fn hist(&self) -> Result<HistogramSpec, FrameError>
pub fn hist(&self) -> Result<HistogramSpec, FrameError>
Return a backend-neutral pandas-style grouped histogram request.
Sourcepub fn sum(&self) -> Result<Series, FrameError>
pub fn sum(&self) -> Result<Series, FrameError>
Sum of each group.
Sourcepub fn mean(&self) -> Result<Series, FrameError>
pub fn mean(&self) -> Result<Series, FrameError>
Mean of each group.
Sourcepub fn count(&self) -> Result<Series, FrameError>
pub fn count(&self) -> Result<Series, FrameError>
Count of non-null values in each group.
Sourcepub fn min(&self) -> Result<Series, FrameError>
pub fn min(&self) -> Result<Series, FrameError>
Minimum of each group.
Sourcepub fn max(&self) -> Result<Series, FrameError>
pub fn max(&self) -> Result<Series, FrameError>
Maximum of each group.
Sourcepub fn any(&self) -> Result<Series, FrameError>
pub fn any(&self) -> Result<Series, FrameError>
Whether any non-missing value is truthy in each group.
Sourcepub fn all(&self) -> Result<Series, FrameError>
pub fn all(&self) -> Result<Series, FrameError>
Whether all non-missing values are truthy in each group.
Sourcepub fn nunique(&self) -> Result<Series, FrameError>
pub fn nunique(&self) -> Result<Series, FrameError>
Number of unique non-missing values in each group.
Sourcepub fn unique(&self) -> Result<Series, FrameError>
pub fn unique(&self) -> Result<Series, FrameError>
Unique values in each group, preserving first-seen group and value order.
Sourcepub fn ohlc(&self) -> Result<DataFrame, FrameError>
pub fn ohlc(&self) -> Result<DataFrame, FrameError>
Open-high-low-close per group.
Sourcepub fn quantile(&self, q: f64) -> Result<Series, FrameError>
pub fn quantile(&self, q: f64) -> Result<Series, FrameError>
Quantile of each group using linear interpolation.
Sourcepub fn sem(&self) -> Result<Series, FrameError>
pub fn sem(&self) -> Result<Series, FrameError>
Standard error of the mean for each group.
Sourcepub fn skew(&self) -> Result<Series, FrameError>
pub fn skew(&self) -> Result<Series, FrameError>
Skewness of each group.
Sourcepub fn kurtosis(&self) -> Result<Series, FrameError>
pub fn kurtosis(&self) -> Result<Series, FrameError>
Excess kurtosis (Fisher’s definition, bias=False) of each group.
Matches pd.Series.groupby(...).kurtosis(). Mirrors fp_types::nankurt
which requires at least 4 non-missing values per group; returns
Null(NaN) otherwise and when the sample standard deviation is zero.
Sourcepub fn kurt(&self) -> Result<Series, FrameError>
pub fn kurt(&self) -> Result<Series, FrameError>
Alias for kurtosis() — pandas exposes both .kurt() and
.kurtosis() on Series.groupby aggregations, and parity with that
surface requires both spellings.
Sourcepub fn idxmin(&self) -> Result<Series, FrameError>
pub fn idxmin(&self) -> Result<Series, FrameError>
Original index label of the minimum value in each group.
Sourcepub fn idxmax(&self) -> Result<Series, FrameError>
pub fn idxmax(&self) -> Result<Series, FrameError>
Original index label of the maximum value in each group.
Sourcepub fn is_monotonic_increasing(&self) -> Result<Series, FrameError>
pub fn is_monotonic_increasing(&self) -> Result<Series, FrameError>
Per-group monotonic-increasing predicate.
Sourcepub fn is_monotonic_decreasing(&self) -> Result<Series, FrameError>
pub fn is_monotonic_decreasing(&self) -> Result<Series, FrameError>
Per-group monotonic-decreasing predicate.
Sourcepub fn rank(
&self,
method: &str,
ascending: bool,
na_option: &str,
) -> Result<Series, FrameError>
pub fn rank( &self, method: &str, ascending: bool, na_option: &str, ) -> Result<Series, FrameError>
Rank values within each group.
Matches series.groupby(by).rank().
method: “average” (default), “min”, “max”, “first”, “dense”
ascending: rank direction (default true = smallest gets rank 1)
na_option: “keep” (NaN stays NaN), “top” (NaN gets lowest ranks), “bottom” (NaN gets highest ranks)
Sourcepub fn rank_with_pct(
&self,
method: &str,
ascending: bool,
na_option: &str,
pct: bool,
) -> Result<Series, FrameError>
pub fn rank_with_pct( &self, method: &str, ascending: bool, na_option: &str, pct: bool, ) -> Result<Series, FrameError>
Rank values within each group and optionally scale them to percentile ranks.
Sourcepub fn std(&self) -> Result<Series, FrameError>
pub fn std(&self) -> Result<Series, FrameError>
Standard deviation of each group (ddof=1).
Sourcepub fn var(&self) -> Result<Series, FrameError>
pub fn var(&self) -> Result<Series, FrameError>
Variance of each group (ddof=1).
Sourcepub fn median(&self) -> Result<Series, FrameError>
pub fn median(&self) -> Result<Series, FrameError>
Median of each group.
Sourcepub fn prod(&self) -> Result<Series, FrameError>
pub fn prod(&self) -> Result<Series, FrameError>
Product of each group.
Sourcepub fn cumcount(&self) -> Result<Series, FrameError>
pub fn cumcount(&self) -> Result<Series, FrameError>
Assign within-group cumulative count (0-based).
Sourcepub fn cumcount_with_ascending(
&self,
ascending: bool,
) -> Result<Series, FrameError>
pub fn cumcount_with_ascending( &self, ascending: bool, ) -> Result<Series, FrameError>
Assign within-group cumulative count (0-based) with explicit direction.
Sourcepub fn ngroup(&self) -> Result<Series, FrameError>
pub fn ngroup(&self) -> Result<Series, FrameError>
Assign ordinal group number to each source row.
Sourcepub fn ngroup_with_ascending(
&self,
ascending: bool,
) -> Result<Series, FrameError>
pub fn ngroup_with_ascending( &self, ascending: bool, ) -> Result<Series, FrameError>
Assign ordinal group number with explicit direction.
Sourcepub fn cumsum(&self) -> Result<Series, FrameError>
pub fn cumsum(&self) -> Result<Series, FrameError>
GroupBy cumulative sum.
Sourcepub fn cumprod(&self) -> Result<Series, FrameError>
pub fn cumprod(&self) -> Result<Series, FrameError>
GroupBy cumulative product.
Sourcepub fn cummin(&self) -> Result<Series, FrameError>
pub fn cummin(&self) -> Result<Series, FrameError>
GroupBy cumulative minimum.
Sourcepub fn cummax(&self) -> Result<Series, FrameError>
pub fn cummax(&self) -> Result<Series, FrameError>
GroupBy cumulative maximum.
Sourcepub fn shift(&self, periods: i64) -> Result<Series, FrameError>
pub fn shift(&self, periods: i64) -> Result<Series, FrameError>
GroupBy shift within each group.
Examples found in repository?
19fn main() {
20 let args: Vec<String> = std::env::args().collect();
21 let n: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(1_000_000);
22 let iters: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(30);
23 let groups = 100usize;
24
25 let labels: Vec<IndexLabel> = (0..n as i64).map(IndexLabel::Int64).collect();
26 let keys: Vec<i64> = (0..n).map(|i| (i % groups) as i64).collect();
27 let vals: Vec<f64> = (0..n).map(|i| (i.wrapping_mul(37) % 9973) as f64 * 0.25).collect();
28 let value = Series::new(
29 "v".to_string(),
30 Index::new(labels.clone()),
31 Column::from_f64_values(vals),
32 )
33 .unwrap();
34 let key = Series::new(
35 "k".to_string(),
36 Index::new(labels),
37 Column::from_i64_values(keys),
38 )
39 .unwrap();
40
41 let out = value.groupby(&key).unwrap().shift(1).unwrap();
42 let mut chk: u64 = 0xcbf29ce484222325;
43 for v in out.values() {
44 let bits = match v {
45 Scalar::Float64(f) => f.to_bits(),
46 _ => 0xDEAD_BEEF_DEAD_BEEF,
47 };
48 chk = (chk ^ bits).wrapping_mul(0x100000001b3);
49 }
50
51 let mut sink = 0usize;
52 let start = Instant::now();
53 for _ in 0..iters {
54 let s = black_box(value.groupby(&key).unwrap().shift(1).unwrap());
55 sink ^= s.len();
56 }
57 let elapsed = start.elapsed();
58 println!(
59 "groupby_shift n={n} iters={iters}: {:.3} ms/iter (chk={chk:016x} sink={sink})",
60 elapsed.as_secs_f64() * 1000.0 / iters as f64
61 );
62}Sourcepub fn diff(&self, periods: usize) -> Result<Series, FrameError>
pub fn diff(&self, periods: usize) -> Result<Series, FrameError>
GroupBy difference within each group.
Sourcepub fn pct_change(&self, periods: usize) -> Result<Series, FrameError>
pub fn pct_change(&self, periods: usize) -> Result<Series, FrameError>
GroupBy percentage change within each group.
Sourcepub fn ffill(&self, limit: Option<usize>) -> Result<Series, FrameError>
pub fn ffill(&self, limit: Option<usize>) -> Result<Series, FrameError>
Forward-fill missing values within each group.
Sourcepub fn bfill(&self, limit: Option<usize>) -> Result<Series, FrameError>
pub fn bfill(&self, limit: Option<usize>) -> Result<Series, FrameError>
Backward-fill missing values within each group.
Sourcepub fn fillna(&self, value: &Scalar) -> Result<Series, FrameError>
pub fn fillna(&self, value: &Scalar) -> Result<Series, FrameError>
Fill missing values in each group with a scalar value.
Sourcepub fn rolling(&self, window: usize) -> SeriesGroupByRolling<'_, '_>
pub fn rolling(&self, window: usize) -> SeriesGroupByRolling<'_, '_>
Grouped rolling window operations.
Matches the narrow pd.SeriesGroupBy.rolling(window) reduction shape.
Results are mapped back onto the original flat Series index.
Sourcepub fn expanding(
&self,
min_periods: Option<usize>,
) -> SeriesGroupByExpanding<'_, '_>
pub fn expanding( &self, min_periods: Option<usize>, ) -> SeriesGroupByExpanding<'_, '_>
Grouped expanding window operations.
Matches the narrow pd.SeriesGroupBy.expanding(min_periods=...)
reduction shape. Results are mapped back onto the original flat
Series index.
Sourcepub fn ewm(
&self,
span: Option<f64>,
alpha: Option<f64>,
) -> SeriesGroupByEwm<'_, '_>
pub fn ewm( &self, span: Option<f64>, alpha: Option<f64>, ) -> SeriesGroupByEwm<'_, '_>
Grouped exponentially weighted moving window operations.
Matches the narrow pd.SeriesGroupBy.ewm(span=..., alpha=...)
reduction shape. Results are mapped back onto the original flat
Series index.
Sourcepub fn resample(&self, freq: &str) -> SeriesGroupByResample<'_, '_>
pub fn resample(&self, freq: &str) -> SeriesGroupByResample<'_, '_>
Grouped resampling.
Matches the narrow pd.SeriesGroupBy.resample(freq) reduction shape.
Until row MultiIndex support is real, group and bucket labels are
represented as flat "{group}, {bucket}" labels.
Sourcepub fn nlargest(&self, n: usize) -> Result<Series, FrameError>
pub fn nlargest(&self, n: usize) -> Result<Series, FrameError>
Return up to n largest non-missing values from each group.
Examples found in repository?
27fn golden() -> String {
28 let mut out = String::new();
29
30 // Series::take across dtypes, negative + duplicate indices.
31 let s = s_i64(vec![10, 20, 30, 40, 50]);
32 let r = s.take(&[4, 0, -1, 2, -5, 2]).unwrap();
33 out.push_str(&format!("take_lbls={:?}\n", r.index().labels()));
34 out.push_str(&format!("take_vals={:?}\n", r.values()));
35 out.push_str(&format!("take_oob_err={}\n", s.take(&[99]).is_err()));
36
37 let f = s_scalars(vec![
38 Scalar::Float64(1.5),
39 Scalar::Float64(f64::NAN),
40 Scalar::Float64(-3.0),
41 ]);
42 out.push_str(&format!(
43 "take_f64={:?}\n",
44 f.take(&[2, 1, 0]).unwrap().values()
45 ));
46 let ni = s_scalars(vec![
47 Scalar::Int64(7),
48 Scalar::Null(NullKind::NaN),
49 Scalar::Int64(9),
50 ]);
51 out.push_str(&format!(
52 "take_ni={:?}\n",
53 ni.take(&[1, 2, 0]).unwrap().values()
54 ));
55 let u = s_scalars(
56 vec!["a", "b", "c"]
57 .into_iter()
58 .map(|x| Scalar::Utf8(x.into()))
59 .collect(),
60 );
61 out.push_str(&format!(
62 "take_utf8={:?}\n",
63 u.take(&[2, -3]).unwrap().values()
64 ));
65
66 // SeriesGroupBy gather paths (nlargest / head) route through take_positions.
67 let data = s_i64(vec![5, 1, 9, 3, 7, 2, 8]);
68 let keys = s_scalars(
69 vec!["a", "b", "a", "b", "a", "b", "a"]
70 .into_iter()
71 .map(|x| Scalar::Utf8(x.into()))
72 .collect(),
73 );
74 let gb = data.groupby(&keys).unwrap();
75 let nl = gb.nlargest(2).unwrap();
76 out.push_str(&format!("gb_nlargest_lbls={:?}\n", nl.index().labels()));
77 out.push_str(&format!("gb_nlargest_vals={:?}\n", nl.values()));
78 let hd = data.groupby(&keys).unwrap().head(1).unwrap();
79 out.push_str(&format!("gb_head_vals={:?}\n", hd.values()));
80 out
81}Sourcepub fn nsmallest(&self, n: usize) -> Result<Series, FrameError>
pub fn nsmallest(&self, n: usize) -> Result<Series, FrameError>
Return up to n smallest non-missing values from each group.
Sourcepub fn value_counts(&self) -> Result<Series, FrameError>
pub fn value_counts(&self) -> Result<Series, FrameError>
Count non-missing values within each group.
Sourcepub fn describe(&self) -> Result<DataFrame, FrameError>
pub fn describe(&self) -> Result<DataFrame, FrameError>
Summary statistics per group.
Matches pd.SeriesGroupBy.describe() for numeric grouped values.
Sourcepub fn corr(&self, other: &Series) -> Result<Series, FrameError>
pub fn corr(&self, other: &Series) -> Result<Series, FrameError>
Pairwise Pearson correlation with another Series per group.
Sourcepub fn cov(&self, other: &Series) -> Result<Series, FrameError>
pub fn cov(&self, other: &Series) -> Result<Series, FrameError>
Pairwise sample covariance with another Series per group.
Sourcepub fn transform(&self, func: &str) -> Result<Series, FrameError>
pub fn transform(&self, func: &str) -> Result<Series, FrameError>
Broadcast a named per-group reduction back to the original Series shape.
Matches series.groupby(by).transform("mean") for supported reduction
names. The output keeps the original index and length.
Sourcepub fn apply<F>(&self, func: F) -> Result<Series, FrameError>
pub fn apply<F>(&self, func: F) -> Result<Series, FrameError>
Apply a Series-returning function to each group and concatenate results.
Matches the Rust callback shape for series.groupby(by).apply(func).
Returned rows are emitted in the existing SeriesGroupBy first-seen group
order, with flat labels retaining both group key and returned index
label until Series row MultiIndex metadata exists.
Sourcepub fn apply_scalar<F>(&self, name: &str, func: F) -> Result<Series, FrameError>
pub fn apply_scalar<F>(&self, name: &str, func: F) -> Result<Series, FrameError>
Apply a scalar-returning function to each group.
This exposes pandas’ scalar SeriesGroupBy.apply result shape as a
typed Rust API: one value per group, indexed by group keys.
Sourcepub fn filter<F>(&self, func: F) -> Result<Series, FrameError>
pub fn filter<F>(&self, func: F) -> Result<Series, FrameError>
Keep or discard whole groups with a caller-supplied predicate.
Matches series.groupby(by).filter(func): the predicate sees one
per-group Series and the returned Series preserves the original row
order and index labels for groups that pass.
Sourcepub fn pipe<T, F>(&self, func: F) -> Result<T, FrameError>
pub fn pipe<T, F>(&self, func: F) -> Result<T, FrameError>
Pipe this grouped Series through a caller-provided function.
Sourcepub fn head(&self, n: i64) -> Result<Series, FrameError>
pub fn head(&self, n: i64) -> Result<Series, FrameError>
Select first n rows per group, preserving original row order.
Matches pd.SeriesGroupBy.head(n), including pandas’ negative n:
head(-k) keeps all but the last k rows of each group. Verified vs
live pandas 2.2.3.
Examples found in repository?
27fn golden() -> String {
28 let mut out = String::new();
29
30 // Series::take across dtypes, negative + duplicate indices.
31 let s = s_i64(vec![10, 20, 30, 40, 50]);
32 let r = s.take(&[4, 0, -1, 2, -5, 2]).unwrap();
33 out.push_str(&format!("take_lbls={:?}\n", r.index().labels()));
34 out.push_str(&format!("take_vals={:?}\n", r.values()));
35 out.push_str(&format!("take_oob_err={}\n", s.take(&[99]).is_err()));
36
37 let f = s_scalars(vec![
38 Scalar::Float64(1.5),
39 Scalar::Float64(f64::NAN),
40 Scalar::Float64(-3.0),
41 ]);
42 out.push_str(&format!(
43 "take_f64={:?}\n",
44 f.take(&[2, 1, 0]).unwrap().values()
45 ));
46 let ni = s_scalars(vec![
47 Scalar::Int64(7),
48 Scalar::Null(NullKind::NaN),
49 Scalar::Int64(9),
50 ]);
51 out.push_str(&format!(
52 "take_ni={:?}\n",
53 ni.take(&[1, 2, 0]).unwrap().values()
54 ));
55 let u = s_scalars(
56 vec!["a", "b", "c"]
57 .into_iter()
58 .map(|x| Scalar::Utf8(x.into()))
59 .collect(),
60 );
61 out.push_str(&format!(
62 "take_utf8={:?}\n",
63 u.take(&[2, -3]).unwrap().values()
64 ));
65
66 // SeriesGroupBy gather paths (nlargest / head) route through take_positions.
67 let data = s_i64(vec![5, 1, 9, 3, 7, 2, 8]);
68 let keys = s_scalars(
69 vec!["a", "b", "a", "b", "a", "b", "a"]
70 .into_iter()
71 .map(|x| Scalar::Utf8(x.into()))
72 .collect(),
73 );
74 let gb = data.groupby(&keys).unwrap();
75 let nl = gb.nlargest(2).unwrap();
76 out.push_str(&format!("gb_nlargest_lbls={:?}\n", nl.index().labels()));
77 out.push_str(&format!("gb_nlargest_vals={:?}\n", nl.values()));
78 let hd = data.groupby(&keys).unwrap().head(1).unwrap();
79 out.push_str(&format!("gb_head_vals={:?}\n", hd.values()));
80 out
81}Sourcepub fn tail(&self, n: i64) -> Result<Series, FrameError>
pub fn tail(&self, n: i64) -> Result<Series, FrameError>
Select last n rows per group, preserving original row order.
Matches pd.SeriesGroupBy.tail(n), including pandas’ negative n:
tail(-k) drops the first k rows of each group. Verified vs live
pandas 2.2.3.
Sourcepub fn take(&self, indices: &[i64]) -> Result<Series, FrameError>
pub fn take(&self, indices: &[i64]) -> Result<Series, FrameError>
Return positional rows from each group.
Matches pd.SeriesGroupBy.take(indices), resolving negative positions
relative to each group’s length.
Sourcepub fn sample(
&self,
n: Option<usize>,
frac: Option<f64>,
replace: bool,
seed: Option<u64>,
) -> Result<Series, FrameError>
pub fn sample( &self, n: Option<usize>, frac: Option<f64>, replace: bool, seed: Option<u64>, ) -> Result<Series, FrameError>
Randomly sample rows from each group.
Matches pd.SeriesGroupBy.sample(n=..., frac=..., replace=...).
Sourcepub fn nth(&self, n: i64) -> Result<Series, FrameError>
pub fn nth(&self, n: i64) -> Result<Series, FrameError>
Select the nth row from each group. Negative n counts from the end.
Sourcepub fn get_group(&self, name: &str) -> Result<Series, FrameError>
pub fn get_group(&self, name: &str) -> Result<Series, FrameError>
Retrieve a single group by its display label.
Sourcepub fn first(&self) -> Result<Series, FrameError>
pub fn first(&self) -> Result<Series, FrameError>
First value of each group.
Sourcepub fn last(&self) -> Result<Series, FrameError>
pub fn last(&self) -> Result<Series, FrameError>
Last value of each group.
Sourcepub fn size(&self) -> Result<Series, FrameError>
pub fn size(&self) -> Result<Series, FrameError>
Number of elements in each group (including nulls).