Skip to main content

bench_take_gather/
bench_take_gather.rs

1//! Bench + golden for Series::take and SeriesGroupBy gather — typed gather lever.
2//!
3//! Run: cargo run -p fp-frame --example bench_take_gather --release
4//!
5//! Series::take and the SeriesGroupBy take_positions (behind groupby
6//! nlargest/nsmallest/head/tail/first/last) cloned a 32 B Scalar per row and
7//! rebuilt via Column::from_values. Routing through the typed
8//! Column::take_positions keeps the contiguous Int64/Float64 buffer. Output is
9//! bit-identical (values, dtype, negative/dup indices, group order).
10
11use std::time::Instant;
12
13use fp_frame::Series;
14use fp_index::IndexLabel;
15use fp_types::{NullKind, Scalar};
16
17fn s_i64(vals: Vec<i64>) -> Series {
18    let idx: Vec<IndexLabel> = (0..vals.len() as i64).map(IndexLabel::Int64).collect();
19    Series::from_values("s", idx, vals.into_iter().map(Scalar::Int64).collect()).unwrap()
20}
21
22fn s_scalars(vals: Vec<Scalar>) -> Series {
23    let idx: Vec<IndexLabel> = (0..vals.len() as i64).map(IndexLabel::Int64).collect();
24    Series::from_values("s", idx, vals).unwrap()
25}
26
27fn golden() -> String {
28    let mut out = String::new();
29
30    // Series::take across dtypes, negative + duplicate indices.
31    let s = s_i64(vec![10, 20, 30, 40, 50]);
32    let r = s.take(&[4, 0, -1, 2, -5, 2]).unwrap();
33    out.push_str(&format!("take_lbls={:?}\n", r.index().labels()));
34    out.push_str(&format!("take_vals={:?}\n", r.values()));
35    out.push_str(&format!("take_oob_err={}\n", s.take(&[99]).is_err()));
36
37    let f = s_scalars(vec![
38        Scalar::Float64(1.5),
39        Scalar::Float64(f64::NAN),
40        Scalar::Float64(-3.0),
41    ]);
42    out.push_str(&format!(
43        "take_f64={:?}\n",
44        f.take(&[2, 1, 0]).unwrap().values()
45    ));
46    let ni = s_scalars(vec![
47        Scalar::Int64(7),
48        Scalar::Null(NullKind::NaN),
49        Scalar::Int64(9),
50    ]);
51    out.push_str(&format!(
52        "take_ni={:?}\n",
53        ni.take(&[1, 2, 0]).unwrap().values()
54    ));
55    let u = s_scalars(
56        vec!["a", "b", "c"]
57            .into_iter()
58            .map(|x| Scalar::Utf8(x.into()))
59            .collect(),
60    );
61    out.push_str(&format!(
62        "take_utf8={:?}\n",
63        u.take(&[2, -3]).unwrap().values()
64    ));
65
66    // SeriesGroupBy gather paths (nlargest / head) route through take_positions.
67    let data = s_i64(vec![5, 1, 9, 3, 7, 2, 8]);
68    let keys = s_scalars(
69        vec!["a", "b", "a", "b", "a", "b", "a"]
70            .into_iter()
71            .map(|x| Scalar::Utf8(x.into()))
72            .collect(),
73    );
74    let gb = data.groupby(&keys).unwrap();
75    let nl = gb.nlargest(2).unwrap();
76    out.push_str(&format!("gb_nlargest_lbls={:?}\n", nl.index().labels()));
77    out.push_str(&format!("gb_nlargest_vals={:?}\n", nl.values()));
78    let hd = data.groupby(&keys).unwrap().head(1).unwrap();
79    out.push_str(&format!("gb_head_vals={:?}\n", hd.values()));
80    out
81}
82
83fn main() {
84    let g = golden();
85    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
86
87    let n: usize = 1_000_000;
88    let s = s_i64((0..n as i64).map(|v| v * 2).collect());
89    let mut x: u64 = 0xfeed_face;
90    let idxs: Vec<i64> = (0..n)
91        .map(|_| {
92            x = x
93                .wrapping_mul(6364136223846793005)
94                .wrapping_add(1442695040888963407);
95            (x >> 16) as i64 % (n as i64)
96        })
97        .collect();
98
99    let _ = s.take(&idxs).unwrap(); // warmup
100
101    let t = Instant::now();
102    let r = s.take(&idxs).unwrap();
103    let d = t.elapsed();
104    assert_eq!(r.len(), n);
105
106    println!("TIMING n={n} take={:.3}ms", d.as_secs_f64() * 1e3);
107}