Skip to main content

bench_sort_index/
bench_sort_index.rs

1//! Bench + golden digest for Series/DataFrame::sort_index over an Int64 index.
2//!
3//! Run: cargo run -p fp-frame --example bench_sort_index --release
4//!
5//! sort_index used an O(n log n) comparison sort on IndexLabel. An all-Int64
6//! index sorts by raw i64, so Column's stable typed radix argsort (O(n)) is
7//! bit-identical — including stable tie order for duplicate labels.
8
9use std::{collections::BTreeMap, time::Instant};
10
11use fp_columnar::Column;
12use fp_frame::{DataFrame, Series};
13use fp_index::{Index, IndexLabel};
14use fp_types::Scalar;
15
16fn series(labels: Vec<i64>, vals: Vec<i64>) -> Series {
17    let idx: Vec<IndexLabel> = labels.into_iter().map(IndexLabel::Int64).collect();
18    let sc: Vec<Scalar> = vals.into_iter().map(Scalar::Int64).collect();
19    Series::from_values("s", idx, sc).unwrap()
20}
21
22fn frame(labels: Vec<i64>, vals: Vec<i64>) -> DataFrame {
23    let idx = Index::new(labels.into_iter().map(IndexLabel::Int64).collect());
24    let mut cols = BTreeMap::new();
25    cols.insert(
26        "v".to_string(),
27        Column::from_values(vals.into_iter().map(Scalar::Int64).collect()).unwrap(),
28    );
29    DataFrame::new_with_column_order(idx, cols, vec!["v".to_string()]).unwrap()
30}
31
32fn golden() -> String {
33    let mut out = String::new();
34    // Duplicate labels 1 and 3 with distinct values prove stable tie order.
35    let s = series(vec![3, 1, 2, 1, 3], vec![10, 20, 30, 40, 50]);
36    let asc = s.sort_index(true).unwrap();
37    out.push_str(&format!("s_asc_labels={:?}\n", asc.index().labels()));
38    out.push_str(&format!("s_asc_vals={:?}\n", asc.values()));
39    let desc = s.sort_index(false).unwrap();
40    out.push_str(&format!("s_desc_labels={:?}\n", desc.index().labels()));
41    out.push_str(&format!("s_desc_vals={:?}\n", desc.values()));
42
43    // Negative + zero labels.
44    let s2 = series(vec![0, -5, 7, -5, 0], vec![1, 2, 3, 4, 5]);
45    let a2 = s2.sort_index(true).unwrap();
46    out.push_str(&format!("s2_asc_labels={:?}\n", a2.index().labels()));
47    out.push_str(&format!("s2_asc_vals={:?}\n", a2.values()));
48
49    let df = frame(vec![3, 1, 2, 1, 3], vec![10, 20, 30, 40, 50]);
50    let dfa = df.sort_index(true).unwrap();
51    out.push_str(&format!("df_asc_labels={:?}\n", dfa.index().labels()));
52    out.push_str(&format!(
53        "df_asc_v={:?}\n",
54        dfa.columns().get("v").unwrap().values()
55    ));
56    let dfd = df.sort_index(false).unwrap();
57    out.push_str(&format!("df_desc_labels={:?}\n", dfd.index().labels()));
58    out.push_str(&format!(
59        "df_desc_v={:?}\n",
60        dfd.columns().get("v").unwrap().values()
61    ));
62    out
63}
64
65fn main() {
66    let g = golden();
67    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
68
69    let n: usize = 200_000;
70    // Shuffled (LCG) i64 index so it is unsorted and Int64-typed.
71    let mut x: u64 = 0x1234_5678;
72    let labels: Vec<i64> = (0..n)
73        .map(|_| {
74            x = x
75                .wrapping_mul(6364136223846793005)
76                .wrapping_add(1442695040888963407);
77            (x >> 16) as i64 % (n as i64)
78        })
79        .collect();
80    let vals: Vec<i64> = (0..n as i64).collect();
81    let s = series(labels, vals);
82
83    // warmup
84    let _ = s.sort_index(true).unwrap();
85
86    let t = Instant::now();
87    let r = s.sort_index(true).unwrap();
88    let d = t.elapsed();
89    assert_eq!(r.len(), n);
90
91    println!("TIMING n={n} sort_index={:.3}ms", d.as_secs_f64() * 1e3);
92}