Skip to main content

bench_corrwith/
bench_corrwith.rs

1//! Bench + golden digest for DataFrame::corrwith_axis(other, axis=1).
2//!
3//! Run: cargo run -p fp-frame --example bench_corrwith --release
4//!
5//! Row-wise corrwith matched each self-row to other by scanning other's index
6//! with `Index::position` — a linear scan for any non-ascending-Int64 index,
7//! i.e. O(n·m). A first-occurrence label->row map built once makes it O(n).
8//! Bit-identical: position_map_first records the FIRST occurrence, exactly
9//! what Index::position returns; unmatched rows are still dropped.
10
11use std::{collections::BTreeMap, time::Instant};
12
13use fp_columnar::Column;
14use fp_frame::DataFrame;
15use fp_index::{Index, IndexLabel};
16use fp_types::Scalar;
17
18fn frame(labels: Vec<IndexLabel>, cols: Vec<(&str, Vec<f64>)>) -> DataFrame {
19    let order: Vec<String> = cols.iter().map(|(n, _)| (*n).to_string()).collect();
20    let mut map = BTreeMap::new();
21    for (n, vs) in cols {
22        map.insert(
23            n.to_string(),
24            Column::from_values(vs.into_iter().map(Scalar::Float64).collect()).unwrap(),
25        );
26    }
27    DataFrame::new_with_column_order(Index::new(labels), map, order).unwrap()
28}
29
30fn lbl(s: &str) -> IndexLabel {
31    IndexLabel::Utf8(s.to_string())
32}
33
34fn golden() -> String {
35    // self has rows r0,r1,r2,r3 ; other has r1 (dup, first wins), r0, r3, rX.
36    let s = frame(
37        vec![lbl("r0"), lbl("r1"), lbl("r2"), lbl("r3")],
38        vec![
39            ("a", vec![1.0, 2.0, 3.0, 4.0]),
40            ("b", vec![2.0, 1.0, 5.0, 4.0]),
41            ("c", vec![9.0, 8.0, 7.0, 1.0]),
42        ],
43    );
44    let o = frame(
45        vec![lbl("r1"), lbl("r0"), lbl("r3"), lbl("r1"), lbl("rX")],
46        vec![
47            ("a", vec![5.0, 1.5, 4.0, -9.0, 0.0]),
48            ("b", vec![6.0, 2.5, 3.0, -9.0, 0.0]),
49            ("c", vec![1.0, 0.5, 8.0, -9.0, 0.0]),
50        ],
51    );
52    let r = s.corrwith_axis(&o, 1).unwrap();
53    let mut out = String::new();
54    out.push_str(&format!("labels={:?}\n", r.index().labels()));
55    // Round values to 9 decimals for a stable digest.
56    let vals: Vec<String> = r
57        .column()
58        .values()
59        .iter()
60        .map(|v| match v {
61            Scalar::Float64(f) if f.is_nan() => "nan".to_string(),
62            Scalar::Float64(f) => format!("{:.9}", f),
63            other => format!("{other:?}"),
64        })
65        .collect();
66    out.push_str(&format!("values={:?}\n", vals));
67    out
68}
69
70fn main() {
71    let g = golden();
72    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
73
74    // Large non-ascending-Int64 (string) index so position() is linear.
75    let n: usize = 20_000;
76    let labels: Vec<IndexLabel> = (0..n).map(|i| lbl(&format!("k{i:08}"))).collect();
77    let mkcol = |mult: f64| (0..n).map(|i| (i as f64) * mult).collect::<Vec<f64>>();
78    let s = frame(
79        labels.clone(),
80        vec![
81            ("a", mkcol(1.0)),
82            ("b", mkcol(2.0)),
83            ("c", mkcol(0.5)),
84            ("d", mkcol(3.0)),
85        ],
86    );
87    // other: same labels, reversed order (so a sorted shortcut can't apply).
88    let mut rlabels = labels.clone();
89    rlabels.reverse();
90    let o = frame(
91        rlabels,
92        vec![
93            ("a", mkcol(1.1)),
94            ("b", mkcol(2.2)),
95            ("c", mkcol(0.7)),
96            ("d", mkcol(3.3)),
97        ],
98    );
99
100    // warmup
101    let _ = s.corrwith_axis(&o, 1).unwrap();
102
103    let t = Instant::now();
104    let r = s.corrwith_axis(&o, 1).unwrap();
105    let d = t.elapsed();
106    assert_eq!(r.len(), n);
107
108    println!("TIMING n={n} corrwith_axis1={:.3}ms", d.as_secs_f64() * 1e3);
109}