Skip to main content

bench_kendall/
bench_kendall.rs

1//! Bench + golden digest for Series::corr_kendall (Kendall tau-b) with ties.
2//!
3//! Run: cargo run -p fp-frame --example bench_kendall --release
4//!
5//! With ties the old path was an O(n²) all-pairs loop. Knight's algorithm
6//! computes the identical tau-b in O(n log n). The golden battery pins the
7//! exact f64 output on tied data so the fast path proves bit-identical.
8
9use std::time::Instant;
10
11use fp_frame::Series;
12use fp_index::IndexLabel;
13use fp_types::Scalar;
14
15fn s_from(vals: &[f64]) -> Series {
16    let idx: Vec<IndexLabel> = (0..vals.len() as i64).map(IndexLabel::Int64).collect();
17    let sc: Vec<Scalar> = vals.iter().map(|&v| Scalar::Float64(v)).collect();
18    Series::from_values("s", idx, sc).unwrap()
19}
20
21fn golden() -> String {
22    let mut out = String::new();
23    let cases: &[(&[f64], &[f64])] = &[
24        // ties in both x and y
25        (
26            &[1.0, 2.0, 2.0, 3.0, 3.0, 3.0],
27            &[1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
28        ),
29        // joint ties (identical (x,y) pairs)
30        (&[1.0, 1.0, 2.0, 2.0], &[5.0, 5.0, 9.0, 9.0]),
31        // discordant-heavy with ties
32        (&[1.0, 2.0, 3.0, 4.0, 4.0], &[5.0, 4.0, 3.0, 2.0, 2.0]),
33        // all x tied -> denom NaN
34        (&[7.0, 7.0, 7.0], &[1.0, 2.0, 3.0]),
35        // mixed integers as floats, larger
36        (
37            &[1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0],
38            &[2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
39        ),
40    ];
41    for (i, (x, y)) in cases.iter().enumerate() {
42        let a = s_from(x);
43        let b = s_from(y);
44        let tau = a.corr_kendall(&b).unwrap();
45        out.push_str(&format!("case{i}: {:.17e}\n", tau));
46    }
47    out
48}
49
50/// Verbatim copy of the original O(n²) pairwise tau-b loop, as an independent
51/// reference for the randomized cross-check.
52fn kendall_ref(x: &[f64], y: &[f64]) -> f64 {
53    let n = x.len();
54    if n < 2 {
55        return f64::NAN;
56    }
57    let (mut con, mut dis, mut tx, mut ty) = (0_i64, 0_i64, 0_i64, 0_i64);
58    for i in 0..n {
59        for j in (i + 1)..n {
60            let dx = x[i] - x[j];
61            let dy = y[i] - y[j];
62            if dx.abs() < f64::EPSILON && dy.abs() < f64::EPSILON {
63                tx += 1;
64                ty += 1;
65            } else if dx.abs() < f64::EPSILON {
66                tx += 1;
67            } else if dy.abs() < f64::EPSILON {
68                ty += 1;
69            } else if (dx > 0.0 && dy > 0.0) || (dx < 0.0 && dy < 0.0) {
70                con += 1;
71            } else {
72                dis += 1;
73            }
74        }
75    }
76    let n_pairs = (n * (n - 1)) as f64 / 2.0;
77    let denom = ((n_pairs - tx as f64) * (n_pairs - ty as f64)).sqrt();
78    if denom < f64::EPSILON {
79        f64::NAN
80    } else {
81        (con - dis) as f64 / denom
82    }
83}
84
85fn cross_check() -> (usize, usize) {
86    // LCG for deterministic pseudo-random integer-tied datasets.
87    let mut state: u64 = 0x2545F4914F6CDD1D;
88    let mut next = || {
89        state = state
90            .wrapping_mul(6364136223846793005)
91            .wrapping_add(1442695040888963407);
92        state
93    };
94    let (mut ok, mut bad) = (0usize, 0usize);
95    for _ in 0..4000 {
96        let n = (next() % 30) as usize + 2;
97        let range = next() % 6 + 1; // small range -> many ties
98        let xs: Vec<f64> = (0..n).map(|_| (next() % range) as f64).collect();
99        let ys: Vec<f64> = (0..n).map(|_| (next() % range) as f64).collect();
100        let got = s_from(&xs).corr_kendall(&s_from(&ys)).unwrap();
101        let want = kendall_ref(&xs, &ys);
102        // bit-for-bit (NaN compares equal by bits here since both produce the
103        // canonical NaN from the same sqrt path).
104        if got.to_bits() == want.to_bits() || (got.is_nan() && want.is_nan()) {
105            ok += 1;
106        } else {
107            bad += 1;
108            if bad <= 3 {
109                eprintln!("MISMATCH n={n} got={got:?} want={want:?}");
110            }
111        }
112    }
113    (ok, bad)
114}
115
116fn main() {
117    let g = golden();
118    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
119
120    let (ok, bad) = cross_check();
121    println!("CROSSCHECK ok={ok} bad={bad}");
122
123    // Large tied dataset: values in a small range force many ties, so the old
124    // path takes the O(n²) loop (no-tie fast path bails on ties).
125    let n: usize = 40_000;
126    let xs: Vec<f64> = (0..n).map(|i| ((i * 7) % 50) as f64).collect();
127    let ys: Vec<f64> = (0..n).map(|i| ((i * 13) % 40) as f64).collect();
128    let a = s_from(&xs);
129    let b = s_from(&ys);
130
131    let t = Instant::now();
132    let tau = a.corr_kendall(&b).unwrap();
133    let d = t.elapsed();
134
135    println!(
136        "TIMING n={n} corr_kendall={:.3}ms tau={:.6}",
137        d.as_secs_f64() * 1e3,
138        tau
139    );
140}