Skip to main content

bench_searchsorted/
bench_searchsorted.rs

1//! Bench + golden digest for Series::searchsorted_values (array form).
2//!
3//! Run: cargo run -p fp-frame --example bench_searchsorted --release
4//!
5//! searchsorted_values mapped each needle through `searchsorted`, which
6//! re-materialized the whole column AND linear-scanned it = O(m·n) work plus
7//! O(m·n) materialization. A sorted Series admits binary search: materialize
8//! once, then partition_point per needle = O(n + m·log n). Bit-identical on
9//! sorted input (the documented precondition); a sortedness guard falls back
10//! to the exact linear scan for unsorted input so behavior is unchanged.
11
12use std::time::Instant;
13
14use fp_frame::Series;
15use fp_index::IndexLabel;
16use fp_types::Scalar;
17
18fn s_i64(vals: Vec<i64>) -> Series {
19    let idx: Vec<IndexLabel> = (0..vals.len() as i64).map(IndexLabel::Int64).collect();
20    let sc: Vec<Scalar> = vals.into_iter().map(Scalar::Int64).collect();
21    Series::from_values("s", idx, sc).unwrap()
22}
23
24fn s_scalars(vals: Vec<Scalar>) -> Series {
25    let idx: Vec<IndexLabel> = (0..vals.len() as i64).map(IndexLabel::Int64).collect();
26    Series::from_values("s", idx, vals).unwrap()
27}
28
29fn golden() -> String {
30    let mut out = String::new();
31
32    // Sorted Int64, both sides, exact + between + below + above.
33    let s = s_i64(vec![10, 20, 20, 30, 40]);
34    let needles: Vec<Scalar> = vec![5, 10, 15, 20, 35, 40, 50]
35        .into_iter()
36        .map(Scalar::Int64)
37        .collect();
38    out.push_str(&format!(
39        "i64_left={:?}\n",
40        s.searchsorted_values(&needles, "left").unwrap()
41    ));
42    out.push_str(&format!(
43        "i64_right={:?}\n",
44        s.searchsorted_values(&needles, "right").unwrap()
45    ));
46
47    // Sorted Float64 with trailing missing (NaN sorts last).
48    let fs = s_scalars(vec![
49        Scalar::Float64(1.5),
50        Scalar::Float64(2.5),
51        Scalar::Float64(2.5),
52        Scalar::Float64(9.0),
53        Scalar::Float64(f64::NAN),
54    ]);
55    let fn_needles = vec![
56        Scalar::Float64(2.5),
57        Scalar::Float64(0.0),
58        Scalar::Float64(10.0),
59        Scalar::Float64(f64::NAN), // missing needle
60    ];
61    out.push_str(&format!(
62        "f64_left={:?}\n",
63        fs.searchsorted_values(&fn_needles, "left").unwrap()
64    ));
65    out.push_str(&format!(
66        "f64_right={:?}\n",
67        fs.searchsorted_values(&fn_needles, "right").unwrap()
68    ));
69
70    // Sorted Utf8.
71    let us = s_scalars(
72        vec!["apple", "banana", "banana", "cherry"]
73            .into_iter()
74            .map(|x| Scalar::Utf8(x.to_string()))
75            .collect(),
76    );
77    let u_needles: Vec<Scalar> = vec!["aardvark", "banana", "date"]
78        .into_iter()
79        .map(|x| Scalar::Utf8(x.to_string()))
80        .collect();
81    out.push_str(&format!(
82        "utf8_left={:?}\n",
83        us.searchsorted_values(&u_needles, "left").unwrap()
84    ));
85    out.push_str(&format!(
86        "utf8_right={:?}\n",
87        us.searchsorted_values(&u_needles, "right").unwrap()
88    ));
89
90    // UNSORTED input: must fall back to the exact linear-scan behavior.
91    let uns = s_i64(vec![30, 10, 40, 20, 20]);
92    let un_needles: Vec<Scalar> = vec![5, 20, 35, 40].into_iter().map(Scalar::Int64).collect();
93    out.push_str(&format!(
94        "unsorted_left={:?}\n",
95        uns.searchsorted_values(&un_needles, "left").unwrap()
96    ));
97    out.push_str(&format!(
98        "unsorted_right={:?}\n",
99        uns.searchsorted_values(&un_needles, "right").unwrap()
100    ));
101
102    // Single-value searchsorted unchanged.
103    out.push_str(&format!(
104        "single_left={}\n",
105        s.searchsorted(&Scalar::Int64(20), "left").unwrap()
106    ));
107    out.push_str(&format!(
108        "single_right={}\n",
109        s.searchsorted(&Scalar::Int64(20), "right").unwrap()
110    ));
111    out.push_str(&format!(
112        "single_missing_left={}\n",
113        fs.searchsorted(&Scalar::Float64(f64::NAN), "left").unwrap()
114    ));
115    out.push_str(&format!(
116        "single_missing_right={}\n",
117        fs.searchsorted(&Scalar::Float64(f64::NAN), "right")
118            .unwrap()
119    ));
120
121    out
122}
123
124fn main() {
125    let g = golden();
126    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
127
128    let n: usize = 40_000;
129    let m: usize = 40_000;
130    let s = s_i64((0..n as i64).map(|v| v * 2).collect());
131    let needles: Vec<Scalar> = (0..m as i64).map(|v| Scalar::Int64(v * 2 + 1)).collect();
132
133    // warmup
134    let _ = s.searchsorted_values(&needles, "left").unwrap();
135
136    let t = Instant::now();
137    let r = s.searchsorted_values(&needles, "left").unwrap();
138    let d = t.elapsed();
139    assert_eq!(r.len(), m);
140
141    println!(
142        "TIMING n={n} m={m} searchsorted_values={:.3}ms",
143        d.as_secs_f64() * 1e3
144    );
145}