Skip to main content

bench_startswith_any/
bench_startswith_any.rs

1//! Bench + golden digest for Series.str.startswith_any / endswith_any.
2//!
3//! Run: cargo run -p fp-frame --example bench_startswith_any --release
4//!
5//! These ran an independent `s.starts_with(p)` / `s.ends_with(p)` for EVERY
6//! pattern on EVERY string — O(|pats|·prefix) per element, worst when the
7//! patterns share a long common prefix. One anchored literal-alternation regex
8//! (`\A(?:…)` / `(?:…)\z`) matches each string once: O(prefix) per element.
9
10use std::time::Instant;
11
12use fp_frame::Series;
13use fp_index::IndexLabel;
14use fp_types::Scalar;
15
16fn s_from(strings: Vec<&str>) -> Series {
17    let idx: Vec<IndexLabel> = (0..strings.len() as i64).map(IndexLabel::Int64).collect();
18    let vals: Vec<Scalar> = strings
19        .into_iter()
20        .map(|x| Scalar::Utf8(x.to_string()))
21        .collect();
22    Series::from_values("s", idx, vals).unwrap()
23}
24
25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29    let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30    out.push_str(&format!("sw_hit={:?}\n", r.values()));
31    // metacharacters stay literal
32    let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33    out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34    let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35    out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36    // empty pattern set => all false
37    let r4 = s.str().startswith_any(&[]).unwrap();
38    out.push_str(&format!("empty={:?}\n", r4.values()));
39    // empty-string pattern matches every non-null string
40    let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43    // with_na variants (null fill).
44    let sn = Series::from_values(
45        "s",
46        vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47        vec![
48            Scalar::Utf8("https://z".into()),
49            Scalar::Null(fp_types::NullKind::NaN),
50        ],
51    )
52    .unwrap();
53    let rn = sn
54        .str()
55        .startswith_any_with_na(&["https://"], Some(true))
56        .unwrap();
57    out.push_str(&format!("na_fill={:?}\n", rn.values()));
58    out
59}
60
61fn main() {
62    let g = golden();
63    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
64
65    // Worst case: many prefixes sharing a long common stem.
66    let pats_owned: Vec<String> = (0..200)
67        .map(|i| format!("https://cdn.example.com/path/{i:04}/"))
68        .collect();
69    let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
70    let one = "https://cdn.example.com/path/9999/asset/file/deep/name.bin";
71    let n = 40_000;
72    let s = s_from(vec![one; n]);
73
74    let _ = s.str().startswith_any(&pats).unwrap(); // warmup
75
76    let t = Instant::now();
77    let r = s.str().startswith_any(&pats).unwrap();
78    let d = t.elapsed();
79    assert_eq!(r.len(), n);
80
81    println!(
82        "TIMING n={n} npats={} startswith_any={:.3}ms",
83        pats.len(),
84        d.as_secs_f64() * 1e3
85    );
86}