Skip to main content

bench_contains_any/
bench_contains_any.rs

1//! Bench + golden digest for Series.str.contains_any(&[&str]).
2//!
3//! Run: cargo run -p fp-frame --example bench_contains_any --release
4//!
5//! contains_any ran an independent `s.contains(p)` for EVERY pattern on EVERY
6//! string — O(|pats|·L) per element. Compiling the literal set into one regex
7//! alternation (Aho-Corasick under the hood) scans each string once: O(L).
8//! Boolean output is identical (contains any pattern as a substring).
9
10use std::time::Instant;
11
12use fp_frame::Series;
13use fp_index::IndexLabel;
14use fp_types::Scalar;
15
16fn s_from(strings: Vec<&str>) -> Series {
17    let idx: Vec<IndexLabel> = (0..strings.len() as i64).map(IndexLabel::Int64).collect();
18    let vals: Vec<Scalar> = strings
19        .into_iter()
20        .map(|x| Scalar::Utf8(x.to_string()))
21        .collect();
22    Series::from_values("s", idx, vals).unwrap()
23}
24
25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["hello world", "foobar", "BAZ qux", "", "a.b+c"]);
28
29    let r = s.str().contains_any(&["world", "qux"]).unwrap();
30    out.push_str(&format!("hit={:?}\n", r.values()));
31
32    // Regex metacharacters must be treated as LITERALS.
33    let r2 = s.str().contains_any(&["a.b+c", "zz"]).unwrap();
34    out.push_str(&format!("literal_meta={:?}\n", r2.values()));
35
36    // Case sensitive (no match for lowercase 'baz').
37    let r3 = s.str().contains_any(&["baz"]).unwrap();
38    out.push_str(&format!("case={:?}\n", r3.values()));
39
40    // Empty pattern set => all false.
41    let r4 = s.str().contains_any(&[]).unwrap();
42    out.push_str(&format!("empty={:?}\n", r4.values()));
43
44    // Empty-string pattern matches every (non-null) string.
45    let r5 = s.str().contains_any(&["", "zz"]).unwrap();
46    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
47    out
48}
49
50fn main() {
51    let g = golden();
52    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
53
54    // Many patterns, mostly-missing, over many medium strings (worst case:
55    // every pattern scanned per string in the naive path).
56    let pats_owned: Vec<String> = (0..200).map(|i| format!("needle{i:04}xyz")).collect();
57    let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
58    let base = "the quick brown fox jumps over the lazy dog ".repeat(4);
59    let n = 20_000;
60    let s = s_from(vec![base.as_str(); n]);
61
62    // warmup
63    let _ = s.str().contains_any(&pats).unwrap();
64
65    let t = Instant::now();
66    let r = s.str().contains_any(&pats).unwrap();
67    let d = t.elapsed();
68    assert_eq!(r.len(), n);
69
70    println!(
71        "TIMING n={n} npats={} contains_any={:.3}ms",
72        pats.len(),
73        d.as_secs_f64() * 1e3
74    );
75}