Skip to main content

bench_translate/
bench_translate.rs

1//! Bench + golden digest for Series.str.translate(from, to).
2//!
3//! Run: cargo run -p fp-frame --example bench_translate --release
4//!
5//! translate scanned the `from` table linearly (O(|from|)) for EVERY input
6//! char — O(total_chars · |from|). A char->replacement map built once makes it
7//! O(total_chars). First occurrence in `from` wins (same as the linear scan);
8//! a source char beyond `to`'s length is deleted; unmapped chars pass through.
9
10use std::time::Instant;
11
12use fp_frame::Series;
13use fp_index::IndexLabel;
14use fp_types::Scalar;
15
16fn s_from(strings: Vec<&str>) -> Series {
17    let idx: Vec<IndexLabel> = (0..strings.len() as i64).map(IndexLabel::Int64).collect();
18    let vals: Vec<Scalar> = strings
19        .into_iter()
20        .map(|x| Scalar::Utf8(x.to_string()))
21        .collect();
22    Series::from_values("s", idx, vals).unwrap()
23}
24
25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["hello world", "abcXYZ", "", "duplicate-d"]);
28
29    // basic 1:1 replacement
30    let r = s.str().translate("lo", "LO").unwrap();
31    out.push_str(&format!("basic={:?}\n", r.values()));
32
33    // `to` shorter than `from` => extra source chars are DELETED. 'o'->'0',
34    // but 'l' (index 1) has no target so every 'l' is removed.
35    let r2 = s.str().translate("ol", "0").unwrap();
36    out.push_str(&format!("delete={:?}\n", r2.values()));
37
38    // duplicate source char in `from`: first occurrence wins ('d'->'1', not '2')
39    let r3 = s.str().translate("dd", "12").unwrap();
40    out.push_str(&format!("dupsrc={:?}\n", r3.values()));
41
42    // empty table: identity
43    let r4 = s.str().translate("", "").unwrap();
44    out.push_str(&format!("empty={:?}\n", r4.values()));
45    out
46}
47
48fn main() {
49    let g = golden();
50    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
51
52    // Large translation table + many long strings.
53    let from: String = (0u32..2000).filter_map(char::from_u32).collect();
54    let to: String = (1000u32..3000).filter_map(char::from_u32).collect();
55    // Strings made of chars near the END of the `from` table: the linear scan
56    // must traverse ~all of `from` per char (the realistic O(|from|) cost).
57    let base: String = (1800u32..2000).filter_map(char::from_u32).collect();
58    let one = base.repeat(20); // ~4000 chars/string
59    let n = 2_000;
60    let s = s_from(vec![one.as_str(); n]);
61
62    // warmup
63    let _ = s.str().translate(&from, &to).unwrap();
64
65    let t = Instant::now();
66    let r = s.str().translate(&from, &to).unwrap();
67    let d = t.elapsed();
68    assert_eq!(r.len(), n);
69
70    println!(
71        "TIMING n={n} from_len={} translate={:.3}ms",
72        from.chars().count(),
73        d.as_secs_f64() * 1e3
74    );
75}