#![allow(missing_docs)]
use std::hint::black_box;
use disarm::api::fold_case;
use disarm::api::strip_accents;
use disarm::api::{slugify, OnUnknown, SlugConfig, Transliterate};
#[path = "../benchmarks/persona_corpus.rs"]
mod persona_corpus;
fn usage() -> ! {
eprintln!("usage: perf_workload <persona> <op> <iters>");
eprintln!(" perf_workload --fingerprint");
eprintln!(" see header comment for valid personas and ops");
std::process::exit(2);
}
fn print_fingerprint() {
let profile = if cfg!(debug_assertions) {
"debug"
} else {
"release"
};
println!(
"{{\"corpus_digest\":\"{}\",\"disarm_version\":\"{}\",\
\"build_arch\":\"{}\",\"build_os\":\"{}\",\
\"pointer_width_bits\":{},\"build_profile\":\"{}\"}}",
persona_corpus::corpus_digest(),
env!("CARGO_PKG_VERSION"),
std::env::consts::ARCH,
std::env::consts::OS,
usize::BITS,
profile,
);
}
fn main() {
let args: Vec<String> = std::env::args().collect();
if args.len() == 2 && args[1] == "--fingerprint" {
print_fingerprint();
return;
}
if args.len() != 4 {
usage();
}
let persona = args[1].as_str();
let op = args[2].as_str();
let iters: u64 = args[3].parse().unwrap_or_else(|_| usage());
let Some(doc) = persona_corpus::doc(persona) else {
eprintln!("unknown persona: {persona}");
usage();
};
let lang: Option<&str> = match (op, persona) {
("lang", "cyrillic_doc") => Some("ru"),
("lang", "arabic_doc") => Some("ar"),
("lang", _) => {
eprintln!("op 'lang' supports cyrillic_doc and arabic_doc only");
usage();
}
_ => None,
};
let config = SlugConfig::default();
let mut checksum: u64 = 0;
for _ in 0..iters {
let n = match op {
"transliterate" | "lang" => {
let mut b = Transliterate::new().on_unknown(OnUnknown::Ignore);
if let Some(l) = lang {
b = b.lang(l);
}
b.run(black_box(&doc)).len()
}
"slugify" => slugify(black_box(&doc), &config).len(),
"fold_case" => fold_case(black_box(&doc)).len(),
"strip_accents" => strip_accents(black_box(&doc)).len(),
"strict_scan" => Transliterate::new()
.find_untranslatable(black_box(&doc))
.len(),
_ => usage(),
};
checksum = checksum.wrapping_add(n as u64);
}
println!(
"persona={persona} op={op} iters={iters} bytes_in={} checksum={checksum}",
doc.len()
);
}