use std::env;
use std::fs;
use std::time::Instant;
#[derive(Debug)]
struct Case {
pattern: String,
units: u32,
utf16le: Vec<u8>,
}
fn read_u32(bytes: &[u8], idx: &mut usize) -> u32 {
let mut buf = [0u8; 4];
buf.copy_from_slice(&bytes[*idx..*idx + 4]);
*idx += 4;
u32::from_le_bytes(buf)
}
fn read_bytes<'a>(bytes: &'a [u8], idx: &mut usize, len: usize) -> &'a [u8] {
let out = &bytes[*idx..*idx + len];
*idx += len;
out
}
fn load_cases(path: &str) -> Result<Vec<Case>, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
let mut idx = 0usize;
if bytes.len() < 12 {
return Err("dataset too small".into());
}
let magic = read_bytes(&bytes, &mut idx, 4);
if magic != b"UTFB" {
return Err("bad magic".into());
}
let _version = read_u32(&bytes, &mut idx);
let count = read_u32(&bytes, &mut idx) as usize;
let mut cases = Vec::with_capacity(count);
for _ in 0..count {
let pat_len = read_u32(&bytes, &mut idx) as usize;
let pat = read_bytes(&bytes, &mut idx, pat_len);
let units = read_u32(&bytes, &mut idx);
let byte_len = read_u32(&bytes, &mut idx) as usize;
let data = read_bytes(&bytes, &mut idx, byte_len).to_vec();
let pattern = String::from_utf8(pat.to_vec())?;
cases.push(Case {
pattern,
units,
utf16le: data,
});
}
Ok(cases)
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut data_path = String::from("perf/utf16_escape_data.bin");
let mut target_bytes: usize = 256 * 1024 * 1024;
let mut min_iters: usize = 100_000;
let mut max_iters: usize = 20_000_000;
let mut args = env::args().skip(1);
while let Some(arg) = args.next() {
match arg.as_str() {
"--data" => {
if let Some(p) = args.next() {
data_path = p;
}
}
"--target-bytes" => {
if let Some(v) = args.next() {
target_bytes = v.parse().unwrap_or(target_bytes);
}
}
"--min-iters" => {
if let Some(v) = args.next() {
min_iters = v.parse().unwrap_or(min_iters);
}
}
"--max-iters" => {
if let Some(v) = args.next() {
max_iters = v.parse().unwrap_or(max_iters);
}
}
_ => {}
}
}
let cases = load_cases(&data_path)?;
println!("pattern,length,units,bytes,iters,ns_per_iter,ns_per_unit,checksum");
for case in cases {
if case.units == 0 {
continue;
}
let byte_len = case.utf16le.len().max(1);
let mut iters = target_bytes / byte_len;
if iters < min_iters {
iters = min_iters;
}
if iters > max_iters {
iters = max_iters;
}
let max_len = utf16_simd::max_escaped_len(case.units as usize, false);
let mut dst = vec![std::mem::MaybeUninit::uninit(); max_len];
let start = Instant::now();
let mut checksum: usize = 0;
for _ in 0..iters {
let written = utf16_simd::escape_json_utf16le(
&case.utf16le,
case.units as usize,
&mut dst,
false,
);
checksum = checksum.wrapping_add(written);
}
let elapsed = start.elapsed();
let ns = elapsed.as_nanos() as f64;
let iters_f = iters as f64;
let units_f = case.units as f64;
let ns_per_iter = ns / iters_f;
let ns_per_unit = ns / (iters_f * units_f);
println!(
"{},{},{},{},{},{:.3},{:.6},{}",
case.pattern,
case.units,
case.units,
case.utf16le.len(),
iters,
ns_per_iter,
ns_per_unit,
checksum
);
}
Ok(())
}