evtx 0.11.2

A Fast (and safe) parser for the Windows XML Event Log (EVTX) format
Documentation
//! Microbench UTF-16 JSON escaping across variable lengths and patterns.
//!
//! Reads a shared binary dataset (generated by scripts/gen_utf16_escape_dataset.py),
//! benchmarks `utf16_simd::escape_json_utf16le_simd`, and prints CSV rows.

use std::env;
use std::fs;
use std::time::Instant;

#[derive(Debug)]
struct Case {
    pattern: String,
    units: u32,
    utf16le: Vec<u8>,
}

fn read_u32(bytes: &[u8], idx: &mut usize) -> u32 {
    let mut buf = [0u8; 4];
    buf.copy_from_slice(&bytes[*idx..*idx + 4]);
    *idx += 4;
    u32::from_le_bytes(buf)
}

fn read_bytes<'a>(bytes: &'a [u8], idx: &mut usize, len: usize) -> &'a [u8] {
    let out = &bytes[*idx..*idx + len];
    *idx += len;
    out
}

fn load_cases(path: &str) -> Result<Vec<Case>, Box<dyn std::error::Error>> {
    let bytes = fs::read(path)?;
    let mut idx = 0usize;
    if bytes.len() < 12 {
        return Err("dataset too small".into());
    }
    let magic = read_bytes(&bytes, &mut idx, 4);
    if magic != b"UTFB" {
        return Err("bad magic".into());
    }
    let _version = read_u32(&bytes, &mut idx);
    let count = read_u32(&bytes, &mut idx) as usize;
    let mut cases = Vec::with_capacity(count);
    for _ in 0..count {
        let pat_len = read_u32(&bytes, &mut idx) as usize;
        let pat = read_bytes(&bytes, &mut idx, pat_len);
        let units = read_u32(&bytes, &mut idx);
        let byte_len = read_u32(&bytes, &mut idx) as usize;
        let data = read_bytes(&bytes, &mut idx, byte_len).to_vec();
        let pattern = String::from_utf8(pat.to_vec())?;
        cases.push(Case {
            pattern,
            units,
            utf16le: data,
        });
    }
    Ok(cases)
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut data_path = String::from("perf/utf16_escape_data.bin");
    let mut target_bytes: usize = 256 * 1024 * 1024;
    let mut min_iters: usize = 100_000;
    let mut max_iters: usize = 20_000_000;

    let mut args = env::args().skip(1);
    while let Some(arg) = args.next() {
        match arg.as_str() {
            "--data" => {
                if let Some(p) = args.next() {
                    data_path = p;
                }
            }
            "--target-bytes" => {
                if let Some(v) = args.next() {
                    target_bytes = v.parse().unwrap_or(target_bytes);
                }
            }
            "--min-iters" => {
                if let Some(v) = args.next() {
                    min_iters = v.parse().unwrap_or(min_iters);
                }
            }
            "--max-iters" => {
                if let Some(v) = args.next() {
                    max_iters = v.parse().unwrap_or(max_iters);
                }
            }
            _ => {}
        }
    }

    let cases = load_cases(&data_path)?;
    println!("pattern,length,units,bytes,iters,ns_per_iter,ns_per_unit,checksum");

    for case in cases {
        if case.units == 0 {
            continue;
        }
        let byte_len = case.utf16le.len().max(1);
        let mut iters = target_bytes / byte_len;
        if iters < min_iters {
            iters = min_iters;
        }
        if iters > max_iters {
            iters = max_iters;
        }

        let max_len = utf16_simd::max_escaped_len(case.units as usize, false);
        let mut dst = vec![std::mem::MaybeUninit::uninit(); max_len];

        let start = Instant::now();
        let mut checksum: usize = 0;
        for _ in 0..iters {
            let written = utf16_simd::escape_json_utf16le(
                &case.utf16le,
                case.units as usize,
                &mut dst,
                false,
            );
            checksum = checksum.wrapping_add(written);
        }
        let elapsed = start.elapsed();
        let ns = elapsed.as_nanos() as f64;
        let iters_f = iters as f64;
        let units_f = case.units as f64;
        let ns_per_iter = ns / iters_f;
        let ns_per_unit = ns / (iters_f * units_f);

        println!(
            "{},{},{},{},{},{:.3},{:.6},{}",
            case.pattern,
            case.units,
            case.units,
            case.utf16le.len(),
            iters,
            ns_per_iter,
            ns_per_unit,
            checksum
        );
    }

    Ok(())
}