zipatch-rs 1.6.0

//! Serial-vs-threaded wall-clock benchmark for the two verify paths.
//!
//! The question this bench answers: would parallelising the verify paths via
//! rayon (or any thread fan-out) actually pay off? The verify paths are the
//! cleanest parallelisation candidates in the crate because they are pure-read,
//! per-target / per-file independent, and CPU-dominated (SHA1 / CRC32 over
//! file bytes).
//!
//! Two suites:
//!
//! - `HashVerifier` — post-apply SHA1 verification. Serial baseline uses the
//!   public [`HashVerifier::execute`] directly. The threaded variants partition
//!   the registered task list across `N` threads, each running its own
//!   [`HashVerifier`] over a slice via [`std::thread::scope`]. Identical
//!   hashing code, only the fan-out differs.
//! - `index::PlanVerifier` — plan-driven CRC32 region verification. Same shape:
//!   serial calls [`PlanVerifier::execute`] over the full plan; threaded variants
//!   split [`Plan::targets`] across `N` threads, each running its own
//!   [`PlanVerifier`] over a sub-plan (the plan is otherwise identical — same
//!   platform, same patches, same `fs_ops`).
//!
//! Setup runs once per `(suite, file/target count)` pair (files written into a
//! shared `tempdir`, kept alive for the whole sweep). The page cache will warm
//! up after the first iteration; both serial and parallel variants then see
//! the same cache, so the comparison isolates CPU-bound hashing throughput,
//! which is precisely the question.
//!
//! Run with `cargo bench --bench verify_parallel --all-features`.

use std::hint::black_box;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::thread;

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use sha1::{Digest, Sha1};
use tempfile::TempDir;
use zipatch_rs::Platform;
use zipatch_rs::index::{
    PartExpected, PartSource, PatchRef, PatchSourceKind, Plan, PlanVerifier, Region, Target,
    TargetPath,
};
use zipatch_rs::verify::{ExpectedHash, HashVerifier, Sha1Digest};

/// Per-file byte count for both suites. 4 MiB is large enough that the
/// per-file hash dominates open/stat overhead but small enough that the full
/// sweep finishes in a reasonable wall-clock.
const FILE_SIZE: usize = 4 * 1024 * 1024;

/// File counts swept. 1 isolates per-file overhead; higher counts show
/// fan-out behaviour as more work is available to share.
const FILE_COUNTS: &[usize] = &[1, 4, 16, 64];

/// Thread counts swept. 1 is included as a sanity check against the serial
/// baseline (should be ~equivalent — measures the thread-scope overhead).
const THREAD_COUNTS: &[usize] = &[2, 4, 8];

/// Compressible-ish payload, varied per `seed` so distinct files have
/// distinct hashes. Content choice does not matter for SHA1/CRC throughput —
/// both algorithms are fixed-throughput regardless of byte distribution — but
/// distinct payloads guard against any accidental dedup at a lower layer.
fn make_payload(size: usize, seed: u8) -> Vec<u8> {
    (0..size)
        .map(|i| seed.wrapping_add((i & 0xFF) as u8))
        .collect()
}

fn sha1_of(bytes: &[u8]) -> Sha1Digest {
    let mut h = Sha1::new();
    h.update(bytes);
    let out = h.finalize();
    let mut arr = [0u8; 20];
    arr.copy_from_slice(&out);
    Sha1Digest::new(arr)
}

// ---------- HashVerifier suite ----------

fn setup_hash_files(dir: &Path, count: usize) -> Vec<(PathBuf, ExpectedHash)> {
    let mut tasks = Vec::with_capacity(count);
    for i in 0..count {
        let path = dir.join(format!("f{i}.bin"));
        let payload = make_payload(FILE_SIZE, i as u8);
        let mut f = std::fs::File::create(&path).expect("create");
        f.write_all(&payload).expect("write");
        f.sync_all().expect("fsync");
        tasks.push((path, ExpectedHash::whole(sha1_of(&payload))));
    }
    tasks
}

fn run_hash_serial(tasks: &[(PathBuf, ExpectedHash)]) {
    let mut b = HashVerifier::new();
    for (p, e) in tasks {
        b = b.expect(p.clone(), e.clone());
    }
    let r = b.execute().expect("verify");
    debug_assert!(r.is_clean());
    black_box(r);
}

fn run_hash_parallel(tasks: &[(PathBuf, ExpectedHash)], threads: usize) {
    let chunk = tasks.len().div_ceil(threads).max(1);
    thread::scope(|s| {
        let mut handles = Vec::new();
        for slice in tasks.chunks(chunk) {
            handles.push(s.spawn(move || {
                let mut b = HashVerifier::new();
                for (p, e) in slice {
                    b = b.expect(p.clone(), e.clone());
                }
                b.execute().expect("verify")
            }));
        }
        for h in handles {
            let r = h.join().expect("thread");
            debug_assert!(r.is_clean());
            black_box(r);
        }
    });
}

fn bench_hash_verifier(c: &mut Criterion) {
    let mut group = c.benchmark_group("HashVerifier");
    group.sample_size(10);

    // Keep TempDirs alive for the lifetime of the group so the same set of
    // files is reused across the thread sweep for a given file count.
    let mut keep_alive: Vec<TempDir> = Vec::new();

    for &count in FILE_COUNTS {
        let dir = TempDir::new().expect("tempdir");
        let tasks = setup_hash_files(dir.path(), count);
        let total_bytes = (FILE_SIZE * count) as u64;
        group.throughput(Throughput::Bytes(total_bytes));

        group.bench_with_input(BenchmarkId::new("serial", count), &tasks, |b, tasks| {
            b.iter(|| run_hash_serial(tasks));
        });
        for &threads in THREAD_COUNTS {
            // Skip thread counts that have no work to share.
            if threads > count {
                continue;
            }
            group.bench_with_input(
                BenchmarkId::new(format!("parallel_{threads}t"), count),
                &(tasks.clone(), threads),
                |b, (tasks, threads)| b.iter(|| run_hash_parallel(tasks, *threads)),
            );
        }

        keep_alive.push(dir);
    }
    group.finish();
}

// ---------- index::PlanVerifier suite ----------

/// Build a plan with `target_count` `Generic` targets, each a single
/// whole-file region with [`PartExpected::Crc32`]. Forces the verifier to
/// read every byte of every target file, exercising the CPU-bound CRC path
/// that matters for the parallelism question.
fn setup_index_plan(install_root: &Path, target_count: usize) -> Plan {
    let mut targets = Vec::with_capacity(target_count);
    for i in 0..target_count {
        let rel = format!("data/f{i}.bin");
        let abs = install_root.join(&rel);
        std::fs::create_dir_all(abs.parent().expect("parent")).expect("mkdir");
        let payload = make_payload(FILE_SIZE, i as u8);
        let mut f = std::fs::File::create(&abs).expect("create");
        f.write_all(&payload).expect("write");
        f.sync_all().expect("fsync");
        let crc = crc32fast::hash(&payload);
        let region = Region::new(
            0,
            FILE_SIZE as u32,
            PartSource::Patch {
                patch_idx: zipatch_rs::newtypes::PatchIndex::new(0),
                offset: 0,
                kind: PatchSourceKind::Raw {
                    len: FILE_SIZE as u32,
                },
                decoded_skip: 0,
            },
            PartExpected::Crc32(crc),
        );
        targets.push(Target::new(
            TargetPath::Generic(rel),
            FILE_SIZE as u64,
            vec![region],
        ));
    }
    Plan::new(
        Platform::Win32,
        vec![PatchRef::new("synthetic", None)],
        targets,
        vec![],
    )
}

fn run_index_serial(plan: &Plan, install_root: &Path) {
    let m = PlanVerifier::new(install_root)
        .execute(plan)
        .expect("verify_plan");
    debug_assert!(m.is_clean());
    black_box(m);
}

fn run_index_parallel(plan: &Plan, install_root: &Path, threads: usize) {
    let chunk = plan.targets.len().div_ceil(threads).max(1);
    thread::scope(|s| {
        let mut handles = Vec::new();
        for slice in plan.targets.chunks(chunk) {
            let sub_plan = Plan::new(
                plan.platform,
                plan.patches.clone(),
                slice.to_vec(),
                plan.fs_ops.clone(),
            );
            let root = install_root.to_path_buf();
            handles.push(s.spawn(move || {
                PlanVerifier::new(root)
                    .execute(&sub_plan)
                    .expect("verify_plan")
            }));
        }
        for h in handles {
            let m = h.join().expect("thread");
            debug_assert!(m.is_clean());
            black_box(m);
        }
    });
}

fn bench_index_verifier(c: &mut Criterion) {
    let mut group = c.benchmark_group("index::PlanVerifier");
    group.sample_size(10);

    let mut keep_alive: Vec<TempDir> = Vec::new();

    for &count in FILE_COUNTS {
        let dir = TempDir::new().expect("tempdir");
        let plan = setup_index_plan(dir.path(), count);
        let total_bytes = (FILE_SIZE * count) as u64;
        group.throughput(Throughput::Bytes(total_bytes));

        let install_root = dir.path().to_path_buf();
        group.bench_with_input(
            BenchmarkId::new("serial", count),
            &(plan.clone(), install_root.clone()),
            |b, (plan, root)| b.iter(|| run_index_serial(plan, root)),
        );
        for &threads in THREAD_COUNTS {
            if threads > count {
                continue;
            }
            group.bench_with_input(
                BenchmarkId::new(format!("parallel_{threads}t"), count),
                &(plan.clone(), install_root.clone(), threads),
                |b, (plan, root, threads)| b.iter(|| run_index_parallel(plan, root, *threads)),
            );
        }

        keep_alive.push(dir);
    }
    group.finish();
}

criterion_group!(benches, bench_hash_verifier, bench_index_verifier);
criterion_main!(benches);