Skip to main content

zipatch_rs/verify/
mod.rs

1//! Post-apply integrity check for files produced by either the sequential
2//! [`apply_to`](crate::ZiPatchReader::apply_to) driver or the indexed
3//! [`IndexApplier::execute`](crate::index::IndexApplier::execute) driver.
4//!
5//! The per-chunk CRC32 the parser already enforces catches transit corruption
6//! of the patch stream itself, but it cannot detect silent corruption of the
7//! *resulting* `SqPack` files on disk. Square Enix's patch lists carry SHA1
8//! hashes for the post-apply `.index` / `.dat` files (whole-file or split into
9//! fixed-size blocks); [`HashVerifier`] reads those files back from disk and
10//! compares against caller-supplied expected hashes.
11//!
12//! This is a separate verification step the caller invokes **after**
13//! [`apply_to`](crate::ZiPatchReader::apply_to) or
14//! [`IndexApplier::execute`](crate::index::IndexApplier::execute) returns
15//! `Ok`. The library never bakes hash verification into the apply loop —
16//! parsing the SE patch list to build the expected-hash input is the
17//! consumer's responsibility (in practice, `gaveloc-patcher`).
18//!
19//! # Modes
20//!
21//! - **Whole-file** ([`ExpectedHash::Whole`]) — single hash over the entire
22//!   file. Cheap to express; an opaque single failure for multi-GiB files.
23//! - **Block-mode** ([`ExpectedHash::Blocks`]) — file is split into
24//!   fixed-size blocks (the SE patch list uses 50 MiB); one hash per block.
25//!   Pinpoints *which* block is bad, so a user-facing repair flow can
26//!   re-fetch a narrow range rather than the whole file.
27//!
28//! Both modes share a [`HashAlgorithm`] discriminant. Only SHA1 is supported
29//! today; the enum is `#[non_exhaustive]` so future algorithms can be added
30//! without a `SemVer` break.
31//!
32//! # Example
33//!
34//! ```no_run
35//! use zipatch_rs::verify::{ExpectedHash, HashAlgorithm, HashVerifier};
36//!
37//! let report = HashVerifier::new()
38//!     .expect(
39//!         "/opt/ffxiv/game/sqpack/ffxiv/000000.win32.index",
40//!         ExpectedHash::whole_sha1(vec![0u8; 20]),
41//!     )
42//!     .execute()
43//!     .unwrap();
44//!
45//! if !report.is_clean() {
46//!     for (path, outcome) in report.failures() {
47//!         eprintln!("{}: {outcome:?}", path.display());
48//!     }
49//! }
50//! # let _ = HashAlgorithm::Sha1;
51//! ```
52
53use crate::Result;
54use rayon::iter::{IntoParallelIterator, ParallelIterator};
55use sha1::{Digest, Sha1};
56use std::collections::BTreeMap;
57use std::fs::File;
58use std::io::Read;
59use std::path::{Path, PathBuf};
60use tracing::{debug, debug_span, info, info_span, trace, warn};
61
62const READ_BUF_CAPACITY: usize = 64 * 1024;
63const SHA1_DIGEST_LEN: usize = 20;
64
65/// Hash algorithm tag carried on an [`ExpectedHash`].
66///
67/// Only SHA1 is implemented today — it is what FFXIV patch lists carry.
68/// `#[non_exhaustive]` reserves room for future additions (e.g. SHA256) to
69/// land as a minor-version, non-breaking addition.
70#[non_exhaustive]
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
72pub enum HashAlgorithm {
73    /// SHA-1, the algorithm Square Enix's patch list carries.
74    Sha1,
75}
76
77impl HashAlgorithm {
78    /// Expected digest length in bytes.
79    #[must_use]
80    pub const fn digest_len(self) -> usize {
81        match self {
82            HashAlgorithm::Sha1 => SHA1_DIGEST_LEN,
83        }
84    }
85}
86
87/// Expected hash spec for a single file.
88///
89/// Either a single whole-file digest, or a fixed-block-size digest per block.
90/// Block-mode is what FFXIV patch lists actually carry for `.dat` files
91/// (50 MiB blocks), because it pinpoints *which* block is bad. Whole-file
92/// mode is the natural fit for small files (e.g. `.index` files), where a
93/// single mismatched bit is best surfaced as a single failure.
94///
95/// # Stability
96///
97/// `#[non_exhaustive]` — future hash-spec shapes may be added without a
98/// `SemVer` break.
99#[non_exhaustive]
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub enum ExpectedHash {
102    /// Whole-file hash mode: a single `algorithm` digest over the full file.
103    Whole {
104        /// Hash algorithm used.
105        algorithm: HashAlgorithm,
106        /// Expected digest bytes. Length must equal `algorithm.digest_len()`.
107        hash: Vec<u8>,
108    },
109    /// Block-mode hash: file is split into `block_size`-byte chunks, each
110    /// hashed independently. The last block may be shorter than `block_size`.
111    Blocks {
112        /// Hash algorithm used.
113        algorithm: HashAlgorithm,
114        /// Block size in bytes. Must be non-zero.
115        block_size: u64,
116        /// One digest per block, in file order. Each digest's length must
117        /// equal `algorithm.digest_len()`.
118        hashes: Vec<Vec<u8>>,
119    },
120}
121
122impl ExpectedHash {
123    /// Construct a whole-file SHA1 spec from a 20-byte digest.
124    #[must_use]
125    pub fn whole_sha1(hash: Vec<u8>) -> Self {
126        ExpectedHash::Whole {
127            algorithm: HashAlgorithm::Sha1,
128            hash,
129        }
130    }
131
132    /// Construct a block-mode SHA1 spec.
133    #[must_use]
134    pub fn blocks_sha1(block_size: u64, hashes: Vec<Vec<u8>>) -> Self {
135        ExpectedHash::Blocks {
136            algorithm: HashAlgorithm::Sha1,
137            block_size,
138            hashes,
139        }
140    }
141
142    /// Hash algorithm in use.
143    #[must_use]
144    pub fn algorithm(&self) -> HashAlgorithm {
145        match self {
146            ExpectedHash::Whole { algorithm, .. } | ExpectedHash::Blocks { algorithm, .. } => {
147                *algorithm
148            }
149        }
150    }
151
152    fn validate(&self) -> Result<()> {
153        let want = self.algorithm().digest_len();
154        match self {
155            ExpectedHash::Whole { hash, .. } => {
156                if hash.len() != want {
157                    return Err(crate::ZiPatchError::InvalidField {
158                        context: "ExpectedHash::Whole digest has wrong length for algorithm",
159                    });
160                }
161            }
162            ExpectedHash::Blocks {
163                block_size, hashes, ..
164            } => {
165                if *block_size == 0 {
166                    return Err(crate::ZiPatchError::InvalidField {
167                        context: "ExpectedHash::Blocks block_size must be non-zero",
168                    });
169                }
170                for h in hashes {
171                    if h.len() != want {
172                        return Err(crate::ZiPatchError::InvalidField {
173                            context: "ExpectedHash::Blocks per-block digest has wrong length for algorithm",
174                        });
175                    }
176                }
177            }
178        }
179        Ok(())
180    }
181}
182
183/// Per-file outcome of a [`HashVerifier::execute`] run.
184///
185/// `#[non_exhaustive]` so future outcome shapes (e.g. permission-denied vs
186/// generic IO) can be split without a `SemVer` break.
187#[non_exhaustive]
188#[derive(Debug, Clone, PartialEq, Eq)]
189pub enum FileVerifyOutcome {
190    /// File matched the expected hash (whole-file mode) or every block matched
191    /// (block-mode).
192    Match,
193    /// Whole-file mode: the computed digest did not equal the expected digest.
194    WholeMismatch {
195        /// Expected digest.
196        expected: Vec<u8>,
197        /// Digest computed over the on-disk file.
198        actual: Vec<u8>,
199    },
200    /// Block-mode: one or more blocks failed.
201    ///
202    /// `mismatched_blocks` holds the zero-based indices of blocks whose hash
203    /// did not match, in ascending order. `expected_block_count` is the number
204    /// of block hashes the caller supplied. `actual_block_count` is the number
205    /// of blocks the file would contain at `block_size` (i.e. `ceil(size /
206    /// block_size)`); a difference means the file is shorter or longer than
207    /// the caller's expectation and every "extra" or "missing" block index is
208    /// reported in `mismatched_blocks`.
209    BlockMismatches {
210        /// Zero-based indices of mismatched blocks, ascending.
211        mismatched_blocks: Vec<usize>,
212        /// Number of block hashes the caller supplied.
213        expected_block_count: usize,
214        /// Number of blocks the on-disk file would split into at `block_size`.
215        actual_block_count: usize,
216    },
217    /// The file does not exist on disk.
218    Missing,
219    /// An I/O error occurred while reading the file. `kind` is the
220    /// [`std::io::ErrorKind`] callers branch on (e.g. [`std::io::ErrorKind::PermissionDenied`]
221    /// to prompt for elevation, [`std::io::ErrorKind::NotFound`] is reported
222    /// as [`FileVerifyOutcome::Missing`] instead). `message` is the
223    /// `std::io::Error` `Display` rendering, preserved as a string so the
224    /// report stays `Clone + PartialEq` for downstream consumers.
225    IoError {
226        /// `std::io::ErrorKind` of the underlying error.
227        kind: std::io::ErrorKind,
228        /// Human-readable rendering of the error.
229        message: String,
230    },
231}
232
233/// Structured outcome of a [`HashVerifier::execute`] run.
234///
235/// One entry per file the caller registered via [`HashVerifier::expect`].
236/// Iteration order is by [`PathBuf`] ordering (the underlying `BTreeMap`).
237///
238/// `#[non_exhaustive]`: future per-run aggregate fields may be added.
239#[non_exhaustive]
240#[derive(Debug, Clone, PartialEq, Eq, Default)]
241pub struct HashVerifyReport {
242    /// Per-file outcome, keyed by the absolute path the caller registered.
243    pub files: BTreeMap<PathBuf, FileVerifyOutcome>,
244}
245
246impl HashVerifyReport {
247    /// `true` iff every registered file matched.
248    #[must_use]
249    pub fn is_clean(&self) -> bool {
250        self.files
251            .values()
252            .all(|o| matches!(o, FileVerifyOutcome::Match))
253    }
254
255    /// Iterate the failing files (everything that is not [`FileVerifyOutcome::Match`]).
256    pub fn failures(&self) -> impl Iterator<Item = (&Path, &FileVerifyOutcome)> {
257        self.files
258            .iter()
259            .filter(|(_, o)| !matches!(o, FileVerifyOutcome::Match))
260            .map(|(p, o)| (p.as_path(), o))
261    }
262
263    /// Count of failing files.
264    #[must_use]
265    pub fn failure_count(&self) -> usize {
266        self.failures().count()
267    }
268}
269
270/// Build up a set of `(path, expected_hash)` pairs, then [`Self::execute`] to
271/// hash the on-disk files and compare against the expected values.
272///
273/// The verifier never writes — it opens each registered file read-only, hashes
274/// it (whole-file or per-block), and produces a [`HashVerifyReport`]. Missing
275/// files and I/O errors during read are recorded as per-file outcomes rather
276/// than aborting the run — consumers want the full picture in a single pass.
277///
278/// # Error semantics
279///
280/// `execute` returns `Err` only for *programmer* errors detected up front
281/// (e.g. a zero `block_size`, or a digest whose length does not match its
282/// declared algorithm). Filesystem errors against the registered paths are
283/// captured per-file in [`FileVerifyOutcome::IoError`] / [`FileVerifyOutcome::Missing`].
284///
285/// # Security
286///
287/// Files are opened via [`std::fs::File::open`], which follows symbolic
288/// links on every platform `zipatch-rs` supports. The verifier itself never
289/// writes — the worst-case outcome of a hostile symlink pointed at a file
290/// outside the install root is an information-disclosure-via-hash: the
291/// target file's SHA1 would appear in the report's
292/// [`FileVerifyOutcome::WholeMismatch`] `actual` field.
293///
294/// If the caller derives registered paths from untrusted input (e.g. a
295/// patch-list response from a server that could be tampered with), it is
296/// **the caller's responsibility** to canonicalize the install root and
297/// reject paths that escape it before passing them to [`Self::expect`].
298/// `zipatch-rs` does not canonicalize or symlink-fence on the caller's
299/// behalf, because the appropriate root depends on the consumer's install
300/// layout.
301#[derive(Debug, Default)]
302pub struct HashVerifier {
303    tasks: Vec<(PathBuf, ExpectedHash)>,
304}
305
306impl HashVerifier {
307    /// Construct an empty verifier.
308    #[must_use]
309    pub fn new() -> Self {
310        Self::default()
311    }
312
313    /// Register `path` with `expected`.
314    ///
315    /// Registering the same path twice with **identical** [`ExpectedHash`]
316    /// values is a no-op (the second registration is silently absorbed at
317    /// [`Self::execute`] time). Registering the same path twice with
318    /// **different** [`ExpectedHash`] values is a programmer error and causes
319    /// [`Self::execute`] to return [`crate::ZiPatchError::InvalidField`].
320    /// The check fires at execute-time rather than here so the builder API
321    /// stays infallible.
322    #[must_use]
323    pub fn expect(mut self, path: impl Into<PathBuf>, expected: ExpectedHash) -> Self {
324        self.tasks.push((path.into(), expected));
325        self
326    }
327
328    /// Hash each registered file and compare against its expected hash.
329    ///
330    /// Returns a [`HashVerifyReport`] describing every file. The report is
331    /// always populated for every registered task — `is_clean()` distinguishes
332    /// a fully-passing run from a failing one. See the struct docs for the
333    /// error policy.
334    ///
335    /// # Errors
336    ///
337    /// Returns [`crate::ZiPatchError::InvalidField`] if any registered
338    /// [`ExpectedHash`] is malformed (wrong digest length, zero `block_size`).
339    /// Filesystem errors are *not* returned here — they appear as
340    /// [`FileVerifyOutcome::IoError`] / [`FileVerifyOutcome::Missing`] entries
341    /// in the report.
342    pub fn execute(self) -> Result<HashVerifyReport> {
343        let span = info_span!("verify_hashes", files = self.tasks.len());
344        let _enter = span.enter();
345        let started = std::time::Instant::now();
346
347        for (_, exp) in &self.tasks {
348            exp.validate()?;
349        }
350
351        let mut seen: BTreeMap<&Path, &ExpectedHash> = BTreeMap::new();
352        for (path, exp) in &self.tasks {
353            match seen.get(path.as_path()) {
354                Some(prev) if *prev == exp => {}
355                Some(_) => {
356                    return Err(crate::ZiPatchError::InvalidField {
357                        context: "HashVerifier: same path registered with conflicting ExpectedHash values",
358                    });
359                }
360                None => {
361                    seen.insert(path.as_path(), exp);
362                }
363            }
364        }
365
366        let mut report = HashVerifyReport::default();
367        let parent = &span;
368        let results: Vec<(PathBuf, FileVerifyOutcome, u64)> = self
369            .tasks
370            .into_par_iter()
371            .map(|(path, expected)| {
372                parent.in_scope(|| {
373                    let sub = debug_span!("verify_file", path = %path.display());
374                    let _e = sub.enter();
375                    let mut scratch = vec![0u8; READ_BUF_CAPACITY];
376                    let (outcome, bytes) = verify_one(&path, &expected, &mut scratch);
377                    match &outcome {
378                        FileVerifyOutcome::Match => {
379                            debug!(bytes_hashed = bytes, "verify_hashes: file match");
380                        }
381                        FileVerifyOutcome::Missing => {
382                            warn!("verify_hashes: file missing");
383                        }
384                        FileVerifyOutcome::IoError { kind, message } => {
385                            warn!(?kind, error = %message, "verify_hashes: io error during hash");
386                        }
387                        FileVerifyOutcome::WholeMismatch { .. } => {
388                            debug!(bytes_hashed = bytes, "verify_hashes: whole-file mismatch");
389                        }
390                        FileVerifyOutcome::BlockMismatches {
391                            mismatched_blocks, ..
392                        } => {
393                            debug!(
394                                bytes_hashed = bytes,
395                                bad_blocks = mismatched_blocks.len(),
396                                "verify_hashes: block-mode mismatches"
397                            );
398                        }
399                    }
400                    (path, outcome, bytes)
401                })
402            })
403            .collect();
404
405        let mut total_bytes: u64 = 0;
406        for (path, outcome, bytes) in results {
407            total_bytes += bytes;
408            report.files.insert(path, outcome);
409        }
410
411        let failures = report.failure_count();
412        info!(
413            files = report.files.len(),
414            failures,
415            bytes_hashed = total_bytes,
416            elapsed_ms = started.elapsed().as_millis() as u64,
417            "verify_hashes: run complete"
418        );
419        Ok(report)
420    }
421}
422
423fn verify_one(
424    path: &Path,
425    expected: &ExpectedHash,
426    scratch: &mut [u8],
427) -> (FileVerifyOutcome, u64) {
428    let mut file = match File::open(path) {
429        Ok(f) => f,
430        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
431            return (FileVerifyOutcome::Missing, 0);
432        }
433        Err(e) => {
434            return (
435                FileVerifyOutcome::IoError {
436                    kind: e.kind(),
437                    message: e.to_string(),
438                },
439                0,
440            );
441        }
442    };
443
444    match expected {
445        ExpectedHash::Whole { algorithm, hash } => match hash_whole(*algorithm, &mut file, scratch)
446        {
447            Ok((actual, n)) => {
448                if actual.as_slice() == hash.as_slice() {
449                    (FileVerifyOutcome::Match, n)
450                } else {
451                    (
452                        FileVerifyOutcome::WholeMismatch {
453                            expected: hash.clone(),
454                            actual,
455                        },
456                        n,
457                    )
458                }
459            }
460            Err(e) => (
461                FileVerifyOutcome::IoError {
462                    kind: e.kind(),
463                    message: e.to_string(),
464                },
465                0,
466            ),
467        },
468        ExpectedHash::Blocks {
469            algorithm,
470            block_size,
471            hashes,
472        } => hash_blocks(*algorithm, &mut file, *block_size, hashes, scratch),
473    }
474}
475
476fn hash_whole<R: Read>(
477    algo: HashAlgorithm,
478    reader: &mut R,
479    scratch: &mut [u8],
480) -> std::io::Result<(Vec<u8>, u64)> {
481    match algo {
482        HashAlgorithm::Sha1 => {
483            let mut hasher = Sha1::new();
484            let mut total: u64 = 0;
485            loop {
486                let n = reader.read(scratch)?;
487                if n == 0 {
488                    break;
489                }
490                hasher.update(&scratch[..n]);
491                total += n as u64;
492                trace!(chunk_bytes = n, "verify_hashes: whole-file chunk");
493            }
494            Ok((hasher.finalize().to_vec(), total))
495        }
496    }
497}
498
499fn hash_blocks<R: Read>(
500    algo: HashAlgorithm,
501    reader: &mut R,
502    block_size: u64,
503    expected: &[Vec<u8>],
504    scratch: &mut [u8],
505) -> (FileVerifyOutcome, u64) {
506    // Stream-hash one block at a time so memory stays O(scratch) regardless of
507    // file size.
508    let mut mismatched: Vec<usize> = Vec::new();
509    let mut block_idx: usize = 0;
510    let mut total_bytes: u64 = 0;
511    let mut hasher = block_hasher(algo);
512    let mut block_bytes_remaining: u64 = block_size;
513    let mut block_had_bytes = false;
514
515    loop {
516        // Cap reads so we never spill across a block boundary.
517        let want = block_bytes_remaining.min(scratch.len() as u64) as usize;
518        if want == 0 {
519            finish_and_compare(algo, &mut hasher, block_idx, expected, &mut mismatched);
520            block_idx += 1;
521            block_bytes_remaining = block_size;
522            block_had_bytes = false;
523            continue;
524        }
525        let n = match reader.read(&mut scratch[..want]) {
526            Ok(n) => n,
527            Err(e) => {
528                return (
529                    FileVerifyOutcome::IoError {
530                        kind: e.kind(),
531                        message: e.to_string(),
532                    },
533                    total_bytes,
534                );
535            }
536        };
537        if n == 0 {
538            if block_had_bytes {
539                // Trailing short block at EOF — finalize and compare.
540                finish_and_compare(algo, &mut hasher, block_idx, expected, &mut mismatched);
541                block_idx += 1;
542            }
543            break;
544        }
545        match &mut hasher {
546            BlockHasher::Sha1(h) => h.update(&scratch[..n]),
547        }
548        total_bytes += n as u64;
549        block_bytes_remaining -= n as u64;
550        block_had_bytes = true;
551        trace!(block_idx, chunk_bytes = n, "verify_hashes: block chunk");
552    }
553
554    // File ran out before we hit `expected.len()` blocks — flag each missing
555    // index as a mismatch. Conversely, if more blocks fit than the caller
556    // supplied, every excess block index past `expected.len()` has already
557    // been flagged inside `finish_and_compare`.
558    for missing in block_idx..expected.len() {
559        mismatched.push(missing);
560    }
561
562    let actual_block_count = block_idx;
563    let expected_block_count = expected.len();
564    let outcome = if mismatched.is_empty() && actual_block_count == expected_block_count {
565        FileVerifyOutcome::Match
566    } else {
567        mismatched.sort_unstable();
568        mismatched.dedup();
569        FileVerifyOutcome::BlockMismatches {
570            mismatched_blocks: mismatched,
571            expected_block_count,
572            actual_block_count,
573        }
574    };
575    (outcome, total_bytes)
576}
577
578enum BlockHasher {
579    Sha1(Sha1),
580}
581
582fn block_hasher(algo: HashAlgorithm) -> BlockHasher {
583    match algo {
584        HashAlgorithm::Sha1 => BlockHasher::Sha1(Sha1::new()),
585    }
586}
587
588fn finish_and_compare(
589    algo: HashAlgorithm,
590    hasher: &mut BlockHasher,
591    block_idx: usize,
592    expected: &[Vec<u8>],
593    mismatched: &mut Vec<usize>,
594) {
595    // Replace the in-progress hasher with a fresh one, taking ownership of the
596    // finished state so we can finalize it without disturbing the loop.
597    let finished = std::mem::replace(hasher, block_hasher(algo));
598    let digest: Vec<u8> = match finished {
599        BlockHasher::Sha1(h) => h.finalize().to_vec(),
600    };
601    match expected.get(block_idx) {
602        Some(want) if want.as_slice() == digest.as_slice() => {}
603        _ => mismatched.push(block_idx),
604    }
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610    use std::io::Write;
611
612    fn sha1_of(bytes: &[u8]) -> Vec<u8> {
613        let mut h = Sha1::new();
614        h.update(bytes);
615        h.finalize().to_vec()
616    }
617
618    fn write_tmp(bytes: &[u8]) -> (tempfile::TempDir, PathBuf) {
619        let dir = tempfile::tempdir().unwrap();
620        let path = dir.path().join("f.bin");
621        let mut f = File::create(&path).unwrap();
622        f.write_all(bytes).unwrap();
623        f.sync_all().unwrap();
624        (dir, path)
625    }
626
627    #[test]
628    fn report_is_clean_when_empty() {
629        let r = HashVerifyReport::default();
630        assert!(r.is_clean());
631        assert_eq!(r.failure_count(), 0);
632        assert_eq!(r.failures().count(), 0);
633    }
634
635    #[test]
636    fn whole_sha1_match() {
637        let payload = b"hello world".repeat(1000);
638        let (_d, path) = write_tmp(&payload);
639        let report = HashVerifier::new()
640            .expect(&path, ExpectedHash::whole_sha1(sha1_of(&payload)))
641            .execute()
642            .unwrap();
643        assert!(report.is_clean(), "got {report:?}");
644    }
645
646    #[test]
647    fn whole_sha1_mismatch() {
648        let (_d, path) = write_tmp(b"abc");
649        let bad = vec![0u8; 20];
650        let report = HashVerifier::new()
651            .expect(&path, ExpectedHash::whole_sha1(bad.clone()))
652            .execute()
653            .unwrap();
654        assert!(!report.is_clean());
655        match report.files.get(&path).unwrap() {
656            FileVerifyOutcome::WholeMismatch { expected, actual } => {
657                assert_eq!(expected, &bad);
658                assert_eq!(actual, &sha1_of(b"abc"));
659            }
660            other => panic!("expected WholeMismatch, got {other:?}"),
661        }
662    }
663
664    #[test]
665    fn block_mode_match() {
666        let block_size: u64 = 256;
667        let mut payload = Vec::new();
668        for i in 0..5u8 {
669            payload.extend(std::iter::repeat_n(i, block_size as usize));
670        }
671        // Add a short trailing block.
672        payload.extend_from_slice(&[0xAB; 17]);
673
674        let hashes: Vec<Vec<u8>> = payload.chunks(block_size as usize).map(sha1_of).collect();
675        let (_d, path) = write_tmp(&payload);
676
677        let report = HashVerifier::new()
678            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes.clone()))
679            .execute()
680            .unwrap();
681        assert!(report.is_clean(), "got {report:?}");
682        assert_eq!(hashes.len(), 6); // 5 full + 1 short
683    }
684
685    #[test]
686    fn block_mode_specific_block_mismatch() {
687        let block_size: u64 = 128;
688        let mut payload = vec![0u8; (block_size as usize) * 4];
689        // Corrupt block 2 by writing to the on-disk file *after* computing the
690        // expected hashes from the clean payload.
691        let clean = payload.clone();
692        payload[(block_size as usize) * 2 + 7] = 0xFF;
693
694        let expected: Vec<Vec<u8>> = clean.chunks(block_size as usize).map(sha1_of).collect();
695        let (_d, path) = write_tmp(&payload);
696
697        let report = HashVerifier::new()
698            .expect(&path, ExpectedHash::blocks_sha1(block_size, expected))
699            .execute()
700            .unwrap();
701        match report.files.get(&path).unwrap() {
702            FileVerifyOutcome::BlockMismatches {
703                mismatched_blocks,
704                expected_block_count,
705                actual_block_count,
706            } => {
707                assert_eq!(mismatched_blocks, &vec![2]);
708                assert_eq!(*expected_block_count, 4);
709                assert_eq!(*actual_block_count, 4);
710            }
711            other => panic!("expected BlockMismatches, got {other:?}"),
712        }
713    }
714
715    #[test]
716    fn missing_file_reported() {
717        let dir = tempfile::tempdir().unwrap();
718        let missing = dir.path().join("does-not-exist");
719        let report = HashVerifier::new()
720            .expect(&missing, ExpectedHash::whole_sha1(vec![0u8; 20]))
721            .execute()
722            .unwrap();
723        assert_eq!(
724            report.files.get(&missing).unwrap(),
725            &FileVerifyOutcome::Missing
726        );
727        assert!(!report.is_clean());
728    }
729
730    #[test]
731    fn block_mode_file_shorter_than_expected_flags_trailing_missing_blocks() {
732        let block_size: u64 = 64;
733        // On-disk file: 2 full blocks. Caller expects 4 blocks of hashes.
734        let payload = vec![0u8; (block_size as usize) * 2];
735        let expected: Vec<Vec<u8>> = payload
736            .chunks(block_size as usize)
737            .map(sha1_of)
738            .chain(std::iter::repeat_n(vec![0u8; 20], 2))
739            .collect();
740        assert_eq!(expected.len(), 4);
741        let (_d, path) = write_tmp(&payload);
742
743        let report = HashVerifier::new()
744            .expect(&path, ExpectedHash::blocks_sha1(block_size, expected))
745            .execute()
746            .unwrap();
747        match report.files.get(&path).unwrap() {
748            FileVerifyOutcome::BlockMismatches {
749                mismatched_blocks,
750                expected_block_count,
751                actual_block_count,
752            } => {
753                assert_eq!(*expected_block_count, 4);
754                assert_eq!(*actual_block_count, 2);
755                assert_eq!(mismatched_blocks, &vec![2, 3]);
756            }
757            other => panic!("expected BlockMismatches, got {other:?}"),
758        }
759    }
760
761    #[test]
762    fn block_mode_file_longer_than_expected_flags_extra_blocks() {
763        let block_size: u64 = 32;
764        let payload = vec![0u8; (block_size as usize) * 4];
765        // Caller supplies only 2 of 4 block hashes (matching the first two).
766        let expected: Vec<Vec<u8>> = payload
767            .chunks(block_size as usize)
768            .take(2)
769            .map(sha1_of)
770            .collect();
771        let (_d, path) = write_tmp(&payload);
772
773        let report = HashVerifier::new()
774            .expect(&path, ExpectedHash::blocks_sha1(block_size, expected))
775            .execute()
776            .unwrap();
777        match report.files.get(&path).unwrap() {
778            FileVerifyOutcome::BlockMismatches {
779                mismatched_blocks,
780                expected_block_count,
781                actual_block_count,
782            } => {
783                assert_eq!(*expected_block_count, 2);
784                assert_eq!(*actual_block_count, 4);
785                assert_eq!(mismatched_blocks, &vec![2, 3]);
786            }
787            other => panic!("expected BlockMismatches, got {other:?}"),
788        }
789    }
790
791    #[test]
792    fn empty_file_whole_mode_matches_sha1_of_empty() {
793        let (_d, path) = write_tmp(&[]);
794        let report = HashVerifier::new()
795            .expect(&path, ExpectedHash::whole_sha1(sha1_of(&[])))
796            .execute()
797            .unwrap();
798        assert!(report.is_clean());
799    }
800
801    #[test]
802    fn empty_file_block_mode_matches_zero_blocks() {
803        // Zero blocks expected; zero blocks present.
804        let (_d, path) = write_tmp(&[]);
805        let report = HashVerifier::new()
806            .expect(&path, ExpectedHash::blocks_sha1(1024, vec![]))
807            .execute()
808            .unwrap();
809        assert!(report.is_clean());
810    }
811
812    #[test]
813    fn zero_block_size_is_rejected_up_front() {
814        let dir = tempfile::tempdir().unwrap();
815        let path = dir.path().join("any");
816        let err = HashVerifier::new()
817            .expect(&path, ExpectedHash::blocks_sha1(0, vec![]))
818            .execute()
819            .unwrap_err();
820        assert!(
821            matches!(err, crate::ZiPatchError::InvalidField { context } if context.contains("block_size")),
822            "got {err:?}"
823        );
824    }
825
826    #[test]
827    fn whole_mode_wrong_digest_length_is_rejected_up_front() {
828        let (_d, path) = write_tmp(b"x");
829        let err = HashVerifier::new()
830            .expect(&path, ExpectedHash::whole_sha1(vec![0u8; 19]))
831            .execute()
832            .unwrap_err();
833        assert!(
834            matches!(err, crate::ZiPatchError::InvalidField { .. }),
835            "got {err:?}"
836        );
837    }
838
839    #[test]
840    fn block_mode_wrong_per_block_digest_length_is_rejected_up_front() {
841        let (_d, path) = write_tmp(b"y");
842        let bad = ExpectedHash::Blocks {
843            algorithm: HashAlgorithm::Sha1,
844            block_size: 16,
845            hashes: vec![vec![0u8; 19]],
846        };
847        let err = HashVerifier::new()
848            .expect(&path, bad)
849            .execute()
850            .unwrap_err();
851        assert!(matches!(err, crate::ZiPatchError::InvalidField { .. }));
852    }
853
854    #[test]
855    fn block_mode_block_size_exceeds_read_buf_capacity_match() {
856        // Each block is larger than READ_BUF_CAPACITY (64 KiB) so the inner
857        // read loop must iterate multiple times before `want == 0` triggers
858        // the finalize branch. Use 200 KiB blocks: 3 full + 1 short trailing.
859        let block_size: u64 = 200 * 1024;
860        let mut payload = Vec::with_capacity((block_size as usize) * 3 + 17);
861        for i in 0..3u8 {
862            payload.extend(std::iter::repeat_n(i.wrapping_mul(31), block_size as usize));
863        }
864        payload.extend_from_slice(&[0xCD; 17]);
865
866        let hashes: Vec<Vec<u8>> = payload.chunks(block_size as usize).map(sha1_of).collect();
867        assert_eq!(hashes.len(), 4);
868        let (_d, path) = write_tmp(&payload);
869
870        let report = HashVerifier::new()
871            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes))
872            .execute()
873            .unwrap();
874        assert!(report.is_clean(), "got {report:?}");
875    }
876
877    #[test]
878    fn block_mode_block_size_exceeds_read_buf_capacity_mismatch() {
879        // Same shape as the match test, but corrupt a byte deep inside block 1
880        // (past the first 64 KiB read) so the mismatch only surfaces if the
881        // multi-read accumulation inside a single block works.
882        let block_size: u64 = 200 * 1024;
883        let mut payload = Vec::with_capacity((block_size as usize) * 3);
884        for i in 0..3u8 {
885            payload.extend(std::iter::repeat_n(i.wrapping_mul(17), block_size as usize));
886        }
887        let clean = payload.clone();
888        // Corrupt block 1 at offset 150 KiB (well past the first 64 KiB read).
889        payload[(block_size as usize) + 150 * 1024] ^= 0xFF;
890
891        let expected: Vec<Vec<u8>> = clean.chunks(block_size as usize).map(sha1_of).collect();
892        let (_d, path) = write_tmp(&payload);
893
894        let report = HashVerifier::new()
895            .expect(&path, ExpectedHash::blocks_sha1(block_size, expected))
896            .execute()
897            .unwrap();
898        match report.files.get(&path).unwrap() {
899            FileVerifyOutcome::BlockMismatches {
900                mismatched_blocks,
901                expected_block_count,
902                actual_block_count,
903            } => {
904                assert_eq!(mismatched_blocks, &vec![1]);
905                assert_eq!(*expected_block_count, 3);
906                assert_eq!(*actual_block_count, 3);
907            }
908            other => panic!("expected BlockMismatches, got {other:?}"),
909        }
910    }
911
912    #[test]
913    fn block_mode_single_short_block_distinguishes_from_empty_file() {
914        // File shorter than `block_size` with exactly one expected hash. This
915        // exercises the trailing-short-block finalize path when the *only*
916        // block is short — distinct from the empty-file (zero blocks) path.
917        let block_size: u64 = 200 * 1024;
918        let payload = vec![0x7Eu8; 1000]; // far less than block_size
919        let hashes = vec![sha1_of(&payload)];
920        let (_d, path) = write_tmp(&payload);
921
922        let report = HashVerifier::new()
923            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes))
924            .execute()
925            .unwrap();
926        assert!(report.is_clean(), "got {report:?}");
927    }
928
929    #[cfg(target_family = "unix")]
930    #[test]
931    fn permission_denied_open_reports_io_error_with_kind() {
932        use std::os::unix::fs::PermissionsExt;
933
934        let (_d, path) = write_tmp(b"forbidden");
935        // Drop read permission. TempDir cleanup uses unlink (not
936        // open-for-read), so 0o000 on the file itself does not block cleanup.
937        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o000)).unwrap();
938
939        // Skip when running as root — chmod 0o000 is bypassed by
940        // CAP_DAC_OVERRIDE, so root can still open the file. Probe via
941        // File::open: if the open succeeds against 0o000, the running user
942        // has the cap and the test would not exercise the IoError path.
943        if File::open(&path).is_ok() {
944            std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644)).unwrap();
945            eprintln!("skipping: running with CAP_DAC_OVERRIDE, chmod 0o000 does not block open");
946            return;
947        }
948
949        let report = HashVerifier::new()
950            .expect(&path, ExpectedHash::whole_sha1(vec![0u8; 20]))
951            .execute()
952            .unwrap();
953
954        // Restore so the TempDir cleanup is robust regardless of platform quirks.
955        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644)).unwrap();
956
957        match report.files.get(&path).unwrap() {
958            FileVerifyOutcome::IoError { kind, message } => {
959                assert_eq!(*kind, std::io::ErrorKind::PermissionDenied, "got {kind:?}");
960                assert!(!message.is_empty(), "message should carry the error text");
961            }
962            other => panic!("expected IoError with PermissionDenied kind, got {other:?}"),
963        }
964    }
965
966    // Note: the mid-read `Err` branch in `hash_blocks` (the second `IoError`
967    // construction site) is not directly tested. Provoking a mid-read IO
968    // error deterministically requires substituting a custom `Read` impl for
969    // `File`, which the current `hash_blocks` signature does not accept. The
970    // permission-denied test above covers the `IoError` construction shape
971    // (kind + message), and the open-time and mid-read arms are byte-identical.
972
973    #[test]
974    fn duplicate_identical_registration_is_noop() {
975        let (_d, path) = write_tmp(b"abc");
976        let expected = ExpectedHash::whole_sha1(sha1_of(b"abc"));
977        let report = HashVerifier::new()
978            .expect(&path, expected.clone())
979            .expect(&path, expected)
980            .execute()
981            .unwrap();
982        assert!(report.is_clean(), "got {report:?}");
983        assert_eq!(report.files.len(), 1);
984    }
985
986    #[test]
987    fn duplicate_conflicting_registration_errors() {
988        let (_d, path) = write_tmp(b"abc");
989        let err = HashVerifier::new()
990            .expect(&path, ExpectedHash::whole_sha1(sha1_of(b"abc")))
991            .expect(&path, ExpectedHash::whole_sha1(vec![0u8; 20]))
992            .execute()
993            .unwrap_err();
994        assert!(
995            matches!(err, crate::ZiPatchError::InvalidField { context } if context.contains("conflicting")),
996            "got {err:?}"
997        );
998    }
999
1000    #[test]
1001    fn failures_iter_excludes_matches() {
1002        let (_d1, ok) = write_tmp(b"a");
1003        let (_d2, bad) = write_tmp(b"b");
1004        let report = HashVerifier::new()
1005            .expect(&ok, ExpectedHash::whole_sha1(sha1_of(b"a")))
1006            .expect(&bad, ExpectedHash::whole_sha1(vec![0u8; 20]))
1007            .execute()
1008            .unwrap();
1009        let fails: Vec<_> = report.failures().collect();
1010        assert_eq!(fails.len(), 1);
1011        assert_eq!(fails[0].0, bad.as_path());
1012    }
1013
1014    /// Reader that yields `n_ok` bytes of zeros, then fails on the next read
1015    /// with the given `ErrorKind`. Used to exercise the mid-read IO error
1016    /// branches in `hash_whole` and `hash_blocks`.
1017    struct FailAfter {
1018        remaining_ok: usize,
1019        kind: std::io::ErrorKind,
1020    }
1021
1022    impl Read for FailAfter {
1023        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1024            if self.remaining_ok == 0 {
1025                return Err(std::io::Error::new(self.kind, "injected"));
1026            }
1027            let n = self.remaining_ok.min(buf.len());
1028            buf[..n].fill(0);
1029            self.remaining_ok -= n;
1030            Ok(n)
1031        }
1032    }
1033
1034    #[test]
1035    fn hash_whole_propagates_mid_read_io_error() {
1036        let mut reader = FailAfter {
1037            remaining_ok: 32,
1038            kind: std::io::ErrorKind::Other,
1039        };
1040        let mut scratch = vec![0u8; 16];
1041        let err = hash_whole(HashAlgorithm::Sha1, &mut reader, &mut scratch).unwrap_err();
1042        assert_eq!(err.kind(), std::io::ErrorKind::Other);
1043    }
1044
1045    #[test]
1046    fn hash_blocks_surfaces_mid_read_io_error_as_outcome() {
1047        let mut reader = FailAfter {
1048            remaining_ok: 40,
1049            kind: std::io::ErrorKind::ConnectionAborted,
1050        };
1051        let mut scratch = vec![0u8; 16];
1052        let expected = vec![vec![0u8; 20]; 4];
1053        let (outcome, bytes) = hash_blocks(
1054            HashAlgorithm::Sha1,
1055            &mut reader,
1056            64,
1057            &expected,
1058            &mut scratch,
1059        );
1060        match outcome {
1061            FileVerifyOutcome::IoError { kind, .. } => {
1062                assert_eq!(kind, std::io::ErrorKind::ConnectionAborted);
1063            }
1064            other => panic!("expected IoError outcome, got {other:?}"),
1065        }
1066        assert_eq!(
1067            bytes, 40,
1068            "bytes hashed up to the failure should be reported"
1069        );
1070    }
1071
1072    // --- execute() with zero tasks ---
1073
1074    #[test]
1075    fn execute_with_no_tasks_returns_clean_empty_report() {
1076        let report = HashVerifier::new().execute().unwrap();
1077        assert!(report.is_clean());
1078        assert_eq!(report.files.len(), 0);
1079        assert_eq!(report.failure_count(), 0);
1080    }
1081
1082    // --- HashVerifyReport invariants ---
1083
1084    #[test]
1085    fn report_nonempty_all_match_is_clean() {
1086        let (_d1, p1) = write_tmp(b"one");
1087        let (_d2, p2) = write_tmp(b"two");
1088        let report = HashVerifier::new()
1089            .expect(&p1, ExpectedHash::whole_sha1(sha1_of(b"one")))
1090            .expect(&p2, ExpectedHash::whole_sha1(sha1_of(b"two")))
1091            .execute()
1092            .unwrap();
1093        assert_eq!(report.files.len(), 2);
1094        assert!(report.is_clean());
1095        assert_eq!(report.failure_count(), 0);
1096        assert_eq!(report.failures().count(), 0);
1097    }
1098
1099    #[test]
1100    fn failure_count_equals_failures_iter_count() {
1101        let (_d1, ok) = write_tmp(b"good");
1102        let (_d2, bad1) = write_tmp(b"bad1");
1103        let (_d3, bad2) = write_tmp(b"bad2");
1104        let report = HashVerifier::new()
1105            .expect(&ok, ExpectedHash::whole_sha1(sha1_of(b"good")))
1106            .expect(&bad1, ExpectedHash::whole_sha1(vec![0u8; 20]))
1107            .expect(&bad2, ExpectedHash::whole_sha1(vec![0u8; 20]))
1108            .execute()
1109            .unwrap();
1110        assert_eq!(report.failure_count(), report.failures().count());
1111        assert_eq!(report.failure_count(), 2);
1112    }
1113
1114    #[test]
1115    fn report_files_iteration_order_is_by_path() {
1116        // BTreeMap guarantees sorted-key iteration; verify the contract holds
1117        // by registering paths out of lexicographic order and checking order.
1118        let dir = tempfile::tempdir().unwrap();
1119        let pb = dir.path().join("b.bin");
1120        let pa = dir.path().join("a.bin");
1121        let pc = dir.path().join("c.bin");
1122        for p in [&pb, &pa, &pc] {
1123            let mut f = File::create(p).unwrap();
1124            f.write_all(b"x").unwrap();
1125        }
1126        let report = HashVerifier::new()
1127            .expect(&pb, ExpectedHash::whole_sha1(sha1_of(b"x")))
1128            .expect(&pa, ExpectedHash::whole_sha1(sha1_of(b"x")))
1129            .expect(&pc, ExpectedHash::whole_sha1(sha1_of(b"x")))
1130            .execute()
1131            .unwrap();
1132        let keys: Vec<&PathBuf> = report.files.keys().collect();
1133        assert_eq!(keys[0], &pa);
1134        assert_eq!(keys[1], &pb);
1135        assert_eq!(keys[2], &pc);
1136    }
1137
1138    // --- FileVerifyOutcome derive sanity ---
1139
1140    #[test]
1141    fn file_verify_outcome_clone_and_partialeq() {
1142        let outcomes = [
1143            FileVerifyOutcome::Match,
1144            FileVerifyOutcome::Missing,
1145            FileVerifyOutcome::WholeMismatch {
1146                expected: vec![0u8; 20],
1147                actual: vec![1u8; 20],
1148            },
1149            FileVerifyOutcome::BlockMismatches {
1150                mismatched_blocks: vec![0, 2],
1151                expected_block_count: 3,
1152                actual_block_count: 3,
1153            },
1154            FileVerifyOutcome::IoError {
1155                kind: std::io::ErrorKind::Other,
1156                message: "oops".to_string(),
1157            },
1158        ];
1159        for o in &outcomes {
1160            let cloned = o.clone();
1161            assert_eq!(o, &cloned, "Clone+PartialEq round-trip failed for {o:?}");
1162        }
1163        assert_ne!(
1164            FileVerifyOutcome::Match,
1165            FileVerifyOutcome::Missing,
1166            "distinct variants must not compare equal"
1167        );
1168    }
1169
1170    // --- ExpectedHash::validate paths ---
1171
1172    #[test]
1173    fn blocks_validate_valid_then_invalid_hash_surfaces_error() {
1174        let (_d, path) = write_tmp(b"z");
1175        let bad = ExpectedHash::Blocks {
1176            algorithm: HashAlgorithm::Sha1,
1177            block_size: 8,
1178            hashes: vec![
1179                vec![0u8; 20], // valid length
1180                vec![0u8; 5],  // invalid length — should surface the error
1181            ],
1182        };
1183        let err = HashVerifier::new()
1184            .expect(&path, bad)
1185            .execute()
1186            .unwrap_err();
1187        assert!(matches!(err, crate::ZiPatchError::InvalidField { .. }));
1188    }
1189
1190    // --- HashVerifier::expect builder semantics ---
1191
1192    #[test]
1193    fn many_chained_expects_all_evaluated() {
1194        let dir = tempfile::tempdir().unwrap();
1195        let n = 10usize;
1196        let mut builder = HashVerifier::new();
1197        let mut paths = Vec::with_capacity(n);
1198        for i in 0..n {
1199            let p = dir.path().join(format!("f{i}.bin"));
1200            let mut f = File::create(&p).unwrap();
1201            f.write_all(&[i as u8]).unwrap();
1202            builder = builder.expect(&p, ExpectedHash::whole_sha1(sha1_of(&[i as u8])));
1203            paths.push(p);
1204        }
1205        let report = builder.execute().unwrap();
1206        assert_eq!(report.files.len(), n);
1207        assert!(report.is_clean(), "got {report:?}");
1208    }
1209
1210    #[test]
1211    fn whole_then_blocks_registration_for_same_path_conflicts() {
1212        let (_d, path) = write_tmp(b"hi");
1213        let err = HashVerifier::new()
1214            .expect(&path, ExpectedHash::whole_sha1(sha1_of(b"hi")))
1215            .expect(&path, ExpectedHash::blocks_sha1(2, vec![sha1_of(b"hi")]))
1216            .execute()
1217            .unwrap_err();
1218        assert!(
1219            matches!(err, crate::ZiPatchError::InvalidField { context } if context.contains("conflicting")),
1220            "got {err:?}"
1221        );
1222    }
1223
1224    // --- Block boundary conditions ---
1225
1226    #[test]
1227    fn block_mode_exact_multiple_of_block_size_no_trailing() {
1228        let block_size: u64 = 64;
1229        let payload = vec![0xAAu8; (block_size as usize) * 3];
1230        let hashes: Vec<Vec<u8>> = payload.chunks(block_size as usize).map(sha1_of).collect();
1231        assert_eq!(hashes.len(), 3);
1232        let (_d, path) = write_tmp(&payload);
1233        let report = HashVerifier::new()
1234            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes))
1235            .execute()
1236            .unwrap();
1237        assert!(report.is_clean(), "got {report:?}");
1238    }
1239
1240    #[test]
1241    fn block_mode_n_blocks_plus_one_byte_trailing() {
1242        let block_size: u64 = 64;
1243        let mut payload = vec![0xBBu8; (block_size as usize) * 3];
1244        payload.push(0xCC);
1245        let hashes: Vec<Vec<u8>> = payload.chunks(block_size as usize).map(sha1_of).collect();
1246        assert_eq!(hashes.len(), 4);
1247        let (_d, path) = write_tmp(&payload);
1248        let report = HashVerifier::new()
1249            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes))
1250            .execute()
1251            .unwrap();
1252        assert!(report.is_clean(), "got {report:?}");
1253    }
1254
1255    #[test]
1256    fn block_mode_single_byte_file() {
1257        let (_d, path) = write_tmp(&[0x42]);
1258        let hashes = vec![sha1_of(&[0x42])];
1259        let report = HashVerifier::new()
1260            .expect(&path, ExpectedHash::blocks_sha1(1024, hashes))
1261            .execute()
1262            .unwrap();
1263        assert!(report.is_clean(), "got {report:?}");
1264    }
1265
1266    #[test]
1267    fn block_mode_block_size_one_each_byte_is_own_block() {
1268        let payload = b"abc";
1269        let hashes: Vec<Vec<u8>> = payload.iter().map(|b| sha1_of(&[*b])).collect();
1270        assert_eq!(hashes.len(), 3);
1271        let (_d, path) = write_tmp(payload);
1272        let report = HashVerifier::new()
1273            .expect(&path, ExpectedHash::blocks_sha1(1, hashes))
1274            .execute()
1275            .unwrap();
1276        assert!(report.is_clean(), "got {report:?}");
1277    }
1278
1279    // --- BlockHasher state isolation between blocks ---
1280
1281    #[test]
1282    fn block_hasher_state_does_not_bleed_between_identical_content_blocks() {
1283        // Both blocks contain the same bytes. Expected[0] matches; expected[1]
1284        // is deliberately wrong. If state bled, block 1's hash would equal
1285        // block 0's hash (which happens to equal expected[0]) — masking the
1286        // mismatch. A correct implementation resets the hasher between blocks,
1287        // so expected[1] != actual[1] and block 1 is flagged.
1288        let block_size: u64 = 32;
1289        let content = vec![0x5Au8; block_size as usize];
1290        let payload: Vec<u8> = content.iter().chain(content.iter()).copied().collect();
1291        let correct_hash = sha1_of(&content);
1292        let wrong_hash = vec![0u8; 20];
1293        assert_ne!(correct_hash, wrong_hash);
1294        let hashes = vec![correct_hash, wrong_hash];
1295        let (_d, path) = write_tmp(&payload);
1296        let report = HashVerifier::new()
1297            .expect(&path, ExpectedHash::blocks_sha1(block_size, hashes))
1298            .execute()
1299            .unwrap();
1300        match report.files.get(&path).unwrap() {
1301            FileVerifyOutcome::BlockMismatches {
1302                mismatched_blocks,
1303                expected_block_count,
1304                actual_block_count,
1305            } => {
1306                assert_eq!(mismatched_blocks, &vec![1]);
1307                assert_eq!(*expected_block_count, 2);
1308                assert_eq!(*actual_block_count, 2);
1309            }
1310            other => panic!("expected BlockMismatches for block 1 only, got {other:?}"),
1311        }
1312    }
1313
1314    // --- Path edge cases ---
1315
1316    #[test]
1317    fn path_with_spaces_and_utf8() {
1318        let dir = tempfile::tempdir().unwrap();
1319        let path = dir.path().join("file with spaces café.bin");
1320        let mut f = File::create(&path).unwrap();
1321        f.write_all(b"data").unwrap();
1322        f.sync_all().unwrap();
1323        let report = HashVerifier::new()
1324            .expect(&path, ExpectedHash::whole_sha1(sha1_of(b"data")))
1325            .execute()
1326            .unwrap();
1327        assert!(report.is_clean(), "got {report:?}");
1328    }
1329
1330    // --- Parallel fan-out determinism ---
1331
1332    // 32 files (above rayon's typical split threshold): half match, half don't.
1333    // Verifies that the parallel collector produces a sorted BTreeMap with the
1334    // right failure count, and that two runs on equivalent input are identical.
1335    #[test]
1336    fn parallel_fan_out_report_is_deterministic_and_sorted() {
1337        const N: usize = 32;
1338        let dir = tempfile::tempdir().unwrap();
1339        let mut builder = HashVerifier::new();
1340        let mut expected_failures = 0usize;
1341        let mut paths: Vec<PathBuf> = Vec::with_capacity(N);
1342        for i in 0..N {
1343            let p = dir.path().join(format!("file_{i:03}.bin"));
1344            let payload = vec![i as u8; 1024 * 1024];
1345            let mut f = File::create(&p).unwrap();
1346            f.write_all(&payload).unwrap();
1347            f.sync_all().unwrap();
1348            let hash = if i % 2 == 0 {
1349                sha1_of(&payload)
1350            } else {
1351                expected_failures += 1;
1352                vec![0u8; 20]
1353            };
1354            builder = builder.expect(&p, ExpectedHash::whole_sha1(hash));
1355            paths.push(p);
1356        }
1357
1358        let run1 = builder.execute().unwrap();
1359        assert_eq!(run1.files.len(), N);
1360        assert_eq!(run1.failure_count(), expected_failures);
1361
1362        let keys: Vec<&PathBuf> = run1.files.keys().collect();
1363        for w in keys.windows(2) {
1364            assert!(w[0] < w[1], "BTreeMap keys out of order: {w:?}");
1365        }
1366
1367        // Rebuild an equivalent verifier to check idempotence.
1368        let mut builder2 = HashVerifier::new();
1369        for (i, p) in paths.iter().enumerate() {
1370            let payload = vec![i as u8; 1024 * 1024];
1371            let hash = if i % 2 == 0 {
1372                sha1_of(&payload)
1373            } else {
1374                vec![0u8; 20]
1375            };
1376            builder2 = builder2.expect(p, ExpectedHash::whole_sha1(hash));
1377        }
1378        let run2 = builder2.execute().unwrap();
1379        assert_eq!(run1, run2, "two equivalent runs produced different reports");
1380    }
1381
1382    // Registers files in a shuffled (non-lexicographic) order and asserts that
1383    // the report's BTreeMap keys are still sorted, guarding against any future
1384    // replacement of BTreeMap with a hash-map in the merge loop.
1385    #[test]
1386    fn parallel_fan_out_shuffled_registration_order_report_sorted() {
1387        const N: usize = 32;
1388        let dir = tempfile::tempdir().unwrap();
1389        // Shuffled index sequence: reverse-order registration.
1390        let indices: Vec<usize> = (0..N).rev().collect();
1391        let mut builder = HashVerifier::new();
1392        let mut paths: Vec<PathBuf> = Vec::with_capacity(N);
1393        // Pre-create all files so paths vec is in 0..N order for comparison.
1394        for i in 0..N {
1395            let p = dir.path().join(format!("z_{i:03}.bin"));
1396            let mut f = File::create(&p).unwrap();
1397            f.write_all(&[i as u8]).unwrap();
1398            f.sync_all().unwrap();
1399            paths.push(p);
1400        }
1401        // Register in reverse order so the task list is not lexicographically sorted.
1402        for &i in &indices {
1403            let payload = [i as u8];
1404            builder = builder.expect(&paths[i], ExpectedHash::whole_sha1(sha1_of(&payload)));
1405        }
1406        let report = builder.execute().unwrap();
1407        assert_eq!(report.files.len(), N);
1408        assert!(report.is_clean(), "all files should match; got {report:?}");
1409        let keys: Vec<&PathBuf> = report.files.keys().collect();
1410        for w in keys.windows(2) {
1411            assert!(w[0] < w[1], "report keys not sorted: {w:?}");
1412        }
1413    }
1414}