Skip to main content

zipatch_rs/verify/
mod.rs

1//! Post-apply integrity check for files produced by either the sequential
2//! [`apply_patch`](crate::ApplyConfig::apply_patch) driver or the indexed
3//! [`IndexApplier::execute`](crate::index::IndexApplier::execute) driver.
4//!
5//! The per-chunk CRC32 the parser already enforces catches transit corruption
6//! of the patch stream itself, but it cannot detect silent corruption of the
7//! *resulting* `SqPack` files on disk. Square Enix's patch lists carry SHA1
8//! hashes for the post-apply `.index` / `.dat` files (whole-file or split into
9//! fixed-size blocks); [`HashVerifier`](crate::verify::HashVerifier) reads those files back from disk and
10//! compares against caller-supplied expected hashes.
11//!
12//! This is a separate verification step the caller invokes **after**
13//! [`apply_patch`](crate::ApplyConfig::apply_patch) or
14//! [`IndexApplier::execute`](crate::index::IndexApplier::execute) returns
15//! `Ok`. The library never bakes hash verification into the apply loop —
16//! parsing the SE patch list to build the expected-hash input is the
17//! consumer's responsibility (in practice, `gaveloc-patcher`).
18//!
19//! # Modes
20//!
21//! - **Whole-file** ([`ExpectedHash::Whole`](crate::verify::ExpectedHash::Whole)) — single hash over the entire
22//!   file. Cheap to express; an opaque single failure for multi-GiB files.
23//! - **Block-mode** ([`ExpectedHash::Blocks`](crate::verify::ExpectedHash::Blocks)) — file is split into
24//!   fixed-size blocks (the SE patch list uses 50 MiB); one hash per block.
25//!   Pinpoints *which* block is bad, so a user-facing repair flow can
26//!   re-fetch a narrow range rather than the whole file.
27//!
28//! Only SHA-1 is supported — it is what Square Enix's patch list carries.
29//! Should the format ever ship another algorithm, a new enum can wrap the
30//! current shape as an additive change.
31//!
32//! # Example
33//!
34//! ```no_run
35//! use zipatch_rs::verify::{ExpectedHash, HashVerifier, Sha1Digest};
36//!
37//! let report = HashVerifier::new()
38//!     .expect(
39//!         "/opt/ffxiv/game/sqpack/ffxiv/000000.win32.index",
40//!         ExpectedHash::whole(Sha1Digest::new([0u8; 20])),
41//!     )
42//!     .execute()
43//!     .unwrap();
44//!
45//! if !report.is_clean() {
46//!     for (path, outcome) in report.failures() {
47//!         eprintln!("{}: {outcome:?}", path.display());
48//!     }
49//! }
50//! ```
51
52use crate::VerifyResult as Result;
53#[cfg(feature = "parallel-verify")]
54use rayon::iter::{IntoParallelIterator, ParallelIterator};
55use sha1::{Digest, Sha1};
56use std::collections::BTreeMap;
57use std::fmt;
58use std::fs::File;
59use std::io::Read;
60use std::path::{Path, PathBuf};
61use std::str::FromStr;
62use tracing::{debug, debug_span, info, info_span, trace, warn};
63
64const READ_BUF_CAPACITY: usize = 64 * 1024;
65const SHA1_DIGEST_LEN: usize = 20;
66
67/// A 20-byte SHA-1 digest.
68///
69/// Used throughout [`crate::verify`] to carry both expected and computed
70/// hashes. The newtype guarantees the length-of-20 invariant by construction —
71/// no runtime length check, no `Vec<u8>` allocation per digest.
72///
73/// [`Display`](fmt::Display) renders as 40 lowercase hex characters;
74/// [`FromStr`] parses the same form (rejecting any input that is not exactly
75/// 40 hex characters).
76#[repr(transparent)]
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
78#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
79pub struct Sha1Digest([u8; SHA1_DIGEST_LEN]);
80
81impl Sha1Digest {
82    /// Construct from a 20-byte array.
83    #[must_use]
84    pub const fn new(bytes: [u8; SHA1_DIGEST_LEN]) -> Self {
85        Self(bytes)
86    }
87
88    /// Borrow the underlying 20-byte array.
89    #[must_use]
90    pub const fn as_bytes(&self) -> &[u8; SHA1_DIGEST_LEN] {
91        &self.0
92    }
93}
94
95impl From<[u8; SHA1_DIGEST_LEN]> for Sha1Digest {
96    fn from(bytes: [u8; SHA1_DIGEST_LEN]) -> Self {
97        Self(bytes)
98    }
99}
100
101impl From<Sha1Digest> for [u8; SHA1_DIGEST_LEN] {
102    fn from(d: Sha1Digest) -> Self {
103        d.0
104    }
105}
106
107impl fmt::Display for Sha1Digest {
108    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109        for b in &self.0 {
110            write!(f, "{b:02x}")?;
111        }
112        Ok(())
113    }
114}
115
116/// Error returned when [`Sha1Digest::from_str`] is given a malformed input.
117#[derive(Debug, Clone, PartialEq, Eq)]
118pub struct ParseSha1DigestError;
119
120impl fmt::Display for ParseSha1DigestError {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        f.write_str("invalid SHA-1 digest: expected 40 lowercase or uppercase hex characters")
123    }
124}
125
126impl std::error::Error for ParseSha1DigestError {}
127
128impl FromStr for Sha1Digest {
129    type Err = ParseSha1DigestError;
130
131    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
132        if s.len() != SHA1_DIGEST_LEN * 2 {
133            return Err(ParseSha1DigestError);
134        }
135        let mut out = [0u8; SHA1_DIGEST_LEN];
136        let bytes = s.as_bytes();
137        for (i, slot) in out.iter_mut().enumerate() {
138            let hi = hex_nibble(bytes[i * 2])?;
139            let lo = hex_nibble(bytes[i * 2 + 1])?;
140            *slot = (hi << 4) | lo;
141        }
142        Ok(Self(out))
143    }
144}
145
146fn hex_nibble(b: u8) -> std::result::Result<u8, ParseSha1DigestError> {
147    match b {
148        b'0'..=b'9' => Ok(b - b'0'),
149        b'a'..=b'f' => Ok(b - b'a' + 10),
150        b'A'..=b'F' => Ok(b - b'A' + 10),
151        _ => Err(ParseSha1DigestError),
152    }
153}
154
155/// Expected hash spec for a single file.
156///
157/// Either a single whole-file digest, or a fixed-block-size digest per block.
158/// Block-mode is what FFXIV patch lists actually carry for `.dat` files
159/// (50 MiB blocks), because it pinpoints *which* block is bad. Whole-file
160/// mode is the natural fit for small files (e.g. `.index` files), where a
161/// single mismatched bit is best surfaced as a single failure.
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub enum ExpectedHash {
164    /// Whole-file mode: a single SHA-1 digest over the entire file.
165    Whole {
166        /// Expected digest.
167        hash: Sha1Digest,
168    },
169    /// Block-mode: file is split into `block_size`-byte chunks, each hashed
170    /// independently. The last block may be shorter than `block_size`.
171    Blocks {
172        /// Block size in bytes. Must be non-zero.
173        block_size: u64,
174        /// One digest per block, in file order.
175        hashes: Vec<Sha1Digest>,
176    },
177}
178
179impl ExpectedHash {
180    /// Construct a whole-file SHA-1 spec.
181    #[must_use]
182    pub fn whole(hash: Sha1Digest) -> Self {
183        ExpectedHash::Whole { hash }
184    }
185
186    /// Construct a block-mode SHA-1 spec.
187    #[must_use]
188    pub fn blocks(block_size: u64, hashes: Vec<Sha1Digest>) -> Self {
189        ExpectedHash::Blocks { block_size, hashes }
190    }
191
192    fn validate(&self) -> Result<()> {
193        match self {
194            ExpectedHash::Whole { .. } => Ok(()),
195            ExpectedHash::Blocks { block_size, .. } => {
196                if *block_size == 0 {
197                    return Err(crate::VerifyError::InvalidField {
198                        context: "ExpectedHash::Blocks block_size must be non-zero",
199                    });
200                }
201                Ok(())
202            }
203        }
204    }
205}
206
207/// Per-file outcome of a [`HashVerifier::execute`] run.
208///
209/// `#[non_exhaustive]` so future outcome shapes (e.g. permission-denied
210/// split out from generic IO, or a partial-read outcome distinct from
211/// `IoError`) can be added without a `SemVer` break.
212#[non_exhaustive]
213#[derive(Debug, Clone, PartialEq, Eq)]
214pub enum FileVerifyOutcome {
215    /// File matched the expected hash (whole-file mode) or every block matched
216    /// (block-mode).
217    Match,
218    /// Whole-file mode: the computed digest did not equal the expected digest.
219    WholeMismatch {
220        /// Expected digest.
221        expected: Sha1Digest,
222        /// Digest computed over the on-disk file.
223        actual: Sha1Digest,
224    },
225    /// Block-mode: one or more blocks failed.
226    ///
227    /// `mismatched_blocks` holds the zero-based indices of blocks whose hash
228    /// did not match, in ascending order. `expected_block_count` is the number
229    /// of block hashes the caller supplied. `actual_block_count` is the number
230    /// of blocks the file would contain at `block_size` (i.e. `ceil(size /
231    /// block_size)`); a difference means the file is shorter or longer than
232    /// the caller's expectation and every "extra" or "missing" block index is
233    /// reported in `mismatched_blocks`.
234    BlockMismatches {
235        /// Zero-based indices of mismatched blocks, ascending.
236        mismatched_blocks: Vec<usize>,
237        /// Number of block hashes the caller supplied.
238        expected_block_count: usize,
239        /// Number of blocks the on-disk file would split into at `block_size`.
240        actual_block_count: usize,
241    },
242    /// The file does not exist on disk.
243    Missing,
244    /// An I/O error occurred while reading the file. `kind` is the
245    /// [`std::io::ErrorKind`] callers branch on (e.g. [`std::io::ErrorKind::PermissionDenied`]
246    /// to prompt for elevation, [`std::io::ErrorKind::NotFound`] is reported
247    /// as [`FileVerifyOutcome::Missing`] instead). `message` is the
248    /// `std::io::Error` `Display` rendering, preserved as a string so the
249    /// report stays `Clone + PartialEq` for downstream consumers.
250    IoError {
251        /// `std::io::ErrorKind` of the underlying error.
252        kind: std::io::ErrorKind,
253        /// Human-readable rendering of the error.
254        message: String,
255    },
256}
257
258/// Structured outcome of a [`HashVerifier::execute`] run.
259///
260/// One entry per file the caller registered via [`HashVerifier::expect`].
261/// Iteration order is by [`PathBuf`] ordering (the underlying `BTreeMap`).
262///
263/// `#[non_exhaustive]`: future per-run aggregate fields (totals, elapsed
264/// time, parallelism stats) may be added without a `SemVer` break.
265#[non_exhaustive]
266#[derive(Debug, Clone, PartialEq, Eq, Default)]
267pub struct HashVerifyReport {
268    /// Per-file outcome, keyed by the absolute path the caller registered.
269    pub files: BTreeMap<PathBuf, FileVerifyOutcome>,
270}
271
272impl HashVerifyReport {
273    /// `true` iff every registered file matched.
274    #[must_use]
275    pub fn is_clean(&self) -> bool {
276        self.files
277            .values()
278            .all(|o| matches!(o, FileVerifyOutcome::Match))
279    }
280
281    /// Iterate the failing files (everything that is not [`FileVerifyOutcome::Match`]).
282    pub fn failures(&self) -> impl Iterator<Item = (&Path, &FileVerifyOutcome)> {
283        self.files
284            .iter()
285            .filter(|(_, o)| !matches!(o, FileVerifyOutcome::Match))
286            .map(|(p, o)| (p.as_path(), o))
287    }
288
289    /// Count of failing files.
290    #[must_use]
291    pub fn failure_count(&self) -> usize {
292        self.failures().count()
293    }
294}
295
296/// Build up a set of `(path, expected_hash)` pairs, then [`Self::execute`] to
297/// hash the on-disk files and compare against the expected values.
298///
299/// The verifier never writes — it opens each registered file read-only, hashes
300/// it (whole-file or per-block), and produces a [`HashVerifyReport`]. Missing
301/// files and I/O errors during read are recorded as per-file outcomes rather
302/// than aborting the run — consumers want the full picture in a single pass.
303///
304/// # Error semantics
305///
306/// `execute` returns `Err` only for *programmer* errors detected up front
307/// (e.g. a zero `block_size`). Filesystem errors against the registered paths
308/// are captured per-file in [`FileVerifyOutcome::IoError`] /
309/// [`FileVerifyOutcome::Missing`].
310///
311/// # Security
312///
313/// Files are opened via [`std::fs::File::open`], which follows symbolic
314/// links on every platform `zipatch-rs` supports. The verifier itself never
315/// writes — the worst-case outcome of a hostile symlink pointed at a file
316/// outside the install root is an information-disclosure-via-hash: the
317/// target file's SHA1 would appear in the report's
318/// [`FileVerifyOutcome::WholeMismatch`] `actual` field.
319///
320/// If the caller derives registered paths from untrusted input (e.g. a
321/// patch-list response from a server that could be tampered with), it is
322/// **the caller's responsibility** to canonicalize the install root and
323/// reject paths that escape it before passing them to [`Self::expect`].
324/// `zipatch-rs` does not canonicalize or symlink-fence on the caller's
325/// behalf, because the appropriate root depends on the consumer's install
326/// layout.
327#[derive(Debug, Default)]
328pub struct HashVerifier {
329    tasks: Vec<(PathBuf, ExpectedHash)>,
330}
331
332impl HashVerifier {
333    /// Construct an empty verifier.
334    #[must_use]
335    pub fn new() -> Self {
336        Self::default()
337    }
338
339    /// Register `path` with `expected`.
340    ///
341    /// Registering the same path twice with **identical** [`ExpectedHash`]
342    /// values is a no-op (the second registration is silently absorbed at
343    /// [`Self::execute`] time). Registering the same path twice with
344    /// **different** [`ExpectedHash`] values is a programmer error and causes
345    /// [`Self::execute`] to return [`crate::VerifyError::InvalidField`].
346    /// The check fires at execute-time rather than here so the builder API
347    /// stays infallible.
348    #[must_use]
349    pub fn expect(mut self, path: impl Into<PathBuf>, expected: ExpectedHash) -> Self {
350        self.tasks.push((path.into(), expected));
351        self
352    }
353
354    /// Hash each registered file and compare against its expected hash.
355    ///
356    /// Returns a [`HashVerifyReport`] describing every file. The report is
357    /// always populated for every registered task — `is_clean()` distinguishes
358    /// a fully-passing run from a failing one. See the struct docs for the
359    /// error policy.
360    ///
361    /// # Errors
362    ///
363    /// Returns [`crate::VerifyError::InvalidField`] if any registered
364    /// [`ExpectedHash`] is malformed (zero `block_size`). Filesystem errors
365    /// are *not* returned here — they appear as
366    /// [`FileVerifyOutcome::IoError`] / [`FileVerifyOutcome::Missing`] entries
367    /// in the report.
368    pub fn execute(self) -> Result<HashVerifyReport> {
369        let span = info_span!(
370            crate::tracing_schema::span_names::VERIFY_HASHES,
371            files = self.tasks.len()
372        );
373        let _enter = span.enter();
374        let started = std::time::Instant::now();
375
376        for (_, exp) in &self.tasks {
377            exp.validate()?;
378        }
379
380        let mut seen: BTreeMap<&Path, &ExpectedHash> = BTreeMap::new();
381        for (path, exp) in &self.tasks {
382            match seen.get(path.as_path()) {
383                Some(prev) if *prev == exp => {}
384                Some(_) => {
385                    return Err(crate::VerifyError::InvalidField {
386                        context: "HashVerifier: same path registered with conflicting ExpectedHash values",
387                    });
388                }
389                None => {
390                    seen.insert(path.as_path(), exp);
391                }
392            }
393        }
394
395        let mut report = HashVerifyReport::default();
396        let parent = &span;
397        // The chain body is identical for both branches; only the iterator
398        // shape differs. Under `parallel-verify` the per-file work fans out
399        // across rayon's pool; without it the verifier runs serially in the
400        // calling thread and `rayon` is not a dependency.
401        #[cfg(feature = "parallel-verify")]
402        let task_iter = self.tasks.into_par_iter();
403        #[cfg(not(feature = "parallel-verify"))]
404        let task_iter = self.tasks.into_iter();
405        let results: Vec<(PathBuf, FileVerifyOutcome, u64)> = task_iter
406            .map(|(path, expected)| {
407                parent.in_scope(|| {
408                    let sub = debug_span!(
409                        crate::tracing_schema::span_names::VERIFY_FILE,
410                        path = %path.display()
411                    );
412                    let _e = sub.enter();
413                    let mut scratch = vec![0u8; READ_BUF_CAPACITY];
414                    let (outcome, bytes) = verify_one(&path, &expected, &mut scratch);
415                    match &outcome {
416                        FileVerifyOutcome::Match => {
417                            debug!(bytes_hashed = bytes, "verify_hashes: file match");
418                        }
419                        FileVerifyOutcome::Missing => {
420                            warn!("verify_hashes: file missing");
421                        }
422                        FileVerifyOutcome::IoError { kind, message } => {
423                            warn!(?kind, error = %message, "verify_hashes: io error during hash");
424                        }
425                        FileVerifyOutcome::WholeMismatch { .. } => {
426                            debug!(bytes_hashed = bytes, "verify_hashes: whole-file mismatch");
427                        }
428                        FileVerifyOutcome::BlockMismatches {
429                            mismatched_blocks, ..
430                        } => {
431                            debug!(
432                                bytes_hashed = bytes,
433                                bad_blocks = mismatched_blocks.len(),
434                                "verify_hashes: block-mode mismatches"
435                            );
436                        }
437                    }
438                    (path, outcome, bytes)
439                })
440            })
441            .collect();
442
443        let mut total_bytes: u64 = 0;
444        for (path, outcome, bytes) in results {
445            total_bytes += bytes;
446            report.files.insert(path, outcome);
447        }
448
449        let failures = report.failure_count();
450        info!(
451            files = report.files.len(),
452            failures,
453            bytes_hashed = total_bytes,
454            elapsed_ms = started.elapsed().as_millis() as u64,
455            "verify_hashes: run complete"
456        );
457        Ok(report)
458    }
459}
460
461fn verify_one(
462    path: &Path,
463    expected: &ExpectedHash,
464    scratch: &mut [u8],
465) -> (FileVerifyOutcome, u64) {
466    let mut file = match File::open(path) {
467        Ok(f) => f,
468        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
469            return (FileVerifyOutcome::Missing, 0);
470        }
471        Err(e) => {
472            return (
473                FileVerifyOutcome::IoError {
474                    kind: e.kind(),
475                    message: e.to_string(),
476                },
477                0,
478            );
479        }
480    };
481
482    match expected {
483        ExpectedHash::Whole { hash } => match hash_whole(&mut file, scratch) {
484            Ok((actual, n)) => {
485                if actual == *hash {
486                    (FileVerifyOutcome::Match, n)
487                } else {
488                    (
489                        FileVerifyOutcome::WholeMismatch {
490                            expected: *hash,
491                            actual,
492                        },
493                        n,
494                    )
495                }
496            }
497            Err(e) => (
498                FileVerifyOutcome::IoError {
499                    kind: e.kind(),
500                    message: e.to_string(),
501                },
502                0,
503            ),
504        },
505        ExpectedHash::Blocks { block_size, hashes } => {
506            hash_blocks(&mut file, *block_size, hashes, scratch)
507        }
508    }
509}
510
511fn finalize(hasher: Sha1) -> Sha1Digest {
512    let out = hasher.finalize();
513    let mut bytes = [0u8; SHA1_DIGEST_LEN];
514    bytes.copy_from_slice(&out);
515    Sha1Digest(bytes)
516}
517
518fn hash_whole<R: Read>(reader: &mut R, scratch: &mut [u8]) -> std::io::Result<(Sha1Digest, u64)> {
519    let mut hasher = Sha1::new();
520    let mut total: u64 = 0;
521    loop {
522        let n = reader.read(scratch)?;
523        if n == 0 {
524            break;
525        }
526        hasher.update(&scratch[..n]);
527        total += n as u64;
528        trace!(chunk_bytes = n, "verify_hashes: whole-file chunk");
529    }
530    Ok((finalize(hasher), total))
531}
532
533fn hash_blocks<R: Read>(
534    reader: &mut R,
535    block_size: u64,
536    expected: &[Sha1Digest],
537    scratch: &mut [u8],
538) -> (FileVerifyOutcome, u64) {
539    // Stream-hash one block at a time so memory stays O(scratch) regardless of
540    // file size.
541    let mut mismatched: Vec<usize> = Vec::new();
542    let mut block_idx: usize = 0;
543    let mut total_bytes: u64 = 0;
544    let mut hasher = Sha1::new();
545    let mut block_bytes_remaining: u64 = block_size;
546    let mut block_had_bytes = false;
547
548    loop {
549        // Cap reads so we never spill across a block boundary.
550        let want = block_bytes_remaining.min(scratch.len() as u64) as usize;
551        if want == 0 {
552            finish_and_compare(&mut hasher, block_idx, expected, &mut mismatched);
553            block_idx += 1;
554            block_bytes_remaining = block_size;
555            block_had_bytes = false;
556            continue;
557        }
558        let n = match reader.read(&mut scratch[..want]) {
559            Ok(n) => n,
560            Err(e) => {
561                return (
562                    FileVerifyOutcome::IoError {
563                        kind: e.kind(),
564                        message: e.to_string(),
565                    },
566                    total_bytes,
567                );
568            }
569        };
570        if n == 0 {
571            if block_had_bytes {
572                // Trailing short block at EOF — finalize and compare.
573                finish_and_compare(&mut hasher, block_idx, expected, &mut mismatched);
574                block_idx += 1;
575            }
576            break;
577        }
578        hasher.update(&scratch[..n]);
579        total_bytes += n as u64;
580        block_bytes_remaining -= n as u64;
581        block_had_bytes = true;
582        trace!(block_idx, chunk_bytes = n, "verify_hashes: block chunk");
583    }
584
585    // File ran out before we hit `expected.len()` blocks — flag each missing
586    // index as a mismatch. Conversely, if more blocks fit than the caller
587    // supplied, every excess block index past `expected.len()` has already
588    // been flagged inside `finish_and_compare`.
589    for missing in block_idx..expected.len() {
590        mismatched.push(missing);
591    }
592
593    let actual_block_count = block_idx;
594    let expected_block_count = expected.len();
595    let outcome = if mismatched.is_empty() && actual_block_count == expected_block_count {
596        FileVerifyOutcome::Match
597    } else {
598        mismatched.sort_unstable();
599        mismatched.dedup();
600        FileVerifyOutcome::BlockMismatches {
601            mismatched_blocks: mismatched,
602            expected_block_count,
603            actual_block_count,
604        }
605    };
606    (outcome, total_bytes)
607}
608
609fn finish_and_compare(
610    hasher: &mut Sha1,
611    block_idx: usize,
612    expected: &[Sha1Digest],
613    mismatched: &mut Vec<usize>,
614) {
615    // Replace the in-progress hasher with a fresh one, taking ownership of the
616    // finished state so we can finalize it without disturbing the loop.
617    let finished = std::mem::replace(hasher, Sha1::new());
618    let digest = finalize(finished);
619    match expected.get(block_idx) {
620        Some(want) if *want == digest => {}
621        _ => mismatched.push(block_idx),
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628    use std::io::Write;
629
630    fn sha1_of(bytes: &[u8]) -> Sha1Digest {
631        let mut h = Sha1::new();
632        h.update(bytes);
633        finalize(h)
634    }
635
636    fn write_tmp(bytes: &[u8]) -> (tempfile::TempDir, PathBuf) {
637        let dir = tempfile::tempdir().unwrap();
638        let path = dir.path().join("f.bin");
639        let mut f = File::create(&path).unwrap();
640        f.write_all(bytes).unwrap();
641        f.sync_all().unwrap();
642        (dir, path)
643    }
644
645    #[test]
646    fn report_is_clean_when_empty() {
647        let r = HashVerifyReport::default();
648        assert!(r.is_clean());
649        assert_eq!(r.failure_count(), 0);
650        assert_eq!(r.failures().count(), 0);
651    }
652
653    #[test]
654    fn whole_sha1_match() {
655        let payload = b"hello world".repeat(1000);
656        let (_d, path) = write_tmp(&payload);
657        let report = HashVerifier::new()
658            .expect(&path, ExpectedHash::whole(sha1_of(&payload)))
659            .execute()
660            .unwrap();
661        assert!(report.is_clean(), "got {report:?}");
662    }
663
664    #[test]
665    fn whole_sha1_mismatch() {
666        let (_d, path) = write_tmp(b"abc");
667        let bad = Sha1Digest::new([0u8; 20]);
668        let report = HashVerifier::new()
669            .expect(&path, ExpectedHash::whole(bad))
670            .execute()
671            .unwrap();
672        assert!(!report.is_clean());
673        match report.files.get(&path).unwrap() {
674            FileVerifyOutcome::WholeMismatch { expected, actual } => {
675                assert_eq!(*expected, bad);
676                assert_eq!(*actual, sha1_of(b"abc"));
677            }
678            other => panic!("expected WholeMismatch, got {other:?}"),
679        }
680    }
681
682    #[test]
683    fn block_mode_match() {
684        let block_size: u64 = 256;
685        let mut payload = Vec::new();
686        for i in 0..5u8 {
687            payload.extend(std::iter::repeat_n(i, block_size as usize));
688        }
689        // Add a short trailing block.
690        payload.extend_from_slice(&[0xAB; 17]);
691
692        let hashes: Vec<Sha1Digest> = payload.chunks(block_size as usize).map(sha1_of).collect();
693        let (_d, path) = write_tmp(&payload);
694
695        let report = HashVerifier::new()
696            .expect(&path, ExpectedHash::blocks(block_size, hashes.clone()))
697            .execute()
698            .unwrap();
699        assert!(report.is_clean(), "got {report:?}");
700        assert_eq!(hashes.len(), 6); // 5 full + 1 short
701    }
702
703    #[test]
704    fn block_mode_specific_block_mismatch() {
705        let block_size: u64 = 128;
706        let mut payload = vec![0u8; (block_size as usize) * 4];
707        // Corrupt block 2 by writing to the on-disk file *after* computing the
708        // expected hashes from the clean payload.
709        let clean = payload.clone();
710        payload[(block_size as usize) * 2 + 7] = 0xFF;
711
712        let expected: Vec<Sha1Digest> = clean.chunks(block_size as usize).map(sha1_of).collect();
713        let (_d, path) = write_tmp(&payload);
714
715        let report = HashVerifier::new()
716            .expect(&path, ExpectedHash::blocks(block_size, expected))
717            .execute()
718            .unwrap();
719        match report.files.get(&path).unwrap() {
720            FileVerifyOutcome::BlockMismatches {
721                mismatched_blocks,
722                expected_block_count,
723                actual_block_count,
724            } => {
725                assert_eq!(mismatched_blocks, &vec![2]);
726                assert_eq!(*expected_block_count, 4);
727                assert_eq!(*actual_block_count, 4);
728            }
729            other => panic!("expected BlockMismatches, got {other:?}"),
730        }
731    }
732
733    #[test]
734    fn missing_file_reported() {
735        let dir = tempfile::tempdir().unwrap();
736        let missing = dir.path().join("does-not-exist");
737        let report = HashVerifier::new()
738            .expect(&missing, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
739            .execute()
740            .unwrap();
741        assert_eq!(
742            report.files.get(&missing).unwrap(),
743            &FileVerifyOutcome::Missing
744        );
745        assert!(!report.is_clean());
746    }
747
748    #[test]
749    fn block_mode_file_shorter_than_expected_flags_trailing_missing_blocks() {
750        let block_size: u64 = 64;
751        // On-disk file: 2 full blocks. Caller expects 4 blocks of hashes.
752        let payload = vec![0u8; (block_size as usize) * 2];
753        let expected: Vec<Sha1Digest> = payload
754            .chunks(block_size as usize)
755            .map(sha1_of)
756            .chain(std::iter::repeat_n(Sha1Digest::new([0u8; 20]), 2))
757            .collect();
758        assert_eq!(expected.len(), 4);
759        let (_d, path) = write_tmp(&payload);
760
761        let report = HashVerifier::new()
762            .expect(&path, ExpectedHash::blocks(block_size, expected))
763            .execute()
764            .unwrap();
765        match report.files.get(&path).unwrap() {
766            FileVerifyOutcome::BlockMismatches {
767                mismatched_blocks,
768                expected_block_count,
769                actual_block_count,
770            } => {
771                assert_eq!(*expected_block_count, 4);
772                assert_eq!(*actual_block_count, 2);
773                assert_eq!(mismatched_blocks, &vec![2, 3]);
774            }
775            other => panic!("expected BlockMismatches, got {other:?}"),
776        }
777    }
778
779    #[test]
780    fn block_mode_file_longer_than_expected_flags_extra_blocks() {
781        let block_size: u64 = 32;
782        let payload = vec![0u8; (block_size as usize) * 4];
783        // Caller supplies only 2 of 4 block hashes (matching the first two).
784        let expected: Vec<Sha1Digest> = payload
785            .chunks(block_size as usize)
786            .take(2)
787            .map(sha1_of)
788            .collect();
789        let (_d, path) = write_tmp(&payload);
790
791        let report = HashVerifier::new()
792            .expect(&path, ExpectedHash::blocks(block_size, expected))
793            .execute()
794            .unwrap();
795        match report.files.get(&path).unwrap() {
796            FileVerifyOutcome::BlockMismatches {
797                mismatched_blocks,
798                expected_block_count,
799                actual_block_count,
800            } => {
801                assert_eq!(*expected_block_count, 2);
802                assert_eq!(*actual_block_count, 4);
803                assert_eq!(mismatched_blocks, &vec![2, 3]);
804            }
805            other => panic!("expected BlockMismatches, got {other:?}"),
806        }
807    }
808
809    #[test]
810    fn empty_file_whole_mode_matches_sha1_of_empty() {
811        let (_d, path) = write_tmp(&[]);
812        let report = HashVerifier::new()
813            .expect(&path, ExpectedHash::whole(sha1_of(&[])))
814            .execute()
815            .unwrap();
816        assert!(report.is_clean());
817    }
818
819    #[test]
820    fn empty_file_block_mode_matches_zero_blocks() {
821        // Zero blocks expected; zero blocks present.
822        let (_d, path) = write_tmp(&[]);
823        let report = HashVerifier::new()
824            .expect(&path, ExpectedHash::blocks(1024, vec![]))
825            .execute()
826            .unwrap();
827        assert!(report.is_clean());
828    }
829
830    #[test]
831    fn zero_block_size_is_rejected_up_front() {
832        let dir = tempfile::tempdir().unwrap();
833        let path = dir.path().join("any");
834        let err = HashVerifier::new()
835            .expect(&path, ExpectedHash::blocks(0, vec![]))
836            .execute()
837            .unwrap_err();
838        assert!(
839            matches!(err, crate::VerifyError::InvalidField { context } if context.contains("block_size")),
840            "got {err:?}"
841        );
842    }
843
844    #[test]
845    fn block_mode_block_size_exceeds_read_buf_capacity_match() {
846        // Each block is larger than READ_BUF_CAPACITY (64 KiB) so the inner
847        // read loop must iterate multiple times before `want == 0` triggers
848        // the finalize branch. Use 200 KiB blocks: 3 full + 1 short trailing.
849        let block_size: u64 = 200 * 1024;
850        let mut payload = Vec::with_capacity((block_size as usize) * 3 + 17);
851        for i in 0..3u8 {
852            payload.extend(std::iter::repeat_n(i.wrapping_mul(31), block_size as usize));
853        }
854        payload.extend_from_slice(&[0xCD; 17]);
855
856        let hashes: Vec<Sha1Digest> = payload.chunks(block_size as usize).map(sha1_of).collect();
857        assert_eq!(hashes.len(), 4);
858        let (_d, path) = write_tmp(&payload);
859
860        let report = HashVerifier::new()
861            .expect(&path, ExpectedHash::blocks(block_size, hashes))
862            .execute()
863            .unwrap();
864        assert!(report.is_clean(), "got {report:?}");
865    }
866
867    #[test]
868    fn block_mode_block_size_exceeds_read_buf_capacity_mismatch() {
869        // Same shape as the match test, but corrupt a byte deep inside block 1
870        // (past the first 64 KiB read) so the mismatch only surfaces if the
871        // multi-read accumulation inside a single block works.
872        let block_size: u64 = 200 * 1024;
873        let mut payload = Vec::with_capacity((block_size as usize) * 3);
874        for i in 0..3u8 {
875            payload.extend(std::iter::repeat_n(i.wrapping_mul(17), block_size as usize));
876        }
877        let clean = payload.clone();
878        // Corrupt block 1 at offset 150 KiB (well past the first 64 KiB read).
879        payload[(block_size as usize) + 150 * 1024] ^= 0xFF;
880
881        let expected: Vec<Sha1Digest> = clean.chunks(block_size as usize).map(sha1_of).collect();
882        let (_d, path) = write_tmp(&payload);
883
884        let report = HashVerifier::new()
885            .expect(&path, ExpectedHash::blocks(block_size, expected))
886            .execute()
887            .unwrap();
888        match report.files.get(&path).unwrap() {
889            FileVerifyOutcome::BlockMismatches {
890                mismatched_blocks,
891                expected_block_count,
892                actual_block_count,
893            } => {
894                assert_eq!(mismatched_blocks, &vec![1]);
895                assert_eq!(*expected_block_count, 3);
896                assert_eq!(*actual_block_count, 3);
897            }
898            other => panic!("expected BlockMismatches, got {other:?}"),
899        }
900    }
901
902    #[test]
903    fn block_mode_single_short_block_distinguishes_from_empty_file() {
904        // File shorter than `block_size` with exactly one expected hash. This
905        // exercises the trailing-short-block finalize path when the *only*
906        // block is short — distinct from the empty-file (zero blocks) path.
907        let block_size: u64 = 200 * 1024;
908        let payload = vec![0x7Eu8; 1000]; // far less than block_size
909        let hashes = vec![sha1_of(&payload)];
910        let (_d, path) = write_tmp(&payload);
911
912        let report = HashVerifier::new()
913            .expect(&path, ExpectedHash::blocks(block_size, hashes))
914            .execute()
915            .unwrap();
916        assert!(report.is_clean(), "got {report:?}");
917    }
918
919    #[cfg(target_family = "unix")]
920    #[test]
921    fn permission_denied_open_reports_io_error_with_kind() {
922        use std::os::unix::fs::PermissionsExt;
923
924        let (_d, path) = write_tmp(b"forbidden");
925        // Drop read permission. TempDir cleanup uses unlink (not
926        // open-for-read), so 0o000 on the file itself does not block cleanup.
927        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o000)).unwrap();
928
929        // Skip when running as root — chmod 0o000 is bypassed by
930        // CAP_DAC_OVERRIDE, so root can still open the file. Probe via
931        // File::open: if the open succeeds against 0o000, the running user
932        // has the cap and the test would not exercise the IoError path.
933        if File::open(&path).is_ok() {
934            std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644)).unwrap();
935            eprintln!("skipping: running with CAP_DAC_OVERRIDE, chmod 0o000 does not block open");
936            return;
937        }
938
939        let report = HashVerifier::new()
940            .expect(&path, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
941            .execute()
942            .unwrap();
943
944        // Restore so the TempDir cleanup is robust regardless of platform quirks.
945        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644)).unwrap();
946
947        match report.files.get(&path).unwrap() {
948            FileVerifyOutcome::IoError { kind, message } => {
949                assert_eq!(*kind, std::io::ErrorKind::PermissionDenied, "got {kind:?}");
950                assert!(!message.is_empty(), "message should carry the error text");
951            }
952            other => panic!("expected IoError with PermissionDenied kind, got {other:?}"),
953        }
954    }
955
956    // Note: the mid-read `Err` branch in `hash_blocks` (the second `IoError`
957    // construction site) is not directly tested. Provoking a mid-read IO
958    // error deterministically requires substituting a custom `Read` impl for
959    // `File`, which the current `hash_blocks` signature does not accept. The
960    // permission-denied test above covers the `IoError` construction shape
961    // (kind + message), and the open-time and mid-read arms are byte-identical.
962
963    #[test]
964    fn duplicate_identical_registration_is_noop() {
965        let (_d, path) = write_tmp(b"abc");
966        let expected = ExpectedHash::whole(sha1_of(b"abc"));
967        let report = HashVerifier::new()
968            .expect(&path, expected.clone())
969            .expect(&path, expected)
970            .execute()
971            .unwrap();
972        assert!(report.is_clean(), "got {report:?}");
973        assert_eq!(report.files.len(), 1);
974    }
975
976    #[test]
977    fn duplicate_conflicting_registration_errors() {
978        let (_d, path) = write_tmp(b"abc");
979        let err = HashVerifier::new()
980            .expect(&path, ExpectedHash::whole(sha1_of(b"abc")))
981            .expect(&path, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
982            .execute()
983            .unwrap_err();
984        assert!(
985            matches!(err, crate::VerifyError::InvalidField { context } if context.contains("conflicting")),
986            "got {err:?}"
987        );
988    }
989
990    #[test]
991    fn failures_iter_excludes_matches() {
992        let (_d1, ok) = write_tmp(b"a");
993        let (_d2, bad) = write_tmp(b"b");
994        let report = HashVerifier::new()
995            .expect(&ok, ExpectedHash::whole(sha1_of(b"a")))
996            .expect(&bad, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
997            .execute()
998            .unwrap();
999        let fails: Vec<_> = report.failures().collect();
1000        assert_eq!(fails.len(), 1);
1001        assert_eq!(fails[0].0, bad.as_path());
1002    }
1003
1004    /// Reader that yields `n_ok` bytes of zeros, then fails on the next read
1005    /// with the given `ErrorKind`. Used to exercise the mid-read IO error
1006    /// branches in `hash_whole` and `hash_blocks`.
1007    struct FailAfter {
1008        remaining_ok: usize,
1009        kind: std::io::ErrorKind,
1010    }
1011
1012    impl Read for FailAfter {
1013        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1014            if self.remaining_ok == 0 {
1015                return Err(std::io::Error::new(self.kind, "injected"));
1016            }
1017            let n = self.remaining_ok.min(buf.len());
1018            buf[..n].fill(0);
1019            self.remaining_ok -= n;
1020            Ok(n)
1021        }
1022    }
1023
1024    #[test]
1025    fn hash_whole_propagates_mid_read_io_error() {
1026        let mut reader = FailAfter {
1027            remaining_ok: 32,
1028            kind: std::io::ErrorKind::Other,
1029        };
1030        let mut scratch = vec![0u8; 16];
1031        let err = hash_whole(&mut reader, &mut scratch).unwrap_err();
1032        assert_eq!(err.kind(), std::io::ErrorKind::Other);
1033    }
1034
1035    #[test]
1036    fn hash_blocks_surfaces_mid_read_io_error_as_outcome() {
1037        let mut reader = FailAfter {
1038            remaining_ok: 40,
1039            kind: std::io::ErrorKind::ConnectionAborted,
1040        };
1041        let mut scratch = vec![0u8; 16];
1042        let expected = vec![Sha1Digest::new([0u8; 20]); 4];
1043        let (outcome, bytes) = hash_blocks(&mut reader, 64, &expected, &mut scratch);
1044        match outcome {
1045            FileVerifyOutcome::IoError { kind, .. } => {
1046                assert_eq!(kind, std::io::ErrorKind::ConnectionAborted);
1047            }
1048            other => panic!("expected IoError outcome, got {other:?}"),
1049        }
1050        assert_eq!(
1051            bytes, 40,
1052            "bytes hashed up to the failure should be reported"
1053        );
1054    }
1055
1056    // --- execute() with zero tasks ---
1057
1058    #[test]
1059    fn execute_with_no_tasks_returns_clean_empty_report() {
1060        let report = HashVerifier::new().execute().unwrap();
1061        assert!(report.is_clean());
1062        assert_eq!(report.files.len(), 0);
1063        assert_eq!(report.failure_count(), 0);
1064    }
1065
1066    // --- HashVerifyReport invariants ---
1067
1068    #[test]
1069    fn report_nonempty_all_match_is_clean() {
1070        let (_d1, p1) = write_tmp(b"one");
1071        let (_d2, p2) = write_tmp(b"two");
1072        let report = HashVerifier::new()
1073            .expect(&p1, ExpectedHash::whole(sha1_of(b"one")))
1074            .expect(&p2, ExpectedHash::whole(sha1_of(b"two")))
1075            .execute()
1076            .unwrap();
1077        assert_eq!(report.files.len(), 2);
1078        assert!(report.is_clean());
1079        assert_eq!(report.failure_count(), 0);
1080        assert_eq!(report.failures().count(), 0);
1081    }
1082
1083    #[test]
1084    fn failure_count_equals_failures_iter_count() {
1085        let (_d1, ok) = write_tmp(b"good");
1086        let (_d2, bad1) = write_tmp(b"bad1");
1087        let (_d3, bad2) = write_tmp(b"bad2");
1088        let report = HashVerifier::new()
1089            .expect(&ok, ExpectedHash::whole(sha1_of(b"good")))
1090            .expect(&bad1, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
1091            .expect(&bad2, ExpectedHash::whole(Sha1Digest::new([0u8; 20])))
1092            .execute()
1093            .unwrap();
1094        assert_eq!(report.failure_count(), report.failures().count());
1095        assert_eq!(report.failure_count(), 2);
1096    }
1097
1098    #[test]
1099    fn report_files_iteration_order_is_by_path() {
1100        // BTreeMap guarantees sorted-key iteration; verify the contract holds
1101        // by registering paths out of lexicographic order and checking order.
1102        let dir = tempfile::tempdir().unwrap();
1103        let pb = dir.path().join("b.bin");
1104        let pa = dir.path().join("a.bin");
1105        let pc = dir.path().join("c.bin");
1106        for p in [&pb, &pa, &pc] {
1107            let mut f = File::create(p).unwrap();
1108            f.write_all(b"x").unwrap();
1109        }
1110        let report = HashVerifier::new()
1111            .expect(&pb, ExpectedHash::whole(sha1_of(b"x")))
1112            .expect(&pa, ExpectedHash::whole(sha1_of(b"x")))
1113            .expect(&pc, ExpectedHash::whole(sha1_of(b"x")))
1114            .execute()
1115            .unwrap();
1116        let keys: Vec<&PathBuf> = report.files.keys().collect();
1117        assert_eq!(keys[0], &pa);
1118        assert_eq!(keys[1], &pb);
1119        assert_eq!(keys[2], &pc);
1120    }
1121
1122    // --- FileVerifyOutcome derive sanity ---
1123
1124    #[test]
1125    fn file_verify_outcome_clone_and_partialeq() {
1126        let outcomes = [
1127            FileVerifyOutcome::Match,
1128            FileVerifyOutcome::Missing,
1129            FileVerifyOutcome::WholeMismatch {
1130                expected: Sha1Digest::new([0u8; 20]),
1131                actual: Sha1Digest::new([1u8; 20]),
1132            },
1133            FileVerifyOutcome::BlockMismatches {
1134                mismatched_blocks: vec![0, 2],
1135                expected_block_count: 3,
1136                actual_block_count: 3,
1137            },
1138            FileVerifyOutcome::IoError {
1139                kind: std::io::ErrorKind::Other,
1140                message: "oops".to_string(),
1141            },
1142        ];
1143        for o in &outcomes {
1144            let cloned = o.clone();
1145            assert_eq!(o, &cloned, "Clone+PartialEq round-trip failed for {o:?}");
1146        }
1147        assert_ne!(
1148            FileVerifyOutcome::Match,
1149            FileVerifyOutcome::Missing,
1150            "distinct variants must not compare equal"
1151        );
1152    }
1153
1154    // --- HashVerifier::expect builder semantics ---
1155
1156    #[test]
1157    fn many_chained_expects_all_evaluated() {
1158        let dir = tempfile::tempdir().unwrap();
1159        let n = 10usize;
1160        let mut builder = HashVerifier::new();
1161        let mut paths = Vec::with_capacity(n);
1162        for i in 0..n {
1163            let p = dir.path().join(format!("f{i}.bin"));
1164            let mut f = File::create(&p).unwrap();
1165            f.write_all(&[i as u8]).unwrap();
1166            builder = builder.expect(&p, ExpectedHash::whole(sha1_of(&[i as u8])));
1167            paths.push(p);
1168        }
1169        let report = builder.execute().unwrap();
1170        assert_eq!(report.files.len(), n);
1171        assert!(report.is_clean(), "got {report:?}");
1172    }
1173
1174    #[test]
1175    fn whole_then_blocks_registration_for_same_path_conflicts() {
1176        let (_d, path) = write_tmp(b"hi");
1177        let err = HashVerifier::new()
1178            .expect(&path, ExpectedHash::whole(sha1_of(b"hi")))
1179            .expect(&path, ExpectedHash::blocks(2, vec![sha1_of(b"hi")]))
1180            .execute()
1181            .unwrap_err();
1182        assert!(
1183            matches!(err, crate::VerifyError::InvalidField { context } if context.contains("conflicting")),
1184            "got {err:?}"
1185        );
1186    }
1187
1188    // --- Block boundary conditions ---
1189
1190    #[test]
1191    fn block_mode_exact_multiple_of_block_size_no_trailing() {
1192        let block_size: u64 = 64;
1193        let payload = vec![0xAAu8; (block_size as usize) * 3];
1194        let hashes: Vec<Sha1Digest> = payload.chunks(block_size as usize).map(sha1_of).collect();
1195        assert_eq!(hashes.len(), 3);
1196        let (_d, path) = write_tmp(&payload);
1197        let report = HashVerifier::new()
1198            .expect(&path, ExpectedHash::blocks(block_size, hashes))
1199            .execute()
1200            .unwrap();
1201        assert!(report.is_clean(), "got {report:?}");
1202    }
1203
1204    #[test]
1205    fn block_mode_n_blocks_plus_one_byte_trailing() {
1206        let block_size: u64 = 64;
1207        let mut payload = vec![0xBBu8; (block_size as usize) * 3];
1208        payload.push(0xCC);
1209        let hashes: Vec<Sha1Digest> = payload.chunks(block_size as usize).map(sha1_of).collect();
1210        assert_eq!(hashes.len(), 4);
1211        let (_d, path) = write_tmp(&payload);
1212        let report = HashVerifier::new()
1213            .expect(&path, ExpectedHash::blocks(block_size, hashes))
1214            .execute()
1215            .unwrap();
1216        assert!(report.is_clean(), "got {report:?}");
1217    }
1218
1219    #[test]
1220    fn block_mode_single_byte_file() {
1221        let (_d, path) = write_tmp(&[0x42]);
1222        let hashes = vec![sha1_of(&[0x42])];
1223        let report = HashVerifier::new()
1224            .expect(&path, ExpectedHash::blocks(1024, hashes))
1225            .execute()
1226            .unwrap();
1227        assert!(report.is_clean(), "got {report:?}");
1228    }
1229
1230    #[test]
1231    fn block_mode_block_size_one_each_byte_is_own_block() {
1232        let payload = b"abc";
1233        let hashes: Vec<Sha1Digest> = payload.iter().map(|b| sha1_of(&[*b])).collect();
1234        assert_eq!(hashes.len(), 3);
1235        let (_d, path) = write_tmp(payload);
1236        let report = HashVerifier::new()
1237            .expect(&path, ExpectedHash::blocks(1, hashes))
1238            .execute()
1239            .unwrap();
1240        assert!(report.is_clean(), "got {report:?}");
1241    }
1242
1243    // --- BlockHasher state isolation between blocks ---
1244
1245    #[test]
1246    fn block_hasher_state_does_not_bleed_between_identical_content_blocks() {
1247        // Both blocks contain the same bytes. Expected[0] matches; expected[1]
1248        // is deliberately wrong. If state bled, block 1's hash would equal
1249        // block 0's hash (which happens to equal expected[0]) — masking the
1250        // mismatch. A correct implementation resets the hasher between blocks,
1251        // so expected[1] != actual[1] and block 1 is flagged.
1252        let block_size: u64 = 32;
1253        let content = vec![0x5Au8; block_size as usize];
1254        let payload: Vec<u8> = content.iter().chain(content.iter()).copied().collect();
1255        let correct_hash = sha1_of(&content);
1256        let wrong_hash = Sha1Digest::new([0u8; 20]);
1257        assert_ne!(correct_hash, wrong_hash);
1258        let hashes = vec![correct_hash, wrong_hash];
1259        let (_d, path) = write_tmp(&payload);
1260        let report = HashVerifier::new()
1261            .expect(&path, ExpectedHash::blocks(block_size, hashes))
1262            .execute()
1263            .unwrap();
1264        match report.files.get(&path).unwrap() {
1265            FileVerifyOutcome::BlockMismatches {
1266                mismatched_blocks,
1267                expected_block_count,
1268                actual_block_count,
1269            } => {
1270                assert_eq!(mismatched_blocks, &vec![1]);
1271                assert_eq!(*expected_block_count, 2);
1272                assert_eq!(*actual_block_count, 2);
1273            }
1274            other => panic!("expected BlockMismatches for block 1 only, got {other:?}"),
1275        }
1276    }
1277
1278    // --- Path edge cases ---
1279
1280    #[test]
1281    fn path_with_spaces_and_utf8() {
1282        let dir = tempfile::tempdir().unwrap();
1283        let path = dir.path().join("file with spaces café.bin");
1284        let mut f = File::create(&path).unwrap();
1285        f.write_all(b"data").unwrap();
1286        f.sync_all().unwrap();
1287        let report = HashVerifier::new()
1288            .expect(&path, ExpectedHash::whole(sha1_of(b"data")))
1289            .execute()
1290            .unwrap();
1291        assert!(report.is_clean(), "got {report:?}");
1292    }
1293
1294    // --- Parallel fan-out determinism ---
1295
1296    // 32 files (above rayon's typical split threshold): half match, half don't.
1297    // Verifies that the parallel collector produces a sorted BTreeMap with the
1298    // right failure count, and that two runs on equivalent input are identical.
1299    #[test]
1300    fn parallel_fan_out_report_is_deterministic_and_sorted() {
1301        const N: usize = 32;
1302        let dir = tempfile::tempdir().unwrap();
1303        let mut builder = HashVerifier::new();
1304        let mut expected_failures = 0usize;
1305        let mut paths: Vec<PathBuf> = Vec::with_capacity(N);
1306        for i in 0..N {
1307            let p = dir.path().join(format!("file_{i:03}.bin"));
1308            let payload = vec![i as u8; 1024 * 1024];
1309            let mut f = File::create(&p).unwrap();
1310            f.write_all(&payload).unwrap();
1311            f.sync_all().unwrap();
1312            let hash = if i % 2 == 0 {
1313                sha1_of(&payload)
1314            } else {
1315                expected_failures += 1;
1316                Sha1Digest::new([0u8; 20])
1317            };
1318            builder = builder.expect(&p, ExpectedHash::whole(hash));
1319            paths.push(p);
1320        }
1321
1322        let run1 = builder.execute().unwrap();
1323        assert_eq!(run1.files.len(), N);
1324        assert_eq!(run1.failure_count(), expected_failures);
1325
1326        let keys: Vec<&PathBuf> = run1.files.keys().collect();
1327        for w in keys.windows(2) {
1328            assert!(w[0] < w[1], "BTreeMap keys out of order: {w:?}");
1329        }
1330
1331        // Rebuild an equivalent verifier to check idempotence.
1332        let mut builder2 = HashVerifier::new();
1333        for (i, p) in paths.iter().enumerate() {
1334            let payload = vec![i as u8; 1024 * 1024];
1335            let hash = if i % 2 == 0 {
1336                sha1_of(&payload)
1337            } else {
1338                Sha1Digest::new([0u8; 20])
1339            };
1340            builder2 = builder2.expect(p, ExpectedHash::whole(hash));
1341        }
1342        let run2 = builder2.execute().unwrap();
1343        assert_eq!(run1, run2, "two equivalent runs produced different reports");
1344    }
1345
1346    // Registers files in a shuffled (non-lexicographic) order and asserts that
1347    // the report's BTreeMap keys are still sorted, guarding against any future
1348    // replacement of BTreeMap with a hash-map in the merge loop.
1349    #[test]
1350    fn parallel_fan_out_shuffled_registration_order_report_sorted() {
1351        const N: usize = 32;
1352        let dir = tempfile::tempdir().unwrap();
1353        // Shuffled index sequence: reverse-order registration.
1354        let indices: Vec<usize> = (0..N).rev().collect();
1355        let mut builder = HashVerifier::new();
1356        let mut paths: Vec<PathBuf> = Vec::with_capacity(N);
1357        // Pre-create all files so paths vec is in 0..N order for comparison.
1358        for i in 0..N {
1359            let p = dir.path().join(format!("z_{i:03}.bin"));
1360            let mut f = File::create(&p).unwrap();
1361            f.write_all(&[i as u8]).unwrap();
1362            f.sync_all().unwrap();
1363            paths.push(p);
1364        }
1365        // Register in reverse order so the task list is not lexicographically sorted.
1366        for &i in &indices {
1367            let payload = [i as u8];
1368            builder = builder.expect(&paths[i], ExpectedHash::whole(sha1_of(&payload)));
1369        }
1370        let report = builder.execute().unwrap();
1371        assert_eq!(report.files.len(), N);
1372        assert!(report.is_clean(), "all files should match; got {report:?}");
1373        let keys: Vec<&PathBuf> = report.files.keys().collect();
1374        for w in keys.windows(2) {
1375            assert!(w[0] < w[1], "report keys not sorted: {w:?}");
1376        }
1377    }
1378
1379    // --- Sha1Digest newtype ---
1380
1381    #[test]
1382    fn sha1_digest_display_is_40_lowercase_hex() {
1383        let d = Sha1Digest::new([
1384            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
1385            0x0e, 0x0f, 0xfe, 0xed, 0xfa, 0xce,
1386        ]);
1387        let s = d.to_string();
1388        assert_eq!(s.len(), 40);
1389        assert_eq!(s, "000102030405060708090a0b0c0d0e0ffeedface");
1390    }
1391
1392    #[test]
1393    fn sha1_digest_from_str_roundtrip() {
1394        let bytes = [
1395            0xde, 0xad, 0xbe, 0xef, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x11, 0x22,
1396            0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
1397        ];
1398        let d = Sha1Digest::new(bytes);
1399        let s = d.to_string();
1400        let parsed: Sha1Digest = s.parse().unwrap();
1401        assert_eq!(parsed, d);
1402        assert_eq!(parsed.as_bytes(), &bytes);
1403    }
1404
1405    #[test]
1406    fn sha1_digest_from_str_accepts_uppercase() {
1407        let parsed: Sha1Digest = "DEADBEEF000102030405060708090A0B0C0D0E0F".parse().unwrap();
1408        let expected = Sha1Digest::new([
1409            0xde, 0xad, 0xbe, 0xef, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
1410            0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1411        ]);
1412        assert_eq!(parsed, expected);
1413    }
1414
1415    #[test]
1416    fn sha1_digest_from_str_rejects_wrong_length() {
1417        assert!("abc".parse::<Sha1Digest>().is_err());
1418        assert!("0".repeat(39).parse::<Sha1Digest>().is_err());
1419        assert!("0".repeat(41).parse::<Sha1Digest>().is_err());
1420    }
1421
1422    #[test]
1423    fn sha1_digest_from_str_rejects_non_hex() {
1424        assert!("g".repeat(40).parse::<Sha1Digest>().is_err());
1425        let mut s = "0".repeat(39);
1426        s.push('z');
1427        assert!(s.parse::<Sha1Digest>().is_err());
1428    }
1429
1430    #[test]
1431    fn sha1_digest_is_copy_and_eq() {
1432        let d = Sha1Digest::new([7u8; 20]);
1433        let copy = d;
1434        assert_eq!(copy, d);
1435        assert_eq!(d, Sha1Digest::new([7u8; 20]));
1436    }
1437}