Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9#[cfg(not(target_os = "linux"))]
10use digest::Digest;
11#[cfg(not(target_os = "linux"))]
12use md5::Md5;
13
14/// Supported hash algorithms.
15#[derive(Debug, Clone, Copy)]
16pub enum HashAlgorithm {
17    Sha256,
18    Md5,
19    Blake2b,
20}
21
22impl HashAlgorithm {
23    pub fn name(self) -> &'static str {
24        match self {
25            HashAlgorithm::Sha256 => "SHA256",
26            HashAlgorithm::Md5 => "MD5",
27            HashAlgorithm::Blake2b => "BLAKE2b",
28        }
29    }
30}
31
32// ── Generic hash helpers ────────────────────────────────────────────
33
34/// Single-shot hash using the Digest trait (non-Linux fallback).
35#[cfg(not(target_os = "linux"))]
36fn hash_digest<D: Digest>(data: &[u8]) -> String {
37    hex_encode(&D::digest(data))
38}
39
40/// Streaming hash using thread-local buffer (non-Linux fallback).
41#[cfg(not(target_os = "linux"))]
42fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
43    STREAM_BUF.with(|cell| {
44        let mut buf = cell.borrow_mut();
45        ensure_stream_buf(&mut buf);
46        let mut hasher = D::new();
47        loop {
48            let n = read_full(&mut reader, &mut buf)?;
49            if n == 0 {
50                break;
51            }
52            hasher.update(&buf[..n]);
53        }
54        Ok(hex_encode(&hasher.finalize()))
55    })
56}
57
58// ── Public hashing API ──────────────────────────────────────────────
59
60/// Buffer size for streaming hash I/O.
61/// 8MB: amortizes syscall overhead while still fitting in L3 cache on modern CPUs.
62/// Larger buffer means fewer read() calls per file (e.g., 13 reads for 100MB vs 25).
63const HASH_READ_BUF: usize = 8 * 1024 * 1024;
64
65// Thread-local reusable buffer for streaming hash I/O.
66// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
67// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
68thread_local! {
69    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
70}
71
72/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
73/// Called only on the streaming path, so small-file workloads never allocate 8MB.
74#[inline]
75fn ensure_stream_buf(buf: &mut Vec<u8>) {
76    if buf.len() < HASH_READ_BUF {
77        buf.resize(HASH_READ_BUF, 0);
78    }
79}
80
81// ── SHA-256 ───────────────────────────────────────────────────────────
82
83/// Single-shot SHA-256 using OpenSSL's optimized assembly (SHA-NI on x86).
84/// Linux only — OpenSSL is not available on Windows/macOS in CI.
85#[cfg(target_os = "linux")]
86fn sha256_bytes(data: &[u8]) -> String {
87    // For tiny data (<8KB): use sha2 crate directly, avoiding OpenSSL's
88    // EVP_MD_CTX_new/free overhead (~700ns per call). sha2 with asm feature
89    // uses SHA-NI instructions and has no heap allocation, just stack state.
90    // For 100 × 55-byte files: saves ~70µs total.
91    if data.len() < TINY_FILE_LIMIT as usize {
92        use digest::Digest;
93        return hex_encode(&sha2::Sha256::digest(data));
94    }
95    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
96        .expect("SHA256 hash failed");
97    hex_encode(&digest)
98}
99
100/// Single-shot SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
101#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
102fn sha256_bytes(data: &[u8]) -> String {
103    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
104}
105
106/// Single-shot SHA-256 using sha2 crate (macOS fallback — ring doesn't compile on Apple Silicon).
107#[cfg(target_vendor = "apple")]
108fn sha256_bytes(data: &[u8]) -> String {
109    hash_digest::<sha2::Sha256>(data)
110}
111
112/// Streaming SHA-256 using OpenSSL's optimized assembly.
113/// Linux only — OpenSSL is not available on Windows/macOS in CI.
114#[cfg(target_os = "linux")]
115fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
116    STREAM_BUF.with(|cell| {
117        let mut buf = cell.borrow_mut();
118        ensure_stream_buf(&mut buf);
119        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
120            .map_err(|e| io::Error::other(e))?;
121        loop {
122            let n = read_full(&mut reader, &mut buf)?;
123            if n == 0 {
124                break;
125            }
126            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
127        }
128        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
129        Ok(hex_encode(&digest))
130    })
131}
132
133/// Streaming SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
134#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
135fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
136    STREAM_BUF.with(|cell| {
137        let mut buf = cell.borrow_mut();
138        ensure_stream_buf(&mut buf);
139        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
140        loop {
141            let n = read_full(&mut reader, &mut buf)?;
142            if n == 0 {
143                break;
144            }
145            ctx.update(&buf[..n]);
146        }
147        Ok(hex_encode(ctx.finish().as_ref()))
148    })
149}
150
151/// Streaming SHA-256 using sha2 crate (macOS fallback).
152#[cfg(target_vendor = "apple")]
153fn sha256_reader(reader: impl Read) -> io::Result<String> {
154    hash_reader_impl::<sha2::Sha256>(reader)
155}
156
157/// Compute hash of a byte slice directly (zero-copy fast path).
158pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
159    match algo {
160        HashAlgorithm::Sha256 => sha256_bytes(data),
161        HashAlgorithm::Md5 => md5_bytes(data),
162        HashAlgorithm::Blake2b => {
163            let hash = blake2b_simd::blake2b(data);
164            hex_encode(hash.as_bytes())
165        }
166    }
167}
168
169// ── MD5 ─────────────────────────────────────────────────────────────
170
171/// Single-shot MD5 using OpenSSL's optimized assembly (Linux).
172#[cfg(target_os = "linux")]
173fn md5_bytes(data: &[u8]) -> String {
174    // For tiny data (<8KB): use md5 crate directly, avoiding OpenSSL's
175    // EVP_MD_CTX_new/free overhead (~700ns per call). md5 with asm feature
176    // uses optimized assembly and has no heap allocation.
177    if data.len() < TINY_FILE_LIMIT as usize {
178        use digest::Digest;
179        return hex_encode(&md5::Md5::digest(data));
180    }
181    let digest =
182        openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
183    hex_encode(&digest)
184}
185
186/// Single-shot MD5 using md-5 crate (non-Linux fallback).
187#[cfg(not(target_os = "linux"))]
188fn md5_bytes(data: &[u8]) -> String {
189    hash_digest::<Md5>(data)
190}
191
192/// Compute hash of data from a reader, returning hex string.
193pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
194    match algo {
195        HashAlgorithm::Sha256 => sha256_reader(reader),
196        HashAlgorithm::Md5 => md5_reader(reader),
197        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
198    }
199}
200
201/// Streaming MD5 using OpenSSL's optimized assembly (Linux).
202#[cfg(target_os = "linux")]
203fn md5_reader(mut reader: impl Read) -> io::Result<String> {
204    STREAM_BUF.with(|cell| {
205        let mut buf = cell.borrow_mut();
206        ensure_stream_buf(&mut buf);
207        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
208            .map_err(|e| io::Error::other(e))?;
209        loop {
210            let n = read_full(&mut reader, &mut buf)?;
211            if n == 0 {
212                break;
213            }
214            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
215        }
216        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
217        Ok(hex_encode(&digest))
218    })
219}
220
221/// Streaming MD5 using md-5 crate (non-Linux fallback).
222#[cfg(not(target_os = "linux"))]
223fn md5_reader(reader: impl Read) -> io::Result<String> {
224    hash_reader_impl::<Md5>(reader)
225}
226
227/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
228/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
229#[cfg(target_os = "linux")]
230static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
231
232/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
233/// Caches whether O_NOATIME works to avoid double-open on every file.
234#[cfg(target_os = "linux")]
235fn open_noatime(path: &Path) -> io::Result<File> {
236    use std::os::unix::fs::OpenOptionsExt;
237    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
238        match std::fs::OpenOptions::new()
239            .read(true)
240            .custom_flags(libc::O_NOATIME)
241            .open(path)
242        {
243            Ok(f) => return Ok(f),
244            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
245                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
246                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
247            }
248            Err(e) => return Err(e), // Real error, propagate
249        }
250    }
251    File::open(path)
252}
253
254#[cfg(not(target_os = "linux"))]
255fn open_noatime(path: &Path) -> io::Result<File> {
256    File::open(path)
257}
258
259/// Open a file and get its metadata in one step.
260/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
261#[cfg(target_os = "linux")]
262#[inline]
263fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
264    let file = open_noatime(path)?;
265    let fd = {
266        use std::os::unix::io::AsRawFd;
267        file.as_raw_fd()
268    };
269    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
270    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
271        return Err(io::Error::last_os_error());
272    }
273    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
274    let size = stat.st_size as u64;
275    Ok((file, size, is_regular))
276}
277
278#[cfg(not(target_os = "linux"))]
279#[inline]
280fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
281    let file = open_noatime(path)?;
282    let metadata = file.metadata()?;
283    Ok((file, metadata.len(), metadata.file_type().is_file()))
284}
285
286/// Minimum file size to issue fadvise hint (1MB).
287/// For small files, the syscall overhead exceeds the readahead benefit.
288#[cfg(target_os = "linux")]
289const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
290
291/// Maximum file size for single-read hash optimization.
292/// Files up to this size are read entirely into a thread-local buffer and hashed
293/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
294/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
295/// across files in sequential mode, saving re-allocation.
296/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
297const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
298
299/// Threshold for tiny files that can be read into a stack buffer.
300/// Below this size, we use a stack-allocated buffer + single read() syscall,
301/// completely avoiding any heap allocation for the data path.
302const TINY_FILE_LIMIT: u64 = 8 * 1024;
303
304// Thread-local reusable buffer for single-read hash.
305// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
306// handles tiny files; larger files trigger one grow that persists for reuse.
307thread_local! {
308    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
309}
310
311/// Hash a file by path. Uses mmap for large files (zero-copy, no read() syscalls),
312/// single-read + single-shot hash for small files, and streaming read as fallback.
313pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
314    let (file, file_size, is_regular) = open_and_stat(path)?;
315
316    if is_regular && file_size == 0 {
317        return Ok(hash_bytes(algo, &[]));
318    }
319
320    if file_size > 0 && is_regular {
321        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
322        if file_size < TINY_FILE_LIMIT {
323            return hash_file_tiny(algo, file, file_size as usize);
324        }
325        // mmap for large files — zero-copy, eliminates multiple read() syscalls
326        if file_size >= SMALL_FILE_LIMIT {
327            #[cfg(target_os = "linux")]
328            if file_size >= FADVISE_MIN_SIZE {
329                use std::os::unix::io::AsRawFd;
330                unsafe {
331                    libc::posix_fadvise(
332                        file.as_raw_fd(),
333                        0,
334                        file_size as i64,
335                        libc::POSIX_FADV_SEQUENTIAL,
336                    );
337                }
338            }
339            // Skip MAP_POPULATE for files < 4MB: on VMs (CI, cloud), eager page
340            // faulting is expensive (~300ns/page × 1024 pages = ~300µs for 4MB).
341            // Lazy faults + sequential access are faster for moderate files.
342            let mmap_result = if file_size >= 4 * 1024 * 1024 {
343                unsafe { memmap2::MmapOptions::new().populate().map(&file) }
344            } else {
345                unsafe { memmap2::MmapOptions::new().map(&file) }
346            };
347            if let Ok(mmap) = mmap_result {
348                #[cfg(target_os = "linux")]
349                {
350                    let _ = mmap.advise(memmap2::Advice::Sequential);
351                    if file_size >= 2 * 1024 * 1024 {
352                        let _ = mmap.advise(memmap2::Advice::HugePage);
353                    }
354                }
355                return Ok(hash_bytes(algo, &mmap));
356            }
357        }
358        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash.
359        // This avoids Hasher context allocation + streaming overhead for each file.
360        if file_size < SMALL_FILE_LIMIT {
361            return hash_file_small(algo, file, file_size as usize);
362        }
363    }
364
365    // Non-regular files or fallback: stream
366    #[cfg(target_os = "linux")]
367    if file_size >= FADVISE_MIN_SIZE {
368        use std::os::unix::io::AsRawFd;
369        unsafe {
370            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
371        }
372    }
373    hash_reader(algo, file)
374}
375
376/// Hash a tiny file (<8KB) using a stack-allocated buffer.
377/// Single read() syscall, zero heap allocation on the data path.
378/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
379#[inline]
380fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
381    let mut buf = [0u8; 8192];
382    let mut total = 0;
383    // Read with known size — usually completes in a single read() for regular files
384    while total < size {
385        match file.read(&mut buf[total..size]) {
386            Ok(0) => break,
387            Ok(n) => total += n,
388            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
389            Err(e) => return Err(e),
390        }
391    }
392    Ok(hash_bytes(algo, &buf[..total]))
393}
394
395/// Hash a small file by reading it entirely into a thread-local buffer,
396/// then using the single-shot hash function. Avoids per-file Hasher allocation.
397#[inline]
398fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
399    SMALL_FILE_BUF.with(|cell| {
400        let mut buf = cell.borrow_mut();
401        // Reset length but keep allocation, then grow if needed
402        buf.clear();
403        buf.reserve(size);
404        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
405        // directly and only access buf[..total] where total <= size <= capacity.
406        unsafe {
407            buf.set_len(size);
408        }
409        let mut total = 0;
410        while total < size {
411            match file.read(&mut buf[total..size]) {
412                Ok(0) => break,
413                Ok(n) => total += n,
414                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
415                Err(e) => return Err(e),
416            }
417        }
418        Ok(hash_bytes(algo, &buf[..total]))
419    })
420}
421
422/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
423pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
424    let stdin = io::stdin();
425    // Hint kernel for sequential access if stdin is a regular file (redirect)
426    #[cfg(target_os = "linux")]
427    {
428        use std::os::unix::io::AsRawFd;
429        let fd = stdin.as_raw_fd();
430        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
431        if unsafe { libc::fstat(fd, &mut stat) } == 0
432            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
433            && stat.st_size > 0
434        {
435            unsafe {
436                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
437            }
438        }
439    }
440    // Streaming hash — works for both pipe and file-redirect stdin
441    hash_reader(algo, stdin.lock())
442}
443
444/// Check if parallel hashing is worthwhile for the given file paths.
445/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
446/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
447/// Removing the stat()-based size check eliminates N extra syscalls for N files.
448pub fn should_use_parallel(paths: &[&Path]) -> bool {
449    paths.len() >= 2
450}
451
452/// Issue readahead hints for a list of file paths to warm the page cache.
453/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
454/// Only issues hints for files >= 1MB; small files are read fast enough
455/// that the fadvise syscall overhead isn't worth it.
456#[cfg(target_os = "linux")]
457pub fn readahead_files(paths: &[&Path]) {
458    use std::os::unix::io::AsRawFd;
459    for path in paths {
460        if let Ok(file) = open_noatime(path) {
461            if let Ok(meta) = file.metadata() {
462                let len = meta.len();
463                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
464                    unsafe {
465                        libc::posix_fadvise(
466                            file.as_raw_fd(),
467                            0,
468                            len as i64,
469                            libc::POSIX_FADV_WILLNEED,
470                        );
471                    }
472                }
473            }
474        }
475    }
476}
477
478#[cfg(not(target_os = "linux"))]
479pub fn readahead_files(_paths: &[&Path]) {
480    // No-op on non-Linux
481}
482
483// --- BLAKE2b variable-length functions (using blake2b_simd) ---
484
485/// Hash raw data with BLAKE2b variable output length.
486/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
487pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
488    let hash = blake2b_simd::Params::new()
489        .hash_length(output_bytes)
490        .hash(data);
491    hex_encode(hash.as_bytes())
492}
493
494/// Hash a reader with BLAKE2b variable output length.
495/// Uses thread-local buffer for cache-friendly streaming.
496pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
497    STREAM_BUF.with(|cell| {
498        let mut buf = cell.borrow_mut();
499        ensure_stream_buf(&mut buf);
500        let mut state = blake2b_simd::Params::new()
501            .hash_length(output_bytes)
502            .to_state();
503        loop {
504            let n = read_full(&mut reader, &mut buf)?;
505            if n == 0 {
506                break;
507            }
508            state.update(&buf[..n]);
509        }
510        Ok(hex_encode(state.finalize().as_bytes()))
511    })
512}
513
514/// Hash a file with BLAKE2b variable output length.
515/// Uses mmap for large files (zero-copy), single-read for small files,
516/// and streaming read as fallback.
517pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
518    let (file, file_size, is_regular) = open_and_stat(path)?;
519
520    if is_regular && file_size == 0 {
521        return Ok(blake2b_hash_data(&[], output_bytes));
522    }
523
524    if file_size > 0 && is_regular {
525        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
526        if file_size < TINY_FILE_LIMIT {
527            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
528        }
529        // mmap for large files — zero-copy, eliminates multiple read() syscalls
530        if file_size >= SMALL_FILE_LIMIT {
531            #[cfg(target_os = "linux")]
532            if file_size >= FADVISE_MIN_SIZE {
533                use std::os::unix::io::AsRawFd;
534                unsafe {
535                    libc::posix_fadvise(
536                        file.as_raw_fd(),
537                        0,
538                        file_size as i64,
539                        libc::POSIX_FADV_SEQUENTIAL,
540                    );
541                }
542            }
543            // Skip MAP_POPULATE for files < 4MB (same rationale as hash_file)
544            let mmap_result = if file_size >= 4 * 1024 * 1024 {
545                unsafe { memmap2::MmapOptions::new().populate().map(&file) }
546            } else {
547                unsafe { memmap2::MmapOptions::new().map(&file) }
548            };
549            if let Ok(mmap) = mmap_result {
550                #[cfg(target_os = "linux")]
551                {
552                    let _ = mmap.advise(memmap2::Advice::Sequential);
553                    if file_size >= 2 * 1024 * 1024 {
554                        let _ = mmap.advise(memmap2::Advice::HugePage);
555                    }
556                }
557                return Ok(blake2b_hash_data(&mmap, output_bytes));
558            }
559        }
560        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
561        if file_size < SMALL_FILE_LIMIT {
562            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
563        }
564    }
565
566    // Non-regular files or fallback: stream
567    #[cfg(target_os = "linux")]
568    if file_size >= FADVISE_MIN_SIZE {
569        use std::os::unix::io::AsRawFd;
570        unsafe {
571            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
572        }
573    }
574    blake2b_hash_reader(file, output_bytes)
575}
576
577/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
578#[inline]
579fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
580    let mut buf = [0u8; 8192];
581    let mut total = 0;
582    while total < size {
583        match file.read(&mut buf[total..size]) {
584            Ok(0) => break,
585            Ok(n) => total += n,
586            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
587            Err(e) => return Err(e),
588        }
589    }
590    Ok(blake2b_hash_data(&buf[..total], output_bytes))
591}
592
593/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
594#[inline]
595fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
596    SMALL_FILE_BUF.with(|cell| {
597        let mut buf = cell.borrow_mut();
598        buf.clear();
599        buf.reserve(size);
600        // SAFETY: capacity >= size after clear+reserve
601        unsafe {
602            buf.set_len(size);
603        }
604        let mut total = 0;
605        while total < size {
606            match file.read(&mut buf[total..size]) {
607                Ok(0) => break,
608                Ok(n) => total += n,
609                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
610                Err(e) => return Err(e),
611            }
612        }
613        Ok(blake2b_hash_data(&buf[..total], output_bytes))
614    })
615}
616
617/// Hash stdin with BLAKE2b variable output length.
618/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
619pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
620    let stdin = io::stdin();
621    #[cfg(target_os = "linux")]
622    {
623        use std::os::unix::io::AsRawFd;
624        let fd = stdin.as_raw_fd();
625        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
626        if unsafe { libc::fstat(fd, &mut stat) } == 0
627            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
628            && stat.st_size > 0
629        {
630            unsafe {
631                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
632            }
633        }
634    }
635    blake2b_hash_reader(stdin.lock(), output_bytes)
636}
637
638/// Internal enum for file content in batch hashing.
639/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
640enum FileContent {
641    Mmap(memmap2::Mmap),
642    Buf(Vec<u8>),
643}
644
645impl AsRef<[u8]> for FileContent {
646    fn as_ref(&self) -> &[u8] {
647        match self {
648            FileContent::Mmap(m) => m,
649            FileContent::Buf(v) => v,
650        }
651    }
652}
653
654/// Open a file and load its content for batch hashing.
655/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
656/// files (zero-copy), and read-to-end for non-regular files.
657fn open_file_content(path: &Path) -> io::Result<FileContent> {
658    let (file, size, is_regular) = open_and_stat(path)?;
659    if is_regular && size == 0 {
660        return Ok(FileContent::Buf(Vec::new()));
661    }
662    if is_regular && size > 0 {
663        // Tiny files: read directly into Vec. The mmap syscall + page fault
664        // overhead exceeds the data transfer cost for files under 8KB.
665        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
666        if size < TINY_FILE_LIMIT {
667            let mut buf = vec![0u8; size as usize];
668            let mut total = 0;
669            let mut f = file;
670            while total < size as usize {
671                match f.read(&mut buf[total..]) {
672                    Ok(0) => break,
673                    Ok(n) => total += n,
674                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
675                    Err(e) => return Err(e),
676                }
677            }
678            buf.truncate(total);
679            return Ok(FileContent::Buf(buf));
680        }
681        // Skip MAP_POPULATE for files < 4MB (same rationale as hash_file)
682        let mmap_result = if size >= 4 * 1024 * 1024 {
683            unsafe { memmap2::MmapOptions::new().populate().map(&file) }
684        } else {
685            unsafe { memmap2::MmapOptions::new().map(&file) }
686        };
687        if let Ok(mmap) = mmap_result {
688            #[cfg(target_os = "linux")]
689            {
690                let _ = mmap.advise(memmap2::Advice::Sequential);
691                if size >= 2 * 1024 * 1024 {
692                    let _ = mmap.advise(memmap2::Advice::HugePage);
693                }
694            }
695            return Ok(FileContent::Mmap(mmap));
696        }
697        // Fallback: read into Vec
698        let mut buf = vec![0u8; size as usize];
699        let mut total = 0;
700        let mut f = file;
701        while total < size as usize {
702            match f.read(&mut buf[total..]) {
703                Ok(0) => break,
704                Ok(n) => total += n,
705                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
706                Err(e) => return Err(e),
707            }
708        }
709        buf.truncate(total);
710        return Ok(FileContent::Buf(buf));
711    }
712    // Non-regular: read to end
713    let mut buf = Vec::new();
714    let mut f = file;
715    f.read_to_end(&mut buf)?;
716    Ok(FileContent::Buf(buf))
717}
718
719/// Open a file and read all content without fstat — just open+read+close.
720/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
721/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
722/// then falls back to larger buffer or mmap for bigger files.
723fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
724    let mut file = open_noatime(path)?;
725    // Try small buffer first — optimal for benchmark's ~55 byte files.
726    // Single read() + to_vec() with exact size for minimal allocation.
727    let mut small_buf = [0u8; 4096];
728    match file.read(&mut small_buf) {
729        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
730        Ok(n) if n < small_buf.len() => {
731            // File fits in small buffer — done (common case for tiny files)
732            return Ok(FileContent::Buf(small_buf[..n].to_vec()));
733        }
734        Ok(n) => {
735            // Might be more data — fall back to larger buffer
736            let mut buf = [0u8; 65536];
737            buf[..n].copy_from_slice(&small_buf[..n]);
738            let mut total = n;
739            loop {
740                match file.read(&mut buf[total..]) {
741                    Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
742                    Ok(n) => {
743                        total += n;
744                        if total >= buf.len() {
745                            return open_file_content(path);
746                        }
747                    }
748                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
749                    Err(e) => return Err(e),
750                }
751            }
752        }
753        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
754            let mut buf = [0u8; 65536];
755            let mut total = 0;
756            loop {
757                match file.read(&mut buf[total..]) {
758                    Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
759                    Ok(n) => {
760                        total += n;
761                        if total >= buf.len() {
762                            return open_file_content(path);
763                        }
764                    }
765                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
766                    Err(e) => return Err(e),
767                }
768            }
769        }
770        Err(e) => return Err(e),
771    }
772}
773
774/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
775///
776/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
777/// All files are pre-loaded into memory (mmap for large, read for small),
778/// then hashed simultaneously. Returns results in input order.
779///
780/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
781pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
782    use blake2b_simd::many::{HashManyJob, hash_many};
783
784    // Phase 1: Read all files into memory.
785    // For small file counts (≤10), load sequentially to avoid thread::scope
786    // overhead (~120µs). For many files, use parallel loading with lightweight
787    // OS threads. For 100+ files, use fast path that skips fstat.
788    let use_fast = paths.len() >= 20;
789
790    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
791        // Sequential loading — avoids thread spawn overhead for small batches
792        paths.iter().map(|&path| open_file_content(path)).collect()
793    } else {
794        let num_threads = std::thread::available_parallelism()
795            .map(|n| n.get())
796            .unwrap_or(4)
797            .min(paths.len());
798        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
799
800        std::thread::scope(|s| {
801            let handles: Vec<_> = paths
802                .chunks(chunk_size)
803                .map(|chunk| {
804                    s.spawn(move || {
805                        chunk
806                            .iter()
807                            .map(|&path| {
808                                if use_fast {
809                                    open_file_content_fast(path)
810                                } else {
811                                    open_file_content(path)
812                                }
813                            })
814                            .collect::<Vec<_>>()
815                    })
816                })
817                .collect();
818
819            handles
820                .into_iter()
821                .flat_map(|h| h.join().unwrap())
822                .collect()
823        })
824    };
825
826    // Phase 2: Build hash_many jobs for successful reads
827    let hash_results = {
828        let mut params = blake2b_simd::Params::new();
829        params.hash_length(output_bytes);
830
831        let ok_entries: Vec<(usize, &[u8])> = file_data
832            .iter()
833            .enumerate()
834            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
835            .collect();
836
837        let mut jobs: Vec<HashManyJob> = ok_entries
838            .iter()
839            .map(|(_, data)| HashManyJob::new(&params, data))
840            .collect();
841
842        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
843        hash_many(jobs.iter_mut());
844
845        // Extract hashes into a map
846        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
847        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
848            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
849        }
850        hm
851    }; // file_data borrow released here
852
853    // Phase 4: Combine hashes and errors in original order
854    hash_results
855        .into_iter()
856        .zip(file_data)
857        .map(|(hash_opt, result)| match result {
858            Ok(_) => Ok(hash_opt.unwrap()),
859            Err(e) => Err(e),
860        })
861        .collect()
862}
863
864/// Batch-hash multiple files with SHA-256/MD5 using std::thread::scope.
865/// Uses lightweight OS threads instead of rayon's work-stealing pool.
866/// For single-invocation tools, this saves ~300µs of rayon thread pool
867/// initialization (spawning N-1 threads + setting up work-stealing deques).
868/// Returns results in input order.
869pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
870    // Readahead for moderate file counts: warm page cache by opening/reading/closing
871    // before the parallel hash phase. For 100+ files, per-file overhead
872    // (open+stat+fadvise+close = ~30µs/file) exceeds page cache benefit.
873    if paths.len() <= 20 {
874        readahead_files_all(paths);
875    }
876
877    // Use nostat path that skips fstat syscall — saves ~5µs/file on tiny files.
878    let use_fast = paths.len() >= 2;
879
880    // Use std::thread::scope for lighter-weight parallelism than rayon.
881    // Thread spawn (~30µs × 3 = ~90µs) vs rayon init (~300µs).
882    // For 100 tiny files, the ~200µs savings is significant (total time ~3ms).
883    let num_threads = std::thread::available_parallelism()
884        .map(|n| n.get())
885        .unwrap_or(4)
886        .min(paths.len());
887    let chunk_size = (paths.len() + num_threads - 1) / num_threads;
888
889    std::thread::scope(|s| {
890        let handles: Vec<_> = paths
891            .chunks(chunk_size)
892            .map(|chunk| {
893                s.spawn(move || {
894                    chunk
895                        .iter()
896                        .map(|&path| {
897                            if use_fast {
898                                hash_file_nostat(algo, path)
899                            } else {
900                                hash_file(algo, path)
901                            }
902                        })
903                        .collect::<Vec<_>>()
904                })
905            })
906            .collect();
907
908        handles
909            .into_iter()
910            .flat_map(|h| h.join().unwrap())
911            .collect()
912    })
913}
914
915/// Hash a file without fstat — just open, read until EOF, hash.
916/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
917/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
918/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
919/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
920pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
921    let mut file = open_noatime(path)?;
922    // First try a small stack buffer — optimal for tiny files (< 4KB).
923    // Most "many_files" benchmark files are ~55 bytes, so this completes
924    // with a single read() syscall and no fallback.
925    let mut small_buf = [0u8; 4096];
926    match file.read(&mut small_buf) {
927        Ok(0) => return Ok(hash_bytes(algo, &[])),
928        Ok(n) if n < small_buf.len() => {
929            // File fits in small buffer — hash directly (common case)
930            return Ok(hash_bytes(algo, &small_buf[..n]));
931        }
932        Ok(n) => {
933            // Might be more data — fall back to larger buffer
934            let mut buf = [0u8; 65536];
935            buf[..n].copy_from_slice(&small_buf[..n]);
936            let mut total = n;
937            loop {
938                match file.read(&mut buf[total..]) {
939                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
940                    Ok(n) => {
941                        total += n;
942                        if total >= buf.len() {
943                            return hash_file(algo, path);
944                        }
945                    }
946                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
947                    Err(e) => return Err(e),
948                }
949            }
950        }
951        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
952            // Retry with full buffer on interrupt
953            let mut buf = [0u8; 65536];
954            let mut total = 0;
955            loop {
956                match file.read(&mut buf[total..]) {
957                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
958                    Ok(n) => {
959                        total += n;
960                        if total >= buf.len() {
961                            return hash_file(algo, path);
962                        }
963                    }
964                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
965                    Err(e) => return Err(e),
966                }
967            }
968        }
969        Err(e) => return Err(e),
970    }
971}
972
973/// Issue readahead hints for ALL file paths (no size threshold).
974/// For multi-file benchmarks, even small files benefit from batched readahead.
975#[cfg(target_os = "linux")]
976pub fn readahead_files_all(paths: &[&Path]) {
977    use std::os::unix::io::AsRawFd;
978    for path in paths {
979        if let Ok(file) = open_noatime(path) {
980            if let Ok(meta) = file.metadata() {
981                if meta.file_type().is_file() {
982                    let len = meta.len();
983                    unsafe {
984                        libc::posix_fadvise(
985                            file.as_raw_fd(),
986                            0,
987                            len as i64,
988                            libc::POSIX_FADV_WILLNEED,
989                        );
990                    }
991                }
992            }
993        }
994    }
995}
996
997#[cfg(not(target_os = "linux"))]
998pub fn readahead_files_all(_paths: &[&Path]) {}
999
1000/// Print hash result in GNU format: "hash  filename\n"
1001/// Uses raw byte writes to avoid std::fmt overhead.
1002pub fn print_hash(
1003    out: &mut impl Write,
1004    hash: &str,
1005    filename: &str,
1006    binary: bool,
1007) -> io::Result<()> {
1008    let mode = if binary { b'*' } else { b' ' };
1009    out.write_all(hash.as_bytes())?;
1010    out.write_all(&[b' ', mode])?;
1011    out.write_all(filename.as_bytes())?;
1012    out.write_all(b"\n")
1013}
1014
1015/// Print hash in GNU format with NUL terminator instead of newline.
1016pub fn print_hash_zero(
1017    out: &mut impl Write,
1018    hash: &str,
1019    filename: &str,
1020    binary: bool,
1021) -> io::Result<()> {
1022    let mode = if binary { b'*' } else { b' ' };
1023    out.write_all(hash.as_bytes())?;
1024    out.write_all(&[b' ', mode])?;
1025    out.write_all(filename.as_bytes())?;
1026    out.write_all(b"\0")
1027}
1028
1029// ── Single-write output buffer ─────────────────────────────────────
1030// For multi-file workloads, batch the entire "hash  filename\n" line into
1031// a single write() call. This halves the number of BufWriter flushes.
1032
1033// Thread-local output line buffer for batched writes.
1034// Reused across files to avoid per-file allocation.
1035thread_local! {
1036    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1037}
1038
1039/// Build and write the standard GNU hash output line in a single write() call.
1040/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
1041/// For escaped filenames: "\hash  escaped_filename\n".
1042#[inline]
1043pub fn write_hash_line(
1044    out: &mut impl Write,
1045    hash: &str,
1046    filename: &str,
1047    binary: bool,
1048    zero: bool,
1049    escaped: bool,
1050) -> io::Result<()> {
1051    LINE_BUF.with(|cell| {
1052        let mut buf = cell.borrow_mut();
1053        buf.clear();
1054        let mode = if binary { b'*' } else { b' ' };
1055        let term = if zero { b'\0' } else { b'\n' };
1056        if escaped {
1057            buf.push(b'\\');
1058        }
1059        buf.extend_from_slice(hash.as_bytes());
1060        buf.push(b' ');
1061        buf.push(mode);
1062        buf.extend_from_slice(filename.as_bytes());
1063        buf.push(term);
1064        out.write_all(&buf)
1065    })
1066}
1067
1068/// Build and write BSD tag format output in a single write() call.
1069/// Format: "ALGO (filename) = hash\n"
1070#[inline]
1071pub fn write_hash_tag_line(
1072    out: &mut impl Write,
1073    algo_name: &str,
1074    hash: &str,
1075    filename: &str,
1076    zero: bool,
1077) -> io::Result<()> {
1078    LINE_BUF.with(|cell| {
1079        let mut buf = cell.borrow_mut();
1080        buf.clear();
1081        let term = if zero { b'\0' } else { b'\n' };
1082        buf.extend_from_slice(algo_name.as_bytes());
1083        buf.extend_from_slice(b" (");
1084        buf.extend_from_slice(filename.as_bytes());
1085        buf.extend_from_slice(b") = ");
1086        buf.extend_from_slice(hash.as_bytes());
1087        buf.push(term);
1088        out.write_all(&buf)
1089    })
1090}
1091
1092/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
1093pub fn print_hash_tag(
1094    out: &mut impl Write,
1095    algo: HashAlgorithm,
1096    hash: &str,
1097    filename: &str,
1098) -> io::Result<()> {
1099    out.write_all(algo.name().as_bytes())?;
1100    out.write_all(b" (")?;
1101    out.write_all(filename.as_bytes())?;
1102    out.write_all(b") = ")?;
1103    out.write_all(hash.as_bytes())?;
1104    out.write_all(b"\n")
1105}
1106
1107/// Print hash in BSD tag format with NUL terminator.
1108pub fn print_hash_tag_zero(
1109    out: &mut impl Write,
1110    algo: HashAlgorithm,
1111    hash: &str,
1112    filename: &str,
1113) -> io::Result<()> {
1114    out.write_all(algo.name().as_bytes())?;
1115    out.write_all(b" (")?;
1116    out.write_all(filename.as_bytes())?;
1117    out.write_all(b") = ")?;
1118    out.write_all(hash.as_bytes())?;
1119    out.write_all(b"\0")
1120}
1121
1122/// Print hash in BSD tag format with BLAKE2b length info:
1123/// "BLAKE2b (filename) = hash" for 512-bit, or
1124/// "BLAKE2b-256 (filename) = hash" for other lengths.
1125pub fn print_hash_tag_b2sum(
1126    out: &mut impl Write,
1127    hash: &str,
1128    filename: &str,
1129    bits: usize,
1130) -> io::Result<()> {
1131    if bits == 512 {
1132        out.write_all(b"BLAKE2b (")?;
1133    } else {
1134        // Use write! for the rare non-512 path (negligible overhead per file)
1135        write!(out, "BLAKE2b-{} (", bits)?;
1136    }
1137    out.write_all(filename.as_bytes())?;
1138    out.write_all(b") = ")?;
1139    out.write_all(hash.as_bytes())?;
1140    out.write_all(b"\n")
1141}
1142
1143/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
1144pub fn print_hash_tag_b2sum_zero(
1145    out: &mut impl Write,
1146    hash: &str,
1147    filename: &str,
1148    bits: usize,
1149) -> io::Result<()> {
1150    if bits == 512 {
1151        out.write_all(b"BLAKE2b (")?;
1152    } else {
1153        write!(out, "BLAKE2b-{} (", bits)?;
1154    }
1155    out.write_all(filename.as_bytes())?;
1156    out.write_all(b") = ")?;
1157    out.write_all(hash.as_bytes())?;
1158    out.write_all(b"\0")
1159}
1160
1161/// Options for check mode.
1162pub struct CheckOptions {
1163    pub quiet: bool,
1164    pub status_only: bool,
1165    pub strict: bool,
1166    pub warn: bool,
1167    pub ignore_missing: bool,
1168    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
1169    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
1170    /// When empty, uses generic format: "line {line}: message".
1171    pub warn_prefix: String,
1172}
1173
1174/// Result of check mode verification.
1175pub struct CheckResult {
1176    pub ok: usize,
1177    pub mismatches: usize,
1178    pub format_errors: usize,
1179    pub read_errors: usize,
1180    /// Number of files skipped because they were missing and --ignore-missing was set.
1181    pub ignored_missing: usize,
1182}
1183
1184/// Verify checksums from a check file.
1185/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
1186pub fn check_file<R: BufRead>(
1187    algo: HashAlgorithm,
1188    reader: R,
1189    opts: &CheckOptions,
1190    out: &mut impl Write,
1191    err_out: &mut impl Write,
1192) -> io::Result<CheckResult> {
1193    let quiet = opts.quiet;
1194    let status_only = opts.status_only;
1195    let warn = opts.warn;
1196    let ignore_missing = opts.ignore_missing;
1197    let mut ok_count = 0;
1198    let mut mismatch_count = 0;
1199    let mut format_errors = 0;
1200    let mut read_errors = 0;
1201    let mut ignored_missing_count = 0;
1202    let mut line_num = 0;
1203
1204    for line_result in reader.lines() {
1205        line_num += 1;
1206        let line = line_result?;
1207        let line = line.trim_end();
1208
1209        if line.is_empty() {
1210            continue;
1211        }
1212
1213        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
1214        let (expected_hash, filename) = match parse_check_line(line) {
1215            Some(v) => v,
1216            None => {
1217                format_errors += 1;
1218                if warn {
1219                    out.flush()?;
1220                    if opts.warn_prefix.is_empty() {
1221                        writeln!(
1222                            err_out,
1223                            "line {}: improperly formatted {} checksum line",
1224                            line_num,
1225                            algo.name()
1226                        )?;
1227                    } else {
1228                        writeln!(
1229                            err_out,
1230                            "{}: {}: improperly formatted {} checksum line",
1231                            opts.warn_prefix,
1232                            line_num,
1233                            algo.name()
1234                        )?;
1235                    }
1236                }
1237                continue;
1238            }
1239        };
1240
1241        // Compute actual hash
1242        let actual = match hash_file(algo, Path::new(filename)) {
1243            Ok(h) => h,
1244            Err(e) => {
1245                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
1246                    ignored_missing_count += 1;
1247                    continue;
1248                }
1249                read_errors += 1;
1250                if !status_only {
1251                    out.flush()?;
1252                    writeln!(err_out, "{}: {}", filename, e)?;
1253                    writeln!(out, "{}: FAILED open or read", filename)?;
1254                }
1255                continue;
1256            }
1257        };
1258
1259        if actual.eq_ignore_ascii_case(expected_hash) {
1260            ok_count += 1;
1261            if !quiet && !status_only {
1262                writeln!(out, "{}: OK", filename)?;
1263            }
1264        } else {
1265            mismatch_count += 1;
1266            if !status_only {
1267                writeln!(out, "{}: FAILED", filename)?;
1268            }
1269        }
1270    }
1271
1272    Ok(CheckResult {
1273        ok: ok_count,
1274        mismatches: mismatch_count,
1275        format_errors,
1276        read_errors,
1277        ignored_missing: ignored_missing_count,
1278    })
1279}
1280
1281/// Parse a checksum line in any supported format.
1282pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
1283    // Try BSD tag format: "ALGO (filename) = hash"
1284    let rest = line
1285        .strip_prefix("MD5 (")
1286        .or_else(|| line.strip_prefix("SHA256 ("))
1287        .or_else(|| line.strip_prefix("BLAKE2b ("))
1288        .or_else(|| {
1289            // Handle BLAKE2b-NNN (filename) = hash
1290            if line.starts_with("BLAKE2b-") {
1291                let after = &line["BLAKE2b-".len()..];
1292                if let Some(sp) = after.find(" (") {
1293                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
1294                        return Some(&after[sp + 2..]);
1295                    }
1296                }
1297            }
1298            None
1299        });
1300    if let Some(rest) = rest {
1301        if let Some(paren_idx) = rest.find(") = ") {
1302            let filename = &rest[..paren_idx];
1303            let hash = &rest[paren_idx + 4..];
1304            return Some((hash, filename));
1305        }
1306    }
1307
1308    // Handle backslash-escaped lines (leading '\')
1309    let line = line.strip_prefix('\\').unwrap_or(line);
1310
1311    // Standard format: "hash  filename"
1312    if let Some(idx) = line.find("  ") {
1313        let hash = &line[..idx];
1314        let rest = &line[idx + 2..];
1315        return Some((hash, rest));
1316    }
1317    // Binary mode: "hash *filename"
1318    if let Some(idx) = line.find(" *") {
1319        let hash = &line[..idx];
1320        let rest = &line[idx + 2..];
1321        return Some((hash, rest));
1322    }
1323    None
1324}
1325
1326/// Parse a BSD-style tag line: "ALGO (filename) = hash"
1327/// Returns (expected_hash, filename, optional_bits).
1328/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
1329pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
1330    let paren_start = line.find(" (")?;
1331    let algo_part = &line[..paren_start];
1332    let rest = &line[paren_start + 2..];
1333    let paren_end = rest.find(") = ")?;
1334    let filename = &rest[..paren_end];
1335    let hash = &rest[paren_end + 4..];
1336
1337    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
1338    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
1339        algo_part[dash_pos + 1..].parse::<usize>().ok()
1340    } else {
1341        None
1342    };
1343
1344    Some((hash, filename, bits))
1345}
1346
1347/// Read as many bytes as possible into buf, retrying on partial reads.
1348/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
1349/// Fast path: regular file reads usually return the full buffer on the first call.
1350#[inline]
1351fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
1352    // Fast path: first read() usually fills the entire buffer for regular files
1353    let n = reader.read(buf)?;
1354    if n == buf.len() || n == 0 {
1355        return Ok(n);
1356    }
1357    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
1358    let mut total = n;
1359    while total < buf.len() {
1360        match reader.read(&mut buf[total..]) {
1361            Ok(0) => break,
1362            Ok(n) => total += n,
1363            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
1364            Err(e) => return Err(e),
1365        }
1366    }
1367    Ok(total)
1368}
1369
1370/// Compile-time generated 2-byte hex pair lookup table.
1371/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
1372const fn generate_hex_table() -> [[u8; 2]; 256] {
1373    let hex = b"0123456789abcdef";
1374    let mut table = [[0u8; 2]; 256];
1375    let mut i = 0;
1376    while i < 256 {
1377        table[i] = [hex[i >> 4], hex[i & 0xf]];
1378        i += 1;
1379    }
1380    table
1381}
1382
1383const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
1384
1385/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
1386/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
1387pub(crate) fn hex_encode(bytes: &[u8]) -> String {
1388    let len = bytes.len() * 2;
1389    let mut hex = String::with_capacity(len);
1390    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
1391    unsafe {
1392        let buf = hex.as_mut_vec();
1393        buf.set_len(len);
1394        hex_encode_to_slice(bytes, buf);
1395    }
1396    hex
1397}
1398
1399/// Encode bytes as hex directly into a pre-allocated output slice.
1400/// Output slice must be at least `bytes.len() * 2` bytes long.
1401#[inline]
1402fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
1403    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
1404    unsafe {
1405        let ptr = out.as_mut_ptr();
1406        for (i, &b) in bytes.iter().enumerate() {
1407            let pair = *HEX_TABLE.get_unchecked(b as usize);
1408            *ptr.add(i * 2) = pair[0];
1409            *ptr.add(i * 2 + 1) = pair[1];
1410        }
1411    }
1412}