Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15/// Supported hash algorithms.
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18    Sha256,
19    Md5,
20    Blake2b,
21}
22
23impl HashAlgorithm {
24    pub fn name(self) -> &'static str {
25        match self {
26            HashAlgorithm::Sha256 => "SHA256",
27            HashAlgorithm::Md5 => "MD5",
28            HashAlgorithm::Blake2b => "BLAKE2b",
29        }
30    }
31}
32
33// ── Generic hash helpers ────────────────────────────────────────────
34
35/// Single-shot hash using the Digest trait (non-Linux fallback).
36#[cfg(not(target_os = "linux"))]
37fn hash_digest<D: Digest>(data: &[u8]) -> String {
38    hex_encode(&D::digest(data))
39}
40
41/// Streaming hash using thread-local buffer (non-Linux fallback).
42#[cfg(not(target_os = "linux"))]
43fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
44    STREAM_BUF.with(|cell| {
45        let mut buf = cell.borrow_mut();
46        ensure_stream_buf(&mut buf);
47        let mut hasher = D::new();
48        loop {
49            let n = read_full(&mut reader, &mut buf)?;
50            if n == 0 {
51                break;
52            }
53            hasher.update(&buf[..n]);
54        }
55        Ok(hex_encode(&hasher.finalize()))
56    })
57}
58
59// ── Public hashing API ──────────────────────────────────────────────
60
61/// Buffer size for streaming hash I/O.
62/// 8MB: amortizes syscall overhead while still fitting in L3 cache on modern CPUs.
63/// Larger buffer means fewer read() calls per file (e.g., 13 reads for 100MB vs 25).
64const HASH_READ_BUF: usize = 8 * 1024 * 1024;
65
66// Thread-local reusable buffer for streaming hash I/O.
67// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
68// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
69thread_local! {
70    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
71}
72
73/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
74/// Called only on the streaming path, so small-file workloads never allocate 8MB.
75#[inline]
76fn ensure_stream_buf(buf: &mut Vec<u8>) {
77    if buf.len() < HASH_READ_BUF {
78        buf.resize(HASH_READ_BUF, 0);
79    }
80}
81
82// ── SHA-256 ───────────────────────────────────────────────────────────
83
84/// Single-shot SHA-256 using OpenSSL's optimized assembly (SHA-NI on x86).
85/// Linux only — OpenSSL is not available on Windows/macOS in CI.
86#[cfg(target_os = "linux")]
87fn sha256_bytes(data: &[u8]) -> String {
88    // For tiny data (<8KB): use sha2 crate directly, avoiding OpenSSL's
89    // EVP_MD_CTX_new/free overhead (~700ns per call). sha2 with asm feature
90    // uses SHA-NI instructions and has no heap allocation, just stack state.
91    // For 100 × 55-byte files: saves ~70µs total.
92    if data.len() < TINY_FILE_LIMIT as usize {
93        use digest::Digest;
94        return hex_encode(&sha2::Sha256::digest(data));
95    }
96    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
97        .expect("SHA256 hash failed");
98    hex_encode(&digest)
99}
100
101/// Single-shot SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
102#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
103fn sha256_bytes(data: &[u8]) -> String {
104    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
105}
106
107/// Single-shot SHA-256 using sha2 crate (macOS fallback — ring doesn't compile on Apple Silicon).
108#[cfg(target_vendor = "apple")]
109fn sha256_bytes(data: &[u8]) -> String {
110    hash_digest::<sha2::Sha256>(data)
111}
112
113/// Streaming SHA-256 using OpenSSL's optimized assembly.
114/// Linux only — OpenSSL is not available on Windows/macOS in CI.
115#[cfg(target_os = "linux")]
116fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
117    STREAM_BUF.with(|cell| {
118        let mut buf = cell.borrow_mut();
119        ensure_stream_buf(&mut buf);
120        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
121            .map_err(|e| io::Error::other(e))?;
122        loop {
123            let n = read_full(&mut reader, &mut buf)?;
124            if n == 0 {
125                break;
126            }
127            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
128        }
129        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
130        Ok(hex_encode(&digest))
131    })
132}
133
134/// Streaming SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
135#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
136fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
137    STREAM_BUF.with(|cell| {
138        let mut buf = cell.borrow_mut();
139        ensure_stream_buf(&mut buf);
140        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
141        loop {
142            let n = read_full(&mut reader, &mut buf)?;
143            if n == 0 {
144                break;
145            }
146            ctx.update(&buf[..n]);
147        }
148        Ok(hex_encode(ctx.finish().as_ref()))
149    })
150}
151
152/// Streaming SHA-256 using sha2 crate (macOS fallback).
153#[cfg(target_vendor = "apple")]
154fn sha256_reader(reader: impl Read) -> io::Result<String> {
155    hash_reader_impl::<sha2::Sha256>(reader)
156}
157
158/// Compute hash of a byte slice directly (zero-copy fast path).
159pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
160    match algo {
161        HashAlgorithm::Sha256 => sha256_bytes(data),
162        HashAlgorithm::Md5 => md5_bytes(data),
163        HashAlgorithm::Blake2b => {
164            let hash = blake2b_simd::blake2b(data);
165            hex_encode(hash.as_bytes())
166        }
167    }
168}
169
170// ── MD5 ─────────────────────────────────────────────────────────────
171
172/// Single-shot MD5 using OpenSSL's optimized assembly (Linux).
173#[cfg(target_os = "linux")]
174fn md5_bytes(data: &[u8]) -> String {
175    // For tiny data (<8KB): use md5 crate directly, avoiding OpenSSL's
176    // EVP_MD_CTX_new/free overhead (~700ns per call). md5 with asm feature
177    // uses optimized assembly and has no heap allocation.
178    if data.len() < TINY_FILE_LIMIT as usize {
179        use digest::Digest;
180        return hex_encode(&md5::Md5::digest(data));
181    }
182    let digest =
183        openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
184    hex_encode(&digest)
185}
186
187/// Single-shot MD5 using md-5 crate (non-Linux fallback).
188#[cfg(not(target_os = "linux"))]
189fn md5_bytes(data: &[u8]) -> String {
190    hash_digest::<Md5>(data)
191}
192
193/// Compute hash of data from a reader, returning hex string.
194pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
195    match algo {
196        HashAlgorithm::Sha256 => sha256_reader(reader),
197        HashAlgorithm::Md5 => md5_reader(reader),
198        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
199    }
200}
201
202/// Streaming MD5 using OpenSSL's optimized assembly (Linux).
203#[cfg(target_os = "linux")]
204fn md5_reader(mut reader: impl Read) -> io::Result<String> {
205    STREAM_BUF.with(|cell| {
206        let mut buf = cell.borrow_mut();
207        ensure_stream_buf(&mut buf);
208        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
209            .map_err(|e| io::Error::other(e))?;
210        loop {
211            let n = read_full(&mut reader, &mut buf)?;
212            if n == 0 {
213                break;
214            }
215            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
216        }
217        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
218        Ok(hex_encode(&digest))
219    })
220}
221
222/// Streaming MD5 using md-5 crate (non-Linux fallback).
223#[cfg(not(target_os = "linux"))]
224fn md5_reader(reader: impl Read) -> io::Result<String> {
225    hash_reader_impl::<Md5>(reader)
226}
227
228/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
229/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
230#[cfg(target_os = "linux")]
231static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
232
233/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
234/// Caches whether O_NOATIME works to avoid double-open on every file.
235#[cfg(target_os = "linux")]
236fn open_noatime(path: &Path) -> io::Result<File> {
237    use std::os::unix::fs::OpenOptionsExt;
238    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
239        match std::fs::OpenOptions::new()
240            .read(true)
241            .custom_flags(libc::O_NOATIME)
242            .open(path)
243        {
244            Ok(f) => return Ok(f),
245            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
246                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
247                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
248            }
249            Err(e) => return Err(e), // Real error, propagate
250        }
251    }
252    File::open(path)
253}
254
255#[cfg(not(target_os = "linux"))]
256fn open_noatime(path: &Path) -> io::Result<File> {
257    File::open(path)
258}
259
260/// Open a file and get its metadata in one step.
261/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
262#[cfg(target_os = "linux")]
263#[inline]
264fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
265    let file = open_noatime(path)?;
266    let fd = {
267        use std::os::unix::io::AsRawFd;
268        file.as_raw_fd()
269    };
270    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
271    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
272        return Err(io::Error::last_os_error());
273    }
274    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
275    let size = stat.st_size as u64;
276    Ok((file, size, is_regular))
277}
278
279#[cfg(not(target_os = "linux"))]
280#[inline]
281fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
282    let file = open_noatime(path)?;
283    let metadata = file.metadata()?;
284    Ok((file, metadata.len(), metadata.file_type().is_file()))
285}
286
287/// Minimum file size to issue fadvise hint (1MB).
288/// For small files, the syscall overhead exceeds the readahead benefit.
289#[cfg(target_os = "linux")]
290const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
291
292/// Maximum file size for single-read hash optimization.
293/// Files up to this size are read entirely into a thread-local buffer and hashed
294/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
295/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
296/// across files in sequential mode, saving re-allocation.
297/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
298const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
299
300/// Threshold for tiny files that can be read into a stack buffer.
301/// Below this size, we use a stack-allocated buffer + single read() syscall,
302/// completely avoiding any heap allocation for the data path.
303const TINY_FILE_LIMIT: u64 = 8 * 1024;
304
305// Thread-local reusable buffer for single-read hash.
306// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
307// handles tiny files; larger files trigger one grow that persists for reuse.
308thread_local! {
309    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
310}
311
312/// I/O-pipelined hash for large files (>=16MB) on Linux.
313/// Uses a reader thread that reads 4MB chunks while the main thread hashes,
314/// overlapping NVMe/SSD read latency with SHA-NI computation.
315/// For a 100MB file: I/O ~15ms from cache, hash ~40ms → pipelined ~42ms vs ~55ms sequential.
316#[cfg(target_os = "linux")]
317fn hash_file_pipelined(algo: HashAlgorithm, mut file: File, file_size: u64) -> io::Result<String> {
318    use std::os::unix::io::AsRawFd;
319
320    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
321
322    // Hint kernel for sequential access
323    unsafe {
324        libc::posix_fadvise(
325            file.as_raw_fd(),
326            0,
327            file_size as i64,
328            libc::POSIX_FADV_SEQUENTIAL,
329        );
330    }
331
332    // Channel for sending filled buffers from reader to hasher.
333    // sync_channel(1) provides natural double-buffering: reader can fill one
334    // buffer ahead while hasher processes the current one.
335    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
336    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
337
338    // Seed the buffer return channel with an initial buffer
339    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
340
341    // Reader thread: reads file into buffers and sends them to hasher
342    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
343        let mut own_buf = vec![0u8; PIPE_BUF_SIZE];
344        loop {
345            // Try to get a returned buffer from hasher, or use our own
346            let mut buf = buf_rx
347                .try_recv()
348                .unwrap_or_else(|_| std::mem::take(&mut own_buf));
349            if buf.is_empty() {
350                buf = vec![0u8; PIPE_BUF_SIZE];
351            }
352
353            let mut total = 0;
354            while total < buf.len() {
355                match file.read(&mut buf[total..]) {
356                    Ok(0) => break,
357                    Ok(n) => total += n,
358                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
359                    Err(e) => return Err(e),
360                }
361            }
362            if total == 0 {
363                break;
364            }
365            if tx.send((buf, total)).is_err() {
366                break;
367            }
368        }
369        Ok(())
370    });
371
372    // Hasher runs on the calling thread
373    let hash_result = match algo {
374        HashAlgorithm::Sha256 => {
375            let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
376                .map_err(|e| io::Error::other(e))?;
377            while let Ok((buf, n)) = rx.recv() {
378                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
379                // Return the buffer to reader for reuse
380                let _ = buf_tx.send(buf);
381            }
382            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
383            Ok(hex_encode(&digest))
384        }
385        HashAlgorithm::Md5 => {
386            let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
387                .map_err(|e| io::Error::other(e))?;
388            while let Ok((buf, n)) = rx.recv() {
389                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
390                let _ = buf_tx.send(buf);
391            }
392            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
393            Ok(hex_encode(&digest))
394        }
395        HashAlgorithm::Blake2b => {
396            let mut state = blake2b_simd::Params::new().to_state();
397            while let Ok((buf, n)) = rx.recv() {
398                state.update(&buf[..n]);
399                let _ = buf_tx.send(buf);
400            }
401            Ok(hex_encode(state.finalize().as_bytes()))
402        }
403    };
404
405    // Wait for reader thread to finish and propagate any I/O errors
406    match reader_handle.join() {
407        Ok(Ok(())) => {}
408        Ok(Err(e)) => {
409            // If hasher already produced a result, prefer the reader's I/O error
410            if hash_result.is_ok() {
411                return Err(e);
412            }
413        }
414        Err(_) => {
415            return Err(io::Error::other("reader thread panicked"));
416        }
417    }
418
419    hash_result
420}
421
422/// Hash a file by path. Uses I/O pipelining for large files on Linux,
423/// mmap with HUGEPAGE hints as fallback, single-read for small files,
424/// and streaming read for non-regular files.
425pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
426    let (file, file_size, is_regular) = open_and_stat(path)?;
427
428    if is_regular && file_size == 0 {
429        return Ok(hash_bytes(algo, &[]));
430    }
431
432    if file_size > 0 && is_regular {
433        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
434        if file_size < TINY_FILE_LIMIT {
435            return hash_file_tiny(algo, file, file_size as usize);
436        }
437        // Large files (>=16MB): use I/O pipelining on Linux to overlap read + hash
438        if file_size >= SMALL_FILE_LIMIT {
439            #[cfg(target_os = "linux")]
440            {
441                return hash_file_pipelined(algo, file, file_size);
442            }
443            // Non-Linux: mmap fallback
444            #[cfg(not(target_os = "linux"))]
445            {
446                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
447                if let Ok(mmap) = mmap_result {
448                    return Ok(hash_bytes(algo, &mmap));
449                }
450            }
451        }
452        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash.
453        // This avoids Hasher context allocation + streaming overhead for each file.
454        if file_size < SMALL_FILE_LIMIT {
455            return hash_file_small(algo, file, file_size as usize);
456        }
457    }
458
459    // Non-regular files or fallback: stream
460    #[cfg(target_os = "linux")]
461    if file_size >= FADVISE_MIN_SIZE {
462        use std::os::unix::io::AsRawFd;
463        unsafe {
464            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
465        }
466    }
467    hash_reader(algo, file)
468}
469
470/// Hash a tiny file (<8KB) using a stack-allocated buffer.
471/// Single read() syscall, zero heap allocation on the data path.
472/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
473#[inline]
474fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
475    let mut buf = [0u8; 8192];
476    let mut total = 0;
477    // Read with known size — usually completes in a single read() for regular files
478    while total < size {
479        match file.read(&mut buf[total..size]) {
480            Ok(0) => break,
481            Ok(n) => total += n,
482            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
483            Err(e) => return Err(e),
484        }
485    }
486    Ok(hash_bytes(algo, &buf[..total]))
487}
488
489/// Hash a small file by reading it entirely into a thread-local buffer,
490/// then using the single-shot hash function. Avoids per-file Hasher allocation.
491#[inline]
492fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
493    SMALL_FILE_BUF.with(|cell| {
494        let mut buf = cell.borrow_mut();
495        // Reset length but keep allocation, then grow if needed
496        buf.clear();
497        buf.reserve(size);
498        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
499        // directly and only access buf[..total] where total <= size <= capacity.
500        unsafe {
501            buf.set_len(size);
502        }
503        let mut total = 0;
504        while total < size {
505            match file.read(&mut buf[total..size]) {
506                Ok(0) => break,
507                Ok(n) => total += n,
508                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
509                Err(e) => return Err(e),
510            }
511        }
512        Ok(hash_bytes(algo, &buf[..total]))
513    })
514}
515
516/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
517pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
518    let stdin = io::stdin();
519    // Hint kernel for sequential access if stdin is a regular file (redirect)
520    #[cfg(target_os = "linux")]
521    {
522        use std::os::unix::io::AsRawFd;
523        let fd = stdin.as_raw_fd();
524        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
525        if unsafe { libc::fstat(fd, &mut stat) } == 0
526            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
527            && stat.st_size > 0
528        {
529            unsafe {
530                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
531            }
532        }
533    }
534    // Streaming hash — works for both pipe and file-redirect stdin
535    hash_reader(algo, stdin.lock())
536}
537
538/// Check if parallel hashing is worthwhile for the given file paths.
539/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
540/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
541/// Removing the stat()-based size check eliminates N extra syscalls for N files.
542pub fn should_use_parallel(paths: &[&Path]) -> bool {
543    paths.len() >= 2
544}
545
546/// Issue readahead hints for a list of file paths to warm the page cache.
547/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
548/// Only issues hints for files >= 1MB; small files are read fast enough
549/// that the fadvise syscall overhead isn't worth it.
550#[cfg(target_os = "linux")]
551pub fn readahead_files(paths: &[&Path]) {
552    use std::os::unix::io::AsRawFd;
553    for path in paths {
554        if let Ok(file) = open_noatime(path) {
555            if let Ok(meta) = file.metadata() {
556                let len = meta.len();
557                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
558                    unsafe {
559                        libc::posix_fadvise(
560                            file.as_raw_fd(),
561                            0,
562                            len as i64,
563                            libc::POSIX_FADV_WILLNEED,
564                        );
565                    }
566                }
567            }
568        }
569    }
570}
571
572#[cfg(not(target_os = "linux"))]
573pub fn readahead_files(_paths: &[&Path]) {
574    // No-op on non-Linux
575}
576
577// --- BLAKE2b variable-length functions (using blake2b_simd) ---
578
579/// Hash raw data with BLAKE2b variable output length.
580/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
581pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
582    let hash = blake2b_simd::Params::new()
583        .hash_length(output_bytes)
584        .hash(data);
585    hex_encode(hash.as_bytes())
586}
587
588/// Hash a reader with BLAKE2b variable output length.
589/// Uses thread-local buffer for cache-friendly streaming.
590pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
591    STREAM_BUF.with(|cell| {
592        let mut buf = cell.borrow_mut();
593        ensure_stream_buf(&mut buf);
594        let mut state = blake2b_simd::Params::new()
595            .hash_length(output_bytes)
596            .to_state();
597        loop {
598            let n = read_full(&mut reader, &mut buf)?;
599            if n == 0 {
600                break;
601            }
602            state.update(&buf[..n]);
603        }
604        Ok(hex_encode(state.finalize().as_bytes()))
605    })
606}
607
608/// Hash a file with BLAKE2b variable output length.
609/// Uses mmap for large files (zero-copy), single-read for small files,
610/// and streaming read as fallback.
611pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
612    let (file, file_size, is_regular) = open_and_stat(path)?;
613
614    if is_regular && file_size == 0 {
615        return Ok(blake2b_hash_data(&[], output_bytes));
616    }
617
618    if file_size > 0 && is_regular {
619        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
620        if file_size < TINY_FILE_LIMIT {
621            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
622        }
623        // mmap for large files — zero-copy, eliminates multiple read() syscalls
624        if file_size >= SMALL_FILE_LIMIT {
625            #[cfg(target_os = "linux")]
626            if file_size >= FADVISE_MIN_SIZE {
627                use std::os::unix::io::AsRawFd;
628                unsafe {
629                    libc::posix_fadvise(
630                        file.as_raw_fd(),
631                        0,
632                        file_size as i64,
633                        libc::POSIX_FADV_SEQUENTIAL,
634                    );
635                }
636            }
637            // No MAP_POPULATE — HUGEPAGE first, then WILLNEED (same as hash_file)
638            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
639            if let Ok(mmap) = mmap_result {
640                #[cfg(target_os = "linux")]
641                {
642                    if file_size >= 2 * 1024 * 1024 {
643                        let _ = mmap.advise(memmap2::Advice::HugePage);
644                    }
645                    let _ = mmap.advise(memmap2::Advice::Sequential);
646                    let _ = mmap.advise(memmap2::Advice::WillNeed);
647                }
648                return Ok(blake2b_hash_data(&mmap, output_bytes));
649            }
650        }
651        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
652        if file_size < SMALL_FILE_LIMIT {
653            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
654        }
655    }
656
657    // Non-regular files or fallback: stream
658    #[cfg(target_os = "linux")]
659    if file_size >= FADVISE_MIN_SIZE {
660        use std::os::unix::io::AsRawFd;
661        unsafe {
662            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
663        }
664    }
665    blake2b_hash_reader(file, output_bytes)
666}
667
668/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
669#[inline]
670fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
671    let mut buf = [0u8; 8192];
672    let mut total = 0;
673    while total < size {
674        match file.read(&mut buf[total..size]) {
675            Ok(0) => break,
676            Ok(n) => total += n,
677            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
678            Err(e) => return Err(e),
679        }
680    }
681    Ok(blake2b_hash_data(&buf[..total], output_bytes))
682}
683
684/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
685#[inline]
686fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
687    SMALL_FILE_BUF.with(|cell| {
688        let mut buf = cell.borrow_mut();
689        buf.clear();
690        buf.reserve(size);
691        // SAFETY: capacity >= size after clear+reserve
692        unsafe {
693            buf.set_len(size);
694        }
695        let mut total = 0;
696        while total < size {
697            match file.read(&mut buf[total..size]) {
698                Ok(0) => break,
699                Ok(n) => total += n,
700                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
701                Err(e) => return Err(e),
702            }
703        }
704        Ok(blake2b_hash_data(&buf[..total], output_bytes))
705    })
706}
707
708/// Hash stdin with BLAKE2b variable output length.
709/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
710pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
711    let stdin = io::stdin();
712    #[cfg(target_os = "linux")]
713    {
714        use std::os::unix::io::AsRawFd;
715        let fd = stdin.as_raw_fd();
716        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
717        if unsafe { libc::fstat(fd, &mut stat) } == 0
718            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
719            && stat.st_size > 0
720        {
721            unsafe {
722                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
723            }
724        }
725    }
726    blake2b_hash_reader(stdin.lock(), output_bytes)
727}
728
729/// Internal enum for file content in batch hashing.
730/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
731enum FileContent {
732    Mmap(memmap2::Mmap),
733    Buf(Vec<u8>),
734}
735
736impl AsRef<[u8]> for FileContent {
737    fn as_ref(&self) -> &[u8] {
738        match self {
739            FileContent::Mmap(m) => m,
740            FileContent::Buf(v) => v,
741        }
742    }
743}
744
745/// Open a file and load its content for batch hashing.
746/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
747/// files (zero-copy), and read-to-end for non-regular files.
748fn open_file_content(path: &Path) -> io::Result<FileContent> {
749    let (file, size, is_regular) = open_and_stat(path)?;
750    if is_regular && size == 0 {
751        return Ok(FileContent::Buf(Vec::new()));
752    }
753    if is_regular && size > 0 {
754        // Tiny files: read directly into Vec. The mmap syscall + page fault
755        // overhead exceeds the data transfer cost for files under 8KB.
756        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
757        if size < TINY_FILE_LIMIT {
758            let mut buf = vec![0u8; size as usize];
759            let mut total = 0;
760            let mut f = file;
761            while total < size as usize {
762                match f.read(&mut buf[total..]) {
763                    Ok(0) => break,
764                    Ok(n) => total += n,
765                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
766                    Err(e) => return Err(e),
767                }
768            }
769            buf.truncate(total);
770            return Ok(FileContent::Buf(buf));
771        }
772        // No MAP_POPULATE — HUGEPAGE first, then WILLNEED (same as hash_file)
773        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
774        if let Ok(mmap) = mmap_result {
775            #[cfg(target_os = "linux")]
776            {
777                if size >= 2 * 1024 * 1024 {
778                    let _ = mmap.advise(memmap2::Advice::HugePage);
779                }
780                let _ = mmap.advise(memmap2::Advice::Sequential);
781                let _ = mmap.advise(memmap2::Advice::WillNeed);
782            }
783            return Ok(FileContent::Mmap(mmap));
784        }
785        // Fallback: read into Vec
786        let mut buf = vec![0u8; size as usize];
787        let mut total = 0;
788        let mut f = file;
789        while total < size as usize {
790            match f.read(&mut buf[total..]) {
791                Ok(0) => break,
792                Ok(n) => total += n,
793                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
794                Err(e) => return Err(e),
795            }
796        }
797        buf.truncate(total);
798        return Ok(FileContent::Buf(buf));
799    }
800    // Non-regular: read to end
801    let mut buf = Vec::new();
802    let mut f = file;
803    f.read_to_end(&mut buf)?;
804    Ok(FileContent::Buf(buf))
805}
806
807/// Open a file and read all content without fstat — just open+read+close.
808/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
809/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
810/// then falls back to larger buffer or mmap for bigger files.
811fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
812    let mut file = open_noatime(path)?;
813    // Try small buffer first — optimal for benchmark's ~55 byte files.
814    // Single read() + to_vec() with exact size for minimal allocation.
815    let mut small_buf = [0u8; 4096];
816    match file.read(&mut small_buf) {
817        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
818        Ok(n) if n < small_buf.len() => {
819            // File fits in small buffer — done (common case for tiny files)
820            return Ok(FileContent::Buf(small_buf[..n].to_vec()));
821        }
822        Ok(n) => {
823            // Might be more data — fall back to larger buffer
824            let mut buf = [0u8; 65536];
825            buf[..n].copy_from_slice(&small_buf[..n]);
826            let mut total = n;
827            loop {
828                match file.read(&mut buf[total..]) {
829                    Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
830                    Ok(n) => {
831                        total += n;
832                        if total >= buf.len() {
833                            return open_file_content(path);
834                        }
835                    }
836                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
837                    Err(e) => return Err(e),
838                }
839            }
840        }
841        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
842            let mut buf = [0u8; 65536];
843            let mut total = 0;
844            loop {
845                match file.read(&mut buf[total..]) {
846                    Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
847                    Ok(n) => {
848                        total += n;
849                        if total >= buf.len() {
850                            return open_file_content(path);
851                        }
852                    }
853                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
854                    Err(e) => return Err(e),
855                }
856            }
857        }
858        Err(e) => return Err(e),
859    }
860}
861
862/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
863///
864/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
865/// All files are pre-loaded into memory (mmap for large, read for small),
866/// then hashed simultaneously. Returns results in input order.
867///
868/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
869pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
870    use blake2b_simd::many::{HashManyJob, hash_many};
871
872    // Phase 1: Read all files into memory.
873    // For small file counts (≤10), load sequentially to avoid thread::scope
874    // overhead (~120µs). For many files, use parallel loading with lightweight
875    // OS threads. For 100+ files, use fast path that skips fstat.
876    let use_fast = paths.len() >= 20;
877
878    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
879        // Sequential loading — avoids thread spawn overhead for small batches
880        paths.iter().map(|&path| open_file_content(path)).collect()
881    } else {
882        let num_threads = std::thread::available_parallelism()
883            .map(|n| n.get())
884            .unwrap_or(4)
885            .min(paths.len());
886        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
887
888        std::thread::scope(|s| {
889            let handles: Vec<_> = paths
890                .chunks(chunk_size)
891                .map(|chunk| {
892                    s.spawn(move || {
893                        chunk
894                            .iter()
895                            .map(|&path| {
896                                if use_fast {
897                                    open_file_content_fast(path)
898                                } else {
899                                    open_file_content(path)
900                                }
901                            })
902                            .collect::<Vec<_>>()
903                    })
904                })
905                .collect();
906
907            handles
908                .into_iter()
909                .flat_map(|h| h.join().unwrap())
910                .collect()
911        })
912    };
913
914    // Phase 2: Build hash_many jobs for successful reads
915    let hash_results = {
916        let mut params = blake2b_simd::Params::new();
917        params.hash_length(output_bytes);
918
919        let ok_entries: Vec<(usize, &[u8])> = file_data
920            .iter()
921            .enumerate()
922            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
923            .collect();
924
925        let mut jobs: Vec<HashManyJob> = ok_entries
926            .iter()
927            .map(|(_, data)| HashManyJob::new(&params, data))
928            .collect();
929
930        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
931        hash_many(jobs.iter_mut());
932
933        // Extract hashes into a map
934        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
935        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
936            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
937        }
938        hm
939    }; // file_data borrow released here
940
941    // Phase 4: Combine hashes and errors in original order
942    hash_results
943        .into_iter()
944        .zip(file_data)
945        .map(|(hash_opt, result)| match result {
946            Ok(_) => Ok(hash_opt.unwrap()),
947            Err(e) => Err(e),
948        })
949        .collect()
950}
951
952/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
953/// Files are sorted by size (largest first) so the biggest files start processing
954/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
955/// eliminating tail latency from uneven file sizes.
956/// Returns results in input order.
957pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
958    let n = paths.len();
959
960    // Build (original_index, path, size) tuples — stat all files for scheduling.
961    // The stat cost (~5µs/file) is repaid by better work distribution.
962    let mut indexed: Vec<(usize, &Path, u64)> = paths
963        .iter()
964        .enumerate()
965        .map(|(i, &p)| {
966            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
967            (i, p, size)
968        })
969        .collect();
970
971    // Sort largest first: ensures big files start hashing immediately while
972    // small files fill in gaps, minimizing tail latency.
973    indexed.sort_by(|a, b| b.2.cmp(&a.2));
974
975    // Issue readahead for the largest files to warm the page cache.
976    #[cfg(target_os = "linux")]
977    {
978        use std::os::unix::io::AsRawFd;
979        for &(_, path, size) in indexed.iter().take(20) {
980            if size >= 1024 * 1024 {
981                if let Ok(file) = open_noatime(path) {
982                    unsafe {
983                        libc::posix_fadvise(
984                            file.as_raw_fd(),
985                            0,
986                            size as i64,
987                            libc::POSIX_FADV_WILLNEED,
988                        );
989                    }
990                }
991            }
992        }
993    }
994
995    let num_threads = std::thread::available_parallelism()
996        .map(|n| n.get())
997        .unwrap_or(4)
998        .min(n);
999
1000    // Atomic work index for dynamic work-stealing.
1001    let work_idx = AtomicUsize::new(0);
1002
1003    std::thread::scope(|s| {
1004        let work_idx = &work_idx;
1005        let indexed = &indexed;
1006
1007        let handles: Vec<_> = (0..num_threads)
1008            .map(|_| {
1009                s.spawn(move || {
1010                    let mut local_results = Vec::new();
1011                    loop {
1012                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1013                        if idx >= indexed.len() {
1014                            break;
1015                        }
1016                        let (orig_idx, path, _size) = indexed[idx];
1017                        let result = hash_file(algo, path);
1018                        local_results.push((orig_idx, result));
1019                    }
1020                    local_results
1021                })
1022            })
1023            .collect();
1024
1025        // Collect results and reorder to match original input order.
1026        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1027        for handle in handles {
1028            for (orig_idx, result) in handle.join().unwrap() {
1029                results[orig_idx] = Some(result);
1030            }
1031        }
1032        results
1033            .into_iter()
1034            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1035            .collect()
1036    })
1037}
1038
1039/// Hash a file without fstat — just open, read until EOF, hash.
1040/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
1041/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
1042/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
1043/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
1044pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1045    let mut file = open_noatime(path)?;
1046    // First try a small stack buffer — optimal for tiny files (< 4KB).
1047    // Most "many_files" benchmark files are ~55 bytes, so this completes
1048    // with a single read() syscall and no fallback.
1049    let mut small_buf = [0u8; 4096];
1050    match file.read(&mut small_buf) {
1051        Ok(0) => return Ok(hash_bytes(algo, &[])),
1052        Ok(n) if n < small_buf.len() => {
1053            // File fits in small buffer — hash directly (common case)
1054            return Ok(hash_bytes(algo, &small_buf[..n]));
1055        }
1056        Ok(n) => {
1057            // Might be more data — fall back to larger buffer
1058            let mut buf = [0u8; 65536];
1059            buf[..n].copy_from_slice(&small_buf[..n]);
1060            let mut total = n;
1061            loop {
1062                match file.read(&mut buf[total..]) {
1063                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1064                    Ok(n) => {
1065                        total += n;
1066                        if total >= buf.len() {
1067                            return hash_file(algo, path);
1068                        }
1069                    }
1070                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1071                    Err(e) => return Err(e),
1072                }
1073            }
1074        }
1075        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1076            // Retry with full buffer on interrupt
1077            let mut buf = [0u8; 65536];
1078            let mut total = 0;
1079            loop {
1080                match file.read(&mut buf[total..]) {
1081                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1082                    Ok(n) => {
1083                        total += n;
1084                        if total >= buf.len() {
1085                            return hash_file(algo, path);
1086                        }
1087                    }
1088                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1089                    Err(e) => return Err(e),
1090                }
1091            }
1092        }
1093        Err(e) => return Err(e),
1094    }
1095}
1096
1097/// Issue readahead hints for ALL file paths (no size threshold).
1098/// For multi-file benchmarks, even small files benefit from batched readahead.
1099#[cfg(target_os = "linux")]
1100pub fn readahead_files_all(paths: &[&Path]) {
1101    use std::os::unix::io::AsRawFd;
1102    for path in paths {
1103        if let Ok(file) = open_noatime(path) {
1104            if let Ok(meta) = file.metadata() {
1105                if meta.file_type().is_file() {
1106                    let len = meta.len();
1107                    unsafe {
1108                        libc::posix_fadvise(
1109                            file.as_raw_fd(),
1110                            0,
1111                            len as i64,
1112                            libc::POSIX_FADV_WILLNEED,
1113                        );
1114                    }
1115                }
1116            }
1117        }
1118    }
1119}
1120
1121#[cfg(not(target_os = "linux"))]
1122pub fn readahead_files_all(_paths: &[&Path]) {}
1123
1124/// Print hash result in GNU format: "hash  filename\n"
1125/// Uses raw byte writes to avoid std::fmt overhead.
1126pub fn print_hash(
1127    out: &mut impl Write,
1128    hash: &str,
1129    filename: &str,
1130    binary: bool,
1131) -> io::Result<()> {
1132    let mode = if binary { b'*' } else { b' ' };
1133    out.write_all(hash.as_bytes())?;
1134    out.write_all(&[b' ', mode])?;
1135    out.write_all(filename.as_bytes())?;
1136    out.write_all(b"\n")
1137}
1138
1139/// Print hash in GNU format with NUL terminator instead of newline.
1140pub fn print_hash_zero(
1141    out: &mut impl Write,
1142    hash: &str,
1143    filename: &str,
1144    binary: bool,
1145) -> io::Result<()> {
1146    let mode = if binary { b'*' } else { b' ' };
1147    out.write_all(hash.as_bytes())?;
1148    out.write_all(&[b' ', mode])?;
1149    out.write_all(filename.as_bytes())?;
1150    out.write_all(b"\0")
1151}
1152
1153// ── Single-write output buffer ─────────────────────────────────────
1154// For multi-file workloads, batch the entire "hash  filename\n" line into
1155// a single write() call. This halves the number of BufWriter flushes.
1156
1157// Thread-local output line buffer for batched writes.
1158// Reused across files to avoid per-file allocation.
1159thread_local! {
1160    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1161}
1162
1163/// Build and write the standard GNU hash output line in a single write() call.
1164/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
1165/// For escaped filenames: "\hash  escaped_filename\n".
1166#[inline]
1167pub fn write_hash_line(
1168    out: &mut impl Write,
1169    hash: &str,
1170    filename: &str,
1171    binary: bool,
1172    zero: bool,
1173    escaped: bool,
1174) -> io::Result<()> {
1175    LINE_BUF.with(|cell| {
1176        let mut buf = cell.borrow_mut();
1177        buf.clear();
1178        let mode = if binary { b'*' } else { b' ' };
1179        let term = if zero { b'\0' } else { b'\n' };
1180        if escaped {
1181            buf.push(b'\\');
1182        }
1183        buf.extend_from_slice(hash.as_bytes());
1184        buf.push(b' ');
1185        buf.push(mode);
1186        buf.extend_from_slice(filename.as_bytes());
1187        buf.push(term);
1188        out.write_all(&buf)
1189    })
1190}
1191
1192/// Build and write BSD tag format output in a single write() call.
1193/// Format: "ALGO (filename) = hash\n"
1194#[inline]
1195pub fn write_hash_tag_line(
1196    out: &mut impl Write,
1197    algo_name: &str,
1198    hash: &str,
1199    filename: &str,
1200    zero: bool,
1201) -> io::Result<()> {
1202    LINE_BUF.with(|cell| {
1203        let mut buf = cell.borrow_mut();
1204        buf.clear();
1205        let term = if zero { b'\0' } else { b'\n' };
1206        buf.extend_from_slice(algo_name.as_bytes());
1207        buf.extend_from_slice(b" (");
1208        buf.extend_from_slice(filename.as_bytes());
1209        buf.extend_from_slice(b") = ");
1210        buf.extend_from_slice(hash.as_bytes());
1211        buf.push(term);
1212        out.write_all(&buf)
1213    })
1214}
1215
1216/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
1217pub fn print_hash_tag(
1218    out: &mut impl Write,
1219    algo: HashAlgorithm,
1220    hash: &str,
1221    filename: &str,
1222) -> io::Result<()> {
1223    out.write_all(algo.name().as_bytes())?;
1224    out.write_all(b" (")?;
1225    out.write_all(filename.as_bytes())?;
1226    out.write_all(b") = ")?;
1227    out.write_all(hash.as_bytes())?;
1228    out.write_all(b"\n")
1229}
1230
1231/// Print hash in BSD tag format with NUL terminator.
1232pub fn print_hash_tag_zero(
1233    out: &mut impl Write,
1234    algo: HashAlgorithm,
1235    hash: &str,
1236    filename: &str,
1237) -> io::Result<()> {
1238    out.write_all(algo.name().as_bytes())?;
1239    out.write_all(b" (")?;
1240    out.write_all(filename.as_bytes())?;
1241    out.write_all(b") = ")?;
1242    out.write_all(hash.as_bytes())?;
1243    out.write_all(b"\0")
1244}
1245
1246/// Print hash in BSD tag format with BLAKE2b length info:
1247/// "BLAKE2b (filename) = hash" for 512-bit, or
1248/// "BLAKE2b-256 (filename) = hash" for other lengths.
1249pub fn print_hash_tag_b2sum(
1250    out: &mut impl Write,
1251    hash: &str,
1252    filename: &str,
1253    bits: usize,
1254) -> io::Result<()> {
1255    if bits == 512 {
1256        out.write_all(b"BLAKE2b (")?;
1257    } else {
1258        // Use write! for the rare non-512 path (negligible overhead per file)
1259        write!(out, "BLAKE2b-{} (", bits)?;
1260    }
1261    out.write_all(filename.as_bytes())?;
1262    out.write_all(b") = ")?;
1263    out.write_all(hash.as_bytes())?;
1264    out.write_all(b"\n")
1265}
1266
1267/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
1268pub fn print_hash_tag_b2sum_zero(
1269    out: &mut impl Write,
1270    hash: &str,
1271    filename: &str,
1272    bits: usize,
1273) -> io::Result<()> {
1274    if bits == 512 {
1275        out.write_all(b"BLAKE2b (")?;
1276    } else {
1277        write!(out, "BLAKE2b-{} (", bits)?;
1278    }
1279    out.write_all(filename.as_bytes())?;
1280    out.write_all(b") = ")?;
1281    out.write_all(hash.as_bytes())?;
1282    out.write_all(b"\0")
1283}
1284
1285/// Options for check mode.
1286pub struct CheckOptions {
1287    pub quiet: bool,
1288    pub status_only: bool,
1289    pub strict: bool,
1290    pub warn: bool,
1291    pub ignore_missing: bool,
1292    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
1293    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
1294    /// When empty, uses generic format: "line {line}: message".
1295    pub warn_prefix: String,
1296}
1297
1298/// Result of check mode verification.
1299pub struct CheckResult {
1300    pub ok: usize,
1301    pub mismatches: usize,
1302    pub format_errors: usize,
1303    pub read_errors: usize,
1304    /// Number of files skipped because they were missing and --ignore-missing was set.
1305    pub ignored_missing: usize,
1306}
1307
1308/// Verify checksums from a check file.
1309/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
1310pub fn check_file<R: BufRead>(
1311    algo: HashAlgorithm,
1312    reader: R,
1313    opts: &CheckOptions,
1314    out: &mut impl Write,
1315    err_out: &mut impl Write,
1316) -> io::Result<CheckResult> {
1317    let quiet = opts.quiet;
1318    let status_only = opts.status_only;
1319    let warn = opts.warn;
1320    let ignore_missing = opts.ignore_missing;
1321    let mut ok_count = 0;
1322    let mut mismatch_count = 0;
1323    let mut format_errors = 0;
1324    let mut read_errors = 0;
1325    let mut ignored_missing_count = 0;
1326    let mut line_num = 0;
1327
1328    for line_result in reader.lines() {
1329        line_num += 1;
1330        let line = line_result?;
1331        let line = line.trim_end();
1332
1333        if line.is_empty() {
1334            continue;
1335        }
1336
1337        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
1338        let (expected_hash, filename) = match parse_check_line(line) {
1339            Some(v) => v,
1340            None => {
1341                format_errors += 1;
1342                if warn {
1343                    out.flush()?;
1344                    if opts.warn_prefix.is_empty() {
1345                        writeln!(
1346                            err_out,
1347                            "line {}: improperly formatted {} checksum line",
1348                            line_num,
1349                            algo.name()
1350                        )?;
1351                    } else {
1352                        writeln!(
1353                            err_out,
1354                            "{}: {}: improperly formatted {} checksum line",
1355                            opts.warn_prefix,
1356                            line_num,
1357                            algo.name()
1358                        )?;
1359                    }
1360                }
1361                continue;
1362            }
1363        };
1364
1365        // Compute actual hash
1366        let actual = match hash_file(algo, Path::new(filename)) {
1367            Ok(h) => h,
1368            Err(e) => {
1369                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
1370                    ignored_missing_count += 1;
1371                    continue;
1372                }
1373                read_errors += 1;
1374                if !status_only {
1375                    out.flush()?;
1376                    writeln!(err_out, "{}: {}", filename, e)?;
1377                    writeln!(out, "{}: FAILED open or read", filename)?;
1378                }
1379                continue;
1380            }
1381        };
1382
1383        if actual.eq_ignore_ascii_case(expected_hash) {
1384            ok_count += 1;
1385            if !quiet && !status_only {
1386                writeln!(out, "{}: OK", filename)?;
1387            }
1388        } else {
1389            mismatch_count += 1;
1390            if !status_only {
1391                writeln!(out, "{}: FAILED", filename)?;
1392            }
1393        }
1394    }
1395
1396    Ok(CheckResult {
1397        ok: ok_count,
1398        mismatches: mismatch_count,
1399        format_errors,
1400        read_errors,
1401        ignored_missing: ignored_missing_count,
1402    })
1403}
1404
1405/// Parse a checksum line in any supported format.
1406pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
1407    // Try BSD tag format: "ALGO (filename) = hash"
1408    let rest = line
1409        .strip_prefix("MD5 (")
1410        .or_else(|| line.strip_prefix("SHA256 ("))
1411        .or_else(|| line.strip_prefix("BLAKE2b ("))
1412        .or_else(|| {
1413            // Handle BLAKE2b-NNN (filename) = hash
1414            if line.starts_with("BLAKE2b-") {
1415                let after = &line["BLAKE2b-".len()..];
1416                if let Some(sp) = after.find(" (") {
1417                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
1418                        return Some(&after[sp + 2..]);
1419                    }
1420                }
1421            }
1422            None
1423        });
1424    if let Some(rest) = rest {
1425        if let Some(paren_idx) = rest.find(") = ") {
1426            let filename = &rest[..paren_idx];
1427            let hash = &rest[paren_idx + 4..];
1428            return Some((hash, filename));
1429        }
1430    }
1431
1432    // Handle backslash-escaped lines (leading '\')
1433    let line = line.strip_prefix('\\').unwrap_or(line);
1434
1435    // Standard format: "hash  filename"
1436    if let Some(idx) = line.find("  ") {
1437        let hash = &line[..idx];
1438        let rest = &line[idx + 2..];
1439        return Some((hash, rest));
1440    }
1441    // Binary mode: "hash *filename"
1442    if let Some(idx) = line.find(" *") {
1443        let hash = &line[..idx];
1444        let rest = &line[idx + 2..];
1445        return Some((hash, rest));
1446    }
1447    None
1448}
1449
1450/// Parse a BSD-style tag line: "ALGO (filename) = hash"
1451/// Returns (expected_hash, filename, optional_bits).
1452/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
1453pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
1454    let paren_start = line.find(" (")?;
1455    let algo_part = &line[..paren_start];
1456    let rest = &line[paren_start + 2..];
1457    let paren_end = rest.find(") = ")?;
1458    let filename = &rest[..paren_end];
1459    let hash = &rest[paren_end + 4..];
1460
1461    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
1462    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
1463        algo_part[dash_pos + 1..].parse::<usize>().ok()
1464    } else {
1465        None
1466    };
1467
1468    Some((hash, filename, bits))
1469}
1470
1471/// Read as many bytes as possible into buf, retrying on partial reads.
1472/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
1473/// Fast path: regular file reads usually return the full buffer on the first call.
1474#[inline]
1475fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
1476    // Fast path: first read() usually fills the entire buffer for regular files
1477    let n = reader.read(buf)?;
1478    if n == buf.len() || n == 0 {
1479        return Ok(n);
1480    }
1481    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
1482    let mut total = n;
1483    while total < buf.len() {
1484        match reader.read(&mut buf[total..]) {
1485            Ok(0) => break,
1486            Ok(n) => total += n,
1487            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
1488            Err(e) => return Err(e),
1489        }
1490    }
1491    Ok(total)
1492}
1493
1494/// Compile-time generated 2-byte hex pair lookup table.
1495/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
1496const fn generate_hex_table() -> [[u8; 2]; 256] {
1497    let hex = b"0123456789abcdef";
1498    let mut table = [[0u8; 2]; 256];
1499    let mut i = 0;
1500    while i < 256 {
1501        table[i] = [hex[i >> 4], hex[i & 0xf]];
1502        i += 1;
1503    }
1504    table
1505}
1506
1507const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
1508
1509/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
1510/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
1511pub(crate) fn hex_encode(bytes: &[u8]) -> String {
1512    let len = bytes.len() * 2;
1513    let mut hex = String::with_capacity(len);
1514    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
1515    unsafe {
1516        let buf = hex.as_mut_vec();
1517        buf.set_len(len);
1518        hex_encode_to_slice(bytes, buf);
1519    }
1520    hex
1521}
1522
1523/// Encode bytes as hex directly into a pre-allocated output slice.
1524/// Output slice must be at least `bytes.len() * 2` bytes long.
1525#[inline]
1526fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
1527    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
1528    unsafe {
1529        let ptr = out.as_mut_ptr();
1530        for (i, &b) in bytes.iter().enumerate() {
1531            let pair = *HEX_TABLE.get_unchecked(b as usize);
1532            *ptr.add(i * 2) = pair[0];
1533            *ptr.add(i * 2 + 1) = pair[1];
1534        }
1535    }
1536}