Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14#[cfg(not(target_os = "linux"))]
15use sha1::Sha1;
16
17/// Supported hash algorithms.
18#[derive(Debug, Clone, Copy)]
19pub enum HashAlgorithm {
20    Sha1,
21    Sha224,
22    Sha256,
23    Sha384,
24    Sha512,
25    Md5,
26    Blake2b,
27}
28
29impl HashAlgorithm {
30    pub fn name(self) -> &'static str {
31        match self {
32            HashAlgorithm::Sha1 => "SHA1",
33            HashAlgorithm::Sha224 => "SHA224",
34            HashAlgorithm::Sha256 => "SHA256",
35            HashAlgorithm::Sha384 => "SHA384",
36            HashAlgorithm::Sha512 => "SHA512",
37            HashAlgorithm::Md5 => "MD5",
38            HashAlgorithm::Blake2b => "BLAKE2b",
39        }
40    }
41}
42
43// ── Generic hash helpers ────────────────────────────────────────────
44
45/// Single-shot hash using the Digest trait (non-Linux fallback).
46#[cfg(not(target_os = "linux"))]
47fn hash_digest<D: Digest>(data: &[u8]) -> String {
48    hex_encode(&D::digest(data))
49}
50
51/// Streaming hash using thread-local buffer (non-Linux fallback).
52#[cfg(not(target_os = "linux"))]
53fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
54    STREAM_BUF.with(|cell| {
55        let mut buf = cell.borrow_mut();
56        ensure_stream_buf(&mut buf);
57        let mut hasher = D::new();
58        loop {
59            let n = read_full(&mut reader, &mut buf)?;
60            if n == 0 {
61                break;
62            }
63            hasher.update(&buf[..n]);
64        }
65        Ok(hex_encode(&hasher.finalize()))
66    })
67}
68
69// ── Public hashing API ──────────────────────────────────────────────
70
71/// Buffer size for streaming hash I/O.
72/// 8MB: amortizes syscall overhead while still fitting in L3 cache on modern CPUs.
73/// Larger buffer means fewer read() calls per file (e.g., 13 reads for 100MB vs 25).
74const HASH_READ_BUF: usize = 8 * 1024 * 1024;
75
76// Thread-local reusable buffer for streaming hash I/O.
77// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
78// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
79thread_local! {
80    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
81}
82
83/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
84/// Called only on the streaming path, so small-file workloads never allocate 8MB.
85#[inline]
86fn ensure_stream_buf(buf: &mut Vec<u8>) {
87    if buf.len() < HASH_READ_BUF {
88        buf.resize(HASH_READ_BUF, 0);
89    }
90}
91
92// ── SHA-256 ───────────────────────────────────────────────────────────
93
94/// Single-shot SHA-256 using OpenSSL's optimized assembly (SHA-NI on x86).
95/// Linux only — OpenSSL is not available on Windows/macOS in CI.
96#[cfg(target_os = "linux")]
97fn sha256_bytes(data: &[u8]) -> String {
98    // For tiny data (<8KB): use sha2 crate directly, avoiding OpenSSL's
99    // EVP_MD_CTX_new/free overhead (~700ns per call). sha2 with asm feature
100    // uses SHA-NI instructions and has no heap allocation, just stack state.
101    // For 100 × 55-byte files: saves ~70µs total.
102    if data.len() < TINY_FILE_LIMIT as usize {
103        use digest::Digest;
104        return hex_encode(&sha2::Sha256::digest(data));
105    }
106    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
107        .expect("SHA256 hash failed");
108    hex_encode(&digest)
109}
110
111/// Single-shot SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
112#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
113fn sha256_bytes(data: &[u8]) -> String {
114    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
115}
116
117/// Single-shot SHA-256 using sha2 crate (macOS fallback — ring doesn't compile on Apple Silicon).
118#[cfg(target_vendor = "apple")]
119fn sha256_bytes(data: &[u8]) -> String {
120    hash_digest::<sha2::Sha256>(data)
121}
122
123/// Streaming SHA-256 using OpenSSL's optimized assembly.
124/// Linux only — OpenSSL is not available on Windows/macOS in CI.
125#[cfg(target_os = "linux")]
126fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
127    STREAM_BUF.with(|cell| {
128        let mut buf = cell.borrow_mut();
129        ensure_stream_buf(&mut buf);
130        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
131            .map_err(|e| io::Error::other(e))?;
132        loop {
133            let n = read_full(&mut reader, &mut buf)?;
134            if n == 0 {
135                break;
136            }
137            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
138        }
139        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
140        Ok(hex_encode(&digest))
141    })
142}
143
144/// Streaming SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
145#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
146fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
147    STREAM_BUF.with(|cell| {
148        let mut buf = cell.borrow_mut();
149        ensure_stream_buf(&mut buf);
150        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
151        loop {
152            let n = read_full(&mut reader, &mut buf)?;
153            if n == 0 {
154                break;
155            }
156            ctx.update(&buf[..n]);
157        }
158        Ok(hex_encode(ctx.finish().as_ref()))
159    })
160}
161
162/// Streaming SHA-256 using sha2 crate (macOS fallback).
163#[cfg(target_vendor = "apple")]
164fn sha256_reader(reader: impl Read) -> io::Result<String> {
165    hash_reader_impl::<sha2::Sha256>(reader)
166}
167
168// ── SHA-1 ─────────────────────────────────────────────────────────────
169
170/// Single-shot SHA-1 using OpenSSL's optimized assembly (Linux).
171#[cfg(target_os = "linux")]
172fn sha1_bytes(data: &[u8]) -> String {
173    if data.len() < TINY_FILE_LIMIT as usize {
174        use digest::Digest;
175        return hex_encode(&sha1::Sha1::digest(data));
176    }
177    let digest =
178        openssl::hash::hash(openssl::hash::MessageDigest::sha1(), data).expect("SHA1 hash failed");
179    hex_encode(&digest)
180}
181
182/// Single-shot SHA-1 using sha1 crate (non-Linux fallback).
183#[cfg(not(target_os = "linux"))]
184fn sha1_bytes(data: &[u8]) -> String {
185    hash_digest::<Sha1>(data)
186}
187
188/// Streaming SHA-1 using OpenSSL's optimized assembly (Linux).
189#[cfg(target_os = "linux")]
190fn sha1_reader(mut reader: impl Read) -> io::Result<String> {
191    STREAM_BUF.with(|cell| {
192        let mut buf = cell.borrow_mut();
193        ensure_stream_buf(&mut buf);
194        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha1())
195            .map_err(|e| io::Error::other(e))?;
196        loop {
197            let n = read_full(&mut reader, &mut buf)?;
198            if n == 0 {
199                break;
200            }
201            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
202        }
203        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
204        Ok(hex_encode(&digest))
205    })
206}
207
208/// Streaming SHA-1 using sha1 crate (non-Linux fallback).
209#[cfg(not(target_os = "linux"))]
210fn sha1_reader(reader: impl Read) -> io::Result<String> {
211    hash_reader_impl::<Sha1>(reader)
212}
213
214// ── SHA-224 ───────────────────────────────────────────────────────────
215
216/// Single-shot SHA-224 using OpenSSL's optimized assembly (Linux).
217#[cfg(target_os = "linux")]
218fn sha224_bytes(data: &[u8]) -> String {
219    if data.len() < TINY_FILE_LIMIT as usize {
220        use digest::Digest;
221        return hex_encode(&sha2::Sha224::digest(data));
222    }
223    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha224(), data)
224        .expect("SHA224 hash failed");
225    hex_encode(&digest)
226}
227
228/// Single-shot SHA-224 using sha2 crate (non-Linux fallback).
229#[cfg(not(target_os = "linux"))]
230fn sha224_bytes(data: &[u8]) -> String {
231    use digest::Digest;
232    hex_encode(&sha2::Sha224::digest(data))
233}
234
235/// Streaming SHA-224 using OpenSSL's optimized assembly (Linux).
236#[cfg(target_os = "linux")]
237fn sha224_reader(mut reader: impl Read) -> io::Result<String> {
238    STREAM_BUF.with(|cell| {
239        let mut buf = cell.borrow_mut();
240        ensure_stream_buf(&mut buf);
241        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha224())
242            .map_err(|e| io::Error::other(e))?;
243        loop {
244            let n = read_full(&mut reader, &mut buf)?;
245            if n == 0 {
246                break;
247            }
248            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
249        }
250        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
251        Ok(hex_encode(&digest))
252    })
253}
254
255/// Streaming SHA-224 using sha2 crate (non-Linux fallback).
256#[cfg(not(target_os = "linux"))]
257fn sha224_reader(reader: impl Read) -> io::Result<String> {
258    STREAM_BUF.with(|cell| {
259        let mut buf = cell.borrow_mut();
260        ensure_stream_buf(&mut buf);
261        let mut hasher = <sha2::Sha224 as digest::Digest>::new();
262        let mut reader = reader;
263        loop {
264            let n = read_full(&mut reader, &mut buf)?;
265            if n == 0 {
266                break;
267            }
268            digest::Digest::update(&mut hasher, &buf[..n]);
269        }
270        Ok(hex_encode(&digest::Digest::finalize(hasher)))
271    })
272}
273
274// ── SHA-384 ───────────────────────────────────────────────────────────
275
276/// Single-shot SHA-384 using OpenSSL's optimized assembly (Linux).
277#[cfg(target_os = "linux")]
278fn sha384_bytes(data: &[u8]) -> String {
279    if data.len() < TINY_FILE_LIMIT as usize {
280        use digest::Digest;
281        return hex_encode(&sha2::Sha384::digest(data));
282    }
283    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha384(), data)
284        .expect("SHA384 hash failed");
285    hex_encode(&digest)
286}
287
288/// Single-shot SHA-384 using sha2 crate (non-Linux fallback).
289#[cfg(not(target_os = "linux"))]
290fn sha384_bytes(data: &[u8]) -> String {
291    use digest::Digest;
292    hex_encode(&sha2::Sha384::digest(data))
293}
294
295/// Streaming SHA-384 using OpenSSL's optimized assembly (Linux).
296#[cfg(target_os = "linux")]
297fn sha384_reader(mut reader: impl Read) -> io::Result<String> {
298    STREAM_BUF.with(|cell| {
299        let mut buf = cell.borrow_mut();
300        ensure_stream_buf(&mut buf);
301        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha384())
302            .map_err(|e| io::Error::other(e))?;
303        loop {
304            let n = read_full(&mut reader, &mut buf)?;
305            if n == 0 {
306                break;
307            }
308            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
309        }
310        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
311        Ok(hex_encode(&digest))
312    })
313}
314
315/// Streaming SHA-384 using sha2 crate (non-Linux fallback).
316#[cfg(not(target_os = "linux"))]
317fn sha384_reader(reader: impl Read) -> io::Result<String> {
318    STREAM_BUF.with(|cell| {
319        let mut buf = cell.borrow_mut();
320        ensure_stream_buf(&mut buf);
321        let mut hasher = <sha2::Sha384 as digest::Digest>::new();
322        let mut reader = reader;
323        loop {
324            let n = read_full(&mut reader, &mut buf)?;
325            if n == 0 {
326                break;
327            }
328            digest::Digest::update(&mut hasher, &buf[..n]);
329        }
330        Ok(hex_encode(&digest::Digest::finalize(hasher)))
331    })
332}
333
334// ── SHA-512 ───────────────────────────────────────────────────────────
335
336/// Single-shot SHA-512 using OpenSSL's optimized assembly (Linux).
337#[cfg(target_os = "linux")]
338fn sha512_bytes(data: &[u8]) -> String {
339    if data.len() < TINY_FILE_LIMIT as usize {
340        use digest::Digest;
341        return hex_encode(&sha2::Sha512::digest(data));
342    }
343    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha512(), data)
344        .expect("SHA512 hash failed");
345    hex_encode(&digest)
346}
347
348/// Single-shot SHA-512 using sha2 crate (non-Linux fallback).
349#[cfg(not(target_os = "linux"))]
350fn sha512_bytes(data: &[u8]) -> String {
351    use digest::Digest;
352    hex_encode(&sha2::Sha512::digest(data))
353}
354
355/// Streaming SHA-512 using OpenSSL's optimized assembly (Linux).
356#[cfg(target_os = "linux")]
357fn sha512_reader(mut reader: impl Read) -> io::Result<String> {
358    STREAM_BUF.with(|cell| {
359        let mut buf = cell.borrow_mut();
360        ensure_stream_buf(&mut buf);
361        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha512())
362            .map_err(|e| io::Error::other(e))?;
363        loop {
364            let n = read_full(&mut reader, &mut buf)?;
365            if n == 0 {
366                break;
367            }
368            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
369        }
370        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
371        Ok(hex_encode(&digest))
372    })
373}
374
375/// Streaming SHA-512 using sha2 crate (non-Linux fallback).
376#[cfg(not(target_os = "linux"))]
377fn sha512_reader(reader: impl Read) -> io::Result<String> {
378    STREAM_BUF.with(|cell| {
379        let mut buf = cell.borrow_mut();
380        ensure_stream_buf(&mut buf);
381        let mut hasher = <sha2::Sha512 as digest::Digest>::new();
382        let mut reader = reader;
383        loop {
384            let n = read_full(&mut reader, &mut buf)?;
385            if n == 0 {
386                break;
387            }
388            digest::Digest::update(&mut hasher, &buf[..n]);
389        }
390        Ok(hex_encode(&digest::Digest::finalize(hasher)))
391    })
392}
393
394/// Compute hash of a byte slice directly (zero-copy fast path).
395pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
396    match algo {
397        HashAlgorithm::Sha1 => sha1_bytes(data),
398        HashAlgorithm::Sha224 => sha224_bytes(data),
399        HashAlgorithm::Sha256 => sha256_bytes(data),
400        HashAlgorithm::Sha384 => sha384_bytes(data),
401        HashAlgorithm::Sha512 => sha512_bytes(data),
402        HashAlgorithm::Md5 => md5_bytes(data),
403        HashAlgorithm::Blake2b => {
404            let hash = blake2b_simd::blake2b(data);
405            hex_encode(hash.as_bytes())
406        }
407    }
408}
409
410/// Hash data and write hex result directly into an output buffer.
411/// Returns the number of hex bytes written. Avoids String allocation
412/// on the critical single-file fast path.
413/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
414#[cfg(target_os = "linux")]
415pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> usize {
416    match algo {
417        HashAlgorithm::Md5 => {
418            use digest::Digest;
419            let digest = md5::Md5::digest(data);
420            hex_encode_to_slice(&digest, out);
421            32
422        }
423        HashAlgorithm::Sha1 => {
424            use digest::Digest;
425            let digest = sha1::Sha1::digest(data);
426            hex_encode_to_slice(&digest, out);
427            40
428        }
429        HashAlgorithm::Sha224 => {
430            use digest::Digest;
431            let digest = sha2::Sha224::digest(data);
432            hex_encode_to_slice(&digest, out);
433            56
434        }
435        HashAlgorithm::Sha256 => {
436            use digest::Digest;
437            let digest = sha2::Sha256::digest(data);
438            hex_encode_to_slice(&digest, out);
439            64
440        }
441        HashAlgorithm::Sha384 => {
442            use digest::Digest;
443            let digest = sha2::Sha384::digest(data);
444            hex_encode_to_slice(&digest, out);
445            96
446        }
447        HashAlgorithm::Sha512 => {
448            use digest::Digest;
449            let digest = sha2::Sha512::digest(data);
450            hex_encode_to_slice(&digest, out);
451            128
452        }
453        HashAlgorithm::Blake2b => {
454            let hash = blake2b_simd::blake2b(data);
455            let bytes = hash.as_bytes();
456            hex_encode_to_slice(bytes, out);
457            bytes.len() * 2
458        }
459    }
460}
461
462/// Hash a single file using raw syscalls and write hex directly to output buffer.
463/// Returns number of hex bytes written.
464/// This is the absolute minimum-overhead path for single-file hashing:
465/// raw open + fstat + read + hash + hex encode, with zero String allocation.
466#[cfg(target_os = "linux")]
467pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
468    use std::os::unix::ffi::OsStrExt;
469
470    let path_bytes = path.as_os_str().as_bytes();
471    let c_path = std::ffi::CString::new(path_bytes)
472        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
473
474    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
475    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
476        flags |= libc::O_NOATIME;
477    }
478
479    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
480    if fd < 0 {
481        let err = io::Error::last_os_error();
482        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
483            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
484            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
485            if fd2 < 0 {
486                return Err(io::Error::last_os_error());
487            }
488            return hash_from_raw_fd_to_buf(algo, fd2, out);
489        }
490        return Err(err);
491    }
492    hash_from_raw_fd_to_buf(algo, fd, out)
493}
494
495/// Hash from raw fd and write hex directly to output buffer.
496/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
497/// For larger files, falls back to hash_file_raw() which allocates a String.
498#[cfg(target_os = "linux")]
499fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
500    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
501    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
502        let err = io::Error::last_os_error();
503        unsafe {
504            libc::close(fd);
505        }
506        return Err(err);
507    }
508    let size = stat.st_size as u64;
509    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
510
511    // Empty regular file
512    if is_regular && size == 0 {
513        unsafe {
514            libc::close(fd);
515        }
516        return Ok(hash_bytes_to_buf(algo, &[], out));
517    }
518
519    // Tiny files (<8KB): fully raw path — zero heap allocation
520    if is_regular && size < TINY_FILE_LIMIT {
521        let mut buf = [0u8; 8192];
522        let mut total = 0usize;
523        while total < size as usize {
524            let n = unsafe {
525                libc::read(
526                    fd,
527                    buf[total..].as_mut_ptr() as *mut libc::c_void,
528                    (size as usize) - total,
529                )
530            };
531            if n < 0 {
532                let err = io::Error::last_os_error();
533                if err.kind() == io::ErrorKind::Interrupted {
534                    continue;
535                }
536                unsafe {
537                    libc::close(fd);
538                }
539                return Err(err);
540            }
541            if n == 0 {
542                break;
543            }
544            total += n as usize;
545        }
546        unsafe {
547            libc::close(fd);
548        }
549        return Ok(hash_bytes_to_buf(algo, &buf[..total], out));
550    }
551
552    // Larger files: fall back to hash_from_raw_fd which returns a String,
553    // then copy the hex into out.
554    use std::os::unix::io::FromRawFd;
555    let file = unsafe { File::from_raw_fd(fd) };
556    let hash_str = if is_regular && size > 0 {
557        if size >= SMALL_FILE_LIMIT {
558            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
559            if let Ok(mmap) = mmap_result {
560                if size >= 2 * 1024 * 1024 {
561                    let _ = mmap.advise(memmap2::Advice::HugePage);
562                }
563                let _ = mmap.advise(memmap2::Advice::Sequential);
564                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
565                    let _ = mmap.advise(memmap2::Advice::WillNeed);
566                }
567                hash_bytes(algo, &mmap)
568            } else {
569                hash_file_small(algo, file, size as usize)?
570            }
571        } else {
572            hash_file_small(algo, file, size as usize)?
573        }
574    } else {
575        hash_reader(algo, file)?
576    };
577    let hex_bytes = hash_str.as_bytes();
578    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
579    Ok(hex_bytes.len())
580}
581
582// ── MD5 ─────────────────────────────────────────────────────────────
583
584/// Single-shot MD5 using OpenSSL's optimized assembly (Linux).
585#[cfg(target_os = "linux")]
586fn md5_bytes(data: &[u8]) -> String {
587    // For tiny data (<8KB): use md5 crate directly, avoiding OpenSSL's
588    // EVP_MD_CTX_new/free overhead (~700ns per call). md5 with asm feature
589    // uses optimized assembly and has no heap allocation.
590    if data.len() < TINY_FILE_LIMIT as usize {
591        use digest::Digest;
592        return hex_encode(&md5::Md5::digest(data));
593    }
594    let digest =
595        openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
596    hex_encode(&digest)
597}
598
599/// Single-shot MD5 using md-5 crate (non-Linux fallback).
600#[cfg(not(target_os = "linux"))]
601fn md5_bytes(data: &[u8]) -> String {
602    hash_digest::<Md5>(data)
603}
604
605/// Compute hash of data from a reader, returning hex string.
606pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
607    match algo {
608        HashAlgorithm::Sha1 => sha1_reader(reader),
609        HashAlgorithm::Sha224 => sha224_reader(reader),
610        HashAlgorithm::Sha256 => sha256_reader(reader),
611        HashAlgorithm::Sha384 => sha384_reader(reader),
612        HashAlgorithm::Sha512 => sha512_reader(reader),
613        HashAlgorithm::Md5 => md5_reader(reader),
614        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
615    }
616}
617
618/// Streaming MD5 using OpenSSL's optimized assembly (Linux).
619#[cfg(target_os = "linux")]
620fn md5_reader(mut reader: impl Read) -> io::Result<String> {
621    STREAM_BUF.with(|cell| {
622        let mut buf = cell.borrow_mut();
623        ensure_stream_buf(&mut buf);
624        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
625            .map_err(|e| io::Error::other(e))?;
626        loop {
627            let n = read_full(&mut reader, &mut buf)?;
628            if n == 0 {
629                break;
630            }
631            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
632        }
633        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
634        Ok(hex_encode(&digest))
635    })
636}
637
638/// Streaming MD5 using md-5 crate (non-Linux fallback).
639#[cfg(not(target_os = "linux"))]
640fn md5_reader(reader: impl Read) -> io::Result<String> {
641    hash_reader_impl::<Md5>(reader)
642}
643
644/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
645/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
646#[cfg(target_os = "linux")]
647static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
648
649/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
650/// Caches whether O_NOATIME works to avoid double-open on every file.
651#[cfg(target_os = "linux")]
652fn open_noatime(path: &Path) -> io::Result<File> {
653    use std::os::unix::fs::OpenOptionsExt;
654    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
655        match std::fs::OpenOptions::new()
656            .read(true)
657            .custom_flags(libc::O_NOATIME)
658            .open(path)
659        {
660            Ok(f) => return Ok(f),
661            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
662                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
663                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
664            }
665            Err(e) => return Err(e), // Real error, propagate
666        }
667    }
668    File::open(path)
669}
670
671#[cfg(not(target_os = "linux"))]
672fn open_noatime(path: &Path) -> io::Result<File> {
673    File::open(path)
674}
675
676/// Open a file and get its metadata in one step.
677/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
678#[cfg(target_os = "linux")]
679#[inline]
680fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
681    let file = open_noatime(path)?;
682    let fd = {
683        use std::os::unix::io::AsRawFd;
684        file.as_raw_fd()
685    };
686    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
687    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
688        return Err(io::Error::last_os_error());
689    }
690    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
691    let size = stat.st_size as u64;
692    Ok((file, size, is_regular))
693}
694
695#[cfg(not(target_os = "linux"))]
696#[inline]
697fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
698    let file = open_noatime(path)?;
699    let metadata = file.metadata()?;
700    Ok((file, metadata.len(), metadata.file_type().is_file()))
701}
702
703/// Minimum file size to issue fadvise hint (1MB).
704/// For small files, the syscall overhead exceeds the readahead benefit.
705#[cfg(target_os = "linux")]
706const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
707
708/// Maximum file size for single-read hash optimization.
709/// Files up to this size are read entirely into a thread-local buffer and hashed
710/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
711/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
712/// across files in sequential mode, saving re-allocation.
713/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
714const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
715
716/// Threshold for tiny files that can be read into a stack buffer.
717/// Below this size, we use a stack-allocated buffer + single read() syscall,
718/// completely avoiding any heap allocation for the data path.
719const TINY_FILE_LIMIT: u64 = 8 * 1024;
720
721// Thread-local reusable buffer for single-read hash.
722// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
723// handles tiny files; larger files trigger one grow that persists for reuse.
724thread_local! {
725    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
726}
727
728/// Optimized hash for large files (>=16MB) on Linux.
729/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
730/// Falls back to streaming I/O with double-buffered reader thread if mmap fails.
731#[cfg(target_os = "linux")]
732fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
733    // Primary path: mmap with huge pages for zero-copy single-shot hash.
734    match unsafe { memmap2::MmapOptions::new().map(&file) } {
735        Ok(mmap) => {
736            if file_size >= 2 * 1024 * 1024 {
737                let _ = mmap.advise(memmap2::Advice::HugePage);
738            }
739            let _ = mmap.advise(memmap2::Advice::Sequential);
740            if file_size >= 4 * 1024 * 1024 {
741                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
742                    let _ = mmap.advise(memmap2::Advice::WillNeed);
743                }
744            } else {
745                let _ = mmap.advise(memmap2::Advice::WillNeed);
746            }
747            Ok(hash_bytes(algo, &mmap))
748        }
749        Err(_) => hash_file_pipelined_read(algo, file, file_size),
750    }
751}
752
753/// Streaming fallback for large files when mmap is unavailable.
754/// Uses double-buffered reader thread with fadvise hints.
755/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
756#[cfg(target_os = "linux")]
757fn hash_file_pipelined_read(
758    algo: HashAlgorithm,
759    mut file: File,
760    file_size: u64,
761) -> io::Result<String> {
762    use std::os::unix::io::AsRawFd;
763
764    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
765
766    unsafe {
767        libc::posix_fadvise(
768            file.as_raw_fd(),
769            0,
770            file_size as i64,
771            libc::POSIX_FADV_SEQUENTIAL,
772        );
773    }
774
775    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
776    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
777    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
778
779    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
780        while let Ok(mut buf) = buf_rx.recv() {
781            let mut total = 0;
782            while total < buf.len() {
783                match file.read(&mut buf[total..]) {
784                    Ok(0) => break,
785                    Ok(n) => total += n,
786                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
787                    Err(e) => return Err(e),
788                }
789            }
790            if total == 0 {
791                break;
792            }
793            if tx.send((buf, total)).is_err() {
794                break;
795            }
796        }
797        Ok(())
798    });
799
800    let hash_result = match algo {
801        HashAlgorithm::Sha1 => {
802            let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha1())
803                .map_err(|e| io::Error::other(e))?;
804            while let Ok((buf, n)) = rx.recv() {
805                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
806                let _ = buf_tx.send(buf);
807            }
808            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
809            Ok(hex_encode(&digest))
810        }
811        HashAlgorithm::Sha224 | HashAlgorithm::Sha384 | HashAlgorithm::Sha512 => {
812            let md = match algo {
813                HashAlgorithm::Sha224 => openssl::hash::MessageDigest::sha224(),
814                HashAlgorithm::Sha384 => openssl::hash::MessageDigest::sha384(),
815                _ => openssl::hash::MessageDigest::sha512(),
816            };
817            let mut hasher = openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e))?;
818            while let Ok((buf, n)) = rx.recv() {
819                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
820                let _ = buf_tx.send(buf);
821            }
822            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
823            Ok(hex_encode(&digest))
824        }
825        HashAlgorithm::Sha256 => {
826            let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
827                .map_err(|e| io::Error::other(e))?;
828            while let Ok((buf, n)) = rx.recv() {
829                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
830                let _ = buf_tx.send(buf);
831            }
832            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
833            Ok(hex_encode(&digest))
834        }
835        HashAlgorithm::Md5 => {
836            let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
837                .map_err(|e| io::Error::other(e))?;
838            while let Ok((buf, n)) = rx.recv() {
839                hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
840                let _ = buf_tx.send(buf);
841            }
842            let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
843            Ok(hex_encode(&digest))
844        }
845        HashAlgorithm::Blake2b => {
846            let mut state = blake2b_simd::Params::new().to_state();
847            while let Ok((buf, n)) = rx.recv() {
848                state.update(&buf[..n]);
849                let _ = buf_tx.send(buf);
850            }
851            Ok(hex_encode(state.finalize().as_bytes()))
852        }
853    };
854
855    match reader_handle.join() {
856        Ok(Ok(())) => {}
857        Ok(Err(e)) => {
858            if hash_result.is_ok() {
859                return Err(e);
860            }
861        }
862        Err(payload) => {
863            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
864                format!("reader thread panicked: {}", s)
865            } else if let Some(s) = payload.downcast_ref::<String>() {
866                format!("reader thread panicked: {}", s)
867            } else {
868                "reader thread panicked".to_string()
869            };
870            return Err(io::Error::other(msg));
871        }
872    }
873
874    hash_result
875}
876
877/// Hash a file by path. Uses I/O pipelining for large files on Linux,
878/// mmap with HUGEPAGE hints as fallback, single-read for small files,
879/// and streaming read for non-regular files.
880pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
881    let (file, file_size, is_regular) = open_and_stat(path)?;
882
883    if is_regular && file_size == 0 {
884        return Ok(hash_bytes(algo, &[]));
885    }
886
887    if file_size > 0 && is_regular {
888        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
889        if file_size < TINY_FILE_LIMIT {
890            return hash_file_tiny(algo, file, file_size as usize);
891        }
892        // Large files (>=16MB): use I/O pipelining on Linux to overlap read + hash
893        if file_size >= SMALL_FILE_LIMIT {
894            #[cfg(target_os = "linux")]
895            {
896                return hash_file_pipelined(algo, file, file_size);
897            }
898            // Non-Linux: mmap fallback
899            #[cfg(not(target_os = "linux"))]
900            {
901                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
902                if let Ok(mmap) = mmap_result {
903                    return Ok(hash_bytes(algo, &mmap));
904                }
905            }
906        }
907        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash.
908        // This avoids Hasher context allocation + streaming overhead for each file.
909        if file_size < SMALL_FILE_LIMIT {
910            return hash_file_small(algo, file, file_size as usize);
911        }
912    }
913
914    // Non-regular files or fallback: stream
915    #[cfg(target_os = "linux")]
916    if file_size >= FADVISE_MIN_SIZE {
917        use std::os::unix::io::AsRawFd;
918        unsafe {
919            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
920        }
921    }
922    hash_reader(algo, file)
923}
924
925/// Hash a tiny file (<8KB) using a stack-allocated buffer.
926/// Single read() syscall, zero heap allocation on the data path.
927/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
928#[inline]
929fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
930    let mut buf = [0u8; 8192];
931    let mut total = 0;
932    // Read with known size — usually completes in a single read() for regular files
933    while total < size {
934        match file.read(&mut buf[total..size]) {
935            Ok(0) => break,
936            Ok(n) => total += n,
937            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
938            Err(e) => return Err(e),
939        }
940    }
941    Ok(hash_bytes(algo, &buf[..total]))
942}
943
944/// Hash a small file by reading it entirely into a thread-local buffer,
945/// then using the single-shot hash function. Avoids per-file Hasher allocation.
946#[inline]
947fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
948    SMALL_FILE_BUF.with(|cell| {
949        let mut buf = cell.borrow_mut();
950        // Reset length but keep allocation, then grow if needed
951        buf.clear();
952        buf.reserve(size);
953        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
954        // directly and only access buf[..total] where total <= size <= capacity.
955        unsafe {
956            buf.set_len(size);
957        }
958        let mut total = 0;
959        while total < size {
960            match file.read(&mut buf[total..size]) {
961                Ok(0) => break,
962                Ok(n) => total += n,
963                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
964                Err(e) => return Err(e),
965            }
966        }
967        Ok(hash_bytes(algo, &buf[..total]))
968    })
969}
970
971/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
972pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
973    let stdin = io::stdin();
974    // Hint kernel for sequential access if stdin is a regular file (redirect)
975    #[cfg(target_os = "linux")]
976    {
977        use std::os::unix::io::AsRawFd;
978        let fd = stdin.as_raw_fd();
979        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
980        if unsafe { libc::fstat(fd, &mut stat) } == 0
981            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
982            && stat.st_size > 0
983        {
984            unsafe {
985                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
986            }
987        }
988    }
989    // Streaming hash — works for both pipe and file-redirect stdin
990    hash_reader(algo, stdin.lock())
991}
992
993/// Check if parallel hashing is worthwhile for the given file paths.
994/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
995/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
996/// Removing the stat()-based size check eliminates N extra syscalls for N files.
997pub fn should_use_parallel(paths: &[&Path]) -> bool {
998    paths.len() >= 2
999}
1000
1001/// Issue readahead hints for a list of file paths to warm the page cache.
1002/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1003/// Only issues hints for files >= 1MB; small files are read fast enough
1004/// that the fadvise syscall overhead isn't worth it.
1005#[cfg(target_os = "linux")]
1006pub fn readahead_files(paths: &[&Path]) {
1007    use std::os::unix::io::AsRawFd;
1008    for path in paths {
1009        if let Ok(file) = open_noatime(path) {
1010            if let Ok(meta) = file.metadata() {
1011                let len = meta.len();
1012                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1013                    unsafe {
1014                        libc::posix_fadvise(
1015                            file.as_raw_fd(),
1016                            0,
1017                            len as i64,
1018                            libc::POSIX_FADV_WILLNEED,
1019                        );
1020                    }
1021                }
1022            }
1023        }
1024    }
1025}
1026
1027#[cfg(not(target_os = "linux"))]
1028pub fn readahead_files(_paths: &[&Path]) {
1029    // No-op on non-Linux
1030}
1031
1032// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1033
1034/// Hash raw data with BLAKE2b variable output length.
1035/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1036pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1037    let hash = blake2b_simd::Params::new()
1038        .hash_length(output_bytes)
1039        .hash(data);
1040    hex_encode(hash.as_bytes())
1041}
1042
1043/// Hash a reader with BLAKE2b variable output length.
1044/// Uses thread-local buffer for cache-friendly streaming.
1045pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1046    STREAM_BUF.with(|cell| {
1047        let mut buf = cell.borrow_mut();
1048        ensure_stream_buf(&mut buf);
1049        let mut state = blake2b_simd::Params::new()
1050            .hash_length(output_bytes)
1051            .to_state();
1052        loop {
1053            let n = read_full(&mut reader, &mut buf)?;
1054            if n == 0 {
1055                break;
1056            }
1057            state.update(&buf[..n]);
1058        }
1059        Ok(hex_encode(state.finalize().as_bytes()))
1060    })
1061}
1062
1063/// Hash a file with BLAKE2b variable output length.
1064/// Uses mmap for large files (zero-copy), single-read for small files,
1065/// and streaming read as fallback.
1066pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1067    let (file, file_size, is_regular) = open_and_stat(path)?;
1068
1069    if is_regular && file_size == 0 {
1070        return Ok(blake2b_hash_data(&[], output_bytes));
1071    }
1072
1073    if file_size > 0 && is_regular {
1074        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1075        if file_size < TINY_FILE_LIMIT {
1076            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1077        }
1078        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1079        if file_size >= SMALL_FILE_LIMIT {
1080            #[cfg(target_os = "linux")]
1081            {
1082                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1083            }
1084            #[cfg(not(target_os = "linux"))]
1085            {
1086                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1087                if let Ok(mmap) = mmap_result {
1088                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1089                }
1090            }
1091        }
1092        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
1093        if file_size < SMALL_FILE_LIMIT {
1094            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1095        }
1096    }
1097
1098    // Non-regular files or fallback: stream
1099    #[cfg(target_os = "linux")]
1100    if file_size >= FADVISE_MIN_SIZE {
1101        use std::os::unix::io::AsRawFd;
1102        unsafe {
1103            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
1104        }
1105    }
1106    blake2b_hash_reader(file, output_bytes)
1107}
1108
1109/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1110#[inline]
1111fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1112    let mut buf = [0u8; 8192];
1113    let mut total = 0;
1114    while total < size {
1115        match file.read(&mut buf[total..size]) {
1116            Ok(0) => break,
1117            Ok(n) => total += n,
1118            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1119            Err(e) => return Err(e),
1120        }
1121    }
1122    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1123}
1124
1125/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1126#[inline]
1127fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1128    SMALL_FILE_BUF.with(|cell| {
1129        let mut buf = cell.borrow_mut();
1130        buf.clear();
1131        buf.reserve(size);
1132        // SAFETY: capacity >= size after clear+reserve
1133        unsafe {
1134            buf.set_len(size);
1135        }
1136        let mut total = 0;
1137        while total < size {
1138            match file.read(&mut buf[total..size]) {
1139                Ok(0) => break,
1140                Ok(n) => total += n,
1141                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1142                Err(e) => return Err(e),
1143            }
1144        }
1145        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1146    })
1147}
1148
1149/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1150/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1151/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1152/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1153#[cfg(target_os = "linux")]
1154fn blake2b_hash_file_pipelined(
1155    file: File,
1156    file_size: u64,
1157    output_bytes: usize,
1158) -> io::Result<String> {
1159    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1160    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1161    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1162    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1163        Ok(mmap) => {
1164            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1165            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1166            if file_size >= 2 * 1024 * 1024 {
1167                let _ = mmap.advise(memmap2::Advice::HugePage);
1168            }
1169            let _ = mmap.advise(memmap2::Advice::Sequential);
1170            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1171            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1172            if file_size >= 4 * 1024 * 1024 {
1173                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1174                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1175                }
1176            } else {
1177                let _ = mmap.advise(memmap2::Advice::WillNeed);
1178            }
1179            // Single-shot hash: processes entire file in one call, streaming
1180            // directly from page cache with no user-space buffer copies.
1181            Ok(blake2b_hash_data(&mmap, output_bytes))
1182        }
1183        Err(_) => {
1184            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1185            // to streaming pipelined I/O.
1186            blake2b_hash_file_streamed(file, file_size, output_bytes)
1187        }
1188    }
1189}
1190
1191/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1192/// Uses double-buffered reader thread with fadvise hints.
1193/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1194#[cfg(target_os = "linux")]
1195fn blake2b_hash_file_streamed(
1196    mut file: File,
1197    file_size: u64,
1198    output_bytes: usize,
1199) -> io::Result<String> {
1200    use std::os::unix::io::AsRawFd;
1201
1202    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1203
1204    // Hint kernel for sequential access
1205    unsafe {
1206        libc::posix_fadvise(
1207            file.as_raw_fd(),
1208            0,
1209            file_size as i64,
1210            libc::POSIX_FADV_SEQUENTIAL,
1211        );
1212    }
1213
1214    // Double-buffered channels: reader fills one buffer while hasher processes another.
1215    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1216    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1217    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1218
1219    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1220        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1221        while let Ok(mut buf) = buf_rx.recv() {
1222            let mut total = 0;
1223            while total < buf.len() {
1224                match file.read(&mut buf[total..]) {
1225                    Ok(0) => break,
1226                    Ok(n) => total += n,
1227                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1228                    Err(e) => return Err(e),
1229                }
1230            }
1231            if total == 0 {
1232                break;
1233            }
1234            if tx.send((buf, total)).is_err() {
1235                break;
1236            }
1237        }
1238        Ok(())
1239    });
1240
1241    let mut state = blake2b_simd::Params::new()
1242        .hash_length(output_bytes)
1243        .to_state();
1244    while let Ok((buf, n)) = rx.recv() {
1245        state.update(&buf[..n]);
1246        let _ = buf_tx.send(buf);
1247    }
1248    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1249
1250    match reader_handle.join() {
1251        Ok(Ok(())) => {}
1252        Ok(Err(e)) => {
1253            if hash_result.is_ok() {
1254                return Err(e);
1255            }
1256        }
1257        Err(payload) => {
1258            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1259                format!("reader thread panicked: {}", s)
1260            } else if let Some(s) = payload.downcast_ref::<String>() {
1261                format!("reader thread panicked: {}", s)
1262            } else {
1263                "reader thread panicked".to_string()
1264            };
1265            return Err(io::Error::other(msg));
1266        }
1267    }
1268
1269    hash_result
1270}
1271
1272/// Hash stdin with BLAKE2b variable output length.
1273/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1274pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1275    let stdin = io::stdin();
1276    #[cfg(target_os = "linux")]
1277    {
1278        use std::os::unix::io::AsRawFd;
1279        let fd = stdin.as_raw_fd();
1280        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1281        if unsafe { libc::fstat(fd, &mut stat) } == 0
1282            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1283            && stat.st_size > 0
1284        {
1285            unsafe {
1286                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1287            }
1288        }
1289    }
1290    blake2b_hash_reader(stdin.lock(), output_bytes)
1291}
1292
1293/// Internal enum for file content in batch hashing.
1294/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1295enum FileContent {
1296    Mmap(memmap2::Mmap),
1297    Buf(Vec<u8>),
1298}
1299
1300impl AsRef<[u8]> for FileContent {
1301    fn as_ref(&self) -> &[u8] {
1302        match self {
1303            FileContent::Mmap(m) => m,
1304            FileContent::Buf(v) => v,
1305        }
1306    }
1307}
1308
1309/// Open a file and load its content for batch hashing.
1310/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1311/// files (zero-copy), and read-to-end for non-regular files.
1312fn open_file_content(path: &Path) -> io::Result<FileContent> {
1313    let (file, size, is_regular) = open_and_stat(path)?;
1314    if is_regular && size == 0 {
1315        return Ok(FileContent::Buf(Vec::new()));
1316    }
1317    if is_regular && size > 0 {
1318        // Tiny files: read directly into Vec. The mmap syscall + page fault
1319        // overhead exceeds the data transfer cost for files under 8KB.
1320        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1321        if size < TINY_FILE_LIMIT {
1322            let mut buf = vec![0u8; size as usize];
1323            let mut total = 0;
1324            let mut f = file;
1325            while total < size as usize {
1326                match f.read(&mut buf[total..]) {
1327                    Ok(0) => break,
1328                    Ok(n) => total += n,
1329                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1330                    Err(e) => return Err(e),
1331                }
1332            }
1333            buf.truncate(total);
1334            return Ok(FileContent::Buf(buf));
1335        }
1336        // HUGEPAGE + PopulateRead for optimal page faulting
1337        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1338        if let Ok(mmap) = mmap_result {
1339            #[cfg(target_os = "linux")]
1340            {
1341                if size >= 2 * 1024 * 1024 {
1342                    let _ = mmap.advise(memmap2::Advice::HugePage);
1343                }
1344                let _ = mmap.advise(memmap2::Advice::Sequential);
1345                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1346                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1347                }
1348            }
1349            return Ok(FileContent::Mmap(mmap));
1350        }
1351        // Fallback: read into Vec
1352        let mut buf = vec![0u8; size as usize];
1353        let mut total = 0;
1354        let mut f = file;
1355        while total < size as usize {
1356            match f.read(&mut buf[total..]) {
1357                Ok(0) => break,
1358                Ok(n) => total += n,
1359                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1360                Err(e) => return Err(e),
1361            }
1362        }
1363        buf.truncate(total);
1364        return Ok(FileContent::Buf(buf));
1365    }
1366    // Non-regular: read to end
1367    let mut buf = Vec::new();
1368    let mut f = file;
1369    f.read_to_end(&mut buf)?;
1370    Ok(FileContent::Buf(buf))
1371}
1372
1373/// Read remaining file content from an already-open fd into a Vec.
1374/// Used when the initial stack buffer is exhausted and we need to read
1375/// the rest without re-opening the file.
1376fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1377    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1378    buf.extend_from_slice(prefix);
1379    file.read_to_end(&mut buf)?;
1380    Ok(FileContent::Buf(buf))
1381}
1382
1383/// Open a file and read all content without fstat — just open+read+close.
1384/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1385/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1386/// then falls back to larger buffer or read_to_end for bigger files.
1387fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1388    let mut file = open_noatime(path)?;
1389    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1390    // For tiny files, allocate exact-size Vec to avoid waste.
1391    let mut small_buf = [0u8; 4096];
1392    match file.read(&mut small_buf) {
1393        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1394        Ok(n) if n < small_buf.len() => {
1395            // File fits in small buffer — allocate exact size
1396            let mut vec = Vec::with_capacity(n);
1397            vec.extend_from_slice(&small_buf[..n]);
1398            return Ok(FileContent::Buf(vec));
1399        }
1400        Ok(n) => {
1401            // Might be more data — allocate heap buffer and read into it directly
1402            let mut buf = vec![0u8; 65536];
1403            buf[..n].copy_from_slice(&small_buf[..n]);
1404            let mut total = n;
1405            loop {
1406                match file.read(&mut buf[total..]) {
1407                    Ok(0) => {
1408                        buf.truncate(total);
1409                        return Ok(FileContent::Buf(buf));
1410                    }
1411                    Ok(n) => {
1412                        total += n;
1413                        if total >= buf.len() {
1414                            // File > 64KB: read rest from existing fd
1415                            return read_remaining_to_vec(&buf[..total], file);
1416                        }
1417                    }
1418                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1419                    Err(e) => return Err(e),
1420                }
1421            }
1422        }
1423        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1424            let mut buf = vec![0u8; 65536];
1425            let mut total = 0;
1426            loop {
1427                match file.read(&mut buf[total..]) {
1428                    Ok(0) => {
1429                        buf.truncate(total);
1430                        return Ok(FileContent::Buf(buf));
1431                    }
1432                    Ok(n) => {
1433                        total += n;
1434                        if total >= buf.len() {
1435                            // File > 64KB: read rest from existing fd
1436                            return read_remaining_to_vec(&buf[..total], file);
1437                        }
1438                    }
1439                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1440                    Err(e) => return Err(e),
1441                }
1442            }
1443        }
1444        Err(e) => return Err(e),
1445    }
1446}
1447
1448/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1449///
1450/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1451/// All files are pre-loaded into memory (mmap for large, read for small),
1452/// then hashed simultaneously. Returns results in input order.
1453///
1454/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1455pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1456    use blake2b_simd::many::{HashManyJob, hash_many};
1457
1458    // Phase 1: Read all files into memory.
1459    // For small file counts (≤10), load sequentially to avoid thread::scope
1460    // overhead (~120µs). For many files, use parallel loading with lightweight
1461    // OS threads. For 100+ files, use fast path that skips fstat.
1462    let use_fast = paths.len() >= 20;
1463
1464    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1465        // Sequential loading — avoids thread spawn overhead for small batches
1466        paths.iter().map(|&path| open_file_content(path)).collect()
1467    } else {
1468        let num_threads = std::thread::available_parallelism()
1469            .map(|n| n.get())
1470            .unwrap_or(4)
1471            .min(paths.len());
1472        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1473
1474        std::thread::scope(|s| {
1475            let handles: Vec<_> = paths
1476                .chunks(chunk_size)
1477                .map(|chunk| {
1478                    s.spawn(move || {
1479                        chunk
1480                            .iter()
1481                            .map(|&path| {
1482                                if use_fast {
1483                                    open_file_content_fast(path)
1484                                } else {
1485                                    open_file_content(path)
1486                                }
1487                            })
1488                            .collect::<Vec<_>>()
1489                    })
1490                })
1491                .collect();
1492
1493            handles
1494                .into_iter()
1495                .flat_map(|h| h.join().unwrap())
1496                .collect()
1497        })
1498    };
1499
1500    // Phase 2: Build hash_many jobs for successful reads
1501    let hash_results = {
1502        let mut params = blake2b_simd::Params::new();
1503        params.hash_length(output_bytes);
1504
1505        let ok_entries: Vec<(usize, &[u8])> = file_data
1506            .iter()
1507            .enumerate()
1508            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1509            .collect();
1510
1511        let mut jobs: Vec<HashManyJob> = ok_entries
1512            .iter()
1513            .map(|(_, data)| HashManyJob::new(&params, data))
1514            .collect();
1515
1516        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1517        hash_many(jobs.iter_mut());
1518
1519        // Extract hashes into a map
1520        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1521        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1522            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1523        }
1524        hm
1525    }; // file_data borrow released here
1526
1527    // Phase 4: Combine hashes and errors in original order
1528    hash_results
1529        .into_iter()
1530        .zip(file_data)
1531        .map(|(hash_opt, result)| match result {
1532            Ok(_) => Ok(hash_opt.unwrap()),
1533            Err(e) => Err(e),
1534        })
1535        .collect()
1536}
1537
1538/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1539/// Samples a few files to estimate total data size. For small workloads, uses
1540/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1541/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1542/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1543/// for large files on Linux).
1544/// Returns results in input order.
1545pub fn blake2b_hash_files_parallel(
1546    paths: &[&Path],
1547    output_bytes: usize,
1548) -> Vec<io::Result<String>> {
1549    let n = paths.len();
1550
1551    // Sample a few files to estimate whether parallel processing is worthwhile.
1552    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1553    // is too small for parallelism to help.
1554    let sample_count = n.min(5);
1555    let mut sample_max: u64 = 0;
1556    let mut sample_total: u64 = 0;
1557    for &p in paths.iter().take(sample_count) {
1558        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1559        sample_total += size;
1560        sample_max = sample_max.max(size);
1561    }
1562    let estimated_total = if sample_count > 0 {
1563        sample_total * (n as u64) / (sample_count as u64)
1564    } else {
1565        0
1566    };
1567
1568    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1569    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1570    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1571        return blake2b_hash_files_many(paths, output_bytes);
1572    }
1573
1574    // Full stat pass for parallel scheduling — worth it for larger workloads.
1575    let mut indexed: Vec<(usize, &Path, u64)> = paths
1576        .iter()
1577        .enumerate()
1578        .map(|(i, &p)| {
1579            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1580            (i, p, size)
1581        })
1582        .collect();
1583
1584    // Sort largest first: ensures big files start hashing immediately while
1585    // small files fill in gaps, minimizing tail latency.
1586    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1587
1588    // Warm page cache for the largest files using async readahead(2).
1589    // Each hash call handles its own mmap prefaulting, but issuing readahead
1590    // here lets the kernel start I/O for upcoming files while workers process
1591    // current ones. readahead(2) returns immediately (non-blocking).
1592    #[cfg(target_os = "linux")]
1593    {
1594        use std::os::unix::io::AsRawFd;
1595        for &(_, path, size) in indexed.iter().take(20) {
1596            if size >= 1024 * 1024 {
1597                if let Ok(file) = open_noatime(path) {
1598                    unsafe {
1599                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1600                    }
1601                }
1602            }
1603        }
1604    }
1605
1606    let num_threads = std::thread::available_parallelism()
1607        .map(|n| n.get())
1608        .unwrap_or(4)
1609        .min(n);
1610
1611    // Atomic work index for dynamic work-stealing.
1612    let work_idx = AtomicUsize::new(0);
1613
1614    std::thread::scope(|s| {
1615        let work_idx = &work_idx;
1616        let indexed = &indexed;
1617
1618        let handles: Vec<_> = (0..num_threads)
1619            .map(|_| {
1620                s.spawn(move || {
1621                    let mut local_results = Vec::new();
1622                    loop {
1623                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1624                        if idx >= indexed.len() {
1625                            break;
1626                        }
1627                        let (orig_idx, path, _size) = indexed[idx];
1628                        let result = blake2b_hash_file(path, output_bytes);
1629                        local_results.push((orig_idx, result));
1630                    }
1631                    local_results
1632                })
1633            })
1634            .collect();
1635
1636        // Collect results and reorder to match original input order.
1637        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1638        for handle in handles {
1639            for (orig_idx, result) in handle.join().unwrap() {
1640                results[orig_idx] = Some(result);
1641            }
1642        }
1643        results
1644            .into_iter()
1645            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1646            .collect()
1647    })
1648}
1649
1650/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
1651/// Files are sorted by size (largest first) so the biggest files start processing
1652/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
1653/// eliminating tail latency from uneven file sizes.
1654/// Returns results in input order.
1655pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1656    let n = paths.len();
1657
1658    // Build (original_index, path, size) tuples — stat all files for scheduling.
1659    // The stat cost (~5µs/file) is repaid by better work distribution.
1660    let mut indexed: Vec<(usize, &Path, u64)> = paths
1661        .iter()
1662        .enumerate()
1663        .map(|(i, &p)| {
1664            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1665            (i, p, size)
1666        })
1667        .collect();
1668
1669    // Sort largest first: ensures big files start hashing immediately while
1670    // small files fill in gaps, minimizing tail latency.
1671    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1672
1673    // Warm page cache for the largest files using async readahead(2).
1674    // Each hash call handles its own mmap prefaulting, but issuing readahead
1675    // here lets the kernel start I/O for upcoming files while workers process
1676    // current ones. readahead(2) returns immediately (non-blocking).
1677    #[cfg(target_os = "linux")]
1678    {
1679        use std::os::unix::io::AsRawFd;
1680        for &(_, path, size) in indexed.iter().take(20) {
1681            if size >= 1024 * 1024 {
1682                if let Ok(file) = open_noatime(path) {
1683                    unsafe {
1684                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1685                    }
1686                }
1687            }
1688        }
1689    }
1690
1691    let num_threads = std::thread::available_parallelism()
1692        .map(|n| n.get())
1693        .unwrap_or(4)
1694        .min(n);
1695
1696    // Atomic work index for dynamic work-stealing.
1697    let work_idx = AtomicUsize::new(0);
1698
1699    std::thread::scope(|s| {
1700        let work_idx = &work_idx;
1701        let indexed = &indexed;
1702
1703        let handles: Vec<_> = (0..num_threads)
1704            .map(|_| {
1705                s.spawn(move || {
1706                    let mut local_results = Vec::new();
1707                    loop {
1708                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1709                        if idx >= indexed.len() {
1710                            break;
1711                        }
1712                        let (orig_idx, path, _size) = indexed[idx];
1713                        let result = hash_file(algo, path);
1714                        local_results.push((orig_idx, result));
1715                    }
1716                    local_results
1717                })
1718            })
1719            .collect();
1720
1721        // Collect results and reorder to match original input order.
1722        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1723        for handle in handles {
1724            for (orig_idx, result) in handle.join().unwrap() {
1725                results[orig_idx] = Some(result);
1726            }
1727        }
1728        results
1729            .into_iter()
1730            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1731            .collect()
1732    })
1733}
1734
1735/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
1736/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
1737/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
1738/// (100 from the sort phase + 100 from open_and_stat inside each worker).
1739/// Returns results in input order.
1740pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1741    let n = paths.len();
1742    if n == 0 {
1743        return Vec::new();
1744    }
1745    if n == 1 {
1746        return vec![hash_file_nostat(algo, paths[0])];
1747    }
1748
1749    // Issue readahead for all files (no size threshold — even tiny files benefit
1750    // from batched WILLNEED hints when processing 100+ files)
1751    #[cfg(target_os = "linux")]
1752    readahead_files_all(paths);
1753
1754    let num_threads = std::thread::available_parallelism()
1755        .map(|n| n.get())
1756        .unwrap_or(4)
1757        .min(n);
1758
1759    let work_idx = AtomicUsize::new(0);
1760
1761    std::thread::scope(|s| {
1762        let work_idx = &work_idx;
1763
1764        let handles: Vec<_> = (0..num_threads)
1765            .map(|_| {
1766                s.spawn(move || {
1767                    let mut local_results = Vec::new();
1768                    loop {
1769                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1770                        if idx >= n {
1771                            break;
1772                        }
1773                        let result = hash_file_nostat(algo, paths[idx]);
1774                        local_results.push((idx, result));
1775                    }
1776                    local_results
1777                })
1778            })
1779            .collect();
1780
1781        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1782        for handle in handles {
1783            for (idx, result) in handle.join().unwrap() {
1784                results[idx] = Some(result);
1785            }
1786        }
1787        results
1788            .into_iter()
1789            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1790            .collect()
1791    })
1792}
1793
1794/// Batch-hash multiple files: pre-read all files into memory in parallel,
1795/// then hash all data in parallel. Optimal for many small files where per-file
1796/// overhead (open/read/close syscalls) dominates over hash computation.
1797///
1798/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
1799/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
1800/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
1801/// Returns results in input order.
1802pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1803    let n = paths.len();
1804    if n == 0 {
1805        return Vec::new();
1806    }
1807
1808    // Issue readahead for all files
1809    #[cfg(target_os = "linux")]
1810    readahead_files_all(paths);
1811
1812    // Phase 1: Load all files into memory in parallel.
1813    // For 20+ files, use fast path that skips fstat.
1814    let use_fast = n >= 20;
1815
1816    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1817        // Sequential loading — avoids thread spawn overhead for small batches
1818        paths
1819            .iter()
1820            .map(|&path| {
1821                if use_fast {
1822                    open_file_content_fast(path)
1823                } else {
1824                    open_file_content(path)
1825                }
1826            })
1827            .collect()
1828    } else {
1829        let num_threads = std::thread::available_parallelism()
1830            .map(|t| t.get())
1831            .unwrap_or(4)
1832            .min(n);
1833        let chunk_size = (n + num_threads - 1) / num_threads;
1834
1835        std::thread::scope(|s| {
1836            let handles: Vec<_> = paths
1837                .chunks(chunk_size)
1838                .map(|chunk| {
1839                    s.spawn(move || {
1840                        chunk
1841                            .iter()
1842                            .map(|&path| {
1843                                if use_fast {
1844                                    open_file_content_fast(path)
1845                                } else {
1846                                    open_file_content(path)
1847                                }
1848                            })
1849                            .collect::<Vec<_>>()
1850                    })
1851                })
1852                .collect();
1853
1854            handles
1855                .into_iter()
1856                .flat_map(|h| h.join().unwrap())
1857                .collect()
1858        })
1859    };
1860
1861    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
1862    // for larger files the parallel hashing across threads helps.
1863    let num_hash_threads = std::thread::available_parallelism()
1864        .map(|t| t.get())
1865        .unwrap_or(4)
1866        .min(n);
1867    let work_idx = AtomicUsize::new(0);
1868
1869    std::thread::scope(|s| {
1870        let work_idx = &work_idx;
1871        let file_data = &file_data;
1872
1873        let handles: Vec<_> = (0..num_hash_threads)
1874            .map(|_| {
1875                s.spawn(move || {
1876                    let mut local_results = Vec::new();
1877                    loop {
1878                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1879                        if idx >= n {
1880                            break;
1881                        }
1882                        let result = match &file_data[idx] {
1883                            Ok(content) => Ok(hash_bytes(algo, content.as_ref())),
1884                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1885                        };
1886                        local_results.push((idx, result));
1887                    }
1888                    local_results
1889                })
1890            })
1891            .collect();
1892
1893        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1894        for handle in handles {
1895            for (idx, result) in handle.join().unwrap() {
1896                results[idx] = Some(result);
1897            }
1898        }
1899        results
1900            .into_iter()
1901            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1902            .collect()
1903    })
1904}
1905
1906/// Stream-hash a file that already has a prefix read into memory.
1907/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
1908/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
1909fn hash_stream_with_prefix(
1910    algo: HashAlgorithm,
1911    prefix: &[u8],
1912    mut file: File,
1913) -> io::Result<String> {
1914    match algo {
1915        HashAlgorithm::Sha1 => {
1916            #[cfg(target_os = "linux")]
1917            {
1918                hash_stream_with_prefix_openssl(openssl::hash::MessageDigest::sha1(), prefix, file)
1919            }
1920            #[cfg(not(target_os = "linux"))]
1921            {
1922                hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file)
1923            }
1924        }
1925        HashAlgorithm::Sha224 => {
1926            #[cfg(target_os = "linux")]
1927            {
1928                hash_stream_with_prefix_openssl(
1929                    openssl::hash::MessageDigest::sha224(),
1930                    prefix,
1931                    file,
1932                )
1933            }
1934            #[cfg(not(target_os = "linux"))]
1935            {
1936                hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file)
1937            }
1938        }
1939        HashAlgorithm::Sha256 => {
1940            #[cfg(target_os = "linux")]
1941            {
1942                hash_stream_with_prefix_openssl(
1943                    openssl::hash::MessageDigest::sha256(),
1944                    prefix,
1945                    file,
1946                )
1947            }
1948            #[cfg(not(target_os = "linux"))]
1949            {
1950                hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file)
1951            }
1952        }
1953        HashAlgorithm::Sha384 => {
1954            #[cfg(target_os = "linux")]
1955            {
1956                hash_stream_with_prefix_openssl(
1957                    openssl::hash::MessageDigest::sha384(),
1958                    prefix,
1959                    file,
1960                )
1961            }
1962            #[cfg(not(target_os = "linux"))]
1963            {
1964                hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file)
1965            }
1966        }
1967        HashAlgorithm::Sha512 => {
1968            #[cfg(target_os = "linux")]
1969            {
1970                hash_stream_with_prefix_openssl(
1971                    openssl::hash::MessageDigest::sha512(),
1972                    prefix,
1973                    file,
1974                )
1975            }
1976            #[cfg(not(target_os = "linux"))]
1977            {
1978                hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file)
1979            }
1980        }
1981        HashAlgorithm::Md5 => {
1982            #[cfg(target_os = "linux")]
1983            {
1984                hash_stream_with_prefix_openssl(openssl::hash::MessageDigest::md5(), prefix, file)
1985            }
1986            #[cfg(not(target_os = "linux"))]
1987            {
1988                hash_stream_with_prefix_digest::<md5::Md5>(prefix, file)
1989            }
1990        }
1991        HashAlgorithm::Blake2b => {
1992            let mut state = blake2b_simd::Params::new().to_state();
1993            state.update(prefix);
1994            STREAM_BUF.with(|cell| {
1995                let mut buf = cell.borrow_mut();
1996                ensure_stream_buf(&mut buf);
1997                loop {
1998                    let n = read_full(&mut file, &mut buf)?;
1999                    if n == 0 {
2000                        break;
2001                    }
2002                    state.update(&buf[..n]);
2003                }
2004                Ok(hex_encode(state.finalize().as_bytes()))
2005            })
2006        }
2007    }
2008}
2009
2010/// Stream-hash with prefix using OpenSSL (Linux only).
2011#[cfg(target_os = "linux")]
2012fn hash_stream_with_prefix_openssl(
2013    md: openssl::hash::MessageDigest,
2014    prefix: &[u8],
2015    mut file: File,
2016) -> io::Result<String> {
2017    let mut hasher = openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e))?;
2018    hasher.update(prefix).map_err(|e| io::Error::other(e))?;
2019    STREAM_BUF.with(|cell| {
2020        let mut buf = cell.borrow_mut();
2021        ensure_stream_buf(&mut buf);
2022        loop {
2023            let n = read_full(&mut file, &mut buf)?;
2024            if n == 0 {
2025                break;
2026            }
2027            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
2028        }
2029        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
2030        Ok(hex_encode(&digest))
2031    })
2032}
2033
2034/// Generic stream-hash with prefix for non-Linux platforms using Digest trait.
2035#[cfg(not(target_os = "linux"))]
2036fn hash_stream_with_prefix_digest<D: digest::Digest>(
2037    prefix: &[u8],
2038    mut file: File,
2039) -> io::Result<String> {
2040    STREAM_BUF.with(|cell| {
2041        let mut buf = cell.borrow_mut();
2042        ensure_stream_buf(&mut buf);
2043        let mut hasher = D::new();
2044        hasher.update(prefix);
2045        loop {
2046            let n = read_full(&mut file, &mut buf)?;
2047            if n == 0 {
2048                break;
2049            }
2050            hasher.update(&buf[..n]);
2051        }
2052        Ok(hex_encode(&hasher.finalize()))
2053    })
2054}
2055
2056/// Hash a file without fstat — just open, read until EOF, hash.
2057/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2058/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2059/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2060/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2061pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2062    let mut file = open_noatime(path)?;
2063    // First try a small stack buffer — optimal for tiny files (< 4KB).
2064    // Most "many_files" benchmark files are ~55 bytes, so this completes
2065    // with a single read() syscall and no fallback.
2066    let mut small_buf = [0u8; 4096];
2067    match file.read(&mut small_buf) {
2068        Ok(0) => return Ok(hash_bytes(algo, &[])),
2069        Ok(n) if n < small_buf.len() => {
2070            // File fits in small buffer — hash directly (common case)
2071            return Ok(hash_bytes(algo, &small_buf[..n]));
2072        }
2073        Ok(n) => {
2074            // Might be more data — fall back to larger buffer
2075            let mut buf = [0u8; 65536];
2076            buf[..n].copy_from_slice(&small_buf[..n]);
2077            let mut total = n;
2078            loop {
2079                match file.read(&mut buf[total..]) {
2080                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
2081                    Ok(n) => {
2082                        total += n;
2083                        if total >= buf.len() {
2084                            // File > 64KB: stream-hash from existing fd instead of
2085                            // re-opening. Feed already-read prefix, continue streaming.
2086                            return hash_stream_with_prefix(algo, &buf[..total], file);
2087                        }
2088                    }
2089                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2090                    Err(e) => return Err(e),
2091                }
2092            }
2093        }
2094        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2095            // Retry with full buffer on interrupt
2096            let mut buf = [0u8; 65536];
2097            let mut total = 0;
2098            loop {
2099                match file.read(&mut buf[total..]) {
2100                    Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
2101                    Ok(n) => {
2102                        total += n;
2103                        if total >= buf.len() {
2104                            // File > 64KB: stream-hash from existing fd
2105                            return hash_stream_with_prefix(algo, &buf[..total], file);
2106                        }
2107                    }
2108                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2109                    Err(e) => return Err(e),
2110                }
2111            }
2112        }
2113        Err(e) => return Err(e),
2114    }
2115}
2116
2117/// Hash a single file using raw Linux syscalls for minimum overhead.
2118/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2119/// For the single-file fast path, this eliminates OpenOptions builder,
2120/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2121///
2122/// Size-based dispatch:
2123/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2124/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2125/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2126/// - Non-regular: wraps fd in File, streaming hash_reader
2127#[cfg(target_os = "linux")]
2128pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2129    use std::os::unix::ffi::OsStrExt;
2130
2131    let path_bytes = path.as_os_str().as_bytes();
2132    let c_path = std::ffi::CString::new(path_bytes)
2133        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2134
2135    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2136    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2137    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2138        flags |= libc::O_NOATIME;
2139    }
2140
2141    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2142    if fd < 0 {
2143        let err = io::Error::last_os_error();
2144        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2145            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2146            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2147            if fd2 < 0 {
2148                return Err(io::Error::last_os_error());
2149            }
2150            return hash_from_raw_fd(algo, fd2);
2151        }
2152        return Err(err);
2153    }
2154    hash_from_raw_fd(algo, fd)
2155}
2156
2157/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2158/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2159/// non-regular (streaming) files.
2160#[cfg(target_os = "linux")]
2161fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2162    // Raw fstat to determine size and type
2163    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2164    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2165        let err = io::Error::last_os_error();
2166        unsafe {
2167            libc::close(fd);
2168        }
2169        return Err(err);
2170    }
2171    let size = stat.st_size as u64;
2172    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2173
2174    // Empty regular file
2175    if is_regular && size == 0 {
2176        unsafe {
2177            libc::close(fd);
2178        }
2179        return Ok(hash_bytes(algo, &[]));
2180    }
2181
2182    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2183    // Entire I/O in 3 raw syscalls: open + read + close.
2184    if is_regular && size < TINY_FILE_LIMIT {
2185        let mut buf = [0u8; 8192];
2186        let mut total = 0usize;
2187        while total < size as usize {
2188            let n = unsafe {
2189                libc::read(
2190                    fd,
2191                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2192                    (size as usize) - total,
2193                )
2194            };
2195            if n < 0 {
2196                let err = io::Error::last_os_error();
2197                if err.kind() == io::ErrorKind::Interrupted {
2198                    continue;
2199                }
2200                unsafe {
2201                    libc::close(fd);
2202                }
2203                return Err(err);
2204            }
2205            if n == 0 {
2206                break;
2207            }
2208            total += n as usize;
2209        }
2210        unsafe {
2211            libc::close(fd);
2212        }
2213        return Ok(hash_bytes(algo, &buf[..total]));
2214    }
2215
2216    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2217    use std::os::unix::io::FromRawFd;
2218    let file = unsafe { File::from_raw_fd(fd) };
2219
2220    if is_regular && size > 0 {
2221        // Large files (>=16MB): mmap with HugePage + PopulateRead
2222        if size >= SMALL_FILE_LIMIT {
2223            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
2224            if let Ok(mmap) = mmap_result {
2225                if size >= 2 * 1024 * 1024 {
2226                    let _ = mmap.advise(memmap2::Advice::HugePage);
2227                }
2228                let _ = mmap.advise(memmap2::Advice::Sequential);
2229                // Prefault pages using huge pages (kernel 5.14+)
2230                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
2231                    let _ = mmap.advise(memmap2::Advice::WillNeed);
2232                }
2233                return Ok(hash_bytes(algo, &mmap));
2234            }
2235        }
2236        // Small files (8KB-16MB): single-read into thread-local buffer
2237        return hash_file_small(algo, file, size as usize);
2238    }
2239
2240    // Non-regular files: streaming hash
2241    hash_reader(algo, file)
2242}
2243
2244/// Issue readahead hints for ALL file paths (no size threshold).
2245/// For multi-file benchmarks, even small files benefit from batched readahead.
2246#[cfg(target_os = "linux")]
2247pub fn readahead_files_all(paths: &[&Path]) {
2248    use std::os::unix::io::AsRawFd;
2249    for path in paths {
2250        if let Ok(file) = open_noatime(path) {
2251            if let Ok(meta) = file.metadata() {
2252                if meta.file_type().is_file() {
2253                    let len = meta.len();
2254                    unsafe {
2255                        libc::posix_fadvise(
2256                            file.as_raw_fd(),
2257                            0,
2258                            len as i64,
2259                            libc::POSIX_FADV_WILLNEED,
2260                        );
2261                    }
2262                }
2263            }
2264        }
2265    }
2266}
2267
2268#[cfg(not(target_os = "linux"))]
2269pub fn readahead_files_all(_paths: &[&Path]) {}
2270
2271/// Print hash result in GNU format: "hash  filename\n"
2272/// Uses raw byte writes to avoid std::fmt overhead.
2273pub fn print_hash(
2274    out: &mut impl Write,
2275    hash: &str,
2276    filename: &str,
2277    binary: bool,
2278) -> io::Result<()> {
2279    let mode = if binary { b'*' } else { b' ' };
2280    out.write_all(hash.as_bytes())?;
2281    out.write_all(&[b' ', mode])?;
2282    out.write_all(filename.as_bytes())?;
2283    out.write_all(b"\n")
2284}
2285
2286/// Print hash in GNU format with NUL terminator instead of newline.
2287pub fn print_hash_zero(
2288    out: &mut impl Write,
2289    hash: &str,
2290    filename: &str,
2291    binary: bool,
2292) -> io::Result<()> {
2293    let mode = if binary { b'*' } else { b' ' };
2294    out.write_all(hash.as_bytes())?;
2295    out.write_all(&[b' ', mode])?;
2296    out.write_all(filename.as_bytes())?;
2297    out.write_all(b"\0")
2298}
2299
2300// ── Single-write output buffer ─────────────────────────────────────
2301// For multi-file workloads, batch the entire "hash  filename\n" line into
2302// a single write() call. This halves the number of BufWriter flushes.
2303
2304// Thread-local output line buffer for batched writes.
2305// Reused across files to avoid per-file allocation.
2306thread_local! {
2307    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2308}
2309
2310/// Build and write the standard GNU hash output line in a single write() call.
2311/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2312/// For escaped filenames: "\hash  escaped_filename\n".
2313#[inline]
2314pub fn write_hash_line(
2315    out: &mut impl Write,
2316    hash: &str,
2317    filename: &str,
2318    binary: bool,
2319    zero: bool,
2320    escaped: bool,
2321) -> io::Result<()> {
2322    LINE_BUF.with(|cell| {
2323        let mut buf = cell.borrow_mut();
2324        buf.clear();
2325        let mode = if binary { b'*' } else { b' ' };
2326        let term = if zero { b'\0' } else { b'\n' };
2327        if escaped {
2328            buf.push(b'\\');
2329        }
2330        buf.extend_from_slice(hash.as_bytes());
2331        buf.push(b' ');
2332        buf.push(mode);
2333        buf.extend_from_slice(filename.as_bytes());
2334        buf.push(term);
2335        out.write_all(&buf)
2336    })
2337}
2338
2339/// Build and write BSD tag format output in a single write() call.
2340/// Format: "ALGO (filename) = hash\n"
2341#[inline]
2342pub fn write_hash_tag_line(
2343    out: &mut impl Write,
2344    algo_name: &str,
2345    hash: &str,
2346    filename: &str,
2347    zero: bool,
2348) -> io::Result<()> {
2349    LINE_BUF.with(|cell| {
2350        let mut buf = cell.borrow_mut();
2351        buf.clear();
2352        let term = if zero { b'\0' } else { b'\n' };
2353        buf.extend_from_slice(algo_name.as_bytes());
2354        buf.extend_from_slice(b" (");
2355        buf.extend_from_slice(filename.as_bytes());
2356        buf.extend_from_slice(b") = ");
2357        buf.extend_from_slice(hash.as_bytes());
2358        buf.push(term);
2359        out.write_all(&buf)
2360    })
2361}
2362
2363/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2364pub fn print_hash_tag(
2365    out: &mut impl Write,
2366    algo: HashAlgorithm,
2367    hash: &str,
2368    filename: &str,
2369) -> io::Result<()> {
2370    out.write_all(algo.name().as_bytes())?;
2371    out.write_all(b" (")?;
2372    out.write_all(filename.as_bytes())?;
2373    out.write_all(b") = ")?;
2374    out.write_all(hash.as_bytes())?;
2375    out.write_all(b"\n")
2376}
2377
2378/// Print hash in BSD tag format with NUL terminator.
2379pub fn print_hash_tag_zero(
2380    out: &mut impl Write,
2381    algo: HashAlgorithm,
2382    hash: &str,
2383    filename: &str,
2384) -> io::Result<()> {
2385    out.write_all(algo.name().as_bytes())?;
2386    out.write_all(b" (")?;
2387    out.write_all(filename.as_bytes())?;
2388    out.write_all(b") = ")?;
2389    out.write_all(hash.as_bytes())?;
2390    out.write_all(b"\0")
2391}
2392
2393/// Print hash in BSD tag format with BLAKE2b length info:
2394/// "BLAKE2b (filename) = hash" for 512-bit, or
2395/// "BLAKE2b-256 (filename) = hash" for other lengths.
2396pub fn print_hash_tag_b2sum(
2397    out: &mut impl Write,
2398    hash: &str,
2399    filename: &str,
2400    bits: usize,
2401) -> io::Result<()> {
2402    if bits == 512 {
2403        out.write_all(b"BLAKE2b (")?;
2404    } else {
2405        // Use write! for the rare non-512 path (negligible overhead per file)
2406        write!(out, "BLAKE2b-{} (", bits)?;
2407    }
2408    out.write_all(filename.as_bytes())?;
2409    out.write_all(b") = ")?;
2410    out.write_all(hash.as_bytes())?;
2411    out.write_all(b"\n")
2412}
2413
2414/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2415pub fn print_hash_tag_b2sum_zero(
2416    out: &mut impl Write,
2417    hash: &str,
2418    filename: &str,
2419    bits: usize,
2420) -> io::Result<()> {
2421    if bits == 512 {
2422        out.write_all(b"BLAKE2b (")?;
2423    } else {
2424        write!(out, "BLAKE2b-{} (", bits)?;
2425    }
2426    out.write_all(filename.as_bytes())?;
2427    out.write_all(b") = ")?;
2428    out.write_all(hash.as_bytes())?;
2429    out.write_all(b"\0")
2430}
2431
2432/// Options for check mode.
2433pub struct CheckOptions {
2434    pub quiet: bool,
2435    pub status_only: bool,
2436    pub strict: bool,
2437    pub warn: bool,
2438    pub ignore_missing: bool,
2439    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2440    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2441    /// When empty, uses generic format: "line {line}: message".
2442    pub warn_prefix: String,
2443}
2444
2445/// Result of check mode verification.
2446pub struct CheckResult {
2447    pub ok: usize,
2448    pub mismatches: usize,
2449    pub format_errors: usize,
2450    pub read_errors: usize,
2451    /// Number of files skipped because they were missing and --ignore-missing was set.
2452    pub ignored_missing: usize,
2453}
2454
2455/// Verify checksums from a check file.
2456/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2457pub fn check_file<R: BufRead>(
2458    algo: HashAlgorithm,
2459    reader: R,
2460    opts: &CheckOptions,
2461    out: &mut impl Write,
2462    err_out: &mut impl Write,
2463) -> io::Result<CheckResult> {
2464    let quiet = opts.quiet;
2465    let status_only = opts.status_only;
2466    let warn = opts.warn;
2467    let ignore_missing = opts.ignore_missing;
2468    let mut ok_count = 0;
2469    let mut mismatch_count = 0;
2470    let mut format_errors = 0;
2471    let mut read_errors = 0;
2472    let mut ignored_missing_count = 0;
2473    let mut line_num = 0;
2474
2475    for line_result in reader.lines() {
2476        line_num += 1;
2477        let line = line_result?;
2478        let line = line.trim_end();
2479
2480        if line.is_empty() {
2481            continue;
2482        }
2483
2484        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2485        let (expected_hash, filename) = match parse_check_line(line) {
2486            Some(v) => v,
2487            None => {
2488                format_errors += 1;
2489                if warn {
2490                    out.flush()?;
2491                    if opts.warn_prefix.is_empty() {
2492                        writeln!(
2493                            err_out,
2494                            "line {}: improperly formatted {} checksum line",
2495                            line_num,
2496                            algo.name()
2497                        )?;
2498                    } else {
2499                        writeln!(
2500                            err_out,
2501                            "{}: {}: improperly formatted {} checksum line",
2502                            opts.warn_prefix,
2503                            line_num,
2504                            algo.name()
2505                        )?;
2506                    }
2507                }
2508                continue;
2509            }
2510        };
2511
2512        // Compute actual hash
2513        let actual = match hash_file(algo, Path::new(filename)) {
2514            Ok(h) => h,
2515            Err(e) => {
2516                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2517                    ignored_missing_count += 1;
2518                    continue;
2519                }
2520                read_errors += 1;
2521                if !status_only {
2522                    out.flush()?;
2523                    writeln!(err_out, "{}: {}", filename, e)?;
2524                    writeln!(out, "{}: FAILED open or read", filename)?;
2525                }
2526                continue;
2527            }
2528        };
2529
2530        if actual.eq_ignore_ascii_case(expected_hash) {
2531            ok_count += 1;
2532            if !quiet && !status_only {
2533                writeln!(out, "{}: OK", filename)?;
2534            }
2535        } else {
2536            mismatch_count += 1;
2537            if !status_only {
2538                writeln!(out, "{}: FAILED", filename)?;
2539            }
2540        }
2541    }
2542
2543    Ok(CheckResult {
2544        ok: ok_count,
2545        mismatches: mismatch_count,
2546        format_errors,
2547        read_errors,
2548        ignored_missing: ignored_missing_count,
2549    })
2550}
2551
2552/// Parse a checksum line in any supported format.
2553pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2554    // Try BSD tag format: "ALGO (filename) = hash"
2555    let rest = line
2556        .strip_prefix("MD5 (")
2557        .or_else(|| line.strip_prefix("SHA1 ("))
2558        .or_else(|| line.strip_prefix("SHA224 ("))
2559        .or_else(|| line.strip_prefix("SHA256 ("))
2560        .or_else(|| line.strip_prefix("SHA384 ("))
2561        .or_else(|| line.strip_prefix("SHA512 ("))
2562        .or_else(|| line.strip_prefix("BLAKE2b ("))
2563        .or_else(|| {
2564            // Handle BLAKE2b-NNN (filename) = hash
2565            if line.starts_with("BLAKE2b-") {
2566                let after = &line["BLAKE2b-".len()..];
2567                if let Some(sp) = after.find(" (") {
2568                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2569                        return Some(&after[sp + 2..]);
2570                    }
2571                }
2572            }
2573            None
2574        });
2575    if let Some(rest) = rest {
2576        if let Some(paren_idx) = rest.find(") = ") {
2577            let filename = &rest[..paren_idx];
2578            let hash = &rest[paren_idx + 4..];
2579            return Some((hash, filename));
2580        }
2581    }
2582
2583    // Handle backslash-escaped lines (leading '\')
2584    let line = line.strip_prefix('\\').unwrap_or(line);
2585
2586    // Standard format: "hash  filename"
2587    if let Some(idx) = line.find("  ") {
2588        let hash = &line[..idx];
2589        let rest = &line[idx + 2..];
2590        return Some((hash, rest));
2591    }
2592    // Binary mode: "hash *filename"
2593    if let Some(idx) = line.find(" *") {
2594        let hash = &line[..idx];
2595        let rest = &line[idx + 2..];
2596        return Some((hash, rest));
2597    }
2598    None
2599}
2600
2601/// Parse a BSD-style tag line: "ALGO (filename) = hash"
2602/// Returns (expected_hash, filename, optional_bits).
2603/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
2604pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2605    let paren_start = line.find(" (")?;
2606    let algo_part = &line[..paren_start];
2607    let rest = &line[paren_start + 2..];
2608    let paren_end = rest.find(") = ")?;
2609    let filename = &rest[..paren_end];
2610    let hash = &rest[paren_end + 4..];
2611
2612    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
2613    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2614        algo_part[dash_pos + 1..].parse::<usize>().ok()
2615    } else {
2616        None
2617    };
2618
2619    Some((hash, filename, bits))
2620}
2621
2622/// Read as many bytes as possible into buf, retrying on partial reads.
2623/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
2624/// Fast path: regular file reads usually return the full buffer on the first call.
2625#[inline]
2626fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2627    // Fast path: first read() usually fills the entire buffer for regular files
2628    let n = reader.read(buf)?;
2629    if n == buf.len() || n == 0 {
2630        return Ok(n);
2631    }
2632    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
2633    let mut total = n;
2634    while total < buf.len() {
2635        match reader.read(&mut buf[total..]) {
2636            Ok(0) => break,
2637            Ok(n) => total += n,
2638            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2639            Err(e) => return Err(e),
2640        }
2641    }
2642    Ok(total)
2643}
2644
2645/// Compile-time generated 2-byte hex pair lookup table.
2646/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
2647const fn generate_hex_table() -> [[u8; 2]; 256] {
2648    let hex = b"0123456789abcdef";
2649    let mut table = [[0u8; 2]; 256];
2650    let mut i = 0;
2651    while i < 256 {
2652        table[i] = [hex[i >> 4], hex[i & 0xf]];
2653        i += 1;
2654    }
2655    table
2656}
2657
2658const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2659
2660/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
2661/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
2662pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2663    let len = bytes.len() * 2;
2664    let mut hex = String::with_capacity(len);
2665    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
2666    unsafe {
2667        let buf = hex.as_mut_vec();
2668        buf.set_len(len);
2669        hex_encode_to_slice(bytes, buf);
2670    }
2671    hex
2672}
2673
2674/// Encode bytes as hex directly into a pre-allocated output slice.
2675/// Output slice must be at least `bytes.len() * 2` bytes long.
2676#[inline]
2677fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2678    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
2679    unsafe {
2680        let ptr = out.as_mut_ptr();
2681        for (i, &b) in bytes.iter().enumerate() {
2682            let pair = *HEX_TABLE.get_unchecked(b as usize);
2683            *ptr.add(i * 2) = pair[0];
2684            *ptr.add(i * 2 + 1) = pair[1];
2685        }
2686    }
2687}