Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15/// Supported hash algorithms.
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18    Sha1,
19    Sha224,
20    Sha256,
21    Sha384,
22    Sha512,
23    Md5,
24    Blake2b,
25}
26
27impl HashAlgorithm {
28    pub fn name(self) -> &'static str {
29        match self {
30            HashAlgorithm::Sha1 => "SHA1",
31            HashAlgorithm::Sha224 => "SHA224",
32            HashAlgorithm::Sha256 => "SHA256",
33            HashAlgorithm::Sha384 => "SHA384",
34            HashAlgorithm::Sha512 => "SHA512",
35            HashAlgorithm::Md5 => "MD5",
36            HashAlgorithm::Blake2b => "BLAKE2b",
37        }
38    }
39}
40
41// ── Generic hash helpers ────────────────────────────────────────────
42
43/// Single-shot hash using the Digest trait (non-Linux fallback).
44#[cfg(not(target_os = "linux"))]
45fn hash_digest<D: Digest>(data: &[u8]) -> String {
46    hex_encode(&D::digest(data))
47}
48
49/// Streaming hash using thread-local buffer (non-Linux fallback).
50#[cfg(not(target_os = "linux"))]
51fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
52    STREAM_BUF.with(|cell| {
53        let mut buf = cell.borrow_mut();
54        ensure_stream_buf(&mut buf);
55        let mut hasher = D::new();
56        loop {
57            let n = read_full(&mut reader, &mut buf)?;
58            if n == 0 {
59                break;
60            }
61            hasher.update(&buf[..n]);
62        }
63        Ok(hex_encode(&hasher.finalize()))
64    })
65}
66
67// ── Public hashing API ──────────────────────────────────────────────
68
69/// Buffer size for streaming hash I/O.
70/// 128KB matches GNU coreutils' buffer size (BUFSIZE=131072), which works well with kernel readahead.
71/// Many small reads allow the kernel to pipeline I/O efficiently, reducing latency
72/// vs fewer large reads that stall waiting for the full buffer to fill.
73const HASH_READ_BUF: usize = 131072;
74
75// Thread-local reusable buffer for streaming hash I/O.
76// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
77// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
78thread_local! {
79    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
80}
81
82/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
83/// Called only on the streaming path, so small-file workloads never allocate 8MB.
84#[inline]
85fn ensure_stream_buf(buf: &mut Vec<u8>) {
86    if buf.len() < HASH_READ_BUF {
87        buf.resize(HASH_READ_BUF, 0);
88    }
89}
90
91// ── OpenSSL-accelerated hash functions (Linux) ───────────────────────
92// OpenSSL's libcrypto provides the fastest SHA implementations, using
93// hardware-specific assembly (SHA-NI, AVX2/AVX512, NEON) tuned for each CPU.
94// This matches what GNU coreutils uses internally.
95
96/// Single-shot hash using OpenSSL (Linux).
97/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
98#[cfg(target_os = "linux")]
99#[inline]
100fn openssl_hash_bytes(md: openssl::hash::MessageDigest, data: &[u8]) -> io::Result<String> {
101    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
102    Ok(hex_encode(&digest))
103}
104
105/// Streaming hash using OpenSSL Hasher (Linux).
106#[cfg(target_os = "linux")]
107fn openssl_hash_reader(
108    md: openssl::hash::MessageDigest,
109    mut reader: impl Read,
110) -> io::Result<String> {
111    STREAM_BUF.with(|cell| {
112        let mut buf = cell.borrow_mut();
113        ensure_stream_buf(&mut buf);
114        let mut hasher =
115            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
116        loop {
117            let n = read_full(&mut reader, &mut buf)?;
118            if n == 0 {
119                break;
120            }
121            hasher
122                .update(&buf[..n])
123                .map_err(|e| io::Error::other(e.to_string()))?;
124        }
125        let digest = hasher
126            .finish()
127            .map_err(|e| io::Error::other(e.to_string()))?;
128        Ok(hex_encode(&digest))
129    })
130}
131
132/// Single-shot hash and write hex directly to buffer using OpenSSL (Linux).
133/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
134#[cfg(target_os = "linux")]
135#[inline]
136fn openssl_hash_bytes_to_buf(
137    md: openssl::hash::MessageDigest,
138    data: &[u8],
139    out: &mut [u8],
140) -> io::Result<usize> {
141    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
142    hex_encode_to_slice(&digest, out);
143    Ok(digest.len() * 2)
144}
145
146// ── Ring-accelerated hash functions (non-Apple, non-Linux targets) ────
147// ring provides BoringSSL assembly with SHA-NI/AVX2/NEON for Windows/FreeBSD.
148
149/// Single-shot hash using ring::digest (non-Apple, non-Linux).
150#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
151#[inline]
152fn ring_hash_bytes(algo: &'static ring::digest::Algorithm, data: &[u8]) -> io::Result<String> {
153    Ok(hex_encode(ring::digest::digest(algo, data).as_ref()))
154}
155
156/// Streaming hash using ring::digest::Context (non-Apple, non-Linux).
157#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
158fn ring_hash_reader(
159    algo: &'static ring::digest::Algorithm,
160    mut reader: impl Read,
161) -> io::Result<String> {
162    STREAM_BUF.with(|cell| {
163        let mut buf = cell.borrow_mut();
164        ensure_stream_buf(&mut buf);
165        let mut ctx = ring::digest::Context::new(algo);
166        loop {
167            let n = read_full(&mut reader, &mut buf)?;
168            if n == 0 {
169                break;
170            }
171            ctx.update(&buf[..n]);
172        }
173        Ok(hex_encode(ctx.finish().as_ref()))
174    })
175}
176
177// ── Algorithm → OpenSSL MessageDigest mapping (Linux) ──────────────────
178// Centralizes OpenSSL algorithm dispatch, used by hash_bytes, hash_stream_with_prefix,
179// hash_file_streaming, and hash_file_pipelined_read.
180
181#[cfg(target_os = "linux")]
182fn algo_to_openssl_md(algo: HashAlgorithm) -> openssl::hash::MessageDigest {
183    match algo {
184        HashAlgorithm::Sha1 => openssl::hash::MessageDigest::sha1(),
185        HashAlgorithm::Sha224 => openssl::hash::MessageDigest::sha224(),
186        HashAlgorithm::Sha256 => openssl::hash::MessageDigest::sha256(),
187        HashAlgorithm::Sha384 => openssl::hash::MessageDigest::sha384(),
188        HashAlgorithm::Sha512 => openssl::hash::MessageDigest::sha512(),
189        HashAlgorithm::Md5 => openssl::hash::MessageDigest::md5(),
190        HashAlgorithm::Blake2b => unreachable!("Blake2b uses its own hasher"),
191    }
192}
193
194// ── SHA-256 ───────────────────────────────────────────────────────────
195// Linux: OpenSSL (system libcrypto, matches GNU coreutils)
196// Windows/FreeBSD: ring (BoringSSL assembly)
197// Apple: sha2 crate (ring doesn't compile on Apple Silicon)
198
199#[cfg(target_os = "linux")]
200fn sha256_bytes(data: &[u8]) -> io::Result<String> {
201    openssl_hash_bytes(openssl::hash::MessageDigest::sha256(), data)
202}
203
204#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
205fn sha256_bytes(data: &[u8]) -> io::Result<String> {
206    ring_hash_bytes(&ring::digest::SHA256, data)
207}
208
209#[cfg(target_vendor = "apple")]
210fn sha256_bytes(data: &[u8]) -> io::Result<String> {
211    Ok(hash_digest::<sha2::Sha256>(data))
212}
213
214#[cfg(target_os = "linux")]
215fn sha256_reader(reader: impl Read) -> io::Result<String> {
216    openssl_hash_reader(openssl::hash::MessageDigest::sha256(), reader)
217}
218
219#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
220fn sha256_reader(reader: impl Read) -> io::Result<String> {
221    ring_hash_reader(&ring::digest::SHA256, reader)
222}
223
224#[cfg(target_vendor = "apple")]
225fn sha256_reader(reader: impl Read) -> io::Result<String> {
226    hash_reader_impl::<sha2::Sha256>(reader)
227}
228
229// ── SHA-1 ─────────────────────────────────────────────────────────────
230
231#[cfg(target_os = "linux")]
232fn sha1_bytes(data: &[u8]) -> io::Result<String> {
233    openssl_hash_bytes(openssl::hash::MessageDigest::sha1(), data)
234}
235
236#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
237fn sha1_bytes(data: &[u8]) -> io::Result<String> {
238    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
239}
240
241#[cfg(target_vendor = "apple")]
242fn sha1_bytes(data: &[u8]) -> io::Result<String> {
243    Ok(hash_digest::<sha1::Sha1>(data))
244}
245
246#[cfg(target_os = "linux")]
247fn sha1_reader(reader: impl Read) -> io::Result<String> {
248    openssl_hash_reader(openssl::hash::MessageDigest::sha1(), reader)
249}
250
251#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
252fn sha1_reader(reader: impl Read) -> io::Result<String> {
253    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
254}
255
256#[cfg(target_vendor = "apple")]
257fn sha1_reader(reader: impl Read) -> io::Result<String> {
258    hash_reader_impl::<sha1::Sha1>(reader)
259}
260
261// ── SHA-224 ───────────────────────────────────────────────────────────
262// ring does not support SHA-224. Use OpenSSL on Linux, sha2 crate elsewhere.
263
264#[cfg(target_os = "linux")]
265fn sha224_bytes(data: &[u8]) -> io::Result<String> {
266    openssl_hash_bytes(openssl::hash::MessageDigest::sha224(), data)
267}
268
269#[cfg(not(target_os = "linux"))]
270fn sha224_bytes(data: &[u8]) -> io::Result<String> {
271    Ok(hex_encode(&sha2::Sha224::digest(data)))
272}
273
274#[cfg(target_os = "linux")]
275fn sha224_reader(reader: impl Read) -> io::Result<String> {
276    openssl_hash_reader(openssl::hash::MessageDigest::sha224(), reader)
277}
278
279#[cfg(not(target_os = "linux"))]
280fn sha224_reader(reader: impl Read) -> io::Result<String> {
281    STREAM_BUF.with(|cell| {
282        let mut buf = cell.borrow_mut();
283        ensure_stream_buf(&mut buf);
284        let mut hasher = <sha2::Sha224 as digest::Digest>::new();
285        let mut reader = reader;
286        loop {
287            let n = read_full(&mut reader, &mut buf)?;
288            if n == 0 {
289                break;
290            }
291            digest::Digest::update(&mut hasher, &buf[..n]);
292        }
293        Ok(hex_encode(&digest::Digest::finalize(hasher)))
294    })
295}
296
297// ── SHA-384 ───────────────────────────────────────────────────────────
298
299#[cfg(target_os = "linux")]
300fn sha384_bytes(data: &[u8]) -> io::Result<String> {
301    openssl_hash_bytes(openssl::hash::MessageDigest::sha384(), data)
302}
303
304#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
305fn sha384_bytes(data: &[u8]) -> io::Result<String> {
306    ring_hash_bytes(&ring::digest::SHA384, data)
307}
308
309#[cfg(target_vendor = "apple")]
310fn sha384_bytes(data: &[u8]) -> io::Result<String> {
311    Ok(hex_encode(&sha2::Sha384::digest(data)))
312}
313
314#[cfg(target_os = "linux")]
315fn sha384_reader(reader: impl Read) -> io::Result<String> {
316    openssl_hash_reader(openssl::hash::MessageDigest::sha384(), reader)
317}
318
319#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
320fn sha384_reader(reader: impl Read) -> io::Result<String> {
321    ring_hash_reader(&ring::digest::SHA384, reader)
322}
323
324#[cfg(target_vendor = "apple")]
325fn sha384_reader(reader: impl Read) -> io::Result<String> {
326    STREAM_BUF.with(|cell| {
327        let mut buf = cell.borrow_mut();
328        ensure_stream_buf(&mut buf);
329        let mut hasher = <sha2::Sha384 as digest::Digest>::new();
330        let mut reader = reader;
331        loop {
332            let n = read_full(&mut reader, &mut buf)?;
333            if n == 0 {
334                break;
335            }
336            digest::Digest::update(&mut hasher, &buf[..n]);
337        }
338        Ok(hex_encode(&digest::Digest::finalize(hasher)))
339    })
340}
341
342// ── SHA-512 ───────────────────────────────────────────────────────────
343
344#[cfg(target_os = "linux")]
345fn sha512_bytes(data: &[u8]) -> io::Result<String> {
346    openssl_hash_bytes(openssl::hash::MessageDigest::sha512(), data)
347}
348
349#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
350fn sha512_bytes(data: &[u8]) -> io::Result<String> {
351    ring_hash_bytes(&ring::digest::SHA512, data)
352}
353
354#[cfg(target_vendor = "apple")]
355fn sha512_bytes(data: &[u8]) -> io::Result<String> {
356    Ok(hex_encode(&sha2::Sha512::digest(data)))
357}
358
359#[cfg(target_os = "linux")]
360fn sha512_reader(reader: impl Read) -> io::Result<String> {
361    openssl_hash_reader(openssl::hash::MessageDigest::sha512(), reader)
362}
363
364#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
365fn sha512_reader(reader: impl Read) -> io::Result<String> {
366    ring_hash_reader(&ring::digest::SHA512, reader)
367}
368
369#[cfg(target_vendor = "apple")]
370fn sha512_reader(reader: impl Read) -> io::Result<String> {
371    STREAM_BUF.with(|cell| {
372        let mut buf = cell.borrow_mut();
373        ensure_stream_buf(&mut buf);
374        let mut hasher = <sha2::Sha512 as digest::Digest>::new();
375        let mut reader = reader;
376        loop {
377            let n = read_full(&mut reader, &mut buf)?;
378            if n == 0 {
379                break;
380            }
381            digest::Digest::update(&mut hasher, &buf[..n]);
382        }
383        Ok(hex_encode(&digest::Digest::finalize(hasher)))
384    })
385}
386
387/// Compute hash of a byte slice directly (zero-copy fast path).
388/// Returns an error if the underlying crypto library rejects the algorithm.
389pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> io::Result<String> {
390    match algo {
391        HashAlgorithm::Sha1 => sha1_bytes(data),
392        HashAlgorithm::Sha224 => sha224_bytes(data),
393        HashAlgorithm::Sha256 => sha256_bytes(data),
394        HashAlgorithm::Sha384 => sha384_bytes(data),
395        HashAlgorithm::Sha512 => sha512_bytes(data),
396        HashAlgorithm::Md5 => md5_bytes(data),
397        HashAlgorithm::Blake2b => {
398            let hash = blake2b_simd::blake2b(data);
399            Ok(hex_encode(hash.as_bytes()))
400        }
401    }
402}
403
404/// Hash data and write hex result directly into an output buffer.
405/// Returns the number of hex bytes written. Avoids String allocation
406/// on the critical single-file fast path.
407/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
408#[cfg(target_os = "linux")]
409pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> io::Result<usize> {
410    match algo {
411        HashAlgorithm::Md5 => {
412            openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::md5(), data, out)
413        }
414        HashAlgorithm::Sha1 => sha1_bytes_to_buf(data, out),
415        HashAlgorithm::Sha224 => sha224_bytes_to_buf(data, out),
416        HashAlgorithm::Sha256 => sha256_bytes_to_buf(data, out),
417        HashAlgorithm::Sha384 => sha384_bytes_to_buf(data, out),
418        HashAlgorithm::Sha512 => sha512_bytes_to_buf(data, out),
419        HashAlgorithm::Blake2b => {
420            let hash = blake2b_simd::blake2b(data);
421            let bytes = hash.as_bytes();
422            hex_encode_to_slice(bytes, out);
423            Ok(bytes.len() * 2)
424        }
425    }
426}
427
428#[cfg(target_os = "linux")]
429fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
430    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha1(), data, out)
431}
432#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
433fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
434    let digest = ring::digest::digest(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data);
435    hex_encode_to_slice(digest.as_ref(), out);
436    Ok(40)
437}
438#[cfg(target_vendor = "apple")]
439fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
440    let digest = sha1::Sha1::digest(data);
441    hex_encode_to_slice(&digest, out);
442    Ok(40)
443}
444
445#[cfg(target_os = "linux")]
446fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
447    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha224(), data, out)
448}
449#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
450fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
451    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
452    hex_encode_to_slice(&digest, out);
453    Ok(56)
454}
455#[cfg(target_vendor = "apple")]
456fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
457    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
458    hex_encode_to_slice(&digest, out);
459    Ok(56)
460}
461
462#[cfg(target_os = "linux")]
463fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
464    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha256(), data, out)
465}
466#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
467fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
468    let digest = ring::digest::digest(&ring::digest::SHA256, data);
469    hex_encode_to_slice(digest.as_ref(), out);
470    Ok(64)
471}
472#[cfg(target_vendor = "apple")]
473fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
474    let digest = <sha2::Sha256 as sha2::Digest>::digest(data);
475    hex_encode_to_slice(&digest, out);
476    Ok(64)
477}
478
479#[cfg(target_os = "linux")]
480fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
481    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha384(), data, out)
482}
483#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
484fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
485    let digest = ring::digest::digest(&ring::digest::SHA384, data);
486    hex_encode_to_slice(digest.as_ref(), out);
487    Ok(96)
488}
489#[cfg(target_vendor = "apple")]
490fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
491    let digest = <sha2::Sha384 as sha2::Digest>::digest(data);
492    hex_encode_to_slice(&digest, out);
493    Ok(96)
494}
495
496#[cfg(target_os = "linux")]
497fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
498    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha512(), data, out)
499}
500#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
501fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
502    let digest = ring::digest::digest(&ring::digest::SHA512, data);
503    hex_encode_to_slice(digest.as_ref(), out);
504    Ok(128)
505}
506#[cfg(target_vendor = "apple")]
507fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
508    let digest = <sha2::Sha512 as sha2::Digest>::digest(data);
509    hex_encode_to_slice(&digest, out);
510    Ok(128)
511}
512
513/// Hash a single file using raw syscalls and write hex directly to output buffer.
514/// Returns number of hex bytes written.
515/// This is the absolute minimum-overhead path for single-file hashing:
516/// raw open + fstat + read + hash + hex encode, with zero String allocation.
517#[cfg(target_os = "linux")]
518pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
519    use std::os::unix::ffi::OsStrExt;
520
521    let path_bytes = path.as_os_str().as_bytes();
522    let c_path = std::ffi::CString::new(path_bytes)
523        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
524
525    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
526    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
527        flags |= libc::O_NOATIME;
528    }
529
530    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
531    if fd < 0 {
532        let err = io::Error::last_os_error();
533        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
534            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
535            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
536            if fd2 < 0 {
537                return Err(io::Error::last_os_error());
538            }
539            return hash_from_raw_fd_to_buf(algo, fd2, out);
540        }
541        return Err(err);
542    }
543    hash_from_raw_fd_to_buf(algo, fd, out)
544}
545
546/// Hash from raw fd and write hex directly to output buffer.
547/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
548/// For larger files, falls back to hash_file_raw() which allocates a String.
549#[cfg(target_os = "linux")]
550fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
551    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
552    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
553        let err = io::Error::last_os_error();
554        unsafe {
555            libc::close(fd);
556        }
557        return Err(err);
558    }
559    let size = stat.st_size as u64;
560    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
561
562    // Empty regular file
563    if is_regular && size == 0 {
564        unsafe {
565            libc::close(fd);
566        }
567        return hash_bytes_to_buf(algo, &[], out);
568    }
569
570    // Tiny files (<8KB): fully raw path — zero heap allocation
571    if is_regular && size < TINY_FILE_LIMIT {
572        let mut buf = [0u8; 8192];
573        let mut total = 0usize;
574        while total < size as usize {
575            let n = unsafe {
576                libc::read(
577                    fd,
578                    buf[total..].as_mut_ptr() as *mut libc::c_void,
579                    (size as usize) - total,
580                )
581            };
582            if n < 0 {
583                let err = io::Error::last_os_error();
584                if err.kind() == io::ErrorKind::Interrupted {
585                    continue;
586                }
587                unsafe {
588                    libc::close(fd);
589                }
590                return Err(err);
591            }
592            if n == 0 {
593                break;
594            }
595            total += n as usize;
596        }
597        unsafe {
598            libc::close(fd);
599        }
600        return hash_bytes_to_buf(algo, &buf[..total], out);
601    }
602
603    // Larger files: fall back to hash_from_raw_fd which returns a String,
604    // then copy the hex into out.
605    use std::os::unix::io::FromRawFd;
606    let file = unsafe { File::from_raw_fd(fd) };
607    let hash_str = if is_regular && size > 0 {
608        if size >= SMALL_FILE_LIMIT {
609            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
610            if let Ok(mmap) = mmap_result {
611                if size >= 2 * 1024 * 1024 {
612                    let _ = mmap.advise(memmap2::Advice::HugePage);
613                }
614                let _ = mmap.advise(memmap2::Advice::Sequential);
615                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
616                    let _ = mmap.advise(memmap2::Advice::WillNeed);
617                }
618                hash_bytes(algo, &mmap)?
619            } else {
620                hash_file_small(algo, file, size as usize)?
621            }
622        } else {
623            hash_file_small(algo, file, size as usize)?
624        }
625    } else {
626        hash_reader(algo, file)?
627    };
628    let hex_bytes = hash_str.as_bytes();
629    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
630    Ok(hex_bytes.len())
631}
632
633// ── MD5 ─────────────────────────────────────────────────────────────
634// Linux: OpenSSL (same assembly-optimized library as GNU coreutils)
635// Other platforms: md-5 crate (pure Rust)
636
637#[cfg(target_os = "linux")]
638fn md5_bytes(data: &[u8]) -> io::Result<String> {
639    openssl_hash_bytes(openssl::hash::MessageDigest::md5(), data)
640}
641
642#[cfg(not(target_os = "linux"))]
643fn md5_bytes(data: &[u8]) -> io::Result<String> {
644    Ok(hash_digest::<Md5>(data))
645}
646
647#[cfg(target_os = "linux")]
648fn md5_reader(reader: impl Read) -> io::Result<String> {
649    openssl_hash_reader(openssl::hash::MessageDigest::md5(), reader)
650}
651
652#[cfg(not(target_os = "linux"))]
653fn md5_reader(reader: impl Read) -> io::Result<String> {
654    hash_reader_impl::<Md5>(reader)
655}
656
657/// Compute hash of data from a reader, returning hex string.
658pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
659    match algo {
660        HashAlgorithm::Sha1 => sha1_reader(reader),
661        HashAlgorithm::Sha224 => sha224_reader(reader),
662        HashAlgorithm::Sha256 => sha256_reader(reader),
663        HashAlgorithm::Sha384 => sha384_reader(reader),
664        HashAlgorithm::Sha512 => sha512_reader(reader),
665        HashAlgorithm::Md5 => md5_reader(reader),
666        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
667    }
668}
669
670/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
671/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
672#[cfg(target_os = "linux")]
673static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
674
675/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
676/// Caches whether O_NOATIME works to avoid double-open on every file.
677#[cfg(target_os = "linux")]
678fn open_noatime(path: &Path) -> io::Result<File> {
679    use std::os::unix::fs::OpenOptionsExt;
680    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
681        match std::fs::OpenOptions::new()
682            .read(true)
683            .custom_flags(libc::O_NOATIME)
684            .open(path)
685        {
686            Ok(f) => return Ok(f),
687            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
688                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
689                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
690            }
691            Err(e) => return Err(e), // Real error, propagate
692        }
693    }
694    File::open(path)
695}
696
697#[cfg(not(target_os = "linux"))]
698fn open_noatime(path: &Path) -> io::Result<File> {
699    File::open(path)
700}
701
702/// Open a file and get its metadata in one step.
703/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
704#[cfg(target_os = "linux")]
705#[inline]
706fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
707    let file = open_noatime(path)?;
708    let fd = {
709        use std::os::unix::io::AsRawFd;
710        file.as_raw_fd()
711    };
712    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
713    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
714        return Err(io::Error::last_os_error());
715    }
716    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
717    let size = stat.st_size as u64;
718    Ok((file, size, is_regular))
719}
720
721#[cfg(not(target_os = "linux"))]
722#[inline]
723fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
724    let file = open_noatime(path)?;
725    let metadata = file.metadata()?;
726    Ok((file, metadata.len(), metadata.file_type().is_file()))
727}
728
729/// Minimum file size to issue fadvise hint (1MB).
730/// For small files, the syscall overhead exceeds the readahead benefit.
731#[cfg(target_os = "linux")]
732const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
733
734/// Maximum file size for single-read hash optimization.
735/// Files up to this size are read entirely into a thread-local buffer and hashed
736/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
737/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
738/// across files in sequential mode, saving re-allocation.
739/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
740const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
741
742/// Threshold for tiny files that can be read into a stack buffer.
743/// Below this size, we use a stack-allocated buffer + single read() syscall,
744/// completely avoiding any heap allocation for the data path.
745const TINY_FILE_LIMIT: u64 = 8 * 1024;
746
747// Thread-local reusable buffer for single-read hash.
748// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
749// handles tiny files; larger files trigger one grow that persists for reuse.
750thread_local! {
751    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
752}
753
754/// Optimized hash for large files (>=16MB) on Linux.
755/// Hash large files (>=16MB) using streaming I/O with fadvise + ring Context.
756/// Uses sequential fadvise hint for kernel readahead, then streams through
757/// hash context in large chunks. For large files (>64MB), uses double-buffered
758/// reader thread to overlap I/O and hashing.
759#[cfg(target_os = "linux")]
760fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
761    // For very large files, double-buffered reader thread overlaps I/O and CPU.
762    // For medium files, single-thread streaming is faster (avoids thread overhead).
763    if file_size >= 64 * 1024 * 1024 {
764        hash_file_pipelined_read(algo, file, file_size)
765    } else {
766        hash_file_streaming(algo, file, file_size)
767    }
768}
769
770/// Simple single-thread streaming hash with fadvise.
771/// Optimal for files 16-64MB where thread overhead exceeds I/O overlap benefit.
772#[cfg(target_os = "linux")]
773fn hash_file_streaming(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
774    use std::os::unix::io::AsRawFd;
775
776    let _ = unsafe {
777        libc::posix_fadvise(
778            file.as_raw_fd(),
779            0,
780            file_size as i64,
781            libc::POSIX_FADV_SEQUENTIAL,
782        )
783    };
784
785    // Use OpenSSL for all algorithms on Linux (same library as GNU coreutils).
786    if matches!(algo, HashAlgorithm::Blake2b) {
787        blake2b_hash_reader(file, 64)
788    } else {
789        openssl_hash_reader(algo_to_openssl_md(algo), file)
790    }
791}
792
793/// Streaming fallback for large files when mmap is unavailable.
794/// Uses double-buffered reader thread with fadvise hints.
795/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
796#[cfg(target_os = "linux")]
797fn hash_file_pipelined_read(
798    algo: HashAlgorithm,
799    mut file: File,
800    file_size: u64,
801) -> io::Result<String> {
802    use std::os::unix::io::AsRawFd;
803
804    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
805
806    let _ = unsafe {
807        libc::posix_fadvise(
808            file.as_raw_fd(),
809            0,
810            file_size as i64,
811            libc::POSIX_FADV_SEQUENTIAL,
812        )
813    };
814
815    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
816    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
817    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
818
819    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
820        while let Ok(mut buf) = buf_rx.recv() {
821            let mut total = 0;
822            while total < buf.len() {
823                match file.read(&mut buf[total..]) {
824                    Ok(0) => break,
825                    Ok(n) => total += n,
826                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
827                    Err(e) => return Err(e),
828                }
829            }
830            if total == 0 {
831                break;
832            }
833            if tx.send((buf, total)).is_err() {
834                break;
835            }
836        }
837        Ok(())
838    });
839
840    // Use OpenSSL Hasher for all hash algorithms (same library as GNU coreutils).
841    macro_rules! hash_pipelined_openssl {
842        ($md:expr) => {{
843            let mut hasher =
844                openssl::hash::Hasher::new($md).map_err(|e| io::Error::other(e.to_string()))?;
845            while let Ok((buf, n)) = rx.recv() {
846                hasher
847                    .update(&buf[..n])
848                    .map_err(|e| io::Error::other(e.to_string()))?;
849                let _ = buf_tx.send(buf);
850            }
851            let digest = hasher
852                .finish()
853                .map_err(|e| io::Error::other(e.to_string()))?;
854            Ok(hex_encode(&digest))
855        }};
856    }
857
858    let hash_result: io::Result<String> = if matches!(algo, HashAlgorithm::Blake2b) {
859        let mut state = blake2b_simd::Params::new().to_state();
860        while let Ok((buf, n)) = rx.recv() {
861            state.update(&buf[..n]);
862            let _ = buf_tx.send(buf);
863        }
864        Ok(hex_encode(state.finalize().as_bytes()))
865    } else {
866        hash_pipelined_openssl!(algo_to_openssl_md(algo))
867    };
868
869    match reader_handle.join() {
870        Ok(Ok(())) => {}
871        Ok(Err(e)) => {
872            if hash_result.is_ok() {
873                return Err(e);
874            }
875        }
876        Err(payload) => {
877            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
878                format!("reader thread panicked: {}", s)
879            } else if let Some(s) = payload.downcast_ref::<String>() {
880                format!("reader thread panicked: {}", s)
881            } else {
882                "reader thread panicked".to_string()
883            };
884            return Err(io::Error::other(msg));
885        }
886    }
887
888    hash_result
889}
890
891/// Hash a file by path. Uses I/O pipelining for large files on Linux,
892/// mmap with HUGEPAGE hints as fallback, single-read for small files,
893/// and streaming read for non-regular files.
894pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
895    let (file, file_size, is_regular) = open_and_stat(path)?;
896
897    if is_regular && file_size == 0 {
898        return hash_bytes(algo, &[]);
899    }
900
901    if file_size > 0 && is_regular {
902        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
903        if file_size < TINY_FILE_LIMIT {
904            return hash_file_tiny(algo, file, file_size as usize);
905        }
906        // Large files (>=16MB): use I/O pipelining on Linux to overlap read + hash
907        if file_size >= SMALL_FILE_LIMIT {
908            #[cfg(target_os = "linux")]
909            {
910                return hash_file_pipelined(algo, file, file_size);
911            }
912            // Non-Linux: mmap fallback
913            #[cfg(not(target_os = "linux"))]
914            {
915                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
916                if let Ok(mmap) = mmap_result {
917                    return hash_bytes(algo, &mmap);
918                }
919            }
920        }
921        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash.
922        // This avoids Hasher context allocation + streaming overhead for each file.
923        if file_size < SMALL_FILE_LIMIT {
924            return hash_file_small(algo, file, file_size as usize);
925        }
926    }
927
928    // Non-regular files or fallback: stream
929    #[cfg(target_os = "linux")]
930    if file_size >= FADVISE_MIN_SIZE {
931        use std::os::unix::io::AsRawFd;
932        unsafe {
933            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
934        }
935    }
936    hash_reader(algo, file)
937}
938
939/// Hash a tiny file (<8KB) using a stack-allocated buffer.
940/// Single read() syscall, zero heap allocation on the data path.
941/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
942#[inline]
943fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
944    let mut buf = [0u8; 8192];
945    let mut total = 0;
946    // Read with known size — usually completes in a single read() for regular files
947    while total < size {
948        match file.read(&mut buf[total..size]) {
949            Ok(0) => break,
950            Ok(n) => total += n,
951            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
952            Err(e) => return Err(e),
953        }
954    }
955    hash_bytes(algo, &buf[..total])
956}
957
958/// Hash a small file by reading it entirely into a thread-local buffer,
959/// then using the single-shot hash function. Avoids per-file Hasher allocation.
960#[inline]
961fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
962    SMALL_FILE_BUF.with(|cell| {
963        let mut buf = cell.borrow_mut();
964        // Reset length but keep allocation, then grow if needed
965        buf.clear();
966        buf.reserve(size);
967        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
968        // directly and only access buf[..total] where total <= size <= capacity.
969        unsafe {
970            buf.set_len(size);
971        }
972        let mut total = 0;
973        while total < size {
974            match file.read(&mut buf[total..size]) {
975                Ok(0) => break,
976                Ok(n) => total += n,
977                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
978                Err(e) => return Err(e),
979            }
980        }
981        hash_bytes(algo, &buf[..total])
982    })
983}
984
985/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
986pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
987    let stdin = io::stdin();
988    // Hint kernel for sequential access if stdin is a regular file (redirect)
989    #[cfg(target_os = "linux")]
990    {
991        use std::os::unix::io::AsRawFd;
992        let fd = stdin.as_raw_fd();
993        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
994        if unsafe { libc::fstat(fd, &mut stat) } == 0
995            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
996            && stat.st_size > 0
997        {
998            unsafe {
999                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1000            }
1001        }
1002    }
1003    // Streaming hash — works for both pipe and file-redirect stdin
1004    hash_reader(algo, stdin.lock())
1005}
1006
1007/// Check if parallel hashing is worthwhile for the given file paths.
1008/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
1009/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
1010/// Removing the stat()-based size check eliminates N extra syscalls for N files.
1011pub fn should_use_parallel(paths: &[&Path]) -> bool {
1012    paths.len() >= 2
1013}
1014
1015/// Issue readahead hints for a list of file paths to warm the page cache.
1016/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1017/// Only issues hints for files >= 1MB; small files are read fast enough
1018/// that the fadvise syscall overhead isn't worth it.
1019#[cfg(target_os = "linux")]
1020pub fn readahead_files(paths: &[&Path]) {
1021    use std::os::unix::io::AsRawFd;
1022    for path in paths {
1023        if let Ok(file) = open_noatime(path) {
1024            if let Ok(meta) = file.metadata() {
1025                let len = meta.len();
1026                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1027                    unsafe {
1028                        libc::posix_fadvise(
1029                            file.as_raw_fd(),
1030                            0,
1031                            len as i64,
1032                            libc::POSIX_FADV_WILLNEED,
1033                        );
1034                    }
1035                }
1036            }
1037        }
1038    }
1039}
1040
1041#[cfg(not(target_os = "linux"))]
1042pub fn readahead_files(_paths: &[&Path]) {
1043    // No-op on non-Linux
1044}
1045
1046// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1047
1048/// Hash raw data with BLAKE2b variable output length.
1049/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1050pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1051    let hash = blake2b_simd::Params::new()
1052        .hash_length(output_bytes)
1053        .hash(data);
1054    hex_encode(hash.as_bytes())
1055}
1056
1057/// Hash a reader with BLAKE2b variable output length.
1058/// Uses thread-local buffer for cache-friendly streaming.
1059pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1060    STREAM_BUF.with(|cell| {
1061        let mut buf = cell.borrow_mut();
1062        ensure_stream_buf(&mut buf);
1063        let mut state = blake2b_simd::Params::new()
1064            .hash_length(output_bytes)
1065            .to_state();
1066        loop {
1067            let n = read_full(&mut reader, &mut buf)?;
1068            if n == 0 {
1069                break;
1070            }
1071            state.update(&buf[..n]);
1072        }
1073        Ok(hex_encode(state.finalize().as_bytes()))
1074    })
1075}
1076
1077/// Hash a file with BLAKE2b variable output length.
1078/// Uses mmap for large files (zero-copy), single-read for small files,
1079/// and streaming read as fallback.
1080pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1081    let (file, file_size, is_regular) = open_and_stat(path)?;
1082
1083    if is_regular && file_size == 0 {
1084        return Ok(blake2b_hash_data(&[], output_bytes));
1085    }
1086
1087    if file_size > 0 && is_regular {
1088        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1089        if file_size < TINY_FILE_LIMIT {
1090            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1091        }
1092        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1093        if file_size >= SMALL_FILE_LIMIT {
1094            #[cfg(target_os = "linux")]
1095            {
1096                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1097            }
1098            #[cfg(not(target_os = "linux"))]
1099            {
1100                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1101                if let Ok(mmap) = mmap_result {
1102                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1103                }
1104            }
1105        }
1106        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
1107        if file_size < SMALL_FILE_LIMIT {
1108            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1109        }
1110    }
1111
1112    // Non-regular files or fallback: stream
1113    #[cfg(target_os = "linux")]
1114    if file_size >= FADVISE_MIN_SIZE {
1115        use std::os::unix::io::AsRawFd;
1116        unsafe {
1117            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
1118        }
1119    }
1120    blake2b_hash_reader(file, output_bytes)
1121}
1122
1123/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1124#[inline]
1125fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1126    let mut buf = [0u8; 8192];
1127    let mut total = 0;
1128    while total < size {
1129        match file.read(&mut buf[total..size]) {
1130            Ok(0) => break,
1131            Ok(n) => total += n,
1132            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1133            Err(e) => return Err(e),
1134        }
1135    }
1136    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1137}
1138
1139/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1140#[inline]
1141fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1142    SMALL_FILE_BUF.with(|cell| {
1143        let mut buf = cell.borrow_mut();
1144        buf.clear();
1145        buf.reserve(size);
1146        // SAFETY: capacity >= size after clear+reserve
1147        unsafe {
1148            buf.set_len(size);
1149        }
1150        let mut total = 0;
1151        while total < size {
1152            match file.read(&mut buf[total..size]) {
1153                Ok(0) => break,
1154                Ok(n) => total += n,
1155                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1156                Err(e) => return Err(e),
1157            }
1158        }
1159        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1160    })
1161}
1162
1163/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1164/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1165/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1166/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1167#[cfg(target_os = "linux")]
1168fn blake2b_hash_file_pipelined(
1169    file: File,
1170    file_size: u64,
1171    output_bytes: usize,
1172) -> io::Result<String> {
1173    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1174    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1175    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1176    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1177        Ok(mmap) => {
1178            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1179            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1180            if file_size >= 2 * 1024 * 1024 {
1181                let _ = mmap.advise(memmap2::Advice::HugePage);
1182            }
1183            let _ = mmap.advise(memmap2::Advice::Sequential);
1184            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1185            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1186            if file_size >= 4 * 1024 * 1024 {
1187                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1188                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1189                }
1190            } else {
1191                let _ = mmap.advise(memmap2::Advice::WillNeed);
1192            }
1193            // Single-shot hash: processes entire file in one call, streaming
1194            // directly from page cache with no user-space buffer copies.
1195            Ok(blake2b_hash_data(&mmap, output_bytes))
1196        }
1197        Err(_) => {
1198            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1199            // to streaming pipelined I/O.
1200            blake2b_hash_file_streamed(file, file_size, output_bytes)
1201        }
1202    }
1203}
1204
1205/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1206/// Uses double-buffered reader thread with fadvise hints.
1207/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1208#[cfg(target_os = "linux")]
1209fn blake2b_hash_file_streamed(
1210    mut file: File,
1211    file_size: u64,
1212    output_bytes: usize,
1213) -> io::Result<String> {
1214    use std::os::unix::io::AsRawFd;
1215
1216    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1217
1218    // Hint kernel for sequential access
1219    unsafe {
1220        libc::posix_fadvise(
1221            file.as_raw_fd(),
1222            0,
1223            file_size as i64,
1224            libc::POSIX_FADV_SEQUENTIAL,
1225        );
1226    }
1227
1228    // Double-buffered channels: reader fills one buffer while hasher processes another.
1229    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1230    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1231    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1232
1233    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1234        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1235        while let Ok(mut buf) = buf_rx.recv() {
1236            let mut total = 0;
1237            while total < buf.len() {
1238                match file.read(&mut buf[total..]) {
1239                    Ok(0) => break,
1240                    Ok(n) => total += n,
1241                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1242                    Err(e) => return Err(e),
1243                }
1244            }
1245            if total == 0 {
1246                break;
1247            }
1248            if tx.send((buf, total)).is_err() {
1249                break;
1250            }
1251        }
1252        Ok(())
1253    });
1254
1255    let mut state = blake2b_simd::Params::new()
1256        .hash_length(output_bytes)
1257        .to_state();
1258    while let Ok((buf, n)) = rx.recv() {
1259        state.update(&buf[..n]);
1260        let _ = buf_tx.send(buf);
1261    }
1262    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1263
1264    match reader_handle.join() {
1265        Ok(Ok(())) => {}
1266        Ok(Err(e)) => {
1267            if hash_result.is_ok() {
1268                return Err(e);
1269            }
1270        }
1271        Err(payload) => {
1272            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1273                format!("reader thread panicked: {}", s)
1274            } else if let Some(s) = payload.downcast_ref::<String>() {
1275                format!("reader thread panicked: {}", s)
1276            } else {
1277                "reader thread panicked".to_string()
1278            };
1279            return Err(io::Error::other(msg));
1280        }
1281    }
1282
1283    hash_result
1284}
1285
1286/// Hash stdin with BLAKE2b variable output length.
1287/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1288pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1289    let stdin = io::stdin();
1290    #[cfg(target_os = "linux")]
1291    {
1292        use std::os::unix::io::AsRawFd;
1293        let fd = stdin.as_raw_fd();
1294        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1295        if unsafe { libc::fstat(fd, &mut stat) } == 0
1296            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1297            && stat.st_size > 0
1298        {
1299            unsafe {
1300                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1301            }
1302        }
1303    }
1304    blake2b_hash_reader(stdin.lock(), output_bytes)
1305}
1306
1307/// Internal enum for file content in batch hashing.
1308/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1309enum FileContent {
1310    Mmap(memmap2::Mmap),
1311    Buf(Vec<u8>),
1312}
1313
1314impl AsRef<[u8]> for FileContent {
1315    fn as_ref(&self) -> &[u8] {
1316        match self {
1317            FileContent::Mmap(m) => m,
1318            FileContent::Buf(v) => v,
1319        }
1320    }
1321}
1322
1323/// Open a file and load its content for batch hashing.
1324/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1325/// files (zero-copy), and read-to-end for non-regular files.
1326fn open_file_content(path: &Path) -> io::Result<FileContent> {
1327    let (file, size, is_regular) = open_and_stat(path)?;
1328    if is_regular && size == 0 {
1329        return Ok(FileContent::Buf(Vec::new()));
1330    }
1331    if is_regular && size > 0 {
1332        // Tiny files: read directly into Vec. The mmap syscall + page fault
1333        // overhead exceeds the data transfer cost for files under 8KB.
1334        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1335        if size < TINY_FILE_LIMIT {
1336            let mut buf = vec![0u8; size as usize];
1337            let mut total = 0;
1338            let mut f = file;
1339            while total < size as usize {
1340                match f.read(&mut buf[total..]) {
1341                    Ok(0) => break,
1342                    Ok(n) => total += n,
1343                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1344                    Err(e) => return Err(e),
1345                }
1346            }
1347            buf.truncate(total);
1348            return Ok(FileContent::Buf(buf));
1349        }
1350        // HUGEPAGE + PopulateRead for optimal page faulting
1351        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1352        if let Ok(mmap) = mmap_result {
1353            #[cfg(target_os = "linux")]
1354            {
1355                if size >= 2 * 1024 * 1024 {
1356                    let _ = mmap.advise(memmap2::Advice::HugePage);
1357                }
1358                let _ = mmap.advise(memmap2::Advice::Sequential);
1359                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1360                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1361                }
1362            }
1363            return Ok(FileContent::Mmap(mmap));
1364        }
1365        // Fallback: read into Vec
1366        let mut buf = vec![0u8; size as usize];
1367        let mut total = 0;
1368        let mut f = file;
1369        while total < size as usize {
1370            match f.read(&mut buf[total..]) {
1371                Ok(0) => break,
1372                Ok(n) => total += n,
1373                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1374                Err(e) => return Err(e),
1375            }
1376        }
1377        buf.truncate(total);
1378        return Ok(FileContent::Buf(buf));
1379    }
1380    // Non-regular: read to end
1381    let mut buf = Vec::new();
1382    let mut f = file;
1383    f.read_to_end(&mut buf)?;
1384    Ok(FileContent::Buf(buf))
1385}
1386
1387/// Read remaining file content from an already-open fd into a Vec.
1388/// Used when the initial stack buffer is exhausted and we need to read
1389/// the rest without re-opening the file.
1390fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1391    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1392    buf.extend_from_slice(prefix);
1393    file.read_to_end(&mut buf)?;
1394    Ok(FileContent::Buf(buf))
1395}
1396
1397/// Open a file and read all content without fstat — just open+read+close.
1398/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1399/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1400/// then falls back to larger buffer or read_to_end for bigger files.
1401fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1402    let mut file = open_noatime(path)?;
1403    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1404    // For tiny files, allocate exact-size Vec to avoid waste.
1405    let mut small_buf = [0u8; 4096];
1406    match file.read(&mut small_buf) {
1407        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1408        Ok(n) if n < small_buf.len() => {
1409            // File fits in small buffer — allocate exact size
1410            let mut vec = Vec::with_capacity(n);
1411            vec.extend_from_slice(&small_buf[..n]);
1412            return Ok(FileContent::Buf(vec));
1413        }
1414        Ok(n) => {
1415            // Might be more data — allocate heap buffer and read into it directly
1416            let mut buf = vec![0u8; 65536];
1417            buf[..n].copy_from_slice(&small_buf[..n]);
1418            let mut total = n;
1419            loop {
1420                match file.read(&mut buf[total..]) {
1421                    Ok(0) => {
1422                        buf.truncate(total);
1423                        return Ok(FileContent::Buf(buf));
1424                    }
1425                    Ok(n) => {
1426                        total += n;
1427                        if total >= buf.len() {
1428                            // File > 64KB: read rest from existing fd
1429                            return read_remaining_to_vec(&buf[..total], file);
1430                        }
1431                    }
1432                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1433                    Err(e) => return Err(e),
1434                }
1435            }
1436        }
1437        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1438            let mut buf = vec![0u8; 65536];
1439            let mut total = 0;
1440            loop {
1441                match file.read(&mut buf[total..]) {
1442                    Ok(0) => {
1443                        buf.truncate(total);
1444                        return Ok(FileContent::Buf(buf));
1445                    }
1446                    Ok(n) => {
1447                        total += n;
1448                        if total >= buf.len() {
1449                            // File > 64KB: read rest from existing fd
1450                            return read_remaining_to_vec(&buf[..total], file);
1451                        }
1452                    }
1453                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1454                    Err(e) => return Err(e),
1455                }
1456            }
1457        }
1458        Err(e) => return Err(e),
1459    }
1460}
1461
1462/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1463///
1464/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1465/// All files are pre-loaded into memory (mmap for large, read for small),
1466/// then hashed simultaneously. Returns results in input order.
1467///
1468/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1469pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1470    use blake2b_simd::many::{HashManyJob, hash_many};
1471
1472    // Phase 1: Read all files into memory.
1473    // For small file counts (≤10), load sequentially to avoid thread::scope
1474    // overhead (~120µs). For many files, use parallel loading with lightweight
1475    // OS threads. For 100+ files, use fast path that skips fstat.
1476    let use_fast = paths.len() >= 20;
1477
1478    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1479        // Sequential loading — avoids thread spawn overhead for small batches
1480        paths.iter().map(|&path| open_file_content(path)).collect()
1481    } else {
1482        let num_threads = std::thread::available_parallelism()
1483            .map(|n| n.get())
1484            .unwrap_or(4)
1485            .min(paths.len());
1486        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1487
1488        std::thread::scope(|s| {
1489            let handles: Vec<_> = paths
1490                .chunks(chunk_size)
1491                .map(|chunk| {
1492                    s.spawn(move || {
1493                        chunk
1494                            .iter()
1495                            .map(|&path| {
1496                                if use_fast {
1497                                    open_file_content_fast(path)
1498                                } else {
1499                                    open_file_content(path)
1500                                }
1501                            })
1502                            .collect::<Vec<_>>()
1503                    })
1504                })
1505                .collect();
1506
1507            handles
1508                .into_iter()
1509                .flat_map(|h| h.join().unwrap())
1510                .collect()
1511        })
1512    };
1513
1514    // Phase 2: Build hash_many jobs for successful reads
1515    let hash_results = {
1516        let mut params = blake2b_simd::Params::new();
1517        params.hash_length(output_bytes);
1518
1519        let ok_entries: Vec<(usize, &[u8])> = file_data
1520            .iter()
1521            .enumerate()
1522            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1523            .collect();
1524
1525        let mut jobs: Vec<HashManyJob> = ok_entries
1526            .iter()
1527            .map(|(_, data)| HashManyJob::new(&params, data))
1528            .collect();
1529
1530        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1531        hash_many(jobs.iter_mut());
1532
1533        // Extract hashes into a map
1534        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1535        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1536            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1537        }
1538        hm
1539    }; // file_data borrow released here
1540
1541    // Phase 4: Combine hashes and errors in original order
1542    hash_results
1543        .into_iter()
1544        .zip(file_data)
1545        .map(|(hash_opt, result)| match result {
1546            Ok(_) => Ok(hash_opt.unwrap()),
1547            Err(e) => Err(e),
1548        })
1549        .collect()
1550}
1551
1552/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1553/// Samples a few files to estimate total data size. For small workloads, uses
1554/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1555/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1556/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1557/// for large files on Linux).
1558/// Returns results in input order.
1559pub fn blake2b_hash_files_parallel(
1560    paths: &[&Path],
1561    output_bytes: usize,
1562) -> Vec<io::Result<String>> {
1563    let n = paths.len();
1564
1565    // Sample a few files to estimate whether parallel processing is worthwhile.
1566    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1567    // is too small for parallelism to help.
1568    let sample_count = n.min(5);
1569    let mut sample_max: u64 = 0;
1570    let mut sample_total: u64 = 0;
1571    for &p in paths.iter().take(sample_count) {
1572        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1573        sample_total += size;
1574        sample_max = sample_max.max(size);
1575    }
1576    let estimated_total = if sample_count > 0 {
1577        sample_total * (n as u64) / (sample_count as u64)
1578    } else {
1579        0
1580    };
1581
1582    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1583    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1584    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1585        return blake2b_hash_files_many(paths, output_bytes);
1586    }
1587
1588    // Full stat pass for parallel scheduling — worth it for larger workloads.
1589    let mut indexed: Vec<(usize, &Path, u64)> = paths
1590        .iter()
1591        .enumerate()
1592        .map(|(i, &p)| {
1593            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1594            (i, p, size)
1595        })
1596        .collect();
1597
1598    // Sort largest first: ensures big files start hashing immediately while
1599    // small files fill in gaps, minimizing tail latency.
1600    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1601
1602    // Warm page cache for the largest files using async readahead(2).
1603    // Each hash call handles its own mmap prefaulting, but issuing readahead
1604    // here lets the kernel start I/O for upcoming files while workers process
1605    // current ones. readahead(2) returns immediately (non-blocking).
1606    #[cfg(target_os = "linux")]
1607    {
1608        use std::os::unix::io::AsRawFd;
1609        for &(_, path, size) in indexed.iter().take(20) {
1610            if size >= 1024 * 1024 {
1611                if let Ok(file) = open_noatime(path) {
1612                    unsafe {
1613                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1614                    }
1615                }
1616            }
1617        }
1618    }
1619
1620    let num_threads = std::thread::available_parallelism()
1621        .map(|n| n.get())
1622        .unwrap_or(4)
1623        .min(n);
1624
1625    // Atomic work index for dynamic work-stealing.
1626    let work_idx = AtomicUsize::new(0);
1627
1628    std::thread::scope(|s| {
1629        let work_idx = &work_idx;
1630        let indexed = &indexed;
1631
1632        let handles: Vec<_> = (0..num_threads)
1633            .map(|_| {
1634                s.spawn(move || {
1635                    let mut local_results = Vec::new();
1636                    loop {
1637                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1638                        if idx >= indexed.len() {
1639                            break;
1640                        }
1641                        let (orig_idx, path, _size) = indexed[idx];
1642                        let result = blake2b_hash_file(path, output_bytes);
1643                        local_results.push((orig_idx, result));
1644                    }
1645                    local_results
1646                })
1647            })
1648            .collect();
1649
1650        // Collect results and reorder to match original input order.
1651        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1652        for handle in handles {
1653            for (orig_idx, result) in handle.join().unwrap() {
1654                results[orig_idx] = Some(result);
1655            }
1656        }
1657        results
1658            .into_iter()
1659            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1660            .collect()
1661    })
1662}
1663
1664/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
1665/// Files are sorted by size (largest first) so the biggest files start processing
1666/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
1667/// eliminating tail latency from uneven file sizes.
1668/// Returns results in input order.
1669pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1670    let n = paths.len();
1671
1672    // Build (original_index, path, size) tuples — stat all files for scheduling.
1673    // The stat cost (~5µs/file) is repaid by better work distribution.
1674    let mut indexed: Vec<(usize, &Path, u64)> = paths
1675        .iter()
1676        .enumerate()
1677        .map(|(i, &p)| {
1678            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1679            (i, p, size)
1680        })
1681        .collect();
1682
1683    // Sort largest first: ensures big files start hashing immediately while
1684    // small files fill in gaps, minimizing tail latency.
1685    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1686
1687    // Warm page cache for the largest files using async readahead(2).
1688    // Each hash call handles its own mmap prefaulting, but issuing readahead
1689    // here lets the kernel start I/O for upcoming files while workers process
1690    // current ones. readahead(2) returns immediately (non-blocking).
1691    #[cfg(target_os = "linux")]
1692    {
1693        use std::os::unix::io::AsRawFd;
1694        for &(_, path, size) in indexed.iter().take(20) {
1695            if size >= 1024 * 1024 {
1696                if let Ok(file) = open_noatime(path) {
1697                    unsafe {
1698                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1699                    }
1700                }
1701            }
1702        }
1703    }
1704
1705    let num_threads = std::thread::available_parallelism()
1706        .map(|n| n.get())
1707        .unwrap_or(4)
1708        .min(n);
1709
1710    // Atomic work index for dynamic work-stealing.
1711    let work_idx = AtomicUsize::new(0);
1712
1713    std::thread::scope(|s| {
1714        let work_idx = &work_idx;
1715        let indexed = &indexed;
1716
1717        let handles: Vec<_> = (0..num_threads)
1718            .map(|_| {
1719                s.spawn(move || {
1720                    let mut local_results = Vec::new();
1721                    loop {
1722                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1723                        if idx >= indexed.len() {
1724                            break;
1725                        }
1726                        let (orig_idx, path, _size) = indexed[idx];
1727                        let result = hash_file(algo, path);
1728                        local_results.push((orig_idx, result));
1729                    }
1730                    local_results
1731                })
1732            })
1733            .collect();
1734
1735        // Collect results and reorder to match original input order.
1736        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1737        for handle in handles {
1738            for (orig_idx, result) in handle.join().unwrap() {
1739                results[orig_idx] = Some(result);
1740            }
1741        }
1742        results
1743            .into_iter()
1744            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1745            .collect()
1746    })
1747}
1748
1749/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
1750/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
1751/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
1752/// (100 from the sort phase + 100 from open_and_stat inside each worker).
1753/// Returns results in input order.
1754pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1755    let n = paths.len();
1756    if n == 0 {
1757        return Vec::new();
1758    }
1759    if n == 1 {
1760        return vec![hash_file_nostat(algo, paths[0])];
1761    }
1762
1763    // Issue readahead for all files (no size threshold — even tiny files benefit
1764    // from batched WILLNEED hints when processing 100+ files)
1765    #[cfg(target_os = "linux")]
1766    readahead_files_all(paths);
1767
1768    let num_threads = std::thread::available_parallelism()
1769        .map(|n| n.get())
1770        .unwrap_or(4)
1771        .min(n);
1772
1773    let work_idx = AtomicUsize::new(0);
1774
1775    std::thread::scope(|s| {
1776        let work_idx = &work_idx;
1777
1778        let handles: Vec<_> = (0..num_threads)
1779            .map(|_| {
1780                s.spawn(move || {
1781                    let mut local_results = Vec::new();
1782                    loop {
1783                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1784                        if idx >= n {
1785                            break;
1786                        }
1787                        let result = hash_file_nostat(algo, paths[idx]);
1788                        local_results.push((idx, result));
1789                    }
1790                    local_results
1791                })
1792            })
1793            .collect();
1794
1795        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1796        for handle in handles {
1797            for (idx, result) in handle.join().unwrap() {
1798                results[idx] = Some(result);
1799            }
1800        }
1801        results
1802            .into_iter()
1803            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1804            .collect()
1805    })
1806}
1807
1808/// Batch-hash multiple files: pre-read all files into memory in parallel,
1809/// then hash all data in parallel. Optimal for many small files where per-file
1810/// overhead (open/read/close syscalls) dominates over hash computation.
1811///
1812/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
1813/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
1814/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
1815/// Returns results in input order.
1816pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1817    let n = paths.len();
1818    if n == 0 {
1819        return Vec::new();
1820    }
1821
1822    // Issue readahead for all files
1823    #[cfg(target_os = "linux")]
1824    readahead_files_all(paths);
1825
1826    // Phase 1: Load all files into memory in parallel.
1827    // For 20+ files, use fast path that skips fstat.
1828    let use_fast = n >= 20;
1829
1830    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1831        // Sequential loading — avoids thread spawn overhead for small batches
1832        paths
1833            .iter()
1834            .map(|&path| {
1835                if use_fast {
1836                    open_file_content_fast(path)
1837                } else {
1838                    open_file_content(path)
1839                }
1840            })
1841            .collect()
1842    } else {
1843        let num_threads = std::thread::available_parallelism()
1844            .map(|t| t.get())
1845            .unwrap_or(4)
1846            .min(n);
1847        let chunk_size = (n + num_threads - 1) / num_threads;
1848
1849        std::thread::scope(|s| {
1850            let handles: Vec<_> = paths
1851                .chunks(chunk_size)
1852                .map(|chunk| {
1853                    s.spawn(move || {
1854                        chunk
1855                            .iter()
1856                            .map(|&path| {
1857                                if use_fast {
1858                                    open_file_content_fast(path)
1859                                } else {
1860                                    open_file_content(path)
1861                                }
1862                            })
1863                            .collect::<Vec<_>>()
1864                    })
1865                })
1866                .collect();
1867
1868            handles
1869                .into_iter()
1870                .flat_map(|h| h.join().unwrap())
1871                .collect()
1872        })
1873    };
1874
1875    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
1876    // for larger files the parallel hashing across threads helps.
1877    let num_hash_threads = std::thread::available_parallelism()
1878        .map(|t| t.get())
1879        .unwrap_or(4)
1880        .min(n);
1881    let work_idx = AtomicUsize::new(0);
1882
1883    std::thread::scope(|s| {
1884        let work_idx = &work_idx;
1885        let file_data = &file_data;
1886
1887        let handles: Vec<_> = (0..num_hash_threads)
1888            .map(|_| {
1889                s.spawn(move || {
1890                    let mut local_results = Vec::new();
1891                    loop {
1892                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1893                        if idx >= n {
1894                            break;
1895                        }
1896                        let result = match &file_data[idx] {
1897                            Ok(content) => hash_bytes(algo, content.as_ref()),
1898                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1899                        };
1900                        local_results.push((idx, result));
1901                    }
1902                    local_results
1903                })
1904            })
1905            .collect();
1906
1907        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1908        for handle in handles {
1909            for (idx, result) in handle.join().unwrap() {
1910                results[idx] = Some(result);
1911            }
1912        }
1913        results
1914            .into_iter()
1915            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1916            .collect()
1917    })
1918}
1919
1920/// Stream-hash a file that already has a prefix read into memory.
1921/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
1922/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
1923fn hash_stream_with_prefix(
1924    algo: HashAlgorithm,
1925    prefix: &[u8],
1926    mut file: File,
1927) -> io::Result<String> {
1928    // Blake2b uses its own hasher on all platforms
1929    if matches!(algo, HashAlgorithm::Blake2b) {
1930        let mut state = blake2b_simd::Params::new().to_state();
1931        state.update(prefix);
1932        return STREAM_BUF.with(|cell| {
1933            let mut buf = cell.borrow_mut();
1934            ensure_stream_buf(&mut buf);
1935            loop {
1936                let n = read_full(&mut file, &mut buf)?;
1937                if n == 0 {
1938                    break;
1939                }
1940                state.update(&buf[..n]);
1941            }
1942            Ok(hex_encode(state.finalize().as_bytes()))
1943        });
1944    }
1945
1946    #[cfg(target_os = "linux")]
1947    {
1948        hash_stream_with_prefix_openssl(algo_to_openssl_md(algo), prefix, file)
1949    }
1950    #[cfg(not(target_os = "linux"))]
1951    {
1952        match algo {
1953            HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
1954            HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
1955            HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
1956            HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
1957            HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
1958            HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
1959            HashAlgorithm::Blake2b => unreachable!(),
1960        }
1961    }
1962}
1963
1964/// Generic stream-hash with prefix for non-Linux platforms using Digest trait.
1965#[cfg(not(target_os = "linux"))]
1966fn hash_stream_with_prefix_digest<D: digest::Digest>(
1967    prefix: &[u8],
1968    mut file: File,
1969) -> io::Result<String> {
1970    STREAM_BUF.with(|cell| {
1971        let mut buf = cell.borrow_mut();
1972        ensure_stream_buf(&mut buf);
1973        let mut hasher = D::new();
1974        hasher.update(prefix);
1975        loop {
1976            let n = read_full(&mut file, &mut buf)?;
1977            if n == 0 {
1978                break;
1979            }
1980            hasher.update(&buf[..n]);
1981        }
1982        Ok(hex_encode(&hasher.finalize()))
1983    })
1984}
1985
1986/// Streaming hash with prefix using OpenSSL (Linux).
1987#[cfg(target_os = "linux")]
1988fn hash_stream_with_prefix_openssl(
1989    md: openssl::hash::MessageDigest,
1990    prefix: &[u8],
1991    mut file: File,
1992) -> io::Result<String> {
1993    STREAM_BUF.with(|cell| {
1994        let mut buf = cell.borrow_mut();
1995        ensure_stream_buf(&mut buf);
1996        let mut hasher =
1997            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
1998        hasher
1999            .update(prefix)
2000            .map_err(|e| io::Error::other(e.to_string()))?;
2001        loop {
2002            let n = read_full(&mut file, &mut buf)?;
2003            if n == 0 {
2004                break;
2005            }
2006            hasher
2007                .update(&buf[..n])
2008                .map_err(|e| io::Error::other(e.to_string()))?;
2009        }
2010        let digest = hasher
2011            .finish()
2012            .map_err(|e| io::Error::other(e.to_string()))?;
2013        Ok(hex_encode(&digest))
2014    })
2015}
2016
2017/// Hash a file without fstat — just open, read until EOF, hash.
2018/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2019/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2020/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2021/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2022pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2023    let mut file = open_noatime(path)?;
2024    // First try a small stack buffer — optimal for tiny files (< 4KB).
2025    // Most "many_files" benchmark files are ~55 bytes, so this completes
2026    // with a single read() syscall and no fallback.
2027    let mut small_buf = [0u8; 4096];
2028    match file.read(&mut small_buf) {
2029        Ok(0) => return hash_bytes(algo, &[]),
2030        Ok(n) if n < small_buf.len() => {
2031            // File fits in small buffer — hash directly (common case)
2032            return hash_bytes(algo, &small_buf[..n]);
2033        }
2034        Ok(n) => {
2035            // Might be more data — fall back to larger buffer
2036            let mut buf = [0u8; 65536];
2037            buf[..n].copy_from_slice(&small_buf[..n]);
2038            let mut total = n;
2039            loop {
2040                match file.read(&mut buf[total..]) {
2041                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2042                    Ok(n) => {
2043                        total += n;
2044                        if total >= buf.len() {
2045                            // File > 64KB: stream-hash from existing fd instead of
2046                            // re-opening. Feed already-read prefix, continue streaming.
2047                            return hash_stream_with_prefix(algo, &buf[..total], file);
2048                        }
2049                    }
2050                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2051                    Err(e) => return Err(e),
2052                }
2053            }
2054        }
2055        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2056            // Retry with full buffer on interrupt
2057            let mut buf = [0u8; 65536];
2058            let mut total = 0;
2059            loop {
2060                match file.read(&mut buf[total..]) {
2061                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2062                    Ok(n) => {
2063                        total += n;
2064                        if total >= buf.len() {
2065                            // File > 64KB: stream-hash from existing fd
2066                            return hash_stream_with_prefix(algo, &buf[..total], file);
2067                        }
2068                    }
2069                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2070                    Err(e) => return Err(e),
2071                }
2072            }
2073        }
2074        Err(e) => return Err(e),
2075    }
2076}
2077
2078/// Hash a single file using raw Linux syscalls for minimum overhead.
2079/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2080/// For the single-file fast path, this eliminates OpenOptions builder,
2081/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2082///
2083/// Size-based dispatch:
2084/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2085/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2086/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2087/// - Non-regular: wraps fd in File, streaming hash_reader
2088#[cfg(target_os = "linux")]
2089pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2090    use std::os::unix::ffi::OsStrExt;
2091
2092    let path_bytes = path.as_os_str().as_bytes();
2093    let c_path = std::ffi::CString::new(path_bytes)
2094        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2095
2096    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2097    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2098    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2099        flags |= libc::O_NOATIME;
2100    }
2101
2102    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2103    if fd < 0 {
2104        let err = io::Error::last_os_error();
2105        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2106            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2107            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2108            if fd2 < 0 {
2109                return Err(io::Error::last_os_error());
2110            }
2111            return hash_from_raw_fd(algo, fd2);
2112        }
2113        return Err(err);
2114    }
2115    hash_from_raw_fd(algo, fd)
2116}
2117
2118/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2119/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2120/// non-regular (streaming) files.
2121#[cfg(target_os = "linux")]
2122fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2123    // Raw fstat to determine size and type
2124    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2125    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2126        let err = io::Error::last_os_error();
2127        unsafe {
2128            libc::close(fd);
2129        }
2130        return Err(err);
2131    }
2132    let size = stat.st_size as u64;
2133    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2134
2135    // Empty regular file
2136    if is_regular && size == 0 {
2137        unsafe {
2138            libc::close(fd);
2139        }
2140        return hash_bytes(algo, &[]);
2141    }
2142
2143    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2144    // Entire I/O in 3 raw syscalls: open + read + close.
2145    if is_regular && size < TINY_FILE_LIMIT {
2146        let mut buf = [0u8; 8192];
2147        let mut total = 0usize;
2148        while total < size as usize {
2149            let n = unsafe {
2150                libc::read(
2151                    fd,
2152                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2153                    (size as usize) - total,
2154                )
2155            };
2156            if n < 0 {
2157                let err = io::Error::last_os_error();
2158                if err.kind() == io::ErrorKind::Interrupted {
2159                    continue;
2160                }
2161                unsafe {
2162                    libc::close(fd);
2163                }
2164                return Err(err);
2165            }
2166            if n == 0 {
2167                break;
2168            }
2169            total += n as usize;
2170        }
2171        unsafe {
2172            libc::close(fd);
2173        }
2174        return hash_bytes(algo, &buf[..total]);
2175    }
2176
2177    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2178    use std::os::unix::io::FromRawFd;
2179    let file = unsafe { File::from_raw_fd(fd) };
2180
2181    if is_regular && size > 0 {
2182        // Large files (>=16MB): mmap with HugePage + PopulateRead
2183        if size >= SMALL_FILE_LIMIT {
2184            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
2185            if let Ok(mmap) = mmap_result {
2186                if size >= 2 * 1024 * 1024 {
2187                    let _ = mmap.advise(memmap2::Advice::HugePage);
2188                }
2189                let _ = mmap.advise(memmap2::Advice::Sequential);
2190                // Prefault pages using huge pages (kernel 5.14+)
2191                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
2192                    let _ = mmap.advise(memmap2::Advice::WillNeed);
2193                }
2194                return hash_bytes(algo, &mmap);
2195            }
2196        }
2197        // Small files (8KB-16MB): single-read into thread-local buffer
2198        return hash_file_small(algo, file, size as usize);
2199    }
2200
2201    // Non-regular files: streaming hash
2202    hash_reader(algo, file)
2203}
2204
2205/// Issue readahead hints for ALL file paths (no size threshold).
2206/// For multi-file benchmarks, even small files benefit from batched readahead.
2207#[cfg(target_os = "linux")]
2208pub fn readahead_files_all(paths: &[&Path]) {
2209    use std::os::unix::io::AsRawFd;
2210    for path in paths {
2211        if let Ok(file) = open_noatime(path) {
2212            if let Ok(meta) = file.metadata() {
2213                if meta.file_type().is_file() {
2214                    let len = meta.len();
2215                    unsafe {
2216                        libc::posix_fadvise(
2217                            file.as_raw_fd(),
2218                            0,
2219                            len as i64,
2220                            libc::POSIX_FADV_WILLNEED,
2221                        );
2222                    }
2223                }
2224            }
2225        }
2226    }
2227}
2228
2229#[cfg(not(target_os = "linux"))]
2230pub fn readahead_files_all(_paths: &[&Path]) {}
2231
2232/// Print hash result in GNU format: "hash  filename\n"
2233/// Uses raw byte writes to avoid std::fmt overhead.
2234pub fn print_hash(
2235    out: &mut impl Write,
2236    hash: &str,
2237    filename: &str,
2238    binary: bool,
2239) -> io::Result<()> {
2240    let mode = if binary { b'*' } else { b' ' };
2241    out.write_all(hash.as_bytes())?;
2242    out.write_all(&[b' ', mode])?;
2243    out.write_all(filename.as_bytes())?;
2244    out.write_all(b"\n")
2245}
2246
2247/// Print hash in GNU format with NUL terminator instead of newline.
2248pub fn print_hash_zero(
2249    out: &mut impl Write,
2250    hash: &str,
2251    filename: &str,
2252    binary: bool,
2253) -> io::Result<()> {
2254    let mode = if binary { b'*' } else { b' ' };
2255    out.write_all(hash.as_bytes())?;
2256    out.write_all(&[b' ', mode])?;
2257    out.write_all(filename.as_bytes())?;
2258    out.write_all(b"\0")
2259}
2260
2261// ── Single-write output buffer ─────────────────────────────────────
2262// For multi-file workloads, batch the entire "hash  filename\n" line into
2263// a single write() call. This halves the number of BufWriter flushes.
2264
2265// Thread-local output line buffer for batched writes.
2266// Reused across files to avoid per-file allocation.
2267thread_local! {
2268    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2269}
2270
2271/// Build and write the standard GNU hash output line in a single write() call.
2272/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2273/// For escaped filenames: "\hash  escaped_filename\n".
2274#[inline]
2275pub fn write_hash_line(
2276    out: &mut impl Write,
2277    hash: &str,
2278    filename: &str,
2279    binary: bool,
2280    zero: bool,
2281    escaped: bool,
2282) -> io::Result<()> {
2283    LINE_BUF.with(|cell| {
2284        let mut buf = cell.borrow_mut();
2285        buf.clear();
2286        let mode = if binary { b'*' } else { b' ' };
2287        let term = if zero { b'\0' } else { b'\n' };
2288        if escaped {
2289            buf.push(b'\\');
2290        }
2291        buf.extend_from_slice(hash.as_bytes());
2292        buf.push(b' ');
2293        buf.push(mode);
2294        buf.extend_from_slice(filename.as_bytes());
2295        buf.push(term);
2296        out.write_all(&buf)
2297    })
2298}
2299
2300/// Build and write BSD tag format output in a single write() call.
2301/// Format: "ALGO (filename) = hash\n"
2302#[inline]
2303pub fn write_hash_tag_line(
2304    out: &mut impl Write,
2305    algo_name: &str,
2306    hash: &str,
2307    filename: &str,
2308    zero: bool,
2309) -> io::Result<()> {
2310    LINE_BUF.with(|cell| {
2311        let mut buf = cell.borrow_mut();
2312        buf.clear();
2313        let term = if zero { b'\0' } else { b'\n' };
2314        buf.extend_from_slice(algo_name.as_bytes());
2315        buf.extend_from_slice(b" (");
2316        buf.extend_from_slice(filename.as_bytes());
2317        buf.extend_from_slice(b") = ");
2318        buf.extend_from_slice(hash.as_bytes());
2319        buf.push(term);
2320        out.write_all(&buf)
2321    })
2322}
2323
2324/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2325pub fn print_hash_tag(
2326    out: &mut impl Write,
2327    algo: HashAlgorithm,
2328    hash: &str,
2329    filename: &str,
2330) -> io::Result<()> {
2331    out.write_all(algo.name().as_bytes())?;
2332    out.write_all(b" (")?;
2333    out.write_all(filename.as_bytes())?;
2334    out.write_all(b") = ")?;
2335    out.write_all(hash.as_bytes())?;
2336    out.write_all(b"\n")
2337}
2338
2339/// Print hash in BSD tag format with NUL terminator.
2340pub fn print_hash_tag_zero(
2341    out: &mut impl Write,
2342    algo: HashAlgorithm,
2343    hash: &str,
2344    filename: &str,
2345) -> io::Result<()> {
2346    out.write_all(algo.name().as_bytes())?;
2347    out.write_all(b" (")?;
2348    out.write_all(filename.as_bytes())?;
2349    out.write_all(b") = ")?;
2350    out.write_all(hash.as_bytes())?;
2351    out.write_all(b"\0")
2352}
2353
2354/// Print hash in BSD tag format with BLAKE2b length info:
2355/// "BLAKE2b (filename) = hash" for 512-bit, or
2356/// "BLAKE2b-256 (filename) = hash" for other lengths.
2357pub fn print_hash_tag_b2sum(
2358    out: &mut impl Write,
2359    hash: &str,
2360    filename: &str,
2361    bits: usize,
2362) -> io::Result<()> {
2363    if bits == 512 {
2364        out.write_all(b"BLAKE2b (")?;
2365    } else {
2366        // Use write! for the rare non-512 path (negligible overhead per file)
2367        write!(out, "BLAKE2b-{} (", bits)?;
2368    }
2369    out.write_all(filename.as_bytes())?;
2370    out.write_all(b") = ")?;
2371    out.write_all(hash.as_bytes())?;
2372    out.write_all(b"\n")
2373}
2374
2375/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2376pub fn print_hash_tag_b2sum_zero(
2377    out: &mut impl Write,
2378    hash: &str,
2379    filename: &str,
2380    bits: usize,
2381) -> io::Result<()> {
2382    if bits == 512 {
2383        out.write_all(b"BLAKE2b (")?;
2384    } else {
2385        write!(out, "BLAKE2b-{} (", bits)?;
2386    }
2387    out.write_all(filename.as_bytes())?;
2388    out.write_all(b") = ")?;
2389    out.write_all(hash.as_bytes())?;
2390    out.write_all(b"\0")
2391}
2392
2393/// Options for check mode.
2394pub struct CheckOptions {
2395    pub quiet: bool,
2396    pub status_only: bool,
2397    pub strict: bool,
2398    pub warn: bool,
2399    pub ignore_missing: bool,
2400    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2401    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2402    /// When empty, uses generic format: "line {line}: message".
2403    pub warn_prefix: String,
2404}
2405
2406/// Result of check mode verification.
2407pub struct CheckResult {
2408    pub ok: usize,
2409    pub mismatches: usize,
2410    pub format_errors: usize,
2411    pub read_errors: usize,
2412    /// Number of files skipped because they were missing and --ignore-missing was set.
2413    pub ignored_missing: usize,
2414}
2415
2416/// Verify checksums from a check file.
2417/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2418pub fn check_file<R: BufRead>(
2419    algo: HashAlgorithm,
2420    reader: R,
2421    opts: &CheckOptions,
2422    out: &mut impl Write,
2423    err_out: &mut impl Write,
2424) -> io::Result<CheckResult> {
2425    let quiet = opts.quiet;
2426    let status_only = opts.status_only;
2427    let warn = opts.warn;
2428    let ignore_missing = opts.ignore_missing;
2429    let mut ok_count = 0;
2430    let mut mismatch_count = 0;
2431    let mut format_errors = 0;
2432    let mut read_errors = 0;
2433    let mut ignored_missing_count = 0;
2434    let mut line_num = 0;
2435
2436    for line_result in reader.lines() {
2437        line_num += 1;
2438        let line = line_result?;
2439        let line = line.trim_end();
2440
2441        if line.is_empty() {
2442            continue;
2443        }
2444
2445        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2446        let (expected_hash, filename) = match parse_check_line(line) {
2447            Some(v) => v,
2448            None => {
2449                format_errors += 1;
2450                if warn {
2451                    out.flush()?;
2452                    if opts.warn_prefix.is_empty() {
2453                        writeln!(
2454                            err_out,
2455                            "line {}: improperly formatted {} checksum line",
2456                            line_num,
2457                            algo.name()
2458                        )?;
2459                    } else {
2460                        writeln!(
2461                            err_out,
2462                            "{}: {}: improperly formatted {} checksum line",
2463                            opts.warn_prefix,
2464                            line_num,
2465                            algo.name()
2466                        )?;
2467                    }
2468                }
2469                continue;
2470            }
2471        };
2472
2473        // Compute actual hash
2474        let actual = match hash_file(algo, Path::new(filename)) {
2475            Ok(h) => h,
2476            Err(e) => {
2477                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2478                    ignored_missing_count += 1;
2479                    continue;
2480                }
2481                read_errors += 1;
2482                if !status_only {
2483                    out.flush()?;
2484                    writeln!(err_out, "{}: {}", filename, e)?;
2485                    writeln!(out, "{}: FAILED open or read", filename)?;
2486                }
2487                continue;
2488            }
2489        };
2490
2491        if actual.eq_ignore_ascii_case(expected_hash) {
2492            ok_count += 1;
2493            if !quiet && !status_only {
2494                writeln!(out, "{}: OK", filename)?;
2495            }
2496        } else {
2497            mismatch_count += 1;
2498            if !status_only {
2499                writeln!(out, "{}: FAILED", filename)?;
2500            }
2501        }
2502    }
2503
2504    Ok(CheckResult {
2505        ok: ok_count,
2506        mismatches: mismatch_count,
2507        format_errors,
2508        read_errors,
2509        ignored_missing: ignored_missing_count,
2510    })
2511}
2512
2513/// Parse a checksum line in any supported format.
2514pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2515    // Try BSD tag format: "ALGO (filename) = hash"
2516    let rest = line
2517        .strip_prefix("MD5 (")
2518        .or_else(|| line.strip_prefix("SHA1 ("))
2519        .or_else(|| line.strip_prefix("SHA224 ("))
2520        .or_else(|| line.strip_prefix("SHA256 ("))
2521        .or_else(|| line.strip_prefix("SHA384 ("))
2522        .or_else(|| line.strip_prefix("SHA512 ("))
2523        .or_else(|| line.strip_prefix("BLAKE2b ("))
2524        .or_else(|| {
2525            // Handle BLAKE2b-NNN (filename) = hash
2526            if line.starts_with("BLAKE2b-") {
2527                let after = &line["BLAKE2b-".len()..];
2528                if let Some(sp) = after.find(" (") {
2529                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2530                        return Some(&after[sp + 2..]);
2531                    }
2532                }
2533            }
2534            None
2535        });
2536    if let Some(rest) = rest {
2537        if let Some(paren_idx) = rest.find(") = ") {
2538            let filename = &rest[..paren_idx];
2539            let hash = &rest[paren_idx + 4..];
2540            return Some((hash, filename));
2541        }
2542    }
2543
2544    // Handle backslash-escaped lines (leading '\')
2545    let line = line.strip_prefix('\\').unwrap_or(line);
2546
2547    // Standard format: "hash  filename"
2548    if let Some(idx) = line.find("  ") {
2549        let hash = &line[..idx];
2550        let rest = &line[idx + 2..];
2551        return Some((hash, rest));
2552    }
2553    // Binary mode: "hash *filename"
2554    if let Some(idx) = line.find(" *") {
2555        let hash = &line[..idx];
2556        let rest = &line[idx + 2..];
2557        return Some((hash, rest));
2558    }
2559    None
2560}
2561
2562/// Parse a BSD-style tag line: "ALGO (filename) = hash"
2563/// Returns (expected_hash, filename, optional_bits).
2564/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
2565pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2566    let paren_start = line.find(" (")?;
2567    let algo_part = &line[..paren_start];
2568    let rest = &line[paren_start + 2..];
2569    let paren_end = rest.find(") = ")?;
2570    let filename = &rest[..paren_end];
2571    let hash = &rest[paren_end + 4..];
2572
2573    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
2574    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2575        algo_part[dash_pos + 1..].parse::<usize>().ok()
2576    } else {
2577        None
2578    };
2579
2580    Some((hash, filename, bits))
2581}
2582
2583/// Read as many bytes as possible into buf, retrying on partial reads.
2584/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
2585/// Fast path: regular file reads usually return the full buffer on the first call.
2586#[inline]
2587fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2588    // Fast path: first read() usually fills the entire buffer for regular files
2589    let n = reader.read(buf)?;
2590    if n == buf.len() || n == 0 {
2591        return Ok(n);
2592    }
2593    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
2594    let mut total = n;
2595    while total < buf.len() {
2596        match reader.read(&mut buf[total..]) {
2597            Ok(0) => break,
2598            Ok(n) => total += n,
2599            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2600            Err(e) => return Err(e),
2601        }
2602    }
2603    Ok(total)
2604}
2605
2606/// Compile-time generated 2-byte hex pair lookup table.
2607/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
2608const fn generate_hex_table() -> [[u8; 2]; 256] {
2609    let hex = b"0123456789abcdef";
2610    let mut table = [[0u8; 2]; 256];
2611    let mut i = 0;
2612    while i < 256 {
2613        table[i] = [hex[i >> 4], hex[i & 0xf]];
2614        i += 1;
2615    }
2616    table
2617}
2618
2619const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2620
2621/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
2622/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
2623pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2624    let len = bytes.len() * 2;
2625    let mut hex = String::with_capacity(len);
2626    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
2627    unsafe {
2628        let buf = hex.as_mut_vec();
2629        buf.set_len(len);
2630        hex_encode_to_slice(bytes, buf);
2631    }
2632    hex
2633}
2634
2635/// Encode bytes as hex directly into a pre-allocated output slice.
2636/// Output slice must be at least `bytes.len() * 2` bytes long.
2637#[inline]
2638fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2639    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
2640    unsafe {
2641        let ptr = out.as_mut_ptr();
2642        for (i, &b) in bytes.iter().enumerate() {
2643            let pair = *HEX_TABLE.get_unchecked(b as usize);
2644            *ptr.add(i * 2) = pair[0];
2645            *ptr.add(i * 2 + 1) = pair[1];
2646        }
2647    }
2648}