Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15/// Supported hash algorithms.
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18    Sha1,
19    Sha224,
20    Sha256,
21    Sha384,
22    Sha512,
23    Md5,
24    Blake2b,
25}
26
27impl HashAlgorithm {
28    pub fn name(self) -> &'static str {
29        match self {
30            HashAlgorithm::Sha1 => "SHA1",
31            HashAlgorithm::Sha224 => "SHA224",
32            HashAlgorithm::Sha256 => "SHA256",
33            HashAlgorithm::Sha384 => "SHA384",
34            HashAlgorithm::Sha512 => "SHA512",
35            HashAlgorithm::Md5 => "MD5",
36            HashAlgorithm::Blake2b => "BLAKE2b",
37        }
38    }
39}
40
41// ── Generic hash helpers ────────────────────────────────────────────
42
43/// Single-shot hash using the Digest trait (non-Linux fallback).
44#[cfg(not(target_os = "linux"))]
45fn hash_digest<D: Digest>(data: &[u8]) -> String {
46    hex_encode(&D::digest(data))
47}
48
49/// Streaming hash using thread-local buffer (non-Linux fallback).
50#[cfg(not(target_os = "linux"))]
51fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
52    STREAM_BUF.with(|cell| {
53        let mut buf = cell.borrow_mut();
54        ensure_stream_buf(&mut buf);
55        let mut hasher = D::new();
56        loop {
57            let n = read_full(&mut reader, &mut buf)?;
58            if n == 0 {
59                break;
60            }
61            hasher.update(&buf[..n]);
62        }
63        Ok(hex_encode(&hasher.finalize()))
64    })
65}
66
67// ── Public hashing API ──────────────────────────────────────────────
68
69/// Buffer size for streaming hash I/O.
70/// 128KB matches GNU coreutils' buffer size (BUFSIZE=131072), which works well with kernel readahead.
71/// Many small reads allow the kernel to pipeline I/O efficiently, reducing latency
72/// vs fewer large reads that stall waiting for the full buffer to fill.
73const HASH_READ_BUF: usize = 131072;
74
75// Thread-local reusable buffer for streaming hash I/O.
76// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
77// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
78thread_local! {
79    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
80}
81
82/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
83/// Called only on the streaming path, so small-file workloads never allocate 8MB.
84#[inline]
85fn ensure_stream_buf(buf: &mut Vec<u8>) {
86    if buf.len() < HASH_READ_BUF {
87        buf.resize(HASH_READ_BUF, 0);
88    }
89}
90
91// ── OpenSSL-accelerated hash functions (Linux) ───────────────────────
92// OpenSSL's libcrypto provides the fastest SHA implementations, using
93// hardware-specific assembly (SHA-NI, AVX2/AVX512, NEON) tuned for each CPU.
94// This matches what GNU coreutils uses internally.
95
96/// Single-shot hash using OpenSSL (Linux).
97/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
98#[cfg(target_os = "linux")]
99#[inline]
100fn openssl_hash_bytes(md: openssl::hash::MessageDigest, data: &[u8]) -> io::Result<String> {
101    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
102    Ok(hex_encode(&digest))
103}
104
105/// Streaming hash using OpenSSL Hasher (Linux).
106#[cfg(target_os = "linux")]
107fn openssl_hash_reader(
108    md: openssl::hash::MessageDigest,
109    mut reader: impl Read,
110) -> io::Result<String> {
111    STREAM_BUF.with(|cell| {
112        let mut buf = cell.borrow_mut();
113        ensure_stream_buf(&mut buf);
114        let mut hasher =
115            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
116        loop {
117            let n = read_full(&mut reader, &mut buf)?;
118            if n == 0 {
119                break;
120            }
121            hasher
122                .update(&buf[..n])
123                .map_err(|e| io::Error::other(e.to_string()))?;
124        }
125        let digest = hasher
126            .finish()
127            .map_err(|e| io::Error::other(e.to_string()))?;
128        Ok(hex_encode(&digest))
129    })
130}
131
132/// Single-shot hash and write hex directly to buffer using OpenSSL (Linux).
133/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
134#[cfg(target_os = "linux")]
135#[inline]
136fn openssl_hash_bytes_to_buf(
137    md: openssl::hash::MessageDigest,
138    data: &[u8],
139    out: &mut [u8],
140) -> io::Result<usize> {
141    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
142    hex_encode_to_slice(&digest, out);
143    Ok(digest.len() * 2)
144}
145
146// ── Ring-accelerated hash functions (non-Apple, non-Linux targets) ────
147// ring provides BoringSSL assembly with SHA-NI/AVX2/NEON for Windows/FreeBSD.
148
149/// Single-shot hash using ring::digest (non-Apple, non-Linux).
150#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
151#[inline]
152fn ring_hash_bytes(algo: &'static ring::digest::Algorithm, data: &[u8]) -> io::Result<String> {
153    Ok(hex_encode(ring::digest::digest(algo, data).as_ref()))
154}
155
156/// Streaming hash using ring::digest::Context (non-Apple, non-Linux).
157#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
158fn ring_hash_reader(
159    algo: &'static ring::digest::Algorithm,
160    mut reader: impl Read,
161) -> io::Result<String> {
162    STREAM_BUF.with(|cell| {
163        let mut buf = cell.borrow_mut();
164        ensure_stream_buf(&mut buf);
165        let mut ctx = ring::digest::Context::new(algo);
166        loop {
167            let n = read_full(&mut reader, &mut buf)?;
168            if n == 0 {
169                break;
170            }
171            ctx.update(&buf[..n]);
172        }
173        Ok(hex_encode(ctx.finish().as_ref()))
174    })
175}
176
177// ── Algorithm → OpenSSL MessageDigest mapping (Linux) ──────────────────
178// Centralizes OpenSSL algorithm dispatch, used by hash_bytes, hash_stream_with_prefix,
179// hash_file_streaming, and hash_file_pipelined_read.
180
181#[cfg(target_os = "linux")]
182fn algo_to_openssl_md(algo: HashAlgorithm) -> openssl::hash::MessageDigest {
183    match algo {
184        HashAlgorithm::Sha1 => openssl::hash::MessageDigest::sha1(),
185        HashAlgorithm::Sha224 => openssl::hash::MessageDigest::sha224(),
186        HashAlgorithm::Sha256 => openssl::hash::MessageDigest::sha256(),
187        HashAlgorithm::Sha384 => openssl::hash::MessageDigest::sha384(),
188        HashAlgorithm::Sha512 => openssl::hash::MessageDigest::sha512(),
189        HashAlgorithm::Md5 => openssl::hash::MessageDigest::md5(),
190        HashAlgorithm::Blake2b => unreachable!("Blake2b uses its own hasher"),
191    }
192}
193
194// ── SHA-256 ───────────────────────────────────────────────────────────
195// Linux: OpenSSL (system libcrypto, matches GNU coreutils)
196// Windows/FreeBSD: ring (BoringSSL assembly)
197// Apple: sha2 crate (ring doesn't compile on Apple Silicon)
198
199#[cfg(target_os = "linux")]
200fn sha256_bytes(data: &[u8]) -> io::Result<String> {
201    openssl_hash_bytes(openssl::hash::MessageDigest::sha256(), data)
202}
203
204#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
205fn sha256_bytes(data: &[u8]) -> io::Result<String> {
206    ring_hash_bytes(&ring::digest::SHA256, data)
207}
208
209#[cfg(target_vendor = "apple")]
210fn sha256_bytes(data: &[u8]) -> io::Result<String> {
211    Ok(hash_digest::<sha2::Sha256>(data))
212}
213
214#[cfg(target_os = "linux")]
215fn sha256_reader(reader: impl Read) -> io::Result<String> {
216    openssl_hash_reader(openssl::hash::MessageDigest::sha256(), reader)
217}
218
219#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
220fn sha256_reader(reader: impl Read) -> io::Result<String> {
221    ring_hash_reader(&ring::digest::SHA256, reader)
222}
223
224#[cfg(target_vendor = "apple")]
225fn sha256_reader(reader: impl Read) -> io::Result<String> {
226    hash_reader_impl::<sha2::Sha256>(reader)
227}
228
229// ── SHA-1 ─────────────────────────────────────────────────────────────
230
231#[cfg(target_os = "linux")]
232fn sha1_bytes(data: &[u8]) -> io::Result<String> {
233    openssl_hash_bytes(openssl::hash::MessageDigest::sha1(), data)
234}
235
236#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
237fn sha1_bytes(data: &[u8]) -> io::Result<String> {
238    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
239}
240
241#[cfg(target_vendor = "apple")]
242fn sha1_bytes(data: &[u8]) -> io::Result<String> {
243    Ok(hash_digest::<sha1::Sha1>(data))
244}
245
246#[cfg(target_os = "linux")]
247fn sha1_reader(reader: impl Read) -> io::Result<String> {
248    openssl_hash_reader(openssl::hash::MessageDigest::sha1(), reader)
249}
250
251#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
252fn sha1_reader(reader: impl Read) -> io::Result<String> {
253    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
254}
255
256#[cfg(target_vendor = "apple")]
257fn sha1_reader(reader: impl Read) -> io::Result<String> {
258    hash_reader_impl::<sha1::Sha1>(reader)
259}
260
261// ── SHA-224 ───────────────────────────────────────────────────────────
262// ring does not support SHA-224. Use OpenSSL on Linux, sha2 crate elsewhere.
263
264#[cfg(target_os = "linux")]
265fn sha224_bytes(data: &[u8]) -> io::Result<String> {
266    openssl_hash_bytes(openssl::hash::MessageDigest::sha224(), data)
267}
268
269#[cfg(not(target_os = "linux"))]
270fn sha224_bytes(data: &[u8]) -> io::Result<String> {
271    Ok(hex_encode(&sha2::Sha224::digest(data)))
272}
273
274#[cfg(target_os = "linux")]
275fn sha224_reader(reader: impl Read) -> io::Result<String> {
276    openssl_hash_reader(openssl::hash::MessageDigest::sha224(), reader)
277}
278
279#[cfg(not(target_os = "linux"))]
280fn sha224_reader(reader: impl Read) -> io::Result<String> {
281    STREAM_BUF.with(|cell| {
282        let mut buf = cell.borrow_mut();
283        ensure_stream_buf(&mut buf);
284        let mut hasher = <sha2::Sha224 as digest::Digest>::new();
285        let mut reader = reader;
286        loop {
287            let n = read_full(&mut reader, &mut buf)?;
288            if n == 0 {
289                break;
290            }
291            digest::Digest::update(&mut hasher, &buf[..n]);
292        }
293        Ok(hex_encode(&digest::Digest::finalize(hasher)))
294    })
295}
296
297// ── SHA-384 ───────────────────────────────────────────────────────────
298
299#[cfg(target_os = "linux")]
300fn sha384_bytes(data: &[u8]) -> io::Result<String> {
301    openssl_hash_bytes(openssl::hash::MessageDigest::sha384(), data)
302}
303
304#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
305fn sha384_bytes(data: &[u8]) -> io::Result<String> {
306    ring_hash_bytes(&ring::digest::SHA384, data)
307}
308
309#[cfg(target_vendor = "apple")]
310fn sha384_bytes(data: &[u8]) -> io::Result<String> {
311    Ok(hex_encode(&sha2::Sha384::digest(data)))
312}
313
314#[cfg(target_os = "linux")]
315fn sha384_reader(reader: impl Read) -> io::Result<String> {
316    openssl_hash_reader(openssl::hash::MessageDigest::sha384(), reader)
317}
318
319#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
320fn sha384_reader(reader: impl Read) -> io::Result<String> {
321    ring_hash_reader(&ring::digest::SHA384, reader)
322}
323
324#[cfg(target_vendor = "apple")]
325fn sha384_reader(reader: impl Read) -> io::Result<String> {
326    STREAM_BUF.with(|cell| {
327        let mut buf = cell.borrow_mut();
328        ensure_stream_buf(&mut buf);
329        let mut hasher = <sha2::Sha384 as digest::Digest>::new();
330        let mut reader = reader;
331        loop {
332            let n = read_full(&mut reader, &mut buf)?;
333            if n == 0 {
334                break;
335            }
336            digest::Digest::update(&mut hasher, &buf[..n]);
337        }
338        Ok(hex_encode(&digest::Digest::finalize(hasher)))
339    })
340}
341
342// ── SHA-512 ───────────────────────────────────────────────────────────
343
344#[cfg(target_os = "linux")]
345fn sha512_bytes(data: &[u8]) -> io::Result<String> {
346    openssl_hash_bytes(openssl::hash::MessageDigest::sha512(), data)
347}
348
349#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
350fn sha512_bytes(data: &[u8]) -> io::Result<String> {
351    ring_hash_bytes(&ring::digest::SHA512, data)
352}
353
354#[cfg(target_vendor = "apple")]
355fn sha512_bytes(data: &[u8]) -> io::Result<String> {
356    Ok(hex_encode(&sha2::Sha512::digest(data)))
357}
358
359#[cfg(target_os = "linux")]
360fn sha512_reader(reader: impl Read) -> io::Result<String> {
361    openssl_hash_reader(openssl::hash::MessageDigest::sha512(), reader)
362}
363
364#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
365fn sha512_reader(reader: impl Read) -> io::Result<String> {
366    ring_hash_reader(&ring::digest::SHA512, reader)
367}
368
369#[cfg(target_vendor = "apple")]
370fn sha512_reader(reader: impl Read) -> io::Result<String> {
371    STREAM_BUF.with(|cell| {
372        let mut buf = cell.borrow_mut();
373        ensure_stream_buf(&mut buf);
374        let mut hasher = <sha2::Sha512 as digest::Digest>::new();
375        let mut reader = reader;
376        loop {
377            let n = read_full(&mut reader, &mut buf)?;
378            if n == 0 {
379                break;
380            }
381            digest::Digest::update(&mut hasher, &buf[..n]);
382        }
383        Ok(hex_encode(&digest::Digest::finalize(hasher)))
384    })
385}
386
387/// Compute hash of a byte slice directly (zero-copy fast path).
388/// Returns an error if the underlying crypto library rejects the algorithm.
389pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> io::Result<String> {
390    match algo {
391        HashAlgorithm::Sha1 => sha1_bytes(data),
392        HashAlgorithm::Sha224 => sha224_bytes(data),
393        HashAlgorithm::Sha256 => sha256_bytes(data),
394        HashAlgorithm::Sha384 => sha384_bytes(data),
395        HashAlgorithm::Sha512 => sha512_bytes(data),
396        HashAlgorithm::Md5 => md5_bytes(data),
397        HashAlgorithm::Blake2b => {
398            let hash = blake2b_simd::blake2b(data);
399            Ok(hex_encode(hash.as_bytes()))
400        }
401    }
402}
403
404/// Hash data and write hex result directly into an output buffer.
405/// Returns the number of hex bytes written. Avoids String allocation
406/// on the critical single-file fast path.
407/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
408#[cfg(target_os = "linux")]
409pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> io::Result<usize> {
410    match algo {
411        HashAlgorithm::Md5 => {
412            openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::md5(), data, out)
413        }
414        HashAlgorithm::Sha1 => sha1_bytes_to_buf(data, out),
415        HashAlgorithm::Sha224 => sha224_bytes_to_buf(data, out),
416        HashAlgorithm::Sha256 => sha256_bytes_to_buf(data, out),
417        HashAlgorithm::Sha384 => sha384_bytes_to_buf(data, out),
418        HashAlgorithm::Sha512 => sha512_bytes_to_buf(data, out),
419        HashAlgorithm::Blake2b => {
420            let hash = blake2b_simd::blake2b(data);
421            let bytes = hash.as_bytes();
422            hex_encode_to_slice(bytes, out);
423            Ok(bytes.len() * 2)
424        }
425    }
426}
427
428#[cfg(target_os = "linux")]
429fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
430    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha1(), data, out)
431}
432#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
433fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
434    let digest = ring::digest::digest(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data);
435    hex_encode_to_slice(digest.as_ref(), out);
436    Ok(40)
437}
438#[cfg(target_vendor = "apple")]
439fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
440    let digest = sha1::Sha1::digest(data);
441    hex_encode_to_slice(&digest, out);
442    Ok(40)
443}
444
445#[cfg(target_os = "linux")]
446fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
447    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha224(), data, out)
448}
449#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
450fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
451    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
452    hex_encode_to_slice(&digest, out);
453    Ok(56)
454}
455#[cfg(target_vendor = "apple")]
456fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
457    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
458    hex_encode_to_slice(&digest, out);
459    Ok(56)
460}
461
462#[cfg(target_os = "linux")]
463fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
464    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha256(), data, out)
465}
466#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
467fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
468    let digest = ring::digest::digest(&ring::digest::SHA256, data);
469    hex_encode_to_slice(digest.as_ref(), out);
470    Ok(64)
471}
472#[cfg(target_vendor = "apple")]
473fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
474    let digest = <sha2::Sha256 as sha2::Digest>::digest(data);
475    hex_encode_to_slice(&digest, out);
476    Ok(64)
477}
478
479#[cfg(target_os = "linux")]
480fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
481    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha384(), data, out)
482}
483#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
484fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
485    let digest = ring::digest::digest(&ring::digest::SHA384, data);
486    hex_encode_to_slice(digest.as_ref(), out);
487    Ok(96)
488}
489#[cfg(target_vendor = "apple")]
490fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
491    let digest = <sha2::Sha384 as sha2::Digest>::digest(data);
492    hex_encode_to_slice(&digest, out);
493    Ok(96)
494}
495
496#[cfg(target_os = "linux")]
497fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
498    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha512(), data, out)
499}
500#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
501fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
502    let digest = ring::digest::digest(&ring::digest::SHA512, data);
503    hex_encode_to_slice(digest.as_ref(), out);
504    Ok(128)
505}
506#[cfg(target_vendor = "apple")]
507fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
508    let digest = <sha2::Sha512 as sha2::Digest>::digest(data);
509    hex_encode_to_slice(&digest, out);
510    Ok(128)
511}
512
513/// Hash a single file using raw syscalls and write hex directly to output buffer.
514/// Returns number of hex bytes written.
515/// This is the absolute minimum-overhead path for single-file hashing:
516/// raw open + fstat + read + hash + hex encode, with zero String allocation.
517#[cfg(target_os = "linux")]
518pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
519    use std::os::unix::ffi::OsStrExt;
520
521    let path_bytes = path.as_os_str().as_bytes();
522    let c_path = std::ffi::CString::new(path_bytes)
523        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
524
525    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
526    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
527        flags |= libc::O_NOATIME;
528    }
529
530    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
531    if fd < 0 {
532        let err = io::Error::last_os_error();
533        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
534            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
535            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
536            if fd2 < 0 {
537                return Err(io::Error::last_os_error());
538            }
539            return hash_from_raw_fd_to_buf(algo, fd2, out);
540        }
541        return Err(err);
542    }
543    hash_from_raw_fd_to_buf(algo, fd, out)
544}
545
546/// Hash from raw fd and write hex directly to output buffer.
547/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
548/// For larger files, falls back to hash_file_raw() which allocates a String.
549#[cfg(target_os = "linux")]
550fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
551    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
552    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
553        let err = io::Error::last_os_error();
554        unsafe {
555            libc::close(fd);
556        }
557        return Err(err);
558    }
559    let size = stat.st_size as u64;
560    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
561
562    // Empty regular file
563    if is_regular && size == 0 {
564        unsafe {
565            libc::close(fd);
566        }
567        return hash_bytes_to_buf(algo, &[], out);
568    }
569
570    // Tiny files (<8KB): fully raw path — zero heap allocation
571    if is_regular && size < TINY_FILE_LIMIT {
572        let mut buf = [0u8; 8192];
573        let mut total = 0usize;
574        while total < size as usize {
575            let n = unsafe {
576                libc::read(
577                    fd,
578                    buf[total..].as_mut_ptr() as *mut libc::c_void,
579                    (size as usize) - total,
580                )
581            };
582            if n < 0 {
583                let err = io::Error::last_os_error();
584                if err.kind() == io::ErrorKind::Interrupted {
585                    continue;
586                }
587                unsafe {
588                    libc::close(fd);
589                }
590                return Err(err);
591            }
592            if n == 0 {
593                break;
594            }
595            total += n as usize;
596        }
597        unsafe {
598            libc::close(fd);
599        }
600        return hash_bytes_to_buf(algo, &buf[..total], out);
601    }
602
603    // Larger files: fall back to hash_from_raw_fd which returns a String,
604    // then copy the hex into out.
605    use std::os::unix::io::FromRawFd;
606    let file = unsafe { File::from_raw_fd(fd) };
607    let hash_str = if is_regular && size > 0 {
608        if size >= SMALL_FILE_LIMIT {
609            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
610            if let Ok(mmap) = mmap_result {
611                if size >= 2 * 1024 * 1024 {
612                    let _ = mmap.advise(memmap2::Advice::HugePage);
613                }
614                let _ = mmap.advise(memmap2::Advice::Sequential);
615                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
616                    let _ = mmap.advise(memmap2::Advice::WillNeed);
617                }
618                hash_bytes(algo, &mmap)?
619            } else {
620                hash_file_small(algo, file, size as usize)?
621            }
622        } else {
623            hash_file_small(algo, file, size as usize)?
624        }
625    } else {
626        hash_reader(algo, file)?
627    };
628    let hex_bytes = hash_str.as_bytes();
629    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
630    Ok(hex_bytes.len())
631}
632
633// ── MD5 ─────────────────────────────────────────────────────────────
634// Linux: OpenSSL (same assembly-optimized library as GNU coreutils)
635// Other platforms: md-5 crate (pure Rust)
636
637#[cfg(target_os = "linux")]
638fn md5_bytes(data: &[u8]) -> io::Result<String> {
639    openssl_hash_bytes(openssl::hash::MessageDigest::md5(), data)
640}
641
642#[cfg(not(target_os = "linux"))]
643fn md5_bytes(data: &[u8]) -> io::Result<String> {
644    Ok(hash_digest::<Md5>(data))
645}
646
647#[cfg(target_os = "linux")]
648fn md5_reader(reader: impl Read) -> io::Result<String> {
649    openssl_hash_reader(openssl::hash::MessageDigest::md5(), reader)
650}
651
652#[cfg(not(target_os = "linux"))]
653fn md5_reader(reader: impl Read) -> io::Result<String> {
654    hash_reader_impl::<Md5>(reader)
655}
656
657/// Compute hash of data from a reader, returning hex string.
658pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
659    match algo {
660        HashAlgorithm::Sha1 => sha1_reader(reader),
661        HashAlgorithm::Sha224 => sha224_reader(reader),
662        HashAlgorithm::Sha256 => sha256_reader(reader),
663        HashAlgorithm::Sha384 => sha384_reader(reader),
664        HashAlgorithm::Sha512 => sha512_reader(reader),
665        HashAlgorithm::Md5 => md5_reader(reader),
666        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
667    }
668}
669
670/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
671/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
672#[cfg(target_os = "linux")]
673static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
674
675/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
676/// Caches whether O_NOATIME works to avoid double-open on every file.
677#[cfg(target_os = "linux")]
678fn open_noatime(path: &Path) -> io::Result<File> {
679    use std::os::unix::fs::OpenOptionsExt;
680    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
681        match std::fs::OpenOptions::new()
682            .read(true)
683            .custom_flags(libc::O_NOATIME)
684            .open(path)
685        {
686            Ok(f) => return Ok(f),
687            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
688                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
689                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
690            }
691            Err(e) => return Err(e), // Real error, propagate
692        }
693    }
694    File::open(path)
695}
696
697#[cfg(not(target_os = "linux"))]
698fn open_noatime(path: &Path) -> io::Result<File> {
699    File::open(path)
700}
701
702/// Open a file and get its metadata in one step.
703/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
704#[cfg(target_os = "linux")]
705#[inline]
706fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
707    let file = open_noatime(path)?;
708    let fd = {
709        use std::os::unix::io::AsRawFd;
710        file.as_raw_fd()
711    };
712    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
713    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
714        return Err(io::Error::last_os_error());
715    }
716    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
717    let size = stat.st_size as u64;
718    Ok((file, size, is_regular))
719}
720
721#[cfg(not(target_os = "linux"))]
722#[inline]
723fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
724    let file = open_noatime(path)?;
725    let metadata = file.metadata()?;
726    Ok((file, metadata.len(), metadata.file_type().is_file()))
727}
728
729/// Minimum file size to issue fadvise hint (1MB).
730/// For small files, the syscall overhead exceeds the readahead benefit.
731#[cfg(target_os = "linux")]
732const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
733
734/// Maximum file size for single-read hash optimization.
735/// Files up to this size are read entirely into a thread-local buffer and hashed
736/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
737/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
738/// across files in sequential mode, saving re-allocation.
739/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
740const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
741
742/// Threshold for tiny files that can be read into a stack buffer.
743/// Below this size, we use a stack-allocated buffer + single read() syscall,
744/// completely avoiding any heap allocation for the data path.
745const TINY_FILE_LIMIT: u64 = 8 * 1024;
746
747// Thread-local reusable buffer for single-read hash.
748// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
749// handles tiny files; larger files trigger one grow that persists for reuse.
750thread_local! {
751    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
752}
753
754/// Optimized hash for large files (>=16MB) on Linux.
755/// Hash large files (>=16MB) using streaming I/O with fadvise + ring Context.
756/// Uses sequential fadvise hint for kernel readahead, then streams through
757/// hash context in large chunks. For large files (>64MB), uses double-buffered
758/// reader thread to overlap I/O and hashing.
759#[cfg(target_os = "linux")]
760fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
761    // For very large files, double-buffered reader thread overlaps I/O and CPU.
762    // For medium files, single-thread streaming is faster (avoids thread overhead).
763    if file_size >= 64 * 1024 * 1024 {
764        hash_file_pipelined_read(algo, file, file_size)
765    } else {
766        hash_file_streaming(algo, file, file_size)
767    }
768}
769
770/// Simple single-thread streaming hash with fadvise.
771/// Optimal for files 16-64MB where thread overhead exceeds I/O overlap benefit.
772#[cfg(target_os = "linux")]
773fn hash_file_streaming(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
774    use std::os::unix::io::AsRawFd;
775
776    let _ = unsafe {
777        libc::posix_fadvise(
778            file.as_raw_fd(),
779            0,
780            file_size as i64,
781            libc::POSIX_FADV_SEQUENTIAL,
782        )
783    };
784
785    // Use OpenSSL for all algorithms on Linux (same library as GNU coreutils).
786    if matches!(algo, HashAlgorithm::Blake2b) {
787        blake2b_hash_reader(file, 64)
788    } else {
789        openssl_hash_reader(algo_to_openssl_md(algo), file)
790    }
791}
792
793/// Streaming fallback for large files when mmap is unavailable.
794/// Uses double-buffered reader thread with fadvise hints.
795/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
796#[cfg(target_os = "linux")]
797fn hash_file_pipelined_read(
798    algo: HashAlgorithm,
799    mut file: File,
800    file_size: u64,
801) -> io::Result<String> {
802    use std::os::unix::io::AsRawFd;
803
804    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
805
806    let _ = unsafe {
807        libc::posix_fadvise(
808            file.as_raw_fd(),
809            0,
810            file_size as i64,
811            libc::POSIX_FADV_SEQUENTIAL,
812        )
813    };
814
815    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
816    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
817    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
818
819    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
820        while let Ok(mut buf) = buf_rx.recv() {
821            let mut total = 0;
822            while total < buf.len() {
823                match file.read(&mut buf[total..]) {
824                    Ok(0) => break,
825                    Ok(n) => total += n,
826                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
827                    Err(e) => return Err(e),
828                }
829            }
830            if total == 0 {
831                break;
832            }
833            if tx.send((buf, total)).is_err() {
834                break;
835            }
836        }
837        Ok(())
838    });
839
840    // Use OpenSSL Hasher for all hash algorithms (same library as GNU coreutils).
841    macro_rules! hash_pipelined_openssl {
842        ($md:expr) => {{
843            let mut hasher =
844                openssl::hash::Hasher::new($md).map_err(|e| io::Error::other(e.to_string()))?;
845            while let Ok((buf, n)) = rx.recv() {
846                hasher
847                    .update(&buf[..n])
848                    .map_err(|e| io::Error::other(e.to_string()))?;
849                let _ = buf_tx.send(buf);
850            }
851            let digest = hasher
852                .finish()
853                .map_err(|e| io::Error::other(e.to_string()))?;
854            Ok(hex_encode(&digest))
855        }};
856    }
857
858    let hash_result: io::Result<String> = if matches!(algo, HashAlgorithm::Blake2b) {
859        let mut state = blake2b_simd::Params::new().to_state();
860        while let Ok((buf, n)) = rx.recv() {
861            state.update(&buf[..n]);
862            let _ = buf_tx.send(buf);
863        }
864        Ok(hex_encode(state.finalize().as_bytes()))
865    } else {
866        hash_pipelined_openssl!(algo_to_openssl_md(algo))
867    };
868
869    match reader_handle.join() {
870        Ok(Ok(())) => {}
871        Ok(Err(e)) => {
872            if hash_result.is_ok() {
873                return Err(e);
874            }
875        }
876        Err(payload) => {
877            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
878                format!("reader thread panicked: {}", s)
879            } else if let Some(s) = payload.downcast_ref::<String>() {
880                format!("reader thread panicked: {}", s)
881            } else {
882                "reader thread panicked".to_string()
883            };
884            return Err(io::Error::other(msg));
885        }
886    }
887
888    hash_result
889}
890
891/// Hash a file by path. Uses I/O pipelining for large files on Linux,
892/// mmap with HUGEPAGE hints as fallback, single-read for small files,
893/// and streaming read for non-regular files.
894pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
895    let (file, file_size, is_regular) = open_and_stat(path)?;
896
897    if is_regular && file_size == 0 {
898        return hash_bytes(algo, &[]);
899    }
900
901    if file_size > 0 && is_regular {
902        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
903        if file_size < TINY_FILE_LIMIT {
904            return hash_file_tiny(algo, file, file_size as usize);
905        }
906        // Large files (>=16MB): mmap with huge pages for zero-copy single-shot hash.
907        // This is faster than streaming I/O for files that fit in RAM because it
908        // avoids thread synchronization, buffer management, and data copying.
909        // Falls back to streaming I/O if mmap fails (FUSE, NFS, etc.).
910        //
911        // SAFETY: mmap is safe for regular local files. If the file is truncated
912        // or the backing storage disappears after mapping (e.g. NFS disconnect),
913        // the kernel delivers SIGBUS which will crash the process. This matches
914        // the behavior of other mmap-using tools (wc, cat). The fallback to
915        // streaming I/O (read()) handles mmap failures at map time, but cannot
916        // protect against post-map truncation.
917        if file_size >= SMALL_FILE_LIMIT {
918            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
919            if let Ok(mmap) = mmap_result {
920                #[cfg(target_os = "linux")]
921                {
922                    if file_size >= 2 * 1024 * 1024 {
923                        let _ = mmap.advise(memmap2::Advice::HugePage);
924                    }
925                    let _ = mmap.advise(memmap2::Advice::Sequential);
926                    // PopulateRead (Linux 5.14+) synchronously faults all pages before
927                    // returning, giving warm TLB entries for hash_bytes. WillNeed is
928                    // async and best-effort — pages may still fault during hashing.
929                    if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
930                        let _ = mmap.advise(memmap2::Advice::WillNeed);
931                    }
932                }
933                return hash_bytes(algo, &mmap);
934            }
935            // mmap failed — fall back to streaming I/O
936            #[cfg(target_os = "linux")]
937            {
938                return hash_file_pipelined(algo, file, file_size);
939            }
940            #[cfg(not(target_os = "linux"))]
941            {
942                // On non-Linux, fall through to hash_reader (streaming fallback)
943            }
944        }
945        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash.
946        // This avoids Hasher context allocation + streaming overhead for each file.
947        if file_size < SMALL_FILE_LIMIT {
948            return hash_file_small(algo, file, file_size as usize);
949        }
950    }
951
952    // Non-regular files or fallback: stream
953    #[cfg(target_os = "linux")]
954    if file_size >= FADVISE_MIN_SIZE {
955        use std::os::unix::io::AsRawFd;
956        unsafe {
957            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
958        }
959    }
960    hash_reader(algo, file)
961}
962
963/// Hash a tiny file (<8KB) using a stack-allocated buffer.
964/// Single read() syscall, zero heap allocation on the data path.
965/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
966#[inline]
967fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
968    let mut buf = [0u8; 8192];
969    let mut total = 0;
970    // Read with known size — usually completes in a single read() for regular files
971    while total < size {
972        match file.read(&mut buf[total..size]) {
973            Ok(0) => break,
974            Ok(n) => total += n,
975            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
976            Err(e) => return Err(e),
977        }
978    }
979    hash_bytes(algo, &buf[..total])
980}
981
982/// Hash a small file by reading it entirely into a thread-local buffer,
983/// then using the single-shot hash function. Avoids per-file Hasher allocation.
984#[inline]
985fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
986    SMALL_FILE_BUF.with(|cell| {
987        let mut buf = cell.borrow_mut();
988        // Reset length but keep allocation, then grow if needed
989        buf.clear();
990        buf.reserve(size);
991        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
992        // directly and only access buf[..total] where total <= size <= capacity.
993        unsafe {
994            buf.set_len(size);
995        }
996        let mut total = 0;
997        while total < size {
998            match file.read(&mut buf[total..size]) {
999                Ok(0) => break,
1000                Ok(n) => total += n,
1001                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1002                Err(e) => return Err(e),
1003            }
1004        }
1005        hash_bytes(algo, &buf[..total])
1006    })
1007}
1008
1009/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
1010pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
1011    let stdin = io::stdin();
1012    // Hint kernel for sequential access if stdin is a regular file (redirect)
1013    #[cfg(target_os = "linux")]
1014    {
1015        use std::os::unix::io::AsRawFd;
1016        let fd = stdin.as_raw_fd();
1017        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1018        if unsafe { libc::fstat(fd, &mut stat) } == 0
1019            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1020            && stat.st_size > 0
1021        {
1022            unsafe {
1023                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1024            }
1025        }
1026    }
1027    // Streaming hash — works for both pipe and file-redirect stdin
1028    hash_reader(algo, stdin.lock())
1029}
1030
1031/// Check if parallel hashing is worthwhile for the given file paths.
1032/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
1033/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
1034/// Removing the stat()-based size check eliminates N extra syscalls for N files.
1035pub fn should_use_parallel(paths: &[&Path]) -> bool {
1036    paths.len() >= 2
1037}
1038
1039/// Issue readahead hints for a list of file paths to warm the page cache.
1040/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1041/// Only issues hints for files >= 1MB; small files are read fast enough
1042/// that the fadvise syscall overhead isn't worth it.
1043#[cfg(target_os = "linux")]
1044pub fn readahead_files(paths: &[&Path]) {
1045    use std::os::unix::io::AsRawFd;
1046    for path in paths {
1047        if let Ok(file) = open_noatime(path) {
1048            if let Ok(meta) = file.metadata() {
1049                let len = meta.len();
1050                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1051                    unsafe {
1052                        libc::posix_fadvise(
1053                            file.as_raw_fd(),
1054                            0,
1055                            len as i64,
1056                            libc::POSIX_FADV_WILLNEED,
1057                        );
1058                    }
1059                }
1060            }
1061        }
1062    }
1063}
1064
1065#[cfg(not(target_os = "linux"))]
1066pub fn readahead_files(_paths: &[&Path]) {
1067    // No-op on non-Linux
1068}
1069
1070// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1071
1072/// Hash raw data with BLAKE2b variable output length.
1073/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1074pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1075    let hash = blake2b_simd::Params::new()
1076        .hash_length(output_bytes)
1077        .hash(data);
1078    hex_encode(hash.as_bytes())
1079}
1080
1081/// Hash a reader with BLAKE2b variable output length.
1082/// Uses thread-local buffer for cache-friendly streaming.
1083pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1084    STREAM_BUF.with(|cell| {
1085        let mut buf = cell.borrow_mut();
1086        ensure_stream_buf(&mut buf);
1087        let mut state = blake2b_simd::Params::new()
1088            .hash_length(output_bytes)
1089            .to_state();
1090        loop {
1091            let n = read_full(&mut reader, &mut buf)?;
1092            if n == 0 {
1093                break;
1094            }
1095            state.update(&buf[..n]);
1096        }
1097        Ok(hex_encode(state.finalize().as_bytes()))
1098    })
1099}
1100
1101/// Hash a file with BLAKE2b variable output length.
1102/// Uses mmap for large files (zero-copy), single-read for small files,
1103/// and streaming read as fallback.
1104pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1105    let (file, file_size, is_regular) = open_and_stat(path)?;
1106
1107    if is_regular && file_size == 0 {
1108        return Ok(blake2b_hash_data(&[], output_bytes));
1109    }
1110
1111    if file_size > 0 && is_regular {
1112        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1113        if file_size < TINY_FILE_LIMIT {
1114            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1115        }
1116        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1117        if file_size >= SMALL_FILE_LIMIT {
1118            #[cfg(target_os = "linux")]
1119            {
1120                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1121            }
1122            #[cfg(not(target_os = "linux"))]
1123            {
1124                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1125                if let Ok(mmap) = mmap_result {
1126                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1127                }
1128            }
1129        }
1130        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
1131        if file_size < SMALL_FILE_LIMIT {
1132            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1133        }
1134    }
1135
1136    // Non-regular files or fallback: stream
1137    #[cfg(target_os = "linux")]
1138    if file_size >= FADVISE_MIN_SIZE {
1139        use std::os::unix::io::AsRawFd;
1140        unsafe {
1141            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
1142        }
1143    }
1144    blake2b_hash_reader(file, output_bytes)
1145}
1146
1147/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1148#[inline]
1149fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1150    let mut buf = [0u8; 8192];
1151    let mut total = 0;
1152    while total < size {
1153        match file.read(&mut buf[total..size]) {
1154            Ok(0) => break,
1155            Ok(n) => total += n,
1156            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1157            Err(e) => return Err(e),
1158        }
1159    }
1160    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1161}
1162
1163/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1164#[inline]
1165fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1166    SMALL_FILE_BUF.with(|cell| {
1167        let mut buf = cell.borrow_mut();
1168        buf.clear();
1169        buf.reserve(size);
1170        // SAFETY: capacity >= size after clear+reserve
1171        unsafe {
1172            buf.set_len(size);
1173        }
1174        let mut total = 0;
1175        while total < size {
1176            match file.read(&mut buf[total..size]) {
1177                Ok(0) => break,
1178                Ok(n) => total += n,
1179                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1180                Err(e) => return Err(e),
1181            }
1182        }
1183        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1184    })
1185}
1186
1187/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1188/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1189/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1190/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1191#[cfg(target_os = "linux")]
1192fn blake2b_hash_file_pipelined(
1193    file: File,
1194    file_size: u64,
1195    output_bytes: usize,
1196) -> io::Result<String> {
1197    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1198    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1199    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1200    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1201        Ok(mmap) => {
1202            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1203            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1204            if file_size >= 2 * 1024 * 1024 {
1205                let _ = mmap.advise(memmap2::Advice::HugePage);
1206            }
1207            let _ = mmap.advise(memmap2::Advice::Sequential);
1208            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1209            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1210            if file_size >= 4 * 1024 * 1024 {
1211                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1212                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1213                }
1214            } else {
1215                let _ = mmap.advise(memmap2::Advice::WillNeed);
1216            }
1217            // Single-shot hash: processes entire file in one call, streaming
1218            // directly from page cache with no user-space buffer copies.
1219            Ok(blake2b_hash_data(&mmap, output_bytes))
1220        }
1221        Err(_) => {
1222            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1223            // to streaming pipelined I/O.
1224            blake2b_hash_file_streamed(file, file_size, output_bytes)
1225        }
1226    }
1227}
1228
1229/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1230/// Uses double-buffered reader thread with fadvise hints.
1231/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1232#[cfg(target_os = "linux")]
1233fn blake2b_hash_file_streamed(
1234    mut file: File,
1235    file_size: u64,
1236    output_bytes: usize,
1237) -> io::Result<String> {
1238    use std::os::unix::io::AsRawFd;
1239
1240    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1241
1242    // Hint kernel for sequential access
1243    unsafe {
1244        libc::posix_fadvise(
1245            file.as_raw_fd(),
1246            0,
1247            file_size as i64,
1248            libc::POSIX_FADV_SEQUENTIAL,
1249        );
1250    }
1251
1252    // Double-buffered channels: reader fills one buffer while hasher processes another.
1253    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1254    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1255    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1256
1257    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1258        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1259        while let Ok(mut buf) = buf_rx.recv() {
1260            let mut total = 0;
1261            while total < buf.len() {
1262                match file.read(&mut buf[total..]) {
1263                    Ok(0) => break,
1264                    Ok(n) => total += n,
1265                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1266                    Err(e) => return Err(e),
1267                }
1268            }
1269            if total == 0 {
1270                break;
1271            }
1272            if tx.send((buf, total)).is_err() {
1273                break;
1274            }
1275        }
1276        Ok(())
1277    });
1278
1279    let mut state = blake2b_simd::Params::new()
1280        .hash_length(output_bytes)
1281        .to_state();
1282    while let Ok((buf, n)) = rx.recv() {
1283        state.update(&buf[..n]);
1284        let _ = buf_tx.send(buf);
1285    }
1286    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1287
1288    match reader_handle.join() {
1289        Ok(Ok(())) => {}
1290        Ok(Err(e)) => {
1291            if hash_result.is_ok() {
1292                return Err(e);
1293            }
1294        }
1295        Err(payload) => {
1296            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1297                format!("reader thread panicked: {}", s)
1298            } else if let Some(s) = payload.downcast_ref::<String>() {
1299                format!("reader thread panicked: {}", s)
1300            } else {
1301                "reader thread panicked".to_string()
1302            };
1303            return Err(io::Error::other(msg));
1304        }
1305    }
1306
1307    hash_result
1308}
1309
1310/// Hash stdin with BLAKE2b variable output length.
1311/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1312pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1313    let stdin = io::stdin();
1314    #[cfg(target_os = "linux")]
1315    {
1316        use std::os::unix::io::AsRawFd;
1317        let fd = stdin.as_raw_fd();
1318        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1319        if unsafe { libc::fstat(fd, &mut stat) } == 0
1320            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1321            && stat.st_size > 0
1322        {
1323            unsafe {
1324                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1325            }
1326        }
1327    }
1328    blake2b_hash_reader(stdin.lock(), output_bytes)
1329}
1330
1331/// Internal enum for file content in batch hashing.
1332/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1333enum FileContent {
1334    Mmap(memmap2::Mmap),
1335    Buf(Vec<u8>),
1336}
1337
1338impl AsRef<[u8]> for FileContent {
1339    fn as_ref(&self) -> &[u8] {
1340        match self {
1341            FileContent::Mmap(m) => m,
1342            FileContent::Buf(v) => v,
1343        }
1344    }
1345}
1346
1347/// Open a file and load its content for batch hashing.
1348/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1349/// files (zero-copy), and read-to-end for non-regular files.
1350fn open_file_content(path: &Path) -> io::Result<FileContent> {
1351    let (file, size, is_regular) = open_and_stat(path)?;
1352    if is_regular && size == 0 {
1353        return Ok(FileContent::Buf(Vec::new()));
1354    }
1355    if is_regular && size > 0 {
1356        // Tiny files: read directly into Vec. The mmap syscall + page fault
1357        // overhead exceeds the data transfer cost for files under 8KB.
1358        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1359        if size < TINY_FILE_LIMIT {
1360            let mut buf = vec![0u8; size as usize];
1361            let mut total = 0;
1362            let mut f = file;
1363            while total < size as usize {
1364                match f.read(&mut buf[total..]) {
1365                    Ok(0) => break,
1366                    Ok(n) => total += n,
1367                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1368                    Err(e) => return Err(e),
1369                }
1370            }
1371            buf.truncate(total);
1372            return Ok(FileContent::Buf(buf));
1373        }
1374        // HUGEPAGE + PopulateRead for optimal page faulting
1375        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1376        if let Ok(mmap) = mmap_result {
1377            #[cfg(target_os = "linux")]
1378            {
1379                if size >= 2 * 1024 * 1024 {
1380                    let _ = mmap.advise(memmap2::Advice::HugePage);
1381                }
1382                let _ = mmap.advise(memmap2::Advice::Sequential);
1383                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1384                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1385                }
1386            }
1387            return Ok(FileContent::Mmap(mmap));
1388        }
1389        // Fallback: read into Vec
1390        let mut buf = vec![0u8; size as usize];
1391        let mut total = 0;
1392        let mut f = file;
1393        while total < size as usize {
1394            match f.read(&mut buf[total..]) {
1395                Ok(0) => break,
1396                Ok(n) => total += n,
1397                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1398                Err(e) => return Err(e),
1399            }
1400        }
1401        buf.truncate(total);
1402        return Ok(FileContent::Buf(buf));
1403    }
1404    // Non-regular: read to end
1405    let mut buf = Vec::new();
1406    let mut f = file;
1407    f.read_to_end(&mut buf)?;
1408    Ok(FileContent::Buf(buf))
1409}
1410
1411/// Read remaining file content from an already-open fd into a Vec.
1412/// Used when the initial stack buffer is exhausted and we need to read
1413/// the rest without re-opening the file.
1414fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1415    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1416    buf.extend_from_slice(prefix);
1417    file.read_to_end(&mut buf)?;
1418    Ok(FileContent::Buf(buf))
1419}
1420
1421/// Open a file and read all content without fstat — just open+read+close.
1422/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1423/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1424/// then falls back to larger buffer or read_to_end for bigger files.
1425fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1426    let mut file = open_noatime(path)?;
1427    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1428    // For tiny files, allocate exact-size Vec to avoid waste.
1429    let mut small_buf = [0u8; 4096];
1430    match file.read(&mut small_buf) {
1431        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1432        Ok(n) if n < small_buf.len() => {
1433            // File fits in small buffer — allocate exact size
1434            let mut vec = Vec::with_capacity(n);
1435            vec.extend_from_slice(&small_buf[..n]);
1436            return Ok(FileContent::Buf(vec));
1437        }
1438        Ok(n) => {
1439            // Might be more data — allocate heap buffer and read into it directly
1440            let mut buf = vec![0u8; 65536];
1441            buf[..n].copy_from_slice(&small_buf[..n]);
1442            let mut total = n;
1443            loop {
1444                match file.read(&mut buf[total..]) {
1445                    Ok(0) => {
1446                        buf.truncate(total);
1447                        return Ok(FileContent::Buf(buf));
1448                    }
1449                    Ok(n) => {
1450                        total += n;
1451                        if total >= buf.len() {
1452                            // File > 64KB: read rest from existing fd
1453                            return read_remaining_to_vec(&buf[..total], file);
1454                        }
1455                    }
1456                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1457                    Err(e) => return Err(e),
1458                }
1459            }
1460        }
1461        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1462            let mut buf = vec![0u8; 65536];
1463            let mut total = 0;
1464            loop {
1465                match file.read(&mut buf[total..]) {
1466                    Ok(0) => {
1467                        buf.truncate(total);
1468                        return Ok(FileContent::Buf(buf));
1469                    }
1470                    Ok(n) => {
1471                        total += n;
1472                        if total >= buf.len() {
1473                            // File > 64KB: read rest from existing fd
1474                            return read_remaining_to_vec(&buf[..total], file);
1475                        }
1476                    }
1477                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1478                    Err(e) => return Err(e),
1479                }
1480            }
1481        }
1482        Err(e) => return Err(e),
1483    }
1484}
1485
1486/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1487///
1488/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1489/// All files are pre-loaded into memory (mmap for large, read for small),
1490/// then hashed simultaneously. Returns results in input order.
1491///
1492/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1493pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1494    use blake2b_simd::many::{HashManyJob, hash_many};
1495
1496    // Phase 1: Read all files into memory.
1497    // For small file counts (≤10), load sequentially to avoid thread::scope
1498    // overhead (~120µs). For many files, use parallel loading with lightweight
1499    // OS threads. For 100+ files, use fast path that skips fstat.
1500    let use_fast = paths.len() >= 20;
1501
1502    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1503        // Sequential loading — avoids thread spawn overhead for small batches
1504        paths.iter().map(|&path| open_file_content(path)).collect()
1505    } else {
1506        let num_threads = std::thread::available_parallelism()
1507            .map(|n| n.get())
1508            .unwrap_or(4)
1509            .min(paths.len());
1510        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1511
1512        std::thread::scope(|s| {
1513            let handles: Vec<_> = paths
1514                .chunks(chunk_size)
1515                .map(|chunk| {
1516                    s.spawn(move || {
1517                        chunk
1518                            .iter()
1519                            .map(|&path| {
1520                                if use_fast {
1521                                    open_file_content_fast(path)
1522                                } else {
1523                                    open_file_content(path)
1524                                }
1525                            })
1526                            .collect::<Vec<_>>()
1527                    })
1528                })
1529                .collect();
1530
1531            handles
1532                .into_iter()
1533                .flat_map(|h| h.join().unwrap())
1534                .collect()
1535        })
1536    };
1537
1538    // Phase 2: Build hash_many jobs for successful reads
1539    let hash_results = {
1540        let mut params = blake2b_simd::Params::new();
1541        params.hash_length(output_bytes);
1542
1543        let ok_entries: Vec<(usize, &[u8])> = file_data
1544            .iter()
1545            .enumerate()
1546            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1547            .collect();
1548
1549        let mut jobs: Vec<HashManyJob> = ok_entries
1550            .iter()
1551            .map(|(_, data)| HashManyJob::new(&params, data))
1552            .collect();
1553
1554        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1555        hash_many(jobs.iter_mut());
1556
1557        // Extract hashes into a map
1558        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1559        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1560            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1561        }
1562        hm
1563    }; // file_data borrow released here
1564
1565    // Phase 4: Combine hashes and errors in original order
1566    hash_results
1567        .into_iter()
1568        .zip(file_data)
1569        .map(|(hash_opt, result)| match result {
1570            Ok(_) => Ok(hash_opt.unwrap()),
1571            Err(e) => Err(e),
1572        })
1573        .collect()
1574}
1575
1576/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1577/// Samples a few files to estimate total data size. For small workloads, uses
1578/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1579/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1580/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1581/// for large files on Linux).
1582/// Returns results in input order.
1583pub fn blake2b_hash_files_parallel(
1584    paths: &[&Path],
1585    output_bytes: usize,
1586) -> Vec<io::Result<String>> {
1587    let n = paths.len();
1588
1589    // Sample a few files to estimate whether parallel processing is worthwhile.
1590    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1591    // is too small for parallelism to help.
1592    let sample_count = n.min(5);
1593    let mut sample_max: u64 = 0;
1594    let mut sample_total: u64 = 0;
1595    for &p in paths.iter().take(sample_count) {
1596        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1597        sample_total += size;
1598        sample_max = sample_max.max(size);
1599    }
1600    let estimated_total = if sample_count > 0 {
1601        sample_total * (n as u64) / (sample_count as u64)
1602    } else {
1603        0
1604    };
1605
1606    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1607    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1608    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1609        return blake2b_hash_files_many(paths, output_bytes);
1610    }
1611
1612    // Full stat pass for parallel scheduling — worth it for larger workloads.
1613    let mut indexed: Vec<(usize, &Path, u64)> = paths
1614        .iter()
1615        .enumerate()
1616        .map(|(i, &p)| {
1617            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1618            (i, p, size)
1619        })
1620        .collect();
1621
1622    // Sort largest first: ensures big files start hashing immediately while
1623    // small files fill in gaps, minimizing tail latency.
1624    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1625
1626    // Warm page cache for the largest files using async readahead(2).
1627    // Each hash call handles its own mmap prefaulting, but issuing readahead
1628    // here lets the kernel start I/O for upcoming files while workers process
1629    // current ones. readahead(2) returns immediately (non-blocking).
1630    #[cfg(target_os = "linux")]
1631    {
1632        use std::os::unix::io::AsRawFd;
1633        for &(_, path, size) in indexed.iter().take(20) {
1634            if size >= 1024 * 1024 {
1635                if let Ok(file) = open_noatime(path) {
1636                    unsafe {
1637                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1638                    }
1639                }
1640            }
1641        }
1642    }
1643
1644    let num_threads = std::thread::available_parallelism()
1645        .map(|n| n.get())
1646        .unwrap_or(4)
1647        .min(n);
1648
1649    // Atomic work index for dynamic work-stealing.
1650    let work_idx = AtomicUsize::new(0);
1651
1652    std::thread::scope(|s| {
1653        let work_idx = &work_idx;
1654        let indexed = &indexed;
1655
1656        let handles: Vec<_> = (0..num_threads)
1657            .map(|_| {
1658                s.spawn(move || {
1659                    let mut local_results = Vec::new();
1660                    loop {
1661                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1662                        if idx >= indexed.len() {
1663                            break;
1664                        }
1665                        let (orig_idx, path, _size) = indexed[idx];
1666                        let result = blake2b_hash_file(path, output_bytes);
1667                        local_results.push((orig_idx, result));
1668                    }
1669                    local_results
1670                })
1671            })
1672            .collect();
1673
1674        // Collect results and reorder to match original input order.
1675        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1676        for handle in handles {
1677            for (orig_idx, result) in handle.join().unwrap() {
1678                results[orig_idx] = Some(result);
1679            }
1680        }
1681        results
1682            .into_iter()
1683            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1684            .collect()
1685    })
1686}
1687
1688/// Auto-dispatch multi-file hashing: picks sequential or parallel based on workload.
1689///
1690/// For small files (<64KB sample), sequential avoids thread spawn + readahead overhead
1691/// that dominates for tiny files. On the "100 × 55-byte files" benchmark, this saves
1692/// ~5ms of overhead (thread creation + 200 stat() calls + 100 fadvise() calls).
1693///
1694/// For large files (>=64KB), parallel processing amortizes thread spawn cost over
1695/// substantial per-file hash work. Returns results in input order.
1696pub fn hash_files_auto(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1697    let n = paths.len();
1698    if n == 0 {
1699        return Vec::new();
1700    }
1701    if n == 1 {
1702        return vec![hash_file_nostat(algo, paths[0])];
1703    }
1704
1705    // Sample up to 3 files (max size) to correctly dispatch mixed workloads
1706    // like `md5sum small.txt big1.gb big2.gb`. Costs at most 3 stat calls (~6µs)
1707    // to save potentially 3-6ms of thread overhead for small-file workloads.
1708    let sample_size = paths
1709        .iter()
1710        .take(3)
1711        .filter_map(|p| std::fs::metadata(p).ok())
1712        .map(|m| m.len())
1713        .max()
1714        .unwrap_or(0);
1715
1716    if sample_size < 65536 {
1717        // Small files: sequential loop avoiding thread spawn overhead.
1718        #[cfg(target_os = "linux")]
1719        {
1720            // Raw syscall path: reuses CString buffer, avoids OpenOptions/File overhead
1721            let mut c_path_buf = Vec::with_capacity(256);
1722            paths
1723                .iter()
1724                .map(|&p| hash_file_raw_nostat(algo, p, &mut c_path_buf))
1725                .collect()
1726        }
1727        #[cfg(not(target_os = "linux"))]
1728        {
1729            paths.iter().map(|&p| hash_file_nostat(algo, p)).collect()
1730        }
1731    } else if n >= 20 {
1732        hash_files_batch(paths, algo)
1733    } else {
1734        hash_files_parallel_fast(paths, algo)
1735    }
1736}
1737
1738/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
1739/// Files are sorted by size (largest first) so the biggest files start processing
1740/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
1741/// eliminating tail latency from uneven file sizes.
1742/// Returns results in input order.
1743pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1744    let n = paths.len();
1745
1746    // Build (original_index, path, size) tuples — stat all files for scheduling.
1747    // The stat cost (~5µs/file) is repaid by better work distribution.
1748    let mut indexed: Vec<(usize, &Path, u64)> = paths
1749        .iter()
1750        .enumerate()
1751        .map(|(i, &p)| {
1752            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1753            (i, p, size)
1754        })
1755        .collect();
1756
1757    // Sort largest first: ensures big files start hashing immediately while
1758    // small files fill in gaps, minimizing tail latency.
1759    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1760
1761    // Warm page cache for the largest files using async readahead(2).
1762    // Each hash call handles its own mmap prefaulting, but issuing readahead
1763    // here lets the kernel start I/O for upcoming files while workers process
1764    // current ones. readahead(2) returns immediately (non-blocking).
1765    #[cfg(target_os = "linux")]
1766    {
1767        use std::os::unix::io::AsRawFd;
1768        for &(_, path, size) in indexed.iter().take(20) {
1769            if size >= 1024 * 1024 {
1770                if let Ok(file) = open_noatime(path) {
1771                    unsafe {
1772                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1773                    }
1774                }
1775            }
1776        }
1777    }
1778
1779    let num_threads = std::thread::available_parallelism()
1780        .map(|n| n.get())
1781        .unwrap_or(4)
1782        .min(n);
1783
1784    // Atomic work index for dynamic work-stealing.
1785    let work_idx = AtomicUsize::new(0);
1786
1787    std::thread::scope(|s| {
1788        let work_idx = &work_idx;
1789        let indexed = &indexed;
1790
1791        let handles: Vec<_> = (0..num_threads)
1792            .map(|_| {
1793                s.spawn(move || {
1794                    let mut local_results = Vec::new();
1795                    loop {
1796                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1797                        if idx >= indexed.len() {
1798                            break;
1799                        }
1800                        let (orig_idx, path, _size) = indexed[idx];
1801                        let result = hash_file(algo, path);
1802                        local_results.push((orig_idx, result));
1803                    }
1804                    local_results
1805                })
1806            })
1807            .collect();
1808
1809        // Collect results and reorder to match original input order.
1810        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1811        for handle in handles {
1812            for (orig_idx, result) in handle.join().unwrap() {
1813                results[orig_idx] = Some(result);
1814            }
1815        }
1816        results
1817            .into_iter()
1818            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1819            .collect()
1820    })
1821}
1822
1823/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
1824/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
1825/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
1826/// (100 from the sort phase + 100 from open_and_stat inside each worker).
1827/// Returns results in input order.
1828pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1829    let n = paths.len();
1830    if n == 0 {
1831        return Vec::new();
1832    }
1833    if n == 1 {
1834        return vec![hash_file_nostat(algo, paths[0])];
1835    }
1836
1837    // Issue readahead for all files (no size threshold — even tiny files benefit
1838    // from batched WILLNEED hints when processing 100+ files)
1839    #[cfg(target_os = "linux")]
1840    readahead_files_all(paths);
1841
1842    let num_threads = std::thread::available_parallelism()
1843        .map(|n| n.get())
1844        .unwrap_or(4)
1845        .min(n);
1846
1847    let work_idx = AtomicUsize::new(0);
1848
1849    std::thread::scope(|s| {
1850        let work_idx = &work_idx;
1851
1852        let handles: Vec<_> = (0..num_threads)
1853            .map(|_| {
1854                s.spawn(move || {
1855                    let mut local_results = Vec::new();
1856                    loop {
1857                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1858                        if idx >= n {
1859                            break;
1860                        }
1861                        let result = hash_file_nostat(algo, paths[idx]);
1862                        local_results.push((idx, result));
1863                    }
1864                    local_results
1865                })
1866            })
1867            .collect();
1868
1869        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1870        for handle in handles {
1871            for (idx, result) in handle.join().unwrap() {
1872                results[idx] = Some(result);
1873            }
1874        }
1875        results
1876            .into_iter()
1877            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1878            .collect()
1879    })
1880}
1881
1882/// Batch-hash multiple files: pre-read all files into memory in parallel,
1883/// then hash all data in parallel. Optimal for many small files where per-file
1884/// overhead (open/read/close syscalls) dominates over hash computation.
1885///
1886/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
1887/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
1888/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
1889/// Returns results in input order.
1890pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1891    let n = paths.len();
1892    if n == 0 {
1893        return Vec::new();
1894    }
1895
1896    // Issue readahead for all files
1897    #[cfg(target_os = "linux")]
1898    readahead_files_all(paths);
1899
1900    // Phase 1: Load all files into memory in parallel.
1901    // For 20+ files, use fast path that skips fstat.
1902    let use_fast = n >= 20;
1903
1904    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1905        // Sequential loading — avoids thread spawn overhead for small batches
1906        paths
1907            .iter()
1908            .map(|&path| {
1909                if use_fast {
1910                    open_file_content_fast(path)
1911                } else {
1912                    open_file_content(path)
1913                }
1914            })
1915            .collect()
1916    } else {
1917        let num_threads = std::thread::available_parallelism()
1918            .map(|t| t.get())
1919            .unwrap_or(4)
1920            .min(n);
1921        let chunk_size = (n + num_threads - 1) / num_threads;
1922
1923        std::thread::scope(|s| {
1924            let handles: Vec<_> = paths
1925                .chunks(chunk_size)
1926                .map(|chunk| {
1927                    s.spawn(move || {
1928                        chunk
1929                            .iter()
1930                            .map(|&path| {
1931                                if use_fast {
1932                                    open_file_content_fast(path)
1933                                } else {
1934                                    open_file_content(path)
1935                                }
1936                            })
1937                            .collect::<Vec<_>>()
1938                    })
1939                })
1940                .collect();
1941
1942            handles
1943                .into_iter()
1944                .flat_map(|h| h.join().unwrap())
1945                .collect()
1946        })
1947    };
1948
1949    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
1950    // for larger files the parallel hashing across threads helps.
1951    let num_hash_threads = std::thread::available_parallelism()
1952        .map(|t| t.get())
1953        .unwrap_or(4)
1954        .min(n);
1955    let work_idx = AtomicUsize::new(0);
1956
1957    std::thread::scope(|s| {
1958        let work_idx = &work_idx;
1959        let file_data = &file_data;
1960
1961        let handles: Vec<_> = (0..num_hash_threads)
1962            .map(|_| {
1963                s.spawn(move || {
1964                    let mut local_results = Vec::new();
1965                    loop {
1966                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1967                        if idx >= n {
1968                            break;
1969                        }
1970                        let result = match &file_data[idx] {
1971                            Ok(content) => hash_bytes(algo, content.as_ref()),
1972                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1973                        };
1974                        local_results.push((idx, result));
1975                    }
1976                    local_results
1977                })
1978            })
1979            .collect();
1980
1981        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1982        for handle in handles {
1983            for (idx, result) in handle.join().unwrap() {
1984                results[idx] = Some(result);
1985            }
1986        }
1987        results
1988            .into_iter()
1989            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1990            .collect()
1991    })
1992}
1993
1994/// Stream-hash a file that already has a prefix read into memory.
1995/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
1996/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
1997fn hash_stream_with_prefix(
1998    algo: HashAlgorithm,
1999    prefix: &[u8],
2000    mut file: File,
2001) -> io::Result<String> {
2002    // Blake2b uses its own hasher on all platforms
2003    if matches!(algo, HashAlgorithm::Blake2b) {
2004        let mut state = blake2b_simd::Params::new().to_state();
2005        state.update(prefix);
2006        return STREAM_BUF.with(|cell| {
2007            let mut buf = cell.borrow_mut();
2008            ensure_stream_buf(&mut buf);
2009            loop {
2010                let n = read_full(&mut file, &mut buf)?;
2011                if n == 0 {
2012                    break;
2013                }
2014                state.update(&buf[..n]);
2015            }
2016            Ok(hex_encode(state.finalize().as_bytes()))
2017        });
2018    }
2019
2020    #[cfg(target_os = "linux")]
2021    {
2022        hash_stream_with_prefix_openssl(algo_to_openssl_md(algo), prefix, file)
2023    }
2024    #[cfg(not(target_os = "linux"))]
2025    {
2026        match algo {
2027            HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
2028            HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
2029            HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
2030            HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
2031            HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
2032            HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
2033            HashAlgorithm::Blake2b => unreachable!(),
2034        }
2035    }
2036}
2037
2038/// Generic stream-hash with prefix for non-Linux platforms using Digest trait.
2039#[cfg(not(target_os = "linux"))]
2040fn hash_stream_with_prefix_digest<D: digest::Digest>(
2041    prefix: &[u8],
2042    mut file: File,
2043) -> io::Result<String> {
2044    STREAM_BUF.with(|cell| {
2045        let mut buf = cell.borrow_mut();
2046        ensure_stream_buf(&mut buf);
2047        let mut hasher = D::new();
2048        hasher.update(prefix);
2049        loop {
2050            let n = read_full(&mut file, &mut buf)?;
2051            if n == 0 {
2052                break;
2053            }
2054            hasher.update(&buf[..n]);
2055        }
2056        Ok(hex_encode(&hasher.finalize()))
2057    })
2058}
2059
2060/// Streaming hash with prefix using OpenSSL (Linux).
2061#[cfg(target_os = "linux")]
2062fn hash_stream_with_prefix_openssl(
2063    md: openssl::hash::MessageDigest,
2064    prefix: &[u8],
2065    mut file: File,
2066) -> io::Result<String> {
2067    STREAM_BUF.with(|cell| {
2068        let mut buf = cell.borrow_mut();
2069        ensure_stream_buf(&mut buf);
2070        let mut hasher =
2071            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
2072        hasher
2073            .update(prefix)
2074            .map_err(|e| io::Error::other(e.to_string()))?;
2075        loop {
2076            let n = read_full(&mut file, &mut buf)?;
2077            if n == 0 {
2078                break;
2079            }
2080            hasher
2081                .update(&buf[..n])
2082                .map_err(|e| io::Error::other(e.to_string()))?;
2083        }
2084        let digest = hasher
2085            .finish()
2086            .map_err(|e| io::Error::other(e.to_string()))?;
2087        Ok(hex_encode(&digest))
2088    })
2089}
2090
2091/// Hash a file without fstat — just open, read until EOF, hash.
2092/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2093/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2094/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2095/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2096pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2097    let mut file = open_noatime(path)?;
2098    // First try a small stack buffer — optimal for tiny files (< 4KB).
2099    // Most "many_files" benchmark files are ~55 bytes, so this completes
2100    // with a single read() syscall and no fallback.
2101    let mut small_buf = [0u8; 4096];
2102    match file.read(&mut small_buf) {
2103        Ok(0) => return hash_bytes(algo, &[]),
2104        Ok(n) if n < small_buf.len() => {
2105            // File fits in small buffer — hash directly (common case)
2106            return hash_bytes(algo, &small_buf[..n]);
2107        }
2108        Ok(n) => {
2109            // Might be more data — fall back to larger buffer
2110            let mut buf = [0u8; 65536];
2111            buf[..n].copy_from_slice(&small_buf[..n]);
2112            let mut total = n;
2113            loop {
2114                match file.read(&mut buf[total..]) {
2115                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2116                    Ok(n) => {
2117                        total += n;
2118                        if total >= buf.len() {
2119                            // File > 64KB: stream-hash from existing fd instead of
2120                            // re-opening. Feed already-read prefix, continue streaming.
2121                            return hash_stream_with_prefix(algo, &buf[..total], file);
2122                        }
2123                    }
2124                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2125                    Err(e) => return Err(e),
2126                }
2127            }
2128        }
2129        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2130            // Retry with full buffer on interrupt
2131            let mut buf = [0u8; 65536];
2132            let mut total = 0;
2133            loop {
2134                match file.read(&mut buf[total..]) {
2135                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2136                    Ok(n) => {
2137                        total += n;
2138                        if total >= buf.len() {
2139                            // File > 64KB: stream-hash from existing fd
2140                            return hash_stream_with_prefix(algo, &buf[..total], file);
2141                        }
2142                    }
2143                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2144                    Err(e) => return Err(e),
2145                }
2146            }
2147        }
2148        Err(e) => return Err(e),
2149    }
2150}
2151
2152/// Hash a small file using raw Linux syscalls without fstat.
2153/// For the multi-file sequential path where we already know files are small.
2154/// Avoids: OpenOptions builder, CString per-file alloc (reuses caller's buffer),
2155/// fstat overhead (unnecessary when we just need open+read+close).
2156/// Returns hash as hex string.
2157#[cfg(target_os = "linux")]
2158fn hash_file_raw_nostat(
2159    algo: HashAlgorithm,
2160    path: &Path,
2161    c_path_buf: &mut Vec<u8>,
2162) -> io::Result<String> {
2163    use std::os::unix::ffi::OsStrExt;
2164
2165    let path_bytes = path.as_os_str().as_bytes();
2166
2167    // Reuse caller's buffer for null-terminated path (avoids heap alloc per file)
2168    c_path_buf.clear();
2169    c_path_buf.reserve(path_bytes.len() + 1);
2170    c_path_buf.extend_from_slice(path_bytes);
2171    c_path_buf.push(0);
2172
2173    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2174    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2175        flags |= libc::O_NOATIME;
2176    }
2177
2178    let fd = unsafe { libc::open(c_path_buf.as_ptr() as *const libc::c_char, flags) };
2179    if fd < 0 {
2180        let err = io::Error::last_os_error();
2181        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2182            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2183            let fd2 = unsafe {
2184                libc::open(
2185                    c_path_buf.as_ptr() as *const libc::c_char,
2186                    libc::O_RDONLY | libc::O_CLOEXEC,
2187                )
2188            };
2189            if fd2 < 0 {
2190                return Err(io::Error::last_os_error());
2191            }
2192            return hash_fd_small(algo, fd2);
2193        }
2194        return Err(err);
2195    }
2196    hash_fd_small(algo, fd)
2197}
2198
2199/// Read a small file from fd, hash it, close fd. No fstat needed.
2200#[cfg(target_os = "linux")]
2201#[inline]
2202fn hash_fd_small(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2203    let mut buf = [0u8; 4096];
2204    let n = loop {
2205        let ret = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
2206        if ret >= 0 {
2207            break ret;
2208        }
2209        let err = io::Error::last_os_error();
2210        if err.kind() == io::ErrorKind::Interrupted {
2211            continue;
2212        }
2213        unsafe {
2214            libc::close(fd);
2215        }
2216        return Err(err);
2217    };
2218    let n = n as usize;
2219    if n < buf.len() {
2220        // File fits in 4KB — common case for small files
2221        unsafe {
2222            libc::close(fd);
2223        }
2224        return hash_bytes(algo, &buf[..n]);
2225    }
2226    // File > 4KB: fall back to hash_file_nostat-style reading
2227    // Wrap fd in File for RAII close
2228    use std::os::unix::io::FromRawFd;
2229    let mut file = unsafe { File::from_raw_fd(fd) };
2230    let mut big_buf = [0u8; 65536];
2231    big_buf[..n].copy_from_slice(&buf[..n]);
2232    let mut total = n;
2233    loop {
2234        match std::io::Read::read(&mut file, &mut big_buf[total..]) {
2235            Ok(0) => return hash_bytes(algo, &big_buf[..total]),
2236            Ok(n) => {
2237                total += n;
2238                if total >= big_buf.len() {
2239                    return hash_stream_with_prefix(algo, &big_buf[..total], file);
2240                }
2241            }
2242            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2243            Err(e) => return Err(e),
2244        }
2245    }
2246}
2247
2248/// Hash a single file using raw Linux syscalls for minimum overhead.
2249/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2250/// For the single-file fast path, this eliminates OpenOptions builder,
2251/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2252///
2253/// Size-based dispatch:
2254/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2255/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2256/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2257/// - Non-regular: wraps fd in File, streaming hash_reader
2258#[cfg(target_os = "linux")]
2259pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2260    use std::os::unix::ffi::OsStrExt;
2261
2262    let path_bytes = path.as_os_str().as_bytes();
2263    let c_path = std::ffi::CString::new(path_bytes)
2264        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2265
2266    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2267    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2268    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2269        flags |= libc::O_NOATIME;
2270    }
2271
2272    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2273    if fd < 0 {
2274        let err = io::Error::last_os_error();
2275        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2276            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2277            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2278            if fd2 < 0 {
2279                return Err(io::Error::last_os_error());
2280            }
2281            return hash_from_raw_fd(algo, fd2);
2282        }
2283        return Err(err);
2284    }
2285    hash_from_raw_fd(algo, fd)
2286}
2287
2288/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2289/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2290/// non-regular (streaming) files.
2291#[cfg(target_os = "linux")]
2292fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2293    // Raw fstat to determine size and type
2294    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2295    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2296        let err = io::Error::last_os_error();
2297        unsafe {
2298            libc::close(fd);
2299        }
2300        return Err(err);
2301    }
2302    let size = stat.st_size as u64;
2303    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2304
2305    // Empty regular file
2306    if is_regular && size == 0 {
2307        unsafe {
2308            libc::close(fd);
2309        }
2310        return hash_bytes(algo, &[]);
2311    }
2312
2313    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2314    // Entire I/O in 3 raw syscalls: open + read + close.
2315    if is_regular && size < TINY_FILE_LIMIT {
2316        let mut buf = [0u8; 8192];
2317        let mut total = 0usize;
2318        while total < size as usize {
2319            let n = unsafe {
2320                libc::read(
2321                    fd,
2322                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2323                    (size as usize) - total,
2324                )
2325            };
2326            if n < 0 {
2327                let err = io::Error::last_os_error();
2328                if err.kind() == io::ErrorKind::Interrupted {
2329                    continue;
2330                }
2331                unsafe {
2332                    libc::close(fd);
2333                }
2334                return Err(err);
2335            }
2336            if n == 0 {
2337                break;
2338            }
2339            total += n as usize;
2340        }
2341        unsafe {
2342            libc::close(fd);
2343        }
2344        return hash_bytes(algo, &buf[..total]);
2345    }
2346
2347    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2348    use std::os::unix::io::FromRawFd;
2349    let file = unsafe { File::from_raw_fd(fd) };
2350
2351    if is_regular && size > 0 {
2352        // Large files (>=16MB): mmap with HugePage + PopulateRead
2353        if size >= SMALL_FILE_LIMIT {
2354            let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
2355            if let Ok(mmap) = mmap_result {
2356                if size >= 2 * 1024 * 1024 {
2357                    let _ = mmap.advise(memmap2::Advice::HugePage);
2358                }
2359                let _ = mmap.advise(memmap2::Advice::Sequential);
2360                // Prefault pages using huge pages (kernel 5.14+)
2361                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
2362                    let _ = mmap.advise(memmap2::Advice::WillNeed);
2363                }
2364                return hash_bytes(algo, &mmap);
2365            }
2366        }
2367        // Small files (8KB-16MB): single-read into thread-local buffer
2368        return hash_file_small(algo, file, size as usize);
2369    }
2370
2371    // Non-regular files: streaming hash
2372    hash_reader(algo, file)
2373}
2374
2375/// Issue readahead hints for ALL file paths (no size threshold).
2376/// For multi-file benchmarks, even small files benefit from batched readahead.
2377#[cfg(target_os = "linux")]
2378pub fn readahead_files_all(paths: &[&Path]) {
2379    use std::os::unix::io::AsRawFd;
2380    for path in paths {
2381        if let Ok(file) = open_noatime(path) {
2382            if let Ok(meta) = file.metadata() {
2383                if meta.file_type().is_file() {
2384                    let len = meta.len();
2385                    unsafe {
2386                        libc::posix_fadvise(
2387                            file.as_raw_fd(),
2388                            0,
2389                            len as i64,
2390                            libc::POSIX_FADV_WILLNEED,
2391                        );
2392                    }
2393                }
2394            }
2395        }
2396    }
2397}
2398
2399#[cfg(not(target_os = "linux"))]
2400pub fn readahead_files_all(_paths: &[&Path]) {}
2401
2402/// Print hash result in GNU format: "hash  filename\n"
2403/// Uses raw byte writes to avoid std::fmt overhead.
2404pub fn print_hash(
2405    out: &mut impl Write,
2406    hash: &str,
2407    filename: &str,
2408    binary: bool,
2409) -> io::Result<()> {
2410    let mode = if binary { b'*' } else { b' ' };
2411    out.write_all(hash.as_bytes())?;
2412    out.write_all(&[b' ', mode])?;
2413    out.write_all(filename.as_bytes())?;
2414    out.write_all(b"\n")
2415}
2416
2417/// Print hash in GNU format with NUL terminator instead of newline.
2418pub fn print_hash_zero(
2419    out: &mut impl Write,
2420    hash: &str,
2421    filename: &str,
2422    binary: bool,
2423) -> io::Result<()> {
2424    let mode = if binary { b'*' } else { b' ' };
2425    out.write_all(hash.as_bytes())?;
2426    out.write_all(&[b' ', mode])?;
2427    out.write_all(filename.as_bytes())?;
2428    out.write_all(b"\0")
2429}
2430
2431// ── Single-write output buffer ─────────────────────────────────────
2432// For multi-file workloads, batch the entire "hash  filename\n" line into
2433// a single write() call. This halves the number of BufWriter flushes.
2434
2435// Thread-local output line buffer for batched writes.
2436// Reused across files to avoid per-file allocation.
2437thread_local! {
2438    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2439}
2440
2441/// Build and write the standard GNU hash output line in a single write() call.
2442/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2443/// For escaped filenames: "\hash  escaped_filename\n".
2444#[inline]
2445pub fn write_hash_line(
2446    out: &mut impl Write,
2447    hash: &str,
2448    filename: &str,
2449    binary: bool,
2450    zero: bool,
2451    escaped: bool,
2452) -> io::Result<()> {
2453    LINE_BUF.with(|cell| {
2454        let mut buf = cell.borrow_mut();
2455        buf.clear();
2456        let mode = if binary { b'*' } else { b' ' };
2457        let term = if zero { b'\0' } else { b'\n' };
2458        if escaped {
2459            buf.push(b'\\');
2460        }
2461        buf.extend_from_slice(hash.as_bytes());
2462        buf.push(b' ');
2463        buf.push(mode);
2464        buf.extend_from_slice(filename.as_bytes());
2465        buf.push(term);
2466        out.write_all(&buf)
2467    })
2468}
2469
2470/// Build and write BSD tag format output in a single write() call.
2471/// Format: "ALGO (filename) = hash\n"
2472#[inline]
2473pub fn write_hash_tag_line(
2474    out: &mut impl Write,
2475    algo_name: &str,
2476    hash: &str,
2477    filename: &str,
2478    zero: bool,
2479) -> io::Result<()> {
2480    LINE_BUF.with(|cell| {
2481        let mut buf = cell.borrow_mut();
2482        buf.clear();
2483        let term = if zero { b'\0' } else { b'\n' };
2484        buf.extend_from_slice(algo_name.as_bytes());
2485        buf.extend_from_slice(b" (");
2486        buf.extend_from_slice(filename.as_bytes());
2487        buf.extend_from_slice(b") = ");
2488        buf.extend_from_slice(hash.as_bytes());
2489        buf.push(term);
2490        out.write_all(&buf)
2491    })
2492}
2493
2494/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2495pub fn print_hash_tag(
2496    out: &mut impl Write,
2497    algo: HashAlgorithm,
2498    hash: &str,
2499    filename: &str,
2500) -> io::Result<()> {
2501    out.write_all(algo.name().as_bytes())?;
2502    out.write_all(b" (")?;
2503    out.write_all(filename.as_bytes())?;
2504    out.write_all(b") = ")?;
2505    out.write_all(hash.as_bytes())?;
2506    out.write_all(b"\n")
2507}
2508
2509/// Print hash in BSD tag format with NUL terminator.
2510pub fn print_hash_tag_zero(
2511    out: &mut impl Write,
2512    algo: HashAlgorithm,
2513    hash: &str,
2514    filename: &str,
2515) -> io::Result<()> {
2516    out.write_all(algo.name().as_bytes())?;
2517    out.write_all(b" (")?;
2518    out.write_all(filename.as_bytes())?;
2519    out.write_all(b") = ")?;
2520    out.write_all(hash.as_bytes())?;
2521    out.write_all(b"\0")
2522}
2523
2524/// Print hash in BSD tag format with BLAKE2b length info:
2525/// "BLAKE2b (filename) = hash" for 512-bit, or
2526/// "BLAKE2b-256 (filename) = hash" for other lengths.
2527pub fn print_hash_tag_b2sum(
2528    out: &mut impl Write,
2529    hash: &str,
2530    filename: &str,
2531    bits: usize,
2532) -> io::Result<()> {
2533    if bits == 512 {
2534        out.write_all(b"BLAKE2b (")?;
2535    } else {
2536        // Use write! for the rare non-512 path (negligible overhead per file)
2537        write!(out, "BLAKE2b-{} (", bits)?;
2538    }
2539    out.write_all(filename.as_bytes())?;
2540    out.write_all(b") = ")?;
2541    out.write_all(hash.as_bytes())?;
2542    out.write_all(b"\n")
2543}
2544
2545/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2546pub fn print_hash_tag_b2sum_zero(
2547    out: &mut impl Write,
2548    hash: &str,
2549    filename: &str,
2550    bits: usize,
2551) -> io::Result<()> {
2552    if bits == 512 {
2553        out.write_all(b"BLAKE2b (")?;
2554    } else {
2555        write!(out, "BLAKE2b-{} (", bits)?;
2556    }
2557    out.write_all(filename.as_bytes())?;
2558    out.write_all(b") = ")?;
2559    out.write_all(hash.as_bytes())?;
2560    out.write_all(b"\0")
2561}
2562
2563/// Options for check mode.
2564pub struct CheckOptions {
2565    pub quiet: bool,
2566    pub status_only: bool,
2567    pub strict: bool,
2568    pub warn: bool,
2569    pub ignore_missing: bool,
2570    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2571    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2572    /// When empty, uses generic format: "line {line}: message".
2573    pub warn_prefix: String,
2574}
2575
2576/// Result of check mode verification.
2577pub struct CheckResult {
2578    pub ok: usize,
2579    pub mismatches: usize,
2580    pub format_errors: usize,
2581    pub read_errors: usize,
2582    /// Number of files skipped because they were missing and --ignore-missing was set.
2583    pub ignored_missing: usize,
2584}
2585
2586/// Verify checksums from a check file.
2587/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2588pub fn check_file<R: BufRead>(
2589    algo: HashAlgorithm,
2590    reader: R,
2591    opts: &CheckOptions,
2592    out: &mut impl Write,
2593    err_out: &mut impl Write,
2594) -> io::Result<CheckResult> {
2595    let quiet = opts.quiet;
2596    let status_only = opts.status_only;
2597    let warn = opts.warn;
2598    let ignore_missing = opts.ignore_missing;
2599    let mut ok_count = 0;
2600    let mut mismatch_count = 0;
2601    let mut format_errors = 0;
2602    let mut read_errors = 0;
2603    let mut ignored_missing_count = 0;
2604    let mut line_num = 0;
2605
2606    for line_result in reader.lines() {
2607        line_num += 1;
2608        let line = line_result?;
2609        let line = line.trim_end();
2610
2611        if line.is_empty() {
2612            continue;
2613        }
2614
2615        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2616        let (expected_hash, filename) = match parse_check_line(line) {
2617            Some(v) => v,
2618            None => {
2619                format_errors += 1;
2620                if warn {
2621                    out.flush()?;
2622                    if opts.warn_prefix.is_empty() {
2623                        writeln!(
2624                            err_out,
2625                            "line {}: improperly formatted {} checksum line",
2626                            line_num,
2627                            algo.name()
2628                        )?;
2629                    } else {
2630                        writeln!(
2631                            err_out,
2632                            "{}: {}: improperly formatted {} checksum line",
2633                            opts.warn_prefix,
2634                            line_num,
2635                            algo.name()
2636                        )?;
2637                    }
2638                }
2639                continue;
2640            }
2641        };
2642
2643        // Compute actual hash
2644        let actual = match hash_file(algo, Path::new(filename)) {
2645            Ok(h) => h,
2646            Err(e) => {
2647                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2648                    ignored_missing_count += 1;
2649                    continue;
2650                }
2651                read_errors += 1;
2652                if !status_only {
2653                    out.flush()?;
2654                    writeln!(err_out, "{}: {}", filename, e)?;
2655                    writeln!(out, "{}: FAILED open or read", filename)?;
2656                }
2657                continue;
2658            }
2659        };
2660
2661        if actual.eq_ignore_ascii_case(expected_hash) {
2662            ok_count += 1;
2663            if !quiet && !status_only {
2664                writeln!(out, "{}: OK", filename)?;
2665            }
2666        } else {
2667            mismatch_count += 1;
2668            if !status_only {
2669                writeln!(out, "{}: FAILED", filename)?;
2670            }
2671        }
2672    }
2673
2674    Ok(CheckResult {
2675        ok: ok_count,
2676        mismatches: mismatch_count,
2677        format_errors,
2678        read_errors,
2679        ignored_missing: ignored_missing_count,
2680    })
2681}
2682
2683/// Parse a checksum line in any supported format.
2684pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2685    // Try BSD tag format: "ALGO (filename) = hash"
2686    let rest = line
2687        .strip_prefix("MD5 (")
2688        .or_else(|| line.strip_prefix("SHA1 ("))
2689        .or_else(|| line.strip_prefix("SHA224 ("))
2690        .or_else(|| line.strip_prefix("SHA256 ("))
2691        .or_else(|| line.strip_prefix("SHA384 ("))
2692        .or_else(|| line.strip_prefix("SHA512 ("))
2693        .or_else(|| line.strip_prefix("BLAKE2b ("))
2694        .or_else(|| {
2695            // Handle BLAKE2b-NNN (filename) = hash
2696            if line.starts_with("BLAKE2b-") {
2697                let after = &line["BLAKE2b-".len()..];
2698                if let Some(sp) = after.find(" (") {
2699                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2700                        return Some(&after[sp + 2..]);
2701                    }
2702                }
2703            }
2704            None
2705        });
2706    if let Some(rest) = rest {
2707        if let Some(paren_idx) = rest.find(") = ") {
2708            let filename = &rest[..paren_idx];
2709            let hash = &rest[paren_idx + 4..];
2710            return Some((hash, filename));
2711        }
2712    }
2713
2714    // Handle backslash-escaped lines (leading '\')
2715    let line = line.strip_prefix('\\').unwrap_or(line);
2716
2717    // Standard format: "hash  filename"
2718    if let Some(idx) = line.find("  ") {
2719        let hash = &line[..idx];
2720        let rest = &line[idx + 2..];
2721        return Some((hash, rest));
2722    }
2723    // Binary mode: "hash *filename"
2724    if let Some(idx) = line.find(" *") {
2725        let hash = &line[..idx];
2726        let rest = &line[idx + 2..];
2727        return Some((hash, rest));
2728    }
2729    None
2730}
2731
2732/// Parse a BSD-style tag line: "ALGO (filename) = hash"
2733/// Returns (expected_hash, filename, optional_bits).
2734/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
2735pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2736    let paren_start = line.find(" (")?;
2737    let algo_part = &line[..paren_start];
2738    let rest = &line[paren_start + 2..];
2739    let paren_end = rest.find(") = ")?;
2740    let filename = &rest[..paren_end];
2741    let hash = &rest[paren_end + 4..];
2742
2743    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
2744    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2745        algo_part[dash_pos + 1..].parse::<usize>().ok()
2746    } else {
2747        None
2748    };
2749
2750    Some((hash, filename, bits))
2751}
2752
2753/// Read as many bytes as possible into buf, retrying on partial reads.
2754/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
2755/// Fast path: regular file reads usually return the full buffer on the first call.
2756#[inline]
2757fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2758    // Fast path: first read() usually fills the entire buffer for regular files
2759    let n = reader.read(buf)?;
2760    if n == buf.len() || n == 0 {
2761        return Ok(n);
2762    }
2763    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
2764    let mut total = n;
2765    while total < buf.len() {
2766        match reader.read(&mut buf[total..]) {
2767            Ok(0) => break,
2768            Ok(n) => total += n,
2769            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2770            Err(e) => return Err(e),
2771        }
2772    }
2773    Ok(total)
2774}
2775
2776/// Compile-time generated 2-byte hex pair lookup table.
2777/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
2778const fn generate_hex_table() -> [[u8; 2]; 256] {
2779    let hex = b"0123456789abcdef";
2780    let mut table = [[0u8; 2]; 256];
2781    let mut i = 0;
2782    while i < 256 {
2783        table[i] = [hex[i >> 4], hex[i & 0xf]];
2784        i += 1;
2785    }
2786    table
2787}
2788
2789const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2790
2791/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
2792/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
2793pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2794    let len = bytes.len() * 2;
2795    let mut hex = String::with_capacity(len);
2796    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
2797    unsafe {
2798        let buf = hex.as_mut_vec();
2799        buf.set_len(len);
2800        hex_encode_to_slice(bytes, buf);
2801    }
2802    hex
2803}
2804
2805/// Encode bytes as hex directly into a pre-allocated output slice.
2806/// Output slice must be at least `bytes.len() * 2` bytes long.
2807#[inline]
2808fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2809    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
2810    unsafe {
2811        let ptr = out.as_mut_ptr();
2812        for (i, &b) in bytes.iter().enumerate() {
2813            let pair = *HEX_TABLE.get_unchecked(b as usize);
2814            *ptr.add(i * 2) = pair[0];
2815            *ptr.add(i * 2 + 1) = pair[1];
2816        }
2817    }
2818}