Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15/// Supported hash algorithms.
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18    Sha1,
19    Sha224,
20    Sha256,
21    Sha384,
22    Sha512,
23    Md5,
24    Blake2b,
25}
26
27impl HashAlgorithm {
28    pub fn name(self) -> &'static str {
29        match self {
30            HashAlgorithm::Sha1 => "SHA1",
31            HashAlgorithm::Sha224 => "SHA224",
32            HashAlgorithm::Sha256 => "SHA256",
33            HashAlgorithm::Sha384 => "SHA384",
34            HashAlgorithm::Sha512 => "SHA512",
35            HashAlgorithm::Md5 => "MD5",
36            HashAlgorithm::Blake2b => "BLAKE2b",
37        }
38    }
39}
40
41// ── Generic hash helpers ────────────────────────────────────────────
42
43/// Single-shot hash using the Digest trait (non-Linux fallback).
44#[cfg(not(target_os = "linux"))]
45fn hash_digest<D: Digest>(data: &[u8]) -> String {
46    hex_encode(&D::digest(data))
47}
48
49/// Streaming hash using thread-local buffer (non-Linux fallback).
50#[cfg(not(target_os = "linux"))]
51fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
52    STREAM_BUF.with(|cell| {
53        let mut buf = cell.borrow_mut();
54        ensure_stream_buf(&mut buf);
55        let mut hasher = D::new();
56        loop {
57            let n = read_full(&mut reader, &mut buf)?;
58            if n == 0 {
59                break;
60            }
61            hasher.update(&buf[..n]);
62        }
63        Ok(hex_encode(&hasher.finalize()))
64    })
65}
66
67// ── Public hashing API ──────────────────────────────────────────────
68
69/// Buffer size for streaming hash I/O.
70/// 128KB matches GNU coreutils' buffer size (BUFSIZE=131072), which works well with kernel readahead.
71/// Many small reads allow the kernel to pipeline I/O efficiently, reducing latency
72/// vs fewer large reads that stall waiting for the full buffer to fill.
73const HASH_READ_BUF: usize = 131072;
74
75// Thread-local reusable buffer for streaming hash I/O.
76// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
77// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
78thread_local! {
79    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
80}
81
82/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
83/// Called only on the streaming path, so small-file workloads never allocate 8MB.
84#[inline]
85fn ensure_stream_buf(buf: &mut Vec<u8>) {
86    if buf.len() < HASH_READ_BUF {
87        buf.resize(HASH_READ_BUF, 0);
88    }
89}
90
91// ── OpenSSL-accelerated hash functions (Linux) ───────────────────────
92// OpenSSL's libcrypto provides the fastest SHA implementations, using
93// hardware-specific assembly (SHA-NI, AVX2/AVX512, NEON) tuned for each CPU.
94// This matches what GNU coreutils uses internally.
95
96/// Single-shot hash using OpenSSL (Linux).
97/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
98#[cfg(target_os = "linux")]
99#[inline]
100fn openssl_hash_bytes(md: openssl::hash::MessageDigest, data: &[u8]) -> io::Result<String> {
101    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
102    Ok(hex_encode(&digest))
103}
104
105/// Streaming hash using OpenSSL Hasher (Linux).
106#[cfg(target_os = "linux")]
107fn openssl_hash_reader(
108    md: openssl::hash::MessageDigest,
109    mut reader: impl Read,
110) -> io::Result<String> {
111    STREAM_BUF.with(|cell| {
112        let mut buf = cell.borrow_mut();
113        ensure_stream_buf(&mut buf);
114        let mut hasher =
115            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
116        loop {
117            let n = read_full(&mut reader, &mut buf)?;
118            if n == 0 {
119                break;
120            }
121            hasher
122                .update(&buf[..n])
123                .map_err(|e| io::Error::other(e.to_string()))?;
124        }
125        let digest = hasher
126            .finish()
127            .map_err(|e| io::Error::other(e.to_string()))?;
128        Ok(hex_encode(&digest))
129    })
130}
131
132/// Single-shot hash and write hex directly to buffer using OpenSSL (Linux).
133/// Returns an error if OpenSSL rejects the algorithm (e.g. FIPS mode).
134#[cfg(target_os = "linux")]
135#[inline]
136fn openssl_hash_bytes_to_buf(
137    md: openssl::hash::MessageDigest,
138    data: &[u8],
139    out: &mut [u8],
140) -> io::Result<usize> {
141    let digest = openssl::hash::hash(md, data).map_err(|e| io::Error::other(e.to_string()))?;
142    hex_encode_to_slice(&digest, out);
143    Ok(digest.len() * 2)
144}
145
146// ── Ring-accelerated hash functions (non-Apple, non-Linux targets) ────
147// ring provides BoringSSL assembly with SHA-NI/AVX2/NEON for Windows/FreeBSD.
148
149/// Single-shot hash using ring::digest (non-Apple, non-Linux).
150#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
151#[inline]
152fn ring_hash_bytes(algo: &'static ring::digest::Algorithm, data: &[u8]) -> io::Result<String> {
153    Ok(hex_encode(ring::digest::digest(algo, data).as_ref()))
154}
155
156/// Streaming hash using ring::digest::Context (non-Apple, non-Linux).
157#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
158fn ring_hash_reader(
159    algo: &'static ring::digest::Algorithm,
160    mut reader: impl Read,
161) -> io::Result<String> {
162    STREAM_BUF.with(|cell| {
163        let mut buf = cell.borrow_mut();
164        ensure_stream_buf(&mut buf);
165        let mut ctx = ring::digest::Context::new(algo);
166        loop {
167            let n = read_full(&mut reader, &mut buf)?;
168            if n == 0 {
169                break;
170            }
171            ctx.update(&buf[..n]);
172        }
173        Ok(hex_encode(ctx.finish().as_ref()))
174    })
175}
176
177// ── Algorithm → OpenSSL MessageDigest mapping (Linux) ──────────────────
178// Centralizes OpenSSL algorithm dispatch, used by hash_bytes, hash_stream_with_prefix,
179// hash_file_streaming, and hash_file_pipelined_read.
180
181#[cfg(target_os = "linux")]
182fn algo_to_openssl_md(algo: HashAlgorithm) -> openssl::hash::MessageDigest {
183    match algo {
184        HashAlgorithm::Sha1 => openssl::hash::MessageDigest::sha1(),
185        HashAlgorithm::Sha224 => openssl::hash::MessageDigest::sha224(),
186        HashAlgorithm::Sha256 => openssl::hash::MessageDigest::sha256(),
187        HashAlgorithm::Sha384 => openssl::hash::MessageDigest::sha384(),
188        HashAlgorithm::Sha512 => openssl::hash::MessageDigest::sha512(),
189        HashAlgorithm::Md5 => openssl::hash::MessageDigest::md5(),
190        HashAlgorithm::Blake2b => unreachable!("Blake2b uses its own hasher"),
191    }
192}
193
194// ── SHA-256 ───────────────────────────────────────────────────────────
195// Linux: OpenSSL (system libcrypto, matches GNU coreutils)
196// Windows/FreeBSD: ring (BoringSSL assembly)
197// Apple: sha2 crate (ring doesn't compile on Apple Silicon)
198
199#[cfg(target_os = "linux")]
200fn sha256_bytes(data: &[u8]) -> io::Result<String> {
201    openssl_hash_bytes(openssl::hash::MessageDigest::sha256(), data)
202}
203
204#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
205fn sha256_bytes(data: &[u8]) -> io::Result<String> {
206    ring_hash_bytes(&ring::digest::SHA256, data)
207}
208
209#[cfg(target_vendor = "apple")]
210fn sha256_bytes(data: &[u8]) -> io::Result<String> {
211    Ok(hash_digest::<sha2::Sha256>(data))
212}
213
214#[cfg(target_os = "linux")]
215fn sha256_reader(reader: impl Read) -> io::Result<String> {
216    openssl_hash_reader(openssl::hash::MessageDigest::sha256(), reader)
217}
218
219#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
220fn sha256_reader(reader: impl Read) -> io::Result<String> {
221    ring_hash_reader(&ring::digest::SHA256, reader)
222}
223
224#[cfg(target_vendor = "apple")]
225fn sha256_reader(reader: impl Read) -> io::Result<String> {
226    hash_reader_impl::<sha2::Sha256>(reader)
227}
228
229// ── SHA-1 ─────────────────────────────────────────────────────────────
230
231#[cfg(target_os = "linux")]
232fn sha1_bytes(data: &[u8]) -> io::Result<String> {
233    openssl_hash_bytes(openssl::hash::MessageDigest::sha1(), data)
234}
235
236#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
237fn sha1_bytes(data: &[u8]) -> io::Result<String> {
238    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
239}
240
241#[cfg(target_vendor = "apple")]
242fn sha1_bytes(data: &[u8]) -> io::Result<String> {
243    Ok(hash_digest::<sha1::Sha1>(data))
244}
245
246#[cfg(target_os = "linux")]
247fn sha1_reader(reader: impl Read) -> io::Result<String> {
248    openssl_hash_reader(openssl::hash::MessageDigest::sha1(), reader)
249}
250
251#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
252fn sha1_reader(reader: impl Read) -> io::Result<String> {
253    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
254}
255
256#[cfg(target_vendor = "apple")]
257fn sha1_reader(reader: impl Read) -> io::Result<String> {
258    hash_reader_impl::<sha1::Sha1>(reader)
259}
260
261// ── SHA-224 ───────────────────────────────────────────────────────────
262// ring does not support SHA-224. Use OpenSSL on Linux, sha2 crate elsewhere.
263
264#[cfg(target_os = "linux")]
265fn sha224_bytes(data: &[u8]) -> io::Result<String> {
266    openssl_hash_bytes(openssl::hash::MessageDigest::sha224(), data)
267}
268
269#[cfg(not(target_os = "linux"))]
270fn sha224_bytes(data: &[u8]) -> io::Result<String> {
271    Ok(hex_encode(&sha2::Sha224::digest(data)))
272}
273
274#[cfg(target_os = "linux")]
275fn sha224_reader(reader: impl Read) -> io::Result<String> {
276    openssl_hash_reader(openssl::hash::MessageDigest::sha224(), reader)
277}
278
279#[cfg(not(target_os = "linux"))]
280fn sha224_reader(reader: impl Read) -> io::Result<String> {
281    STREAM_BUF.with(|cell| {
282        let mut buf = cell.borrow_mut();
283        ensure_stream_buf(&mut buf);
284        let mut hasher = <sha2::Sha224 as digest::Digest>::new();
285        let mut reader = reader;
286        loop {
287            let n = read_full(&mut reader, &mut buf)?;
288            if n == 0 {
289                break;
290            }
291            digest::Digest::update(&mut hasher, &buf[..n]);
292        }
293        Ok(hex_encode(&digest::Digest::finalize(hasher)))
294    })
295}
296
297// ── SHA-384 ───────────────────────────────────────────────────────────
298
299#[cfg(target_os = "linux")]
300fn sha384_bytes(data: &[u8]) -> io::Result<String> {
301    openssl_hash_bytes(openssl::hash::MessageDigest::sha384(), data)
302}
303
304#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
305fn sha384_bytes(data: &[u8]) -> io::Result<String> {
306    ring_hash_bytes(&ring::digest::SHA384, data)
307}
308
309#[cfg(target_vendor = "apple")]
310fn sha384_bytes(data: &[u8]) -> io::Result<String> {
311    Ok(hex_encode(&sha2::Sha384::digest(data)))
312}
313
314#[cfg(target_os = "linux")]
315fn sha384_reader(reader: impl Read) -> io::Result<String> {
316    openssl_hash_reader(openssl::hash::MessageDigest::sha384(), reader)
317}
318
319#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
320fn sha384_reader(reader: impl Read) -> io::Result<String> {
321    ring_hash_reader(&ring::digest::SHA384, reader)
322}
323
324#[cfg(target_vendor = "apple")]
325fn sha384_reader(reader: impl Read) -> io::Result<String> {
326    STREAM_BUF.with(|cell| {
327        let mut buf = cell.borrow_mut();
328        ensure_stream_buf(&mut buf);
329        let mut hasher = <sha2::Sha384 as digest::Digest>::new();
330        let mut reader = reader;
331        loop {
332            let n = read_full(&mut reader, &mut buf)?;
333            if n == 0 {
334                break;
335            }
336            digest::Digest::update(&mut hasher, &buf[..n]);
337        }
338        Ok(hex_encode(&digest::Digest::finalize(hasher)))
339    })
340}
341
342// ── SHA-512 ───────────────────────────────────────────────────────────
343
344#[cfg(target_os = "linux")]
345fn sha512_bytes(data: &[u8]) -> io::Result<String> {
346    openssl_hash_bytes(openssl::hash::MessageDigest::sha512(), data)
347}
348
349#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
350fn sha512_bytes(data: &[u8]) -> io::Result<String> {
351    ring_hash_bytes(&ring::digest::SHA512, data)
352}
353
354#[cfg(target_vendor = "apple")]
355fn sha512_bytes(data: &[u8]) -> io::Result<String> {
356    Ok(hex_encode(&sha2::Sha512::digest(data)))
357}
358
359#[cfg(target_os = "linux")]
360fn sha512_reader(reader: impl Read) -> io::Result<String> {
361    openssl_hash_reader(openssl::hash::MessageDigest::sha512(), reader)
362}
363
364#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
365fn sha512_reader(reader: impl Read) -> io::Result<String> {
366    ring_hash_reader(&ring::digest::SHA512, reader)
367}
368
369#[cfg(target_vendor = "apple")]
370fn sha512_reader(reader: impl Read) -> io::Result<String> {
371    STREAM_BUF.with(|cell| {
372        let mut buf = cell.borrow_mut();
373        ensure_stream_buf(&mut buf);
374        let mut hasher = <sha2::Sha512 as digest::Digest>::new();
375        let mut reader = reader;
376        loop {
377            let n = read_full(&mut reader, &mut buf)?;
378            if n == 0 {
379                break;
380            }
381            digest::Digest::update(&mut hasher, &buf[..n]);
382        }
383        Ok(hex_encode(&digest::Digest::finalize(hasher)))
384    })
385}
386
387/// Compute hash of a byte slice directly (zero-copy fast path).
388/// Returns an error if the underlying crypto library rejects the algorithm.
389pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> io::Result<String> {
390    match algo {
391        HashAlgorithm::Sha1 => sha1_bytes(data),
392        HashAlgorithm::Sha224 => sha224_bytes(data),
393        HashAlgorithm::Sha256 => sha256_bytes(data),
394        HashAlgorithm::Sha384 => sha384_bytes(data),
395        HashAlgorithm::Sha512 => sha512_bytes(data),
396        HashAlgorithm::Md5 => md5_bytes(data),
397        HashAlgorithm::Blake2b => {
398            let hash = blake2b_simd::blake2b(data);
399            Ok(hex_encode(hash.as_bytes()))
400        }
401    }
402}
403
404/// Hash data and write hex result directly into an output buffer.
405/// Returns the number of hex bytes written. Avoids String allocation
406/// on the critical single-file fast path.
407/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
408#[cfg(target_os = "linux")]
409pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> io::Result<usize> {
410    match algo {
411        HashAlgorithm::Md5 => {
412            openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::md5(), data, out)
413        }
414        HashAlgorithm::Sha1 => sha1_bytes_to_buf(data, out),
415        HashAlgorithm::Sha224 => sha224_bytes_to_buf(data, out),
416        HashAlgorithm::Sha256 => sha256_bytes_to_buf(data, out),
417        HashAlgorithm::Sha384 => sha384_bytes_to_buf(data, out),
418        HashAlgorithm::Sha512 => sha512_bytes_to_buf(data, out),
419        HashAlgorithm::Blake2b => {
420            let hash = blake2b_simd::blake2b(data);
421            let bytes = hash.as_bytes();
422            hex_encode_to_slice(bytes, out);
423            Ok(bytes.len() * 2)
424        }
425    }
426}
427
428#[cfg(target_os = "linux")]
429fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
430    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha1(), data, out)
431}
432#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
433fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
434    let digest = ring::digest::digest(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data);
435    hex_encode_to_slice(digest.as_ref(), out);
436    Ok(40)
437}
438#[cfg(target_vendor = "apple")]
439fn sha1_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
440    let digest = sha1::Sha1::digest(data);
441    hex_encode_to_slice(&digest, out);
442    Ok(40)
443}
444
445#[cfg(target_os = "linux")]
446fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
447    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha224(), data, out)
448}
449#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
450fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
451    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
452    hex_encode_to_slice(&digest, out);
453    Ok(56)
454}
455#[cfg(target_vendor = "apple")]
456fn sha224_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
457    let digest = <sha2::Sha224 as sha2::Digest>::digest(data);
458    hex_encode_to_slice(&digest, out);
459    Ok(56)
460}
461
462#[cfg(target_os = "linux")]
463fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
464    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha256(), data, out)
465}
466#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
467fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
468    let digest = ring::digest::digest(&ring::digest::SHA256, data);
469    hex_encode_to_slice(digest.as_ref(), out);
470    Ok(64)
471}
472#[cfg(target_vendor = "apple")]
473fn sha256_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
474    let digest = <sha2::Sha256 as sha2::Digest>::digest(data);
475    hex_encode_to_slice(&digest, out);
476    Ok(64)
477}
478
479#[cfg(target_os = "linux")]
480fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
481    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha384(), data, out)
482}
483#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
484fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
485    let digest = ring::digest::digest(&ring::digest::SHA384, data);
486    hex_encode_to_slice(digest.as_ref(), out);
487    Ok(96)
488}
489#[cfg(target_vendor = "apple")]
490fn sha384_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
491    let digest = <sha2::Sha384 as sha2::Digest>::digest(data);
492    hex_encode_to_slice(&digest, out);
493    Ok(96)
494}
495
496#[cfg(target_os = "linux")]
497fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
498    openssl_hash_bytes_to_buf(openssl::hash::MessageDigest::sha512(), data, out)
499}
500#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
501fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
502    let digest = ring::digest::digest(&ring::digest::SHA512, data);
503    hex_encode_to_slice(digest.as_ref(), out);
504    Ok(128)
505}
506#[cfg(target_vendor = "apple")]
507fn sha512_bytes_to_buf(data: &[u8], out: &mut [u8]) -> io::Result<usize> {
508    let digest = <sha2::Sha512 as sha2::Digest>::digest(data);
509    hex_encode_to_slice(&digest, out);
510    Ok(128)
511}
512
513/// Hash a single file using raw syscalls and write hex directly to output buffer.
514/// Returns number of hex bytes written.
515/// This is the absolute minimum-overhead path for single-file hashing:
516/// raw open + fstat + read + hash + hex encode, with zero String allocation.
517#[cfg(target_os = "linux")]
518pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
519    use std::os::unix::ffi::OsStrExt;
520
521    let path_bytes = path.as_os_str().as_bytes();
522    let c_path = std::ffi::CString::new(path_bytes)
523        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
524
525    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
526    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
527        flags |= libc::O_NOATIME;
528    }
529
530    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
531    if fd < 0 {
532        let err = io::Error::last_os_error();
533        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
534            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
535            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
536            if fd2 < 0 {
537                return Err(io::Error::last_os_error());
538            }
539            return hash_from_raw_fd_to_buf(algo, fd2, out);
540        }
541        return Err(err);
542    }
543    hash_from_raw_fd_to_buf(algo, fd, out)
544}
545
546/// Hash from raw fd and write hex directly to output buffer.
547/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
548/// For larger files, falls back to hash_file_raw() which allocates a String.
549#[cfg(target_os = "linux")]
550fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
551    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
552    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
553        let err = io::Error::last_os_error();
554        unsafe {
555            libc::close(fd);
556        }
557        return Err(err);
558    }
559    let size = stat.st_size as u64;
560    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
561
562    // Empty regular file
563    if is_regular && size == 0 {
564        unsafe {
565            libc::close(fd);
566        }
567        return hash_bytes_to_buf(algo, &[], out);
568    }
569
570    // Tiny files (<8KB): fully raw path — zero heap allocation
571    if is_regular && size < TINY_FILE_LIMIT {
572        let mut buf = [0u8; 8192];
573        let mut total = 0usize;
574        while total < size as usize {
575            let n = unsafe {
576                libc::read(
577                    fd,
578                    buf[total..].as_mut_ptr() as *mut libc::c_void,
579                    (size as usize) - total,
580                )
581            };
582            if n < 0 {
583                let err = io::Error::last_os_error();
584                if err.kind() == io::ErrorKind::Interrupted {
585                    continue;
586                }
587                unsafe {
588                    libc::close(fd);
589                }
590                return Err(err);
591            }
592            if n == 0 {
593                break;
594            }
595            total += n as usize;
596        }
597        unsafe {
598            libc::close(fd);
599        }
600        return hash_bytes_to_buf(algo, &buf[..total], out);
601    }
602
603    // Larger files: fall back to hash_from_raw_fd which returns a String,
604    // then copy the hex into out.
605    use std::os::unix::io::FromRawFd;
606    let file = unsafe { File::from_raw_fd(fd) };
607    let hash_str = if is_regular && size > 0 {
608        hash_regular_file(algo, file, size)?
609    } else {
610        hash_reader(algo, file)?
611    };
612    let hex_bytes = hash_str.as_bytes();
613    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
614    Ok(hex_bytes.len())
615}
616
617// ── MD5 ─────────────────────────────────────────────────────────────
618// Linux: OpenSSL (same assembly-optimized library as GNU coreutils)
619// Other platforms: md-5 crate (pure Rust)
620
621#[cfg(target_os = "linux")]
622fn md5_bytes(data: &[u8]) -> io::Result<String> {
623    openssl_hash_bytes(openssl::hash::MessageDigest::md5(), data)
624}
625
626#[cfg(not(target_os = "linux"))]
627fn md5_bytes(data: &[u8]) -> io::Result<String> {
628    Ok(hash_digest::<Md5>(data))
629}
630
631#[cfg(target_os = "linux")]
632fn md5_reader(reader: impl Read) -> io::Result<String> {
633    openssl_hash_reader(openssl::hash::MessageDigest::md5(), reader)
634}
635
636#[cfg(not(target_os = "linux"))]
637fn md5_reader(reader: impl Read) -> io::Result<String> {
638    hash_reader_impl::<Md5>(reader)
639}
640
641/// Compute hash of data from a reader, returning hex string.
642pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
643    match algo {
644        HashAlgorithm::Sha1 => sha1_reader(reader),
645        HashAlgorithm::Sha224 => sha224_reader(reader),
646        HashAlgorithm::Sha256 => sha256_reader(reader),
647        HashAlgorithm::Sha384 => sha384_reader(reader),
648        HashAlgorithm::Sha512 => sha512_reader(reader),
649        HashAlgorithm::Md5 => md5_reader(reader),
650        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
651    }
652}
653
654/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
655/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
656#[cfg(target_os = "linux")]
657static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
658
659/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
660/// Caches whether O_NOATIME works to avoid double-open on every file.
661#[cfg(target_os = "linux")]
662fn open_noatime(path: &Path) -> io::Result<File> {
663    use std::os::unix::fs::OpenOptionsExt;
664    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
665        match std::fs::OpenOptions::new()
666            .read(true)
667            .custom_flags(libc::O_NOATIME)
668            .open(path)
669        {
670            Ok(f) => return Ok(f),
671            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
672                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
673                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
674            }
675            Err(e) => return Err(e), // Real error, propagate
676        }
677    }
678    File::open(path)
679}
680
681#[cfg(not(target_os = "linux"))]
682fn open_noatime(path: &Path) -> io::Result<File> {
683    File::open(path)
684}
685
686/// Open a file and get its metadata in one step.
687/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
688#[cfg(target_os = "linux")]
689#[inline]
690fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
691    let file = open_noatime(path)?;
692    let fd = {
693        use std::os::unix::io::AsRawFd;
694        file.as_raw_fd()
695    };
696    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
697    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
698        return Err(io::Error::last_os_error());
699    }
700    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
701    let size = stat.st_size as u64;
702    Ok((file, size, is_regular))
703}
704
705#[cfg(not(target_os = "linux"))]
706#[inline]
707fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
708    let file = open_noatime(path)?;
709    let metadata = file.metadata()?;
710    Ok((file, metadata.len(), metadata.file_type().is_file()))
711}
712
713/// Minimum file size to issue fadvise hint (1MB).
714/// For small files, the syscall overhead exceeds the readahead benefit.
715#[cfg(target_os = "linux")]
716const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
717
718/// Maximum file size for single-read hash optimization.
719/// Files up to this size are read entirely into a thread-local buffer and hashed
720/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
721/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
722/// across files in sequential mode, saving re-allocation.
723/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
724const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
725
726/// Threshold for tiny files that can be read into a stack buffer.
727/// Below this size, we use a stack-allocated buffer + single read() syscall,
728/// completely avoiding any heap allocation for the data path.
729const TINY_FILE_LIMIT: u64 = 8 * 1024;
730
731// Thread-local reusable buffer for single-read hash.
732// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
733// handles tiny files; larger files trigger one grow that persists for reuse.
734thread_local! {
735    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
736}
737
738/// Optimized hash for large files (>=16MB) on Linux.
739/// Hash large files (>=16MB) using streaming I/O with fadvise + ring Context.
740/// Uses sequential fadvise hint for kernel readahead, then streams through
741/// hash context in large chunks. For large files (>64MB), uses double-buffered
742/// reader thread to overlap I/O and hashing.
743#[cfg(target_os = "linux")]
744fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
745    // For very large files, double-buffered reader thread overlaps I/O and CPU.
746    // For medium files, single-thread streaming is faster (avoids thread overhead).
747    if file_size >= 64 * 1024 * 1024 {
748        hash_file_pipelined_read(algo, file, file_size)
749    } else {
750        hash_file_streaming(algo, file, file_size)
751    }
752}
753
754/// Simple single-thread streaming hash with fadvise.
755/// Optimal for files 16-64MB where thread overhead exceeds I/O overlap benefit.
756#[cfg(target_os = "linux")]
757fn hash_file_streaming(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
758    use std::os::unix::io::AsRawFd;
759
760    let _ = unsafe {
761        libc::posix_fadvise(
762            file.as_raw_fd(),
763            0,
764            file_size as i64,
765            libc::POSIX_FADV_SEQUENTIAL,
766        )
767    };
768
769    // Use OpenSSL for all algorithms on Linux (same library as GNU coreutils).
770    if matches!(algo, HashAlgorithm::Blake2b) {
771        blake2b_hash_reader(file, 64)
772    } else {
773        openssl_hash_reader(algo_to_openssl_md(algo), file)
774    }
775}
776
777/// Streaming fallback for large files when mmap is unavailable.
778/// Uses double-buffered reader thread with fadvise hints.
779/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
780#[cfg(target_os = "linux")]
781fn hash_file_pipelined_read(
782    algo: HashAlgorithm,
783    mut file: File,
784    file_size: u64,
785) -> io::Result<String> {
786    use std::os::unix::io::AsRawFd;
787
788    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
789
790    let _ = unsafe {
791        libc::posix_fadvise(
792            file.as_raw_fd(),
793            0,
794            file_size as i64,
795            libc::POSIX_FADV_SEQUENTIAL,
796        )
797    };
798
799    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
800    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
801    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
802
803    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
804        while let Ok(mut buf) = buf_rx.recv() {
805            let mut total = 0;
806            while total < buf.len() {
807                match file.read(&mut buf[total..]) {
808                    Ok(0) => break,
809                    Ok(n) => total += n,
810                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
811                    Err(e) => return Err(e),
812                }
813            }
814            if total == 0 {
815                break;
816            }
817            if tx.send((buf, total)).is_err() {
818                break;
819            }
820        }
821        Ok(())
822    });
823
824    // Use OpenSSL Hasher for all hash algorithms (same library as GNU coreutils).
825    macro_rules! hash_pipelined_openssl {
826        ($md:expr) => {{
827            let mut hasher =
828                openssl::hash::Hasher::new($md).map_err(|e| io::Error::other(e.to_string()))?;
829            while let Ok((buf, n)) = rx.recv() {
830                hasher
831                    .update(&buf[..n])
832                    .map_err(|e| io::Error::other(e.to_string()))?;
833                let _ = buf_tx.send(buf);
834            }
835            let digest = hasher
836                .finish()
837                .map_err(|e| io::Error::other(e.to_string()))?;
838            Ok(hex_encode(&digest))
839        }};
840    }
841
842    let hash_result: io::Result<String> = if matches!(algo, HashAlgorithm::Blake2b) {
843        let mut state = blake2b_simd::Params::new().to_state();
844        while let Ok((buf, n)) = rx.recv() {
845            state.update(&buf[..n]);
846            let _ = buf_tx.send(buf);
847        }
848        Ok(hex_encode(state.finalize().as_bytes()))
849    } else {
850        hash_pipelined_openssl!(algo_to_openssl_md(algo))
851    };
852
853    match reader_handle.join() {
854        Ok(Ok(())) => {}
855        Ok(Err(e)) => {
856            if hash_result.is_ok() {
857                return Err(e);
858            }
859        }
860        Err(payload) => {
861            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
862                format!("reader thread panicked: {}", s)
863            } else if let Some(s) = payload.downcast_ref::<String>() {
864                format!("reader thread panicked: {}", s)
865            } else {
866                "reader thread panicked".to_string()
867            };
868            return Err(io::Error::other(msg));
869        }
870    }
871
872    hash_result
873}
874
875/// Hash a known-regular file using tiered I/O strategy based on size.
876/// - Large (>=16MB): mmap with HugePage/PopulateRead hints, pipelined fallback
877/// - Small/Medium (8KB-16MB): single read into thread-local buffer + single-shot hash
878///
879/// SAFETY: mmap is safe for regular local files opened just above. The fallback
880/// to streaming I/O (hash_reader/hash_file_pipelined) handles mmap failures at
881/// map time, but cannot protect against post-map truncation. If the file is
882/// truncated or backing storage disappears after mapping (e.g. NFS), the kernel
883/// delivers SIGBUS — acceptable, matching other mmap tools.
884fn hash_regular_file(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
885    // Large files (>=SMALL_FILE_LIMIT): mmap for zero-copy single-shot hash.
886    if file_size >= SMALL_FILE_LIMIT {
887        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
888        if let Ok(mmap) = mmap_result {
889            #[cfg(target_os = "linux")]
890            {
891                if file_size >= 2 * 1024 * 1024 {
892                    let _ = mmap.advise(memmap2::Advice::HugePage);
893                }
894                let _ = mmap.advise(memmap2::Advice::Sequential);
895                // PopulateRead (Linux 5.14+) synchronously faults all pages before
896                // returning, giving warm TLB entries for hash_bytes. WillNeed is
897                // async and best-effort — pages may still fault during hashing.
898                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
899                    let _ = mmap.advise(memmap2::Advice::WillNeed);
900                }
901            }
902            return hash_bytes(algo, &mmap);
903        }
904        // mmap failed — fall back to streaming I/O
905        #[cfg(target_os = "linux")]
906        {
907            return hash_file_pipelined(algo, file, file_size);
908        }
909        #[cfg(not(target_os = "linux"))]
910        {
911            return hash_reader(algo, file);
912        }
913    }
914    // Files below SMALL_FILE_LIMIT (8KB-16MB): single read into thread-local
915    // buffer + single-shot SIMD hash. This is faster than streaming hash_reader
916    // (128KB chunked incremental updates) because: one read() syscall vs ~N,
917    // one hash_bytes() call vs N context.update() calls, and the thread-local
918    // buffer stays warm across sequential file hashing.
919    hash_file_small(algo, file, file_size as usize)
920}
921
922/// Hash a file by path. Uses tiered I/O strategy for regular files,
923/// streaming read for non-regular files.
924pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
925    let (file, file_size, is_regular) = open_and_stat(path)?;
926
927    if is_regular && file_size == 0 {
928        return hash_bytes(algo, &[]);
929    }
930
931    if file_size > 0 && is_regular {
932        if file_size < TINY_FILE_LIMIT {
933            return hash_file_tiny(algo, file, file_size as usize);
934        }
935        return hash_regular_file(algo, file, file_size);
936    }
937
938    // Non-regular files or fallback: stream
939    #[cfg(target_os = "linux")]
940    if file_size >= FADVISE_MIN_SIZE {
941        use std::os::unix::io::AsRawFd;
942        let _ = unsafe {
943            libc::posix_fadvise(
944                file.as_raw_fd(),
945                0,
946                file_size as i64,
947                libc::POSIX_FADV_SEQUENTIAL,
948            )
949        };
950    }
951    hash_reader(algo, file)
952}
953
954/// Hash a tiny file (<8KB) using a stack-allocated buffer.
955/// Single read() syscall, zero heap allocation on the data path.
956/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
957#[inline]
958fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
959    let mut buf = [0u8; 8192];
960    let mut total = 0;
961    // Read with known size — usually completes in a single read() for regular files
962    while total < size {
963        match file.read(&mut buf[total..size]) {
964            Ok(0) => break,
965            Ok(n) => total += n,
966            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
967            Err(e) => return Err(e),
968        }
969    }
970    hash_bytes(algo, &buf[..total])
971}
972
973/// Hash a small file by reading it entirely into a thread-local buffer,
974/// then using the single-shot hash function. Avoids per-file Hasher allocation.
975#[inline]
976fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
977    SMALL_FILE_BUF.with(|cell| {
978        let mut buf = cell.borrow_mut();
979        // Reset length but keep allocation, then grow if needed
980        buf.clear();
981        buf.reserve(size);
982        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
983        // directly and only access buf[..total] where total <= size <= capacity.
984        unsafe {
985            buf.set_len(size);
986        }
987        let mut total = 0;
988        while total < size {
989            match file.read(&mut buf[total..size]) {
990                Ok(0) => break,
991                Ok(n) => total += n,
992                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
993                Err(e) => return Err(e),
994            }
995        }
996        hash_bytes(algo, &buf[..total])
997    })
998}
999
1000/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
1001pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
1002    let stdin = io::stdin();
1003    // Hint kernel for sequential access if stdin is a regular file (redirect)
1004    #[cfg(target_os = "linux")]
1005    {
1006        use std::os::unix::io::AsRawFd;
1007        let fd = stdin.as_raw_fd();
1008        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1009        if unsafe { libc::fstat(fd, &mut stat) } == 0
1010            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1011            && stat.st_size > 0
1012        {
1013            unsafe {
1014                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1015            }
1016        }
1017    }
1018    // Streaming hash — works for both pipe and file-redirect stdin
1019    hash_reader(algo, stdin.lock())
1020}
1021
1022/// Check if parallel hashing is worthwhile for the given file paths.
1023/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
1024/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
1025/// Removing the stat()-based size check eliminates N extra syscalls for N files.
1026pub fn should_use_parallel(paths: &[&Path]) -> bool {
1027    paths.len() >= 2
1028}
1029
1030/// Issue readahead hints for a list of file paths to warm the page cache.
1031/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1032/// Only issues hints for files >= 1MB; small files are read fast enough
1033/// that the fadvise syscall overhead isn't worth it.
1034#[cfg(target_os = "linux")]
1035pub fn readahead_files(paths: &[&Path]) {
1036    use std::os::unix::io::AsRawFd;
1037    for path in paths {
1038        if let Ok(file) = open_noatime(path) {
1039            if let Ok(meta) = file.metadata() {
1040                let len = meta.len();
1041                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1042                    unsafe {
1043                        libc::posix_fadvise(
1044                            file.as_raw_fd(),
1045                            0,
1046                            len as i64,
1047                            libc::POSIX_FADV_WILLNEED,
1048                        );
1049                    }
1050                }
1051            }
1052        }
1053    }
1054}
1055
1056#[cfg(not(target_os = "linux"))]
1057pub fn readahead_files(_paths: &[&Path]) {
1058    // No-op on non-Linux
1059}
1060
1061// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1062
1063/// Hash raw data with BLAKE2b variable output length.
1064/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1065pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1066    let hash = blake2b_simd::Params::new()
1067        .hash_length(output_bytes)
1068        .hash(data);
1069    hex_encode(hash.as_bytes())
1070}
1071
1072/// Hash a reader with BLAKE2b variable output length.
1073/// Uses thread-local buffer for cache-friendly streaming.
1074pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1075    STREAM_BUF.with(|cell| {
1076        let mut buf = cell.borrow_mut();
1077        ensure_stream_buf(&mut buf);
1078        let mut state = blake2b_simd::Params::new()
1079            .hash_length(output_bytes)
1080            .to_state();
1081        loop {
1082            let n = read_full(&mut reader, &mut buf)?;
1083            if n == 0 {
1084                break;
1085            }
1086            state.update(&buf[..n]);
1087        }
1088        Ok(hex_encode(state.finalize().as_bytes()))
1089    })
1090}
1091
1092/// Hash a file with BLAKE2b variable output length.
1093/// Uses mmap for large files (zero-copy), single-read for small files,
1094/// and streaming read as fallback.
1095pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1096    let (file, file_size, is_regular) = open_and_stat(path)?;
1097
1098    if is_regular && file_size == 0 {
1099        return Ok(blake2b_hash_data(&[], output_bytes));
1100    }
1101
1102    if file_size > 0 && is_regular {
1103        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1104        if file_size < TINY_FILE_LIMIT {
1105            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1106        }
1107        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1108        if file_size >= SMALL_FILE_LIMIT {
1109            #[cfg(target_os = "linux")]
1110            {
1111                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1112            }
1113            #[cfg(not(target_os = "linux"))]
1114            {
1115                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1116                if let Ok(mmap) = mmap_result {
1117                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1118                }
1119            }
1120        }
1121        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash
1122        if file_size < SMALL_FILE_LIMIT {
1123            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1124        }
1125    }
1126
1127    // Non-regular files or fallback: stream
1128    #[cfg(target_os = "linux")]
1129    if file_size >= FADVISE_MIN_SIZE {
1130        use std::os::unix::io::AsRawFd;
1131        let _ = unsafe {
1132            libc::posix_fadvise(
1133                file.as_raw_fd(),
1134                0,
1135                file_size as i64,
1136                libc::POSIX_FADV_SEQUENTIAL,
1137            )
1138        };
1139    }
1140    blake2b_hash_reader(file, output_bytes)
1141}
1142
1143/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1144#[inline]
1145fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1146    let mut buf = [0u8; 8192];
1147    let mut total = 0;
1148    while total < size {
1149        match file.read(&mut buf[total..size]) {
1150            Ok(0) => break,
1151            Ok(n) => total += n,
1152            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1153            Err(e) => return Err(e),
1154        }
1155    }
1156    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1157}
1158
1159/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1160#[inline]
1161fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1162    SMALL_FILE_BUF.with(|cell| {
1163        let mut buf = cell.borrow_mut();
1164        buf.clear();
1165        buf.reserve(size);
1166        // SAFETY: capacity >= size after clear+reserve
1167        unsafe {
1168            buf.set_len(size);
1169        }
1170        let mut total = 0;
1171        while total < size {
1172            match file.read(&mut buf[total..size]) {
1173                Ok(0) => break,
1174                Ok(n) => total += n,
1175                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1176                Err(e) => return Err(e),
1177            }
1178        }
1179        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1180    })
1181}
1182
1183/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1184/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1185/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1186/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1187#[cfg(target_os = "linux")]
1188fn blake2b_hash_file_pipelined(
1189    file: File,
1190    file_size: u64,
1191    output_bytes: usize,
1192) -> io::Result<String> {
1193    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1194    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1195    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1196    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1197        Ok(mmap) => {
1198            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1199            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1200            if file_size >= 2 * 1024 * 1024 {
1201                let _ = mmap.advise(memmap2::Advice::HugePage);
1202            }
1203            let _ = mmap.advise(memmap2::Advice::Sequential);
1204            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1205            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1206            if file_size >= 4 * 1024 * 1024 {
1207                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1208                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1209                }
1210            } else {
1211                let _ = mmap.advise(memmap2::Advice::WillNeed);
1212            }
1213            // Single-shot hash: processes entire file in one call, streaming
1214            // directly from page cache with no user-space buffer copies.
1215            Ok(blake2b_hash_data(&mmap, output_bytes))
1216        }
1217        Err(_) => {
1218            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1219            // to streaming pipelined I/O.
1220            blake2b_hash_file_streamed(file, file_size, output_bytes)
1221        }
1222    }
1223}
1224
1225/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1226/// Uses double-buffered reader thread with fadvise hints.
1227/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1228#[cfg(target_os = "linux")]
1229fn blake2b_hash_file_streamed(
1230    mut file: File,
1231    file_size: u64,
1232    output_bytes: usize,
1233) -> io::Result<String> {
1234    use std::os::unix::io::AsRawFd;
1235
1236    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1237
1238    // Hint kernel for sequential access
1239    unsafe {
1240        libc::posix_fadvise(
1241            file.as_raw_fd(),
1242            0,
1243            file_size as i64,
1244            libc::POSIX_FADV_SEQUENTIAL,
1245        );
1246    }
1247
1248    // Double-buffered channels: reader fills one buffer while hasher processes another.
1249    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1250    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1251    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1252
1253    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1254        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1255        while let Ok(mut buf) = buf_rx.recv() {
1256            let mut total = 0;
1257            while total < buf.len() {
1258                match file.read(&mut buf[total..]) {
1259                    Ok(0) => break,
1260                    Ok(n) => total += n,
1261                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1262                    Err(e) => return Err(e),
1263                }
1264            }
1265            if total == 0 {
1266                break;
1267            }
1268            if tx.send((buf, total)).is_err() {
1269                break;
1270            }
1271        }
1272        Ok(())
1273    });
1274
1275    let mut state = blake2b_simd::Params::new()
1276        .hash_length(output_bytes)
1277        .to_state();
1278    while let Ok((buf, n)) = rx.recv() {
1279        state.update(&buf[..n]);
1280        let _ = buf_tx.send(buf);
1281    }
1282    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1283
1284    match reader_handle.join() {
1285        Ok(Ok(())) => {}
1286        Ok(Err(e)) => {
1287            if hash_result.is_ok() {
1288                return Err(e);
1289            }
1290        }
1291        Err(payload) => {
1292            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1293                format!("reader thread panicked: {}", s)
1294            } else if let Some(s) = payload.downcast_ref::<String>() {
1295                format!("reader thread panicked: {}", s)
1296            } else {
1297                "reader thread panicked".to_string()
1298            };
1299            return Err(io::Error::other(msg));
1300        }
1301    }
1302
1303    hash_result
1304}
1305
1306/// Hash stdin with BLAKE2b variable output length.
1307/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1308pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1309    let stdin = io::stdin();
1310    #[cfg(target_os = "linux")]
1311    {
1312        use std::os::unix::io::AsRawFd;
1313        let fd = stdin.as_raw_fd();
1314        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1315        if unsafe { libc::fstat(fd, &mut stat) } == 0
1316            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1317            && stat.st_size > 0
1318        {
1319            unsafe {
1320                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1321            }
1322        }
1323    }
1324    blake2b_hash_reader(stdin.lock(), output_bytes)
1325}
1326
1327/// Internal enum for file content in batch hashing.
1328/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1329enum FileContent {
1330    Mmap(memmap2::Mmap),
1331    Buf(Vec<u8>),
1332}
1333
1334impl AsRef<[u8]> for FileContent {
1335    fn as_ref(&self) -> &[u8] {
1336        match self {
1337            FileContent::Mmap(m) => m,
1338            FileContent::Buf(v) => v,
1339        }
1340    }
1341}
1342
1343/// Open a file and load its content for batch hashing.
1344/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1345/// files (zero-copy), and read-to-end for non-regular files.
1346fn open_file_content(path: &Path) -> io::Result<FileContent> {
1347    let (file, size, is_regular) = open_and_stat(path)?;
1348    if is_regular && size == 0 {
1349        return Ok(FileContent::Buf(Vec::new()));
1350    }
1351    if is_regular && size > 0 {
1352        // Tiny files: read directly into Vec. The mmap syscall + page fault
1353        // overhead exceeds the data transfer cost for files under 8KB.
1354        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1355        if size < TINY_FILE_LIMIT {
1356            let mut buf = vec![0u8; size as usize];
1357            let mut total = 0;
1358            let mut f = file;
1359            while total < size as usize {
1360                match f.read(&mut buf[total..]) {
1361                    Ok(0) => break,
1362                    Ok(n) => total += n,
1363                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1364                    Err(e) => return Err(e),
1365                }
1366            }
1367            buf.truncate(total);
1368            return Ok(FileContent::Buf(buf));
1369        }
1370        // HUGEPAGE + PopulateRead for optimal page faulting
1371        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1372        if let Ok(mmap) = mmap_result {
1373            #[cfg(target_os = "linux")]
1374            {
1375                if size >= 2 * 1024 * 1024 {
1376                    let _ = mmap.advise(memmap2::Advice::HugePage);
1377                }
1378                let _ = mmap.advise(memmap2::Advice::Sequential);
1379                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1380                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1381                }
1382            }
1383            return Ok(FileContent::Mmap(mmap));
1384        }
1385        // Fallback: read into Vec
1386        let mut buf = vec![0u8; size as usize];
1387        let mut total = 0;
1388        let mut f = file;
1389        while total < size as usize {
1390            match f.read(&mut buf[total..]) {
1391                Ok(0) => break,
1392                Ok(n) => total += n,
1393                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1394                Err(e) => return Err(e),
1395            }
1396        }
1397        buf.truncate(total);
1398        return Ok(FileContent::Buf(buf));
1399    }
1400    // Non-regular: read to end
1401    let mut buf = Vec::new();
1402    let mut f = file;
1403    f.read_to_end(&mut buf)?;
1404    Ok(FileContent::Buf(buf))
1405}
1406
1407/// Read remaining file content from an already-open fd into a Vec.
1408/// Used when the initial stack buffer is exhausted and we need to read
1409/// the rest without re-opening the file.
1410fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1411    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1412    buf.extend_from_slice(prefix);
1413    file.read_to_end(&mut buf)?;
1414    Ok(FileContent::Buf(buf))
1415}
1416
1417/// Open a file and read all content without fstat — just open+read+close.
1418/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1419/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1420/// then falls back to larger buffer or read_to_end for bigger files.
1421fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1422    let mut file = open_noatime(path)?;
1423    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1424    // For tiny files, allocate exact-size Vec to avoid waste.
1425    let mut small_buf = [0u8; 4096];
1426    match file.read(&mut small_buf) {
1427        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1428        Ok(n) if n < small_buf.len() => {
1429            // File fits in small buffer — allocate exact size
1430            let mut vec = Vec::with_capacity(n);
1431            vec.extend_from_slice(&small_buf[..n]);
1432            return Ok(FileContent::Buf(vec));
1433        }
1434        Ok(n) => {
1435            // Might be more data — allocate heap buffer and read into it directly
1436            let mut buf = vec![0u8; 65536];
1437            buf[..n].copy_from_slice(&small_buf[..n]);
1438            let mut total = n;
1439            loop {
1440                match file.read(&mut buf[total..]) {
1441                    Ok(0) => {
1442                        buf.truncate(total);
1443                        return Ok(FileContent::Buf(buf));
1444                    }
1445                    Ok(n) => {
1446                        total += n;
1447                        if total >= buf.len() {
1448                            // File > 64KB: read rest from existing fd
1449                            return read_remaining_to_vec(&buf[..total], file);
1450                        }
1451                    }
1452                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1453                    Err(e) => return Err(e),
1454                }
1455            }
1456        }
1457        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1458            let mut buf = vec![0u8; 65536];
1459            let mut total = 0;
1460            loop {
1461                match file.read(&mut buf[total..]) {
1462                    Ok(0) => {
1463                        buf.truncate(total);
1464                        return Ok(FileContent::Buf(buf));
1465                    }
1466                    Ok(n) => {
1467                        total += n;
1468                        if total >= buf.len() {
1469                            // File > 64KB: read rest from existing fd
1470                            return read_remaining_to_vec(&buf[..total], file);
1471                        }
1472                    }
1473                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1474                    Err(e) => return Err(e),
1475                }
1476            }
1477        }
1478        Err(e) => return Err(e),
1479    }
1480}
1481
1482/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1483///
1484/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1485/// All files are pre-loaded into memory (mmap for large, read for small),
1486/// then hashed simultaneously. Returns results in input order.
1487///
1488/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1489pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1490    use blake2b_simd::many::{HashManyJob, hash_many};
1491
1492    // Phase 1: Read all files into memory.
1493    // For small file counts (≤10), load sequentially to avoid thread::scope
1494    // overhead (~120µs). For many files, use parallel loading with lightweight
1495    // OS threads. For 100+ files, use fast path that skips fstat.
1496    let use_fast = paths.len() >= 20;
1497
1498    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1499        // Sequential loading — avoids thread spawn overhead for small batches
1500        paths.iter().map(|&path| open_file_content(path)).collect()
1501    } else {
1502        let num_threads = std::thread::available_parallelism()
1503            .map(|n| n.get())
1504            .unwrap_or(4)
1505            .min(paths.len());
1506        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1507
1508        std::thread::scope(|s| {
1509            let handles: Vec<_> = paths
1510                .chunks(chunk_size)
1511                .map(|chunk| {
1512                    s.spawn(move || {
1513                        chunk
1514                            .iter()
1515                            .map(|&path| {
1516                                if use_fast {
1517                                    open_file_content_fast(path)
1518                                } else {
1519                                    open_file_content(path)
1520                                }
1521                            })
1522                            .collect::<Vec<_>>()
1523                    })
1524                })
1525                .collect();
1526
1527            handles
1528                .into_iter()
1529                .flat_map(|h| h.join().unwrap())
1530                .collect()
1531        })
1532    };
1533
1534    // Phase 2: Build hash_many jobs for successful reads
1535    let hash_results = {
1536        let mut params = blake2b_simd::Params::new();
1537        params.hash_length(output_bytes);
1538
1539        let ok_entries: Vec<(usize, &[u8])> = file_data
1540            .iter()
1541            .enumerate()
1542            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1543            .collect();
1544
1545        let mut jobs: Vec<HashManyJob> = ok_entries
1546            .iter()
1547            .map(|(_, data)| HashManyJob::new(&params, data))
1548            .collect();
1549
1550        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1551        hash_many(jobs.iter_mut());
1552
1553        // Extract hashes into a map
1554        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1555        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1556            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1557        }
1558        hm
1559    }; // file_data borrow released here
1560
1561    // Phase 4: Combine hashes and errors in original order
1562    hash_results
1563        .into_iter()
1564        .zip(file_data)
1565        .map(|(hash_opt, result)| match result {
1566            Ok(_) => Ok(hash_opt.unwrap()),
1567            Err(e) => Err(e),
1568        })
1569        .collect()
1570}
1571
1572/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1573/// Samples a few files to estimate total data size. For small workloads, uses
1574/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1575/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1576/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1577/// for large files on Linux).
1578/// Returns results in input order.
1579pub fn blake2b_hash_files_parallel(
1580    paths: &[&Path],
1581    output_bytes: usize,
1582) -> Vec<io::Result<String>> {
1583    let n = paths.len();
1584
1585    // Sample a few files to estimate whether parallel processing is worthwhile.
1586    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1587    // is too small for parallelism to help.
1588    let sample_count = n.min(5);
1589    let mut sample_max: u64 = 0;
1590    let mut sample_total: u64 = 0;
1591    for &p in paths.iter().take(sample_count) {
1592        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1593        sample_total += size;
1594        sample_max = sample_max.max(size);
1595    }
1596    let estimated_total = if sample_count > 0 {
1597        sample_total * (n as u64) / (sample_count as u64)
1598    } else {
1599        0
1600    };
1601
1602    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1603    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1604    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1605        return blake2b_hash_files_many(paths, output_bytes);
1606    }
1607
1608    // Full stat pass for parallel scheduling — worth it for larger workloads.
1609    let mut indexed: Vec<(usize, &Path, u64)> = paths
1610        .iter()
1611        .enumerate()
1612        .map(|(i, &p)| {
1613            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1614            (i, p, size)
1615        })
1616        .collect();
1617
1618    // Sort largest first: ensures big files start hashing immediately while
1619    // small files fill in gaps, minimizing tail latency.
1620    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1621
1622    // Warm page cache for the largest files using async readahead(2).
1623    // Each hash call handles its own mmap prefaulting, but issuing readahead
1624    // here lets the kernel start I/O for upcoming files while workers process
1625    // current ones. readahead(2) returns immediately (non-blocking).
1626    #[cfg(target_os = "linux")]
1627    {
1628        use std::os::unix::io::AsRawFd;
1629        for &(_, path, size) in indexed.iter().take(20) {
1630            if size >= 1024 * 1024 {
1631                if let Ok(file) = open_noatime(path) {
1632                    unsafe {
1633                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1634                    }
1635                }
1636            }
1637        }
1638    }
1639
1640    let num_threads = std::thread::available_parallelism()
1641        .map(|n| n.get())
1642        .unwrap_or(4)
1643        .min(n);
1644
1645    // Atomic work index for dynamic work-stealing.
1646    let work_idx = AtomicUsize::new(0);
1647
1648    std::thread::scope(|s| {
1649        let work_idx = &work_idx;
1650        let indexed = &indexed;
1651
1652        let handles: Vec<_> = (0..num_threads)
1653            .map(|_| {
1654                s.spawn(move || {
1655                    let mut local_results = Vec::new();
1656                    loop {
1657                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1658                        if idx >= indexed.len() {
1659                            break;
1660                        }
1661                        let (orig_idx, path, _size) = indexed[idx];
1662                        let result = blake2b_hash_file(path, output_bytes);
1663                        local_results.push((orig_idx, result));
1664                    }
1665                    local_results
1666                })
1667            })
1668            .collect();
1669
1670        // Collect results and reorder to match original input order.
1671        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1672        for handle in handles {
1673            for (orig_idx, result) in handle.join().unwrap() {
1674                results[orig_idx] = Some(result);
1675            }
1676        }
1677        results
1678            .into_iter()
1679            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1680            .collect()
1681    })
1682}
1683
1684/// Auto-dispatch multi-file hashing: picks sequential or parallel based on workload.
1685///
1686/// For small files (<64KB sample), sequential avoids thread spawn + readahead overhead
1687/// that dominates for tiny files. On the "100 × 55-byte files" benchmark, this saves
1688/// ~5ms of overhead (thread creation + 200 stat() calls + 100 fadvise() calls).
1689///
1690/// For large files (>=64KB), parallel processing amortizes thread spawn cost over
1691/// substantial per-file hash work. Returns results in input order.
1692pub fn hash_files_auto(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1693    let n = paths.len();
1694    if n == 0 {
1695        return Vec::new();
1696    }
1697    if n == 1 {
1698        return vec![hash_file_nostat(algo, paths[0])];
1699    }
1700
1701    // Sample up to 3 files (max size) to correctly dispatch mixed workloads
1702    // like `md5sum small.txt big1.gb big2.gb`. Costs at most 3 stat calls (~6µs)
1703    // to save potentially 3-6ms of thread overhead for small-file workloads.
1704    let sample_size = paths
1705        .iter()
1706        .take(3)
1707        .filter_map(|p| std::fs::metadata(p).ok())
1708        .map(|m| m.len())
1709        .max()
1710        .unwrap_or(0);
1711
1712    if sample_size < 65536 {
1713        // Small files: sequential loop avoiding thread spawn overhead.
1714        #[cfg(target_os = "linux")]
1715        {
1716            // Raw syscall path: reuses CString buffer, avoids OpenOptions/File overhead
1717            let mut c_path_buf = Vec::with_capacity(256);
1718            paths
1719                .iter()
1720                .map(|&p| hash_file_raw_nostat(algo, p, &mut c_path_buf))
1721                .collect()
1722        }
1723        #[cfg(not(target_os = "linux"))]
1724        {
1725            paths.iter().map(|&p| hash_file_nostat(algo, p)).collect()
1726        }
1727    } else if n >= 20 {
1728        hash_files_batch(paths, algo)
1729    } else {
1730        hash_files_parallel_fast(paths, algo)
1731    }
1732}
1733
1734/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
1735/// Files are sorted by size (largest first) so the biggest files start processing
1736/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
1737/// eliminating tail latency from uneven file sizes.
1738/// Returns results in input order.
1739pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1740    let n = paths.len();
1741
1742    // Build (original_index, path, size) tuples — stat all files for scheduling.
1743    // The stat cost (~5µs/file) is repaid by better work distribution.
1744    let mut indexed: Vec<(usize, &Path, u64)> = paths
1745        .iter()
1746        .enumerate()
1747        .map(|(i, &p)| {
1748            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1749            (i, p, size)
1750        })
1751        .collect();
1752
1753    // Sort largest first: ensures big files start hashing immediately while
1754    // small files fill in gaps, minimizing tail latency.
1755    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1756
1757    // Warm page cache for the largest files using async readahead(2).
1758    // Each hash call handles its own mmap prefaulting, but issuing readahead
1759    // here lets the kernel start I/O for upcoming files while workers process
1760    // current ones. readahead(2) returns immediately (non-blocking).
1761    #[cfg(target_os = "linux")]
1762    {
1763        use std::os::unix::io::AsRawFd;
1764        for &(_, path, size) in indexed.iter().take(20) {
1765            if size >= 1024 * 1024 {
1766                if let Ok(file) = open_noatime(path) {
1767                    unsafe {
1768                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1769                    }
1770                }
1771            }
1772        }
1773    }
1774
1775    let num_threads = std::thread::available_parallelism()
1776        .map(|n| n.get())
1777        .unwrap_or(4)
1778        .min(n);
1779
1780    // Atomic work index for dynamic work-stealing.
1781    let work_idx = AtomicUsize::new(0);
1782
1783    std::thread::scope(|s| {
1784        let work_idx = &work_idx;
1785        let indexed = &indexed;
1786
1787        let handles: Vec<_> = (0..num_threads)
1788            .map(|_| {
1789                s.spawn(move || {
1790                    let mut local_results = Vec::new();
1791                    loop {
1792                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1793                        if idx >= indexed.len() {
1794                            break;
1795                        }
1796                        let (orig_idx, path, _size) = indexed[idx];
1797                        let result = hash_file(algo, path);
1798                        local_results.push((orig_idx, result));
1799                    }
1800                    local_results
1801                })
1802            })
1803            .collect();
1804
1805        // Collect results and reorder to match original input order.
1806        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1807        for handle in handles {
1808            for (orig_idx, result) in handle.join().unwrap() {
1809                results[orig_idx] = Some(result);
1810            }
1811        }
1812        results
1813            .into_iter()
1814            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1815            .collect()
1816    })
1817}
1818
1819/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
1820/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
1821/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
1822/// (100 from the sort phase + 100 from open_and_stat inside each worker).
1823/// Returns results in input order.
1824pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1825    let n = paths.len();
1826    if n == 0 {
1827        return Vec::new();
1828    }
1829    if n == 1 {
1830        return vec![hash_file_nostat(algo, paths[0])];
1831    }
1832
1833    // Issue readahead for all files (no size threshold — even tiny files benefit
1834    // from batched WILLNEED hints when processing 100+ files)
1835    #[cfg(target_os = "linux")]
1836    readahead_files_all(paths);
1837
1838    let num_threads = std::thread::available_parallelism()
1839        .map(|n| n.get())
1840        .unwrap_or(4)
1841        .min(n);
1842
1843    let work_idx = AtomicUsize::new(0);
1844
1845    std::thread::scope(|s| {
1846        let work_idx = &work_idx;
1847
1848        let handles: Vec<_> = (0..num_threads)
1849            .map(|_| {
1850                s.spawn(move || {
1851                    let mut local_results = Vec::new();
1852                    loop {
1853                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1854                        if idx >= n {
1855                            break;
1856                        }
1857                        let result = hash_file_nostat(algo, paths[idx]);
1858                        local_results.push((idx, result));
1859                    }
1860                    local_results
1861                })
1862            })
1863            .collect();
1864
1865        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1866        for handle in handles {
1867            for (idx, result) in handle.join().unwrap() {
1868                results[idx] = Some(result);
1869            }
1870        }
1871        results
1872            .into_iter()
1873            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1874            .collect()
1875    })
1876}
1877
1878/// Batch-hash multiple files: pre-read all files into memory in parallel,
1879/// then hash all data in parallel. Optimal for many small files where per-file
1880/// overhead (open/read/close syscalls) dominates over hash computation.
1881///
1882/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
1883/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
1884/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
1885/// Returns results in input order.
1886pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1887    let n = paths.len();
1888    if n == 0 {
1889        return Vec::new();
1890    }
1891
1892    // Issue readahead for all files
1893    #[cfg(target_os = "linux")]
1894    readahead_files_all(paths);
1895
1896    // Phase 1: Load all files into memory in parallel.
1897    // For 20+ files, use fast path that skips fstat.
1898    let use_fast = n >= 20;
1899
1900    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1901        // Sequential loading — avoids thread spawn overhead for small batches
1902        paths
1903            .iter()
1904            .map(|&path| {
1905                if use_fast {
1906                    open_file_content_fast(path)
1907                } else {
1908                    open_file_content(path)
1909                }
1910            })
1911            .collect()
1912    } else {
1913        let num_threads = std::thread::available_parallelism()
1914            .map(|t| t.get())
1915            .unwrap_or(4)
1916            .min(n);
1917        let chunk_size = (n + num_threads - 1) / num_threads;
1918
1919        std::thread::scope(|s| {
1920            let handles: Vec<_> = paths
1921                .chunks(chunk_size)
1922                .map(|chunk| {
1923                    s.spawn(move || {
1924                        chunk
1925                            .iter()
1926                            .map(|&path| {
1927                                if use_fast {
1928                                    open_file_content_fast(path)
1929                                } else {
1930                                    open_file_content(path)
1931                                }
1932                            })
1933                            .collect::<Vec<_>>()
1934                    })
1935                })
1936                .collect();
1937
1938            handles
1939                .into_iter()
1940                .flat_map(|h| h.join().unwrap())
1941                .collect()
1942        })
1943    };
1944
1945    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
1946    // for larger files the parallel hashing across threads helps.
1947    let num_hash_threads = std::thread::available_parallelism()
1948        .map(|t| t.get())
1949        .unwrap_or(4)
1950        .min(n);
1951    let work_idx = AtomicUsize::new(0);
1952
1953    std::thread::scope(|s| {
1954        let work_idx = &work_idx;
1955        let file_data = &file_data;
1956
1957        let handles: Vec<_> = (0..num_hash_threads)
1958            .map(|_| {
1959                s.spawn(move || {
1960                    let mut local_results = Vec::new();
1961                    loop {
1962                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1963                        if idx >= n {
1964                            break;
1965                        }
1966                        let result = match &file_data[idx] {
1967                            Ok(content) => hash_bytes(algo, content.as_ref()),
1968                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1969                        };
1970                        local_results.push((idx, result));
1971                    }
1972                    local_results
1973                })
1974            })
1975            .collect();
1976
1977        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1978        for handle in handles {
1979            for (idx, result) in handle.join().unwrap() {
1980                results[idx] = Some(result);
1981            }
1982        }
1983        results
1984            .into_iter()
1985            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1986            .collect()
1987    })
1988}
1989
1990/// Stream-hash a file that already has a prefix read into memory.
1991/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
1992/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
1993fn hash_stream_with_prefix(
1994    algo: HashAlgorithm,
1995    prefix: &[u8],
1996    mut file: File,
1997) -> io::Result<String> {
1998    // Blake2b uses its own hasher on all platforms
1999    if matches!(algo, HashAlgorithm::Blake2b) {
2000        let mut state = blake2b_simd::Params::new().to_state();
2001        state.update(prefix);
2002        return STREAM_BUF.with(|cell| {
2003            let mut buf = cell.borrow_mut();
2004            ensure_stream_buf(&mut buf);
2005            loop {
2006                let n = read_full(&mut file, &mut buf)?;
2007                if n == 0 {
2008                    break;
2009                }
2010                state.update(&buf[..n]);
2011            }
2012            Ok(hex_encode(state.finalize().as_bytes()))
2013        });
2014    }
2015
2016    #[cfg(target_os = "linux")]
2017    {
2018        hash_stream_with_prefix_openssl(algo_to_openssl_md(algo), prefix, file)
2019    }
2020    #[cfg(not(target_os = "linux"))]
2021    {
2022        match algo {
2023            HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
2024            HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
2025            HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
2026            HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
2027            HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
2028            HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
2029            HashAlgorithm::Blake2b => unreachable!(),
2030        }
2031    }
2032}
2033
2034/// Generic stream-hash with prefix for non-Linux platforms using Digest trait.
2035#[cfg(not(target_os = "linux"))]
2036fn hash_stream_with_prefix_digest<D: digest::Digest>(
2037    prefix: &[u8],
2038    mut file: File,
2039) -> io::Result<String> {
2040    STREAM_BUF.with(|cell| {
2041        let mut buf = cell.borrow_mut();
2042        ensure_stream_buf(&mut buf);
2043        let mut hasher = D::new();
2044        hasher.update(prefix);
2045        loop {
2046            let n = read_full(&mut file, &mut buf)?;
2047            if n == 0 {
2048                break;
2049            }
2050            hasher.update(&buf[..n]);
2051        }
2052        Ok(hex_encode(&hasher.finalize()))
2053    })
2054}
2055
2056/// Streaming hash with prefix using OpenSSL (Linux).
2057#[cfg(target_os = "linux")]
2058fn hash_stream_with_prefix_openssl(
2059    md: openssl::hash::MessageDigest,
2060    prefix: &[u8],
2061    mut file: File,
2062) -> io::Result<String> {
2063    STREAM_BUF.with(|cell| {
2064        let mut buf = cell.borrow_mut();
2065        ensure_stream_buf(&mut buf);
2066        let mut hasher =
2067            openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e.to_string()))?;
2068        hasher
2069            .update(prefix)
2070            .map_err(|e| io::Error::other(e.to_string()))?;
2071        loop {
2072            let n = read_full(&mut file, &mut buf)?;
2073            if n == 0 {
2074                break;
2075            }
2076            hasher
2077                .update(&buf[..n])
2078                .map_err(|e| io::Error::other(e.to_string()))?;
2079        }
2080        let digest = hasher
2081            .finish()
2082            .map_err(|e| io::Error::other(e.to_string()))?;
2083        Ok(hex_encode(&digest))
2084    })
2085}
2086
2087/// Hash a file without fstat — just open, read until EOF, hash.
2088/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2089/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2090/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2091/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2092pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2093    let mut file = open_noatime(path)?;
2094    // First try a small stack buffer — optimal for tiny files (< 4KB).
2095    // Most "many_files" benchmark files are ~55 bytes, so this completes
2096    // with a single read() syscall and no fallback.
2097    let mut small_buf = [0u8; 4096];
2098    match file.read(&mut small_buf) {
2099        Ok(0) => return hash_bytes(algo, &[]),
2100        Ok(n) if n < small_buf.len() => {
2101            // File fits in small buffer — hash directly (common case)
2102            return hash_bytes(algo, &small_buf[..n]);
2103        }
2104        Ok(n) => {
2105            // Might be more data — fall back to larger buffer
2106            let mut buf = [0u8; 65536];
2107            buf[..n].copy_from_slice(&small_buf[..n]);
2108            let mut total = n;
2109            loop {
2110                match file.read(&mut buf[total..]) {
2111                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2112                    Ok(n) => {
2113                        total += n;
2114                        if total >= buf.len() {
2115                            // File > 64KB: stream-hash from existing fd instead of
2116                            // re-opening. Feed already-read prefix, continue streaming.
2117                            return hash_stream_with_prefix(algo, &buf[..total], file);
2118                        }
2119                    }
2120                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2121                    Err(e) => return Err(e),
2122                }
2123            }
2124        }
2125        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2126            // Retry with full buffer on interrupt
2127            let mut buf = [0u8; 65536];
2128            let mut total = 0;
2129            loop {
2130                match file.read(&mut buf[total..]) {
2131                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2132                    Ok(n) => {
2133                        total += n;
2134                        if total >= buf.len() {
2135                            // File > 64KB: stream-hash from existing fd
2136                            return hash_stream_with_prefix(algo, &buf[..total], file);
2137                        }
2138                    }
2139                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2140                    Err(e) => return Err(e),
2141                }
2142            }
2143        }
2144        Err(e) => return Err(e),
2145    }
2146}
2147
2148/// Hash a small file using raw Linux syscalls without fstat.
2149/// For the multi-file sequential path where we already know files are small.
2150/// Avoids: OpenOptions builder, CString per-file alloc (reuses caller's buffer),
2151/// fstat overhead (unnecessary when we just need open+read+close).
2152/// Returns hash as hex string.
2153#[cfg(target_os = "linux")]
2154fn hash_file_raw_nostat(
2155    algo: HashAlgorithm,
2156    path: &Path,
2157    c_path_buf: &mut Vec<u8>,
2158) -> io::Result<String> {
2159    use std::os::unix::ffi::OsStrExt;
2160
2161    let path_bytes = path.as_os_str().as_bytes();
2162
2163    // Reuse caller's buffer for null-terminated path (avoids heap alloc per file)
2164    c_path_buf.clear();
2165    c_path_buf.reserve(path_bytes.len() + 1);
2166    c_path_buf.extend_from_slice(path_bytes);
2167    c_path_buf.push(0);
2168
2169    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2170    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2171        flags |= libc::O_NOATIME;
2172    }
2173
2174    let fd = unsafe { libc::open(c_path_buf.as_ptr() as *const libc::c_char, flags) };
2175    if fd < 0 {
2176        let err = io::Error::last_os_error();
2177        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2178            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2179            let fd2 = unsafe {
2180                libc::open(
2181                    c_path_buf.as_ptr() as *const libc::c_char,
2182                    libc::O_RDONLY | libc::O_CLOEXEC,
2183                )
2184            };
2185            if fd2 < 0 {
2186                return Err(io::Error::last_os_error());
2187            }
2188            return hash_fd_small(algo, fd2);
2189        }
2190        return Err(err);
2191    }
2192    hash_fd_small(algo, fd)
2193}
2194
2195/// Read a small file from fd, hash it, close fd. No fstat needed.
2196#[cfg(target_os = "linux")]
2197#[inline]
2198fn hash_fd_small(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2199    let mut buf = [0u8; 4096];
2200    let n = loop {
2201        let ret = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
2202        if ret >= 0 {
2203            break ret;
2204        }
2205        let err = io::Error::last_os_error();
2206        if err.kind() == io::ErrorKind::Interrupted {
2207            continue;
2208        }
2209        unsafe {
2210            libc::close(fd);
2211        }
2212        return Err(err);
2213    };
2214    let n = n as usize;
2215    if n < buf.len() {
2216        // File fits in 4KB — common case for small files
2217        unsafe {
2218            libc::close(fd);
2219        }
2220        return hash_bytes(algo, &buf[..n]);
2221    }
2222    // File > 4KB: fall back to hash_file_nostat-style reading
2223    // Wrap fd in File for RAII close
2224    use std::os::unix::io::FromRawFd;
2225    let mut file = unsafe { File::from_raw_fd(fd) };
2226    let mut big_buf = [0u8; 65536];
2227    big_buf[..n].copy_from_slice(&buf[..n]);
2228    let mut total = n;
2229    loop {
2230        match std::io::Read::read(&mut file, &mut big_buf[total..]) {
2231            Ok(0) => return hash_bytes(algo, &big_buf[..total]),
2232            Ok(n) => {
2233                total += n;
2234                if total >= big_buf.len() {
2235                    return hash_stream_with_prefix(algo, &big_buf[..total], file);
2236                }
2237            }
2238            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2239            Err(e) => return Err(e),
2240        }
2241    }
2242}
2243
2244/// Hash a single file using raw Linux syscalls for minimum overhead.
2245/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2246/// For the single-file fast path, this eliminates OpenOptions builder,
2247/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2248///
2249/// Size-based dispatch:
2250/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2251/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2252/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2253/// - Non-regular: wraps fd in File, streaming hash_reader
2254#[cfg(target_os = "linux")]
2255pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2256    use std::os::unix::ffi::OsStrExt;
2257
2258    let path_bytes = path.as_os_str().as_bytes();
2259    let c_path = std::ffi::CString::new(path_bytes)
2260        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2261
2262    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2263    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2264    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2265        flags |= libc::O_NOATIME;
2266    }
2267
2268    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2269    if fd < 0 {
2270        let err = io::Error::last_os_error();
2271        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2272            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2273            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2274            if fd2 < 0 {
2275                return Err(io::Error::last_os_error());
2276            }
2277            return hash_from_raw_fd(algo, fd2);
2278        }
2279        return Err(err);
2280    }
2281    hash_from_raw_fd(algo, fd)
2282}
2283
2284/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2285/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2286/// non-regular (streaming) files.
2287#[cfg(target_os = "linux")]
2288fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2289    // Raw fstat to determine size and type
2290    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2291    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2292        let err = io::Error::last_os_error();
2293        unsafe {
2294            libc::close(fd);
2295        }
2296        return Err(err);
2297    }
2298    let size = stat.st_size as u64;
2299    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2300
2301    // Empty regular file
2302    if is_regular && size == 0 {
2303        unsafe {
2304            libc::close(fd);
2305        }
2306        return hash_bytes(algo, &[]);
2307    }
2308
2309    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2310    // Entire I/O in 3 raw syscalls: open + read + close.
2311    if is_regular && size < TINY_FILE_LIMIT {
2312        let mut buf = [0u8; 8192];
2313        let mut total = 0usize;
2314        while total < size as usize {
2315            let n = unsafe {
2316                libc::read(
2317                    fd,
2318                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2319                    (size as usize) - total,
2320                )
2321            };
2322            if n < 0 {
2323                let err = io::Error::last_os_error();
2324                if err.kind() == io::ErrorKind::Interrupted {
2325                    continue;
2326                }
2327                unsafe {
2328                    libc::close(fd);
2329                }
2330                return Err(err);
2331            }
2332            if n == 0 {
2333                break;
2334            }
2335            total += n as usize;
2336        }
2337        unsafe {
2338            libc::close(fd);
2339        }
2340        return hash_bytes(algo, &buf[..total]);
2341    }
2342
2343    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2344    use std::os::unix::io::FromRawFd;
2345    let file = unsafe { File::from_raw_fd(fd) };
2346
2347    if is_regular && size > 0 {
2348        return hash_regular_file(algo, file, size);
2349    }
2350
2351    // Non-regular files: streaming hash
2352    hash_reader(algo, file)
2353}
2354
2355/// Issue readahead hints for ALL file paths (no size threshold).
2356/// For multi-file benchmarks, even small files benefit from batched readahead.
2357#[cfg(target_os = "linux")]
2358pub fn readahead_files_all(paths: &[&Path]) {
2359    use std::os::unix::io::AsRawFd;
2360    for path in paths {
2361        if let Ok(file) = open_noatime(path) {
2362            if let Ok(meta) = file.metadata() {
2363                if meta.file_type().is_file() {
2364                    let len = meta.len();
2365                    unsafe {
2366                        libc::posix_fadvise(
2367                            file.as_raw_fd(),
2368                            0,
2369                            len as i64,
2370                            libc::POSIX_FADV_WILLNEED,
2371                        );
2372                    }
2373                }
2374            }
2375        }
2376    }
2377}
2378
2379#[cfg(not(target_os = "linux"))]
2380pub fn readahead_files_all(_paths: &[&Path]) {}
2381
2382/// Print hash result in GNU format: "hash  filename\n"
2383/// Uses raw byte writes to avoid std::fmt overhead.
2384pub fn print_hash(
2385    out: &mut impl Write,
2386    hash: &str,
2387    filename: &str,
2388    binary: bool,
2389) -> io::Result<()> {
2390    let mode = if binary { b'*' } else { b' ' };
2391    out.write_all(hash.as_bytes())?;
2392    out.write_all(&[b' ', mode])?;
2393    out.write_all(filename.as_bytes())?;
2394    out.write_all(b"\n")
2395}
2396
2397/// Print hash in GNU format with NUL terminator instead of newline.
2398pub fn print_hash_zero(
2399    out: &mut impl Write,
2400    hash: &str,
2401    filename: &str,
2402    binary: bool,
2403) -> io::Result<()> {
2404    let mode = if binary { b'*' } else { b' ' };
2405    out.write_all(hash.as_bytes())?;
2406    out.write_all(&[b' ', mode])?;
2407    out.write_all(filename.as_bytes())?;
2408    out.write_all(b"\0")
2409}
2410
2411// ── Single-write output buffer ─────────────────────────────────────
2412// For multi-file workloads, batch the entire "hash  filename\n" line into
2413// a single write() call. This halves the number of BufWriter flushes.
2414
2415// Thread-local output line buffer for batched writes.
2416// Reused across files to avoid per-file allocation.
2417thread_local! {
2418    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2419}
2420
2421/// Build and write the standard GNU hash output line in a single write() call.
2422/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2423/// For escaped filenames: "\hash  escaped_filename\n".
2424#[inline]
2425pub fn write_hash_line(
2426    out: &mut impl Write,
2427    hash: &str,
2428    filename: &str,
2429    binary: bool,
2430    zero: bool,
2431    escaped: bool,
2432) -> io::Result<()> {
2433    LINE_BUF.with(|cell| {
2434        let mut buf = cell.borrow_mut();
2435        buf.clear();
2436        let mode = if binary { b'*' } else { b' ' };
2437        let term = if zero { b'\0' } else { b'\n' };
2438        if escaped {
2439            buf.push(b'\\');
2440        }
2441        buf.extend_from_slice(hash.as_bytes());
2442        buf.push(b' ');
2443        buf.push(mode);
2444        buf.extend_from_slice(filename.as_bytes());
2445        buf.push(term);
2446        out.write_all(&buf)
2447    })
2448}
2449
2450/// Build and write BSD tag format output in a single write() call.
2451/// Format: "ALGO (filename) = hash\n"
2452#[inline]
2453pub fn write_hash_tag_line(
2454    out: &mut impl Write,
2455    algo_name: &str,
2456    hash: &str,
2457    filename: &str,
2458    zero: bool,
2459) -> io::Result<()> {
2460    LINE_BUF.with(|cell| {
2461        let mut buf = cell.borrow_mut();
2462        buf.clear();
2463        let term = if zero { b'\0' } else { b'\n' };
2464        buf.extend_from_slice(algo_name.as_bytes());
2465        buf.extend_from_slice(b" (");
2466        buf.extend_from_slice(filename.as_bytes());
2467        buf.extend_from_slice(b") = ");
2468        buf.extend_from_slice(hash.as_bytes());
2469        buf.push(term);
2470        out.write_all(&buf)
2471    })
2472}
2473
2474/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2475pub fn print_hash_tag(
2476    out: &mut impl Write,
2477    algo: HashAlgorithm,
2478    hash: &str,
2479    filename: &str,
2480) -> io::Result<()> {
2481    out.write_all(algo.name().as_bytes())?;
2482    out.write_all(b" (")?;
2483    out.write_all(filename.as_bytes())?;
2484    out.write_all(b") = ")?;
2485    out.write_all(hash.as_bytes())?;
2486    out.write_all(b"\n")
2487}
2488
2489/// Print hash in BSD tag format with NUL terminator.
2490pub fn print_hash_tag_zero(
2491    out: &mut impl Write,
2492    algo: HashAlgorithm,
2493    hash: &str,
2494    filename: &str,
2495) -> io::Result<()> {
2496    out.write_all(algo.name().as_bytes())?;
2497    out.write_all(b" (")?;
2498    out.write_all(filename.as_bytes())?;
2499    out.write_all(b") = ")?;
2500    out.write_all(hash.as_bytes())?;
2501    out.write_all(b"\0")
2502}
2503
2504/// Print hash in BSD tag format with BLAKE2b length info:
2505/// "BLAKE2b (filename) = hash" for 512-bit, or
2506/// "BLAKE2b-256 (filename) = hash" for other lengths.
2507pub fn print_hash_tag_b2sum(
2508    out: &mut impl Write,
2509    hash: &str,
2510    filename: &str,
2511    bits: usize,
2512) -> io::Result<()> {
2513    if bits == 512 {
2514        out.write_all(b"BLAKE2b (")?;
2515    } else {
2516        // Use write! for the rare non-512 path (negligible overhead per file)
2517        write!(out, "BLAKE2b-{} (", bits)?;
2518    }
2519    out.write_all(filename.as_bytes())?;
2520    out.write_all(b") = ")?;
2521    out.write_all(hash.as_bytes())?;
2522    out.write_all(b"\n")
2523}
2524
2525/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2526pub fn print_hash_tag_b2sum_zero(
2527    out: &mut impl Write,
2528    hash: &str,
2529    filename: &str,
2530    bits: usize,
2531) -> io::Result<()> {
2532    if bits == 512 {
2533        out.write_all(b"BLAKE2b (")?;
2534    } else {
2535        write!(out, "BLAKE2b-{} (", bits)?;
2536    }
2537    out.write_all(filename.as_bytes())?;
2538    out.write_all(b") = ")?;
2539    out.write_all(hash.as_bytes())?;
2540    out.write_all(b"\0")
2541}
2542
2543/// Options for check mode.
2544pub struct CheckOptions {
2545    pub quiet: bool,
2546    pub status_only: bool,
2547    pub strict: bool,
2548    pub warn: bool,
2549    pub ignore_missing: bool,
2550    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2551    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2552    /// When empty, uses generic format: "line {line}: message".
2553    pub warn_prefix: String,
2554}
2555
2556/// Result of check mode verification.
2557pub struct CheckResult {
2558    pub ok: usize,
2559    pub mismatches: usize,
2560    pub format_errors: usize,
2561    pub read_errors: usize,
2562    /// Number of files skipped because they were missing and --ignore-missing was set.
2563    pub ignored_missing: usize,
2564}
2565
2566/// Verify checksums from a check file.
2567/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2568pub fn check_file<R: BufRead>(
2569    algo: HashAlgorithm,
2570    reader: R,
2571    opts: &CheckOptions,
2572    out: &mut impl Write,
2573    err_out: &mut impl Write,
2574) -> io::Result<CheckResult> {
2575    let quiet = opts.quiet;
2576    let status_only = opts.status_only;
2577    let warn = opts.warn;
2578    let ignore_missing = opts.ignore_missing;
2579    let mut ok_count = 0;
2580    let mut mismatch_count = 0;
2581    let mut format_errors = 0;
2582    let mut read_errors = 0;
2583    let mut ignored_missing_count = 0;
2584    let mut line_num = 0;
2585
2586    for line_result in reader.lines() {
2587        line_num += 1;
2588        let line = line_result?;
2589        let line = line.trim_end();
2590
2591        if line.is_empty() {
2592            continue;
2593        }
2594
2595        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2596        let (expected_hash, filename) = match parse_check_line(line) {
2597            Some(v) => v,
2598            None => {
2599                format_errors += 1;
2600                if warn {
2601                    out.flush()?;
2602                    if opts.warn_prefix.is_empty() {
2603                        writeln!(
2604                            err_out,
2605                            "line {}: improperly formatted {} checksum line",
2606                            line_num,
2607                            algo.name()
2608                        )?;
2609                    } else {
2610                        writeln!(
2611                            err_out,
2612                            "{}: {}: improperly formatted {} checksum line",
2613                            opts.warn_prefix,
2614                            line_num,
2615                            algo.name()
2616                        )?;
2617                    }
2618                }
2619                continue;
2620            }
2621        };
2622
2623        // Compute actual hash
2624        let actual = match hash_file(algo, Path::new(filename)) {
2625            Ok(h) => h,
2626            Err(e) => {
2627                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2628                    ignored_missing_count += 1;
2629                    continue;
2630                }
2631                read_errors += 1;
2632                if !status_only {
2633                    out.flush()?;
2634                    writeln!(err_out, "{}: {}", filename, e)?;
2635                    writeln!(out, "{}: FAILED open or read", filename)?;
2636                }
2637                continue;
2638            }
2639        };
2640
2641        if actual.eq_ignore_ascii_case(expected_hash) {
2642            ok_count += 1;
2643            if !quiet && !status_only {
2644                writeln!(out, "{}: OK", filename)?;
2645            }
2646        } else {
2647            mismatch_count += 1;
2648            if !status_only {
2649                writeln!(out, "{}: FAILED", filename)?;
2650            }
2651        }
2652    }
2653
2654    Ok(CheckResult {
2655        ok: ok_count,
2656        mismatches: mismatch_count,
2657        format_errors,
2658        read_errors,
2659        ignored_missing: ignored_missing_count,
2660    })
2661}
2662
2663/// Parse a checksum line in any supported format.
2664pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2665    // Try BSD tag format: "ALGO (filename) = hash"
2666    let rest = line
2667        .strip_prefix("MD5 (")
2668        .or_else(|| line.strip_prefix("SHA1 ("))
2669        .or_else(|| line.strip_prefix("SHA224 ("))
2670        .or_else(|| line.strip_prefix("SHA256 ("))
2671        .or_else(|| line.strip_prefix("SHA384 ("))
2672        .or_else(|| line.strip_prefix("SHA512 ("))
2673        .or_else(|| line.strip_prefix("BLAKE2b ("))
2674        .or_else(|| {
2675            // Handle BLAKE2b-NNN (filename) = hash
2676            if line.starts_with("BLAKE2b-") {
2677                let after = &line["BLAKE2b-".len()..];
2678                if let Some(sp) = after.find(" (") {
2679                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2680                        return Some(&after[sp + 2..]);
2681                    }
2682                }
2683            }
2684            None
2685        });
2686    if let Some(rest) = rest {
2687        if let Some(paren_idx) = rest.find(") = ") {
2688            let filename = &rest[..paren_idx];
2689            let hash = &rest[paren_idx + 4..];
2690            return Some((hash, filename));
2691        }
2692    }
2693
2694    // Handle backslash-escaped lines (leading '\')
2695    let line = line.strip_prefix('\\').unwrap_or(line);
2696
2697    // Standard format: "hash  filename"
2698    if let Some(idx) = line.find("  ") {
2699        let hash = &line[..idx];
2700        let rest = &line[idx + 2..];
2701        return Some((hash, rest));
2702    }
2703    // Binary mode: "hash *filename"
2704    if let Some(idx) = line.find(" *") {
2705        let hash = &line[..idx];
2706        let rest = &line[idx + 2..];
2707        return Some((hash, rest));
2708    }
2709    None
2710}
2711
2712/// Parse a BSD-style tag line: "ALGO (filename) = hash"
2713/// Returns (expected_hash, filename, optional_bits).
2714/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
2715pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2716    let paren_start = line.find(" (")?;
2717    let algo_part = &line[..paren_start];
2718    let rest = &line[paren_start + 2..];
2719    let paren_end = rest.find(") = ")?;
2720    let filename = &rest[..paren_end];
2721    let hash = &rest[paren_end + 4..];
2722
2723    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
2724    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2725        algo_part[dash_pos + 1..].parse::<usize>().ok()
2726    } else {
2727        None
2728    };
2729
2730    Some((hash, filename, bits))
2731}
2732
2733/// Read as many bytes as possible into buf, retrying on partial reads.
2734/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
2735/// Fast path: regular file reads usually return the full buffer on the first call.
2736#[inline]
2737fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2738    // Fast path: first read() usually fills the entire buffer for regular files
2739    let n = reader.read(buf)?;
2740    if n == buf.len() || n == 0 {
2741        return Ok(n);
2742    }
2743    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
2744    let mut total = n;
2745    while total < buf.len() {
2746        match reader.read(&mut buf[total..]) {
2747            Ok(0) => break,
2748            Ok(n) => total += n,
2749            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2750            Err(e) => return Err(e),
2751        }
2752    }
2753    Ok(total)
2754}
2755
2756/// Compile-time generated 2-byte hex pair lookup table.
2757/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
2758const fn generate_hex_table() -> [[u8; 2]; 256] {
2759    let hex = b"0123456789abcdef";
2760    let mut table = [[0u8; 2]; 256];
2761    let mut i = 0;
2762    while i < 256 {
2763        table[i] = [hex[i >> 4], hex[i & 0xf]];
2764        i += 1;
2765    }
2766    table
2767}
2768
2769const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2770
2771/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
2772/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
2773pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2774    let len = bytes.len() * 2;
2775    let mut hex = String::with_capacity(len);
2776    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
2777    unsafe {
2778        let buf = hex.as_mut_vec();
2779        buf.set_len(len);
2780        hex_encode_to_slice(bytes, buf);
2781    }
2782    hex
2783}
2784
2785/// Encode bytes as hex directly into a pre-allocated output slice.
2786/// Output slice must be at least `bytes.len() * 2` bytes long.
2787#[inline]
2788fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2789    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
2790    unsafe {
2791        let ptr = out.as_mut_ptr();
2792        for (i, &b) in bytes.iter().enumerate() {
2793            let pair = *HEX_TABLE.get_unchecked(b as usize);
2794            *ptr.add(i * 2) = pair[0];
2795            *ptr.add(i * 2 + 1) = pair[1];
2796        }
2797    }
2798}