Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10use digest::Digest;
11use md5::Md5;
12
13// ── OpenSSL dynamic loading for all hash algorithms (Linux only) ──────
14// GNU coreutils links against OpenSSL which has hardware-accelerated hash
15// instructions (SHA-NI for SHA-1/SHA-256, AVX-512 for SHA-512 on x86_64).
16// We dynamically load libcrypto at runtime via dlopen/dlsym to get the same
17// performance without requiring a compile-time dependency on libssl-dev.
18// If libcrypto is unavailable, we fall back to ring/sha2/md5 crates.
19#[cfg(target_os = "linux")]
20mod openssl_evp {
21    use std::ffi::CStr;
22    use std::io;
23    use std::ptr;
24    use std::sync::OnceLock;
25
26    // OpenSSL EVP types (opaque pointers)
27    type EvpMdCtx = *mut libc::c_void;
28    type EvpMd = *const libc::c_void;
29    type Engine = *const libc::c_void;
30
31    // Function pointer types matching OpenSSL's EVP API
32    type FnEvpMdGetter = unsafe extern "C" fn() -> EvpMd;
33    type FnEvpMdCtxNew = unsafe extern "C" fn() -> EvpMdCtx;
34    type FnEvpDigestInitEx = unsafe extern "C" fn(EvpMdCtx, EvpMd, Engine) -> libc::c_int;
35    type FnEvpDigestUpdate =
36        unsafe extern "C" fn(EvpMdCtx, *const libc::c_void, libc::size_t) -> libc::c_int;
37    type FnEvpDigestFinalEx =
38        unsafe extern "C" fn(EvpMdCtx, *mut u8, *mut libc::c_uint) -> libc::c_int;
39    type FnEvpMdCtxFree = unsafe extern "C" fn(EvpMdCtx);
40
41    struct OpenSslFns {
42        evp_md5: FnEvpMdGetter,
43        evp_sha1: FnEvpMdGetter,
44        evp_sha224: FnEvpMdGetter,
45        evp_sha256: FnEvpMdGetter,
46        evp_sha384: FnEvpMdGetter,
47        evp_sha512: FnEvpMdGetter,
48        evp_md_ctx_new: FnEvpMdCtxNew,
49        evp_digest_init_ex: FnEvpDigestInitEx,
50        evp_digest_update: FnEvpDigestUpdate,
51        evp_digest_final_ex: FnEvpDigestFinalEx,
52        evp_md_ctx_free: FnEvpMdCtxFree,
53        _handle: *mut libc::c_void, // kept alive so symbols stay valid
54    }
55
56    // SAFETY: The function pointers are valid for the lifetime of the process
57    // (dlopen handle is never closed). The pointers themselves are immutable
58    // after initialization via OnceLock.
59    unsafe impl Send for OpenSslFns {}
60    unsafe impl Sync for OpenSslFns {}
61
62    /// Cached OpenSSL function pointers. Initialized once on first use.
63    /// `None` inside means libcrypto was not found or symbols were missing.
64    static FNS: OnceLock<Option<OpenSslFns>> = OnceLock::new();
65
66    fn dlsym_checked(handle: *mut libc::c_void, name: &CStr) -> Option<*mut libc::c_void> {
67        let ptr = unsafe { libc::dlsym(handle, name.as_ptr()) };
68        if ptr.is_null() { None } else { Some(ptr) }
69    }
70
71    /// Guard that ensures dlclose is called if we fail to resolve all symbols.
72    struct DlopenHandle(*mut libc::c_void);
73    impl Drop for DlopenHandle {
74        fn drop(&mut self) {
75            unsafe {
76                libc::dlclose(self.0);
77            }
78        }
79    }
80
81    fn try_load() -> Option<OpenSslFns> {
82        // Try OpenSSL 3.x first, then 1.1.x
83        let handle = unsafe {
84            let h = libc::dlopen(
85                c"libcrypto.so.3".as_ptr(),
86                libc::RTLD_LAZY | libc::RTLD_LOCAL,
87            );
88            if h.is_null() {
89                let h = libc::dlopen(
90                    c"libcrypto.so.1.1".as_ptr(),
91                    libc::RTLD_LAZY | libc::RTLD_LOCAL,
92                );
93                if h.is_null() {
94                    return None;
95                }
96                h
97            } else {
98                h
99            }
100        };
101
102        // Guard ensures dlclose on early return (any dlsym failure).
103        let guard = DlopenHandle(handle);
104
105        unsafe {
106            let evp_md5: FnEvpMdGetter = std::mem::transmute(dlsym_checked(handle, c"EVP_md5")?);
107            let evp_sha1: FnEvpMdGetter = std::mem::transmute(dlsym_checked(handle, c"EVP_sha1")?);
108            let evp_sha224: FnEvpMdGetter =
109                std::mem::transmute(dlsym_checked(handle, c"EVP_sha224")?);
110            let evp_sha256: FnEvpMdGetter =
111                std::mem::transmute(dlsym_checked(handle, c"EVP_sha256")?);
112            let evp_sha384: FnEvpMdGetter =
113                std::mem::transmute(dlsym_checked(handle, c"EVP_sha384")?);
114            let evp_sha512: FnEvpMdGetter =
115                std::mem::transmute(dlsym_checked(handle, c"EVP_sha512")?);
116            let evp_md_ctx_new: FnEvpMdCtxNew =
117                std::mem::transmute(dlsym_checked(handle, c"EVP_MD_CTX_new")?);
118            let evp_digest_init_ex: FnEvpDigestInitEx =
119                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestInit_ex")?);
120            let evp_digest_update: FnEvpDigestUpdate =
121                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestUpdate")?);
122            let evp_digest_final_ex: FnEvpDigestFinalEx =
123                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestFinal_ex")?);
124            let evp_md_ctx_free: FnEvpMdCtxFree =
125                std::mem::transmute(dlsym_checked(handle, c"EVP_MD_CTX_free")?);
126
127            // All symbols resolved — prevent dlclose by forgetting the guard.
128            std::mem::forget(guard);
129
130            Some(OpenSslFns {
131                evp_md5,
132                evp_sha1,
133                evp_sha224,
134                evp_sha256,
135                evp_sha384,
136                evp_sha512,
137                evp_md_ctx_new,
138                evp_digest_init_ex,
139                evp_digest_update,
140                evp_digest_final_ex,
141                evp_md_ctx_free,
142                _handle: handle,
143            })
144        }
145    }
146
147    fn get_fns() -> Option<&'static OpenSslFns> {
148        FNS.get_or_init(try_load).as_ref()
149    }
150
151    /// Returns true if OpenSSL's libcrypto is available for hardware-accelerated hashing.
152    pub fn is_available() -> bool {
153        get_fns().is_some()
154    }
155
156    /// RAII wrapper for EVP_MD_CTX that frees on drop.
157    struct EvpCtx {
158        ctx: EvpMdCtx,
159        free_fn: FnEvpMdCtxFree,
160    }
161
162    impl Drop for EvpCtx {
163        fn drop(&mut self) {
164            if !self.ctx.is_null() {
165                unsafe {
166                    (self.free_fn)(self.ctx);
167                }
168            }
169        }
170    }
171
172    /// Which EVP hash algorithm to use.
173    #[derive(Clone, Copy)]
174    pub enum EvpAlgorithm {
175        Md5,
176        Sha1,
177        Sha224,
178        Sha256,
179        Sha384,
180        Sha512,
181    }
182
183    impl EvpAlgorithm {
184        fn digest_len(self) -> usize {
185            match self {
186                EvpAlgorithm::Md5 => 16,
187                EvpAlgorithm::Sha1 => 20,
188                EvpAlgorithm::Sha224 => 28,
189                EvpAlgorithm::Sha256 => 32,
190                EvpAlgorithm::Sha384 => 48,
191                EvpAlgorithm::Sha512 => 64,
192            }
193        }
194
195        fn get_md(self, fns: &OpenSslFns) -> EvpMd {
196            unsafe {
197                match self {
198                    EvpAlgorithm::Md5 => (fns.evp_md5)(),
199                    EvpAlgorithm::Sha1 => (fns.evp_sha1)(),
200                    EvpAlgorithm::Sha224 => (fns.evp_sha224)(),
201                    EvpAlgorithm::Sha256 => (fns.evp_sha256)(),
202                    EvpAlgorithm::Sha384 => (fns.evp_sha384)(),
203                    EvpAlgorithm::Sha512 => (fns.evp_sha512)(),
204                }
205            }
206        }
207    }
208
209    /// Single-shot hash of a byte slice using OpenSSL EVP.
210    pub fn hash_bytes(algo: EvpAlgorithm, data: &[u8]) -> io::Result<Vec<u8>> {
211        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
212
213        unsafe {
214            let md = algo.get_md(fns);
215            if md.is_null() {
216                return Err(io::Error::other("EVP_* returned null"));
217            }
218
219            let ctx = (fns.evp_md_ctx_new)();
220            if ctx.is_null() {
221                return Err(io::Error::other("EVP_MD_CTX_new failed"));
222            }
223            let _guard = EvpCtx {
224                ctx,
225                free_fn: fns.evp_md_ctx_free,
226            };
227
228            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
229                return Err(io::Error::other("EVP_DigestInit_ex failed"));
230            }
231            if !data.is_empty()
232                && (fns.evp_digest_update)(ctx, data.as_ptr() as *const libc::c_void, data.len())
233                    != 1
234            {
235                return Err(io::Error::other("EVP_DigestUpdate failed"));
236            }
237
238            let mut out = vec![0u8; algo.digest_len()];
239            let mut out_len: libc::c_uint = 0;
240            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
241                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
242            }
243            out.truncate(out_len as usize);
244            Ok(out)
245        }
246    }
247
248    /// Streaming hash: create context, feed chunks, finalize.
249    pub fn hash_reader(algo: EvpAlgorithm, mut reader: impl std::io::Read) -> io::Result<Vec<u8>> {
250        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
251
252        unsafe {
253            let md = algo.get_md(fns);
254            if md.is_null() {
255                return Err(io::Error::other("EVP_* returned null"));
256            }
257
258            let ctx = (fns.evp_md_ctx_new)();
259            if ctx.is_null() {
260                return Err(io::Error::other("EVP_MD_CTX_new failed"));
261            }
262            let _guard = EvpCtx {
263                ctx,
264                free_fn: fns.evp_md_ctx_free,
265            };
266
267            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
268                return Err(io::Error::other("EVP_DigestInit_ex failed"));
269            }
270
271            super::STREAM_BUF.with(|cell| {
272                let mut buf = cell.borrow_mut();
273                super::ensure_stream_buf(&mut buf);
274                loop {
275                    let n = super::read_full(&mut reader, &mut buf)?;
276                    if n == 0 {
277                        break;
278                    }
279                    if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n)
280                        != 1
281                    {
282                        return Err(io::Error::other("EVP_DigestUpdate failed"));
283                    }
284                }
285                Ok(())
286            })?;
287
288            let mut out = vec![0u8; algo.digest_len()];
289            let mut out_len: libc::c_uint = 0;
290            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
291                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
292            }
293            out.truncate(out_len as usize);
294            Ok(out)
295        }
296    }
297
298    /// Streaming hash with a prefix already read into memory.
299    pub fn hash_reader_with_prefix(
300        algo: EvpAlgorithm,
301        prefix: &[u8],
302        mut reader: impl std::io::Read,
303    ) -> io::Result<Vec<u8>> {
304        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
305
306        unsafe {
307            let md = algo.get_md(fns);
308            if md.is_null() {
309                return Err(io::Error::other("EVP_* returned null"));
310            }
311
312            let ctx = (fns.evp_md_ctx_new)();
313            if ctx.is_null() {
314                return Err(io::Error::other("EVP_MD_CTX_new failed"));
315            }
316            let _guard = EvpCtx {
317                ctx,
318                free_fn: fns.evp_md_ctx_free,
319            };
320
321            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
322                return Err(io::Error::other("EVP_DigestInit_ex failed"));
323            }
324
325            // Feed prefix
326            if !prefix.is_empty()
327                && (fns.evp_digest_update)(
328                    ctx,
329                    prefix.as_ptr() as *const libc::c_void,
330                    prefix.len(),
331                ) != 1
332            {
333                return Err(io::Error::other("EVP_DigestUpdate failed"));
334            }
335
336            // Stream rest
337            super::STREAM_BUF.with(|cell| {
338                let mut buf = cell.borrow_mut();
339                super::ensure_stream_buf(&mut buf);
340                loop {
341                    let n = super::read_full(&mut reader, &mut buf)?;
342                    if n == 0 {
343                        break;
344                    }
345                    if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n)
346                        != 1
347                    {
348                        return Err(io::Error::other("EVP_DigestUpdate failed"));
349                    }
350                }
351                Ok(())
352            })?;
353
354            let mut out = vec![0u8; algo.digest_len()];
355            let mut out_len: libc::c_uint = 0;
356            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
357                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
358            }
359            out.truncate(out_len as usize);
360            Ok(out)
361        }
362    }
363
364    /// Pipelined hash for the double-buffered reader thread path.
365    /// Returns raw digest bytes for the caller to hex-encode.
366    pub fn hash_pipelined(
367        algo: EvpAlgorithm,
368        rx: &std::sync::mpsc::Receiver<(Vec<u8>, usize)>,
369        buf_tx: &std::sync::mpsc::SyncSender<Vec<u8>>,
370    ) -> io::Result<Vec<u8>> {
371        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
372
373        unsafe {
374            let md = algo.get_md(fns);
375            if md.is_null() {
376                return Err(io::Error::other("EVP_* returned null"));
377            }
378
379            let ctx = (fns.evp_md_ctx_new)();
380            if ctx.is_null() {
381                return Err(io::Error::other("EVP_MD_CTX_new failed"));
382            }
383            let _guard = EvpCtx {
384                ctx,
385                free_fn: fns.evp_md_ctx_free,
386            };
387
388            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
389                return Err(io::Error::other("EVP_DigestInit_ex failed"));
390            }
391
392            while let Ok((buf, n)) = rx.recv() {
393                if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n) != 1 {
394                    let _ = buf_tx.send(buf);
395                    return Err(io::Error::other("EVP_DigestUpdate failed"));
396                }
397                let _ = buf_tx.send(buf);
398            }
399
400            let mut out = vec![0u8; algo.digest_len()];
401            let mut out_len: libc::c_uint = 0;
402            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
403                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
404            }
405            out.truncate(out_len as usize);
406            Ok(out)
407        }
408    }
409}
410
411/// Supported hash algorithms.
412#[derive(Debug, Clone, Copy)]
413pub enum HashAlgorithm {
414    Sha1,
415    Sha224,
416    Sha256,
417    Sha384,
418    Sha512,
419    Md5,
420    Blake2b,
421}
422
423impl HashAlgorithm {
424    pub fn name(self) -> &'static str {
425        match self {
426            HashAlgorithm::Sha1 => "SHA1",
427            HashAlgorithm::Sha224 => "SHA224",
428            HashAlgorithm::Sha256 => "SHA256",
429            HashAlgorithm::Sha384 => "SHA384",
430            HashAlgorithm::Sha512 => "SHA512",
431            HashAlgorithm::Md5 => "MD5",
432            HashAlgorithm::Blake2b => "BLAKE2b",
433        }
434    }
435}
436
437// ── Generic hash helpers ────────────────────────────────────────────
438
439/// Single-shot hash using the Digest trait.
440fn hash_digest<D: Digest>(data: &[u8]) -> String {
441    hex_encode(&D::digest(data))
442}
443
444/// Streaming hash using thread-local buffer via the Digest trait.
445fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
446    STREAM_BUF.with(|cell| {
447        let mut buf = cell.borrow_mut();
448        ensure_stream_buf(&mut buf);
449        let mut hasher = D::new();
450        loop {
451            let n = read_full(&mut reader, &mut buf)?;
452            if n == 0 {
453                break;
454            }
455            hasher.update(&buf[..n]);
456        }
457        Ok(hex_encode(&hasher.finalize()))
458    })
459}
460
461// ── Public hashing API ──────────────────────────────────────────────
462
463/// Buffer size for streaming hash I/O.
464/// 128KB matches GNU coreutils' buffer size (BUFSIZE=131072), which works well with kernel readahead.
465/// Many small reads allow the kernel to pipeline I/O efficiently, reducing latency
466/// vs fewer large reads that stall waiting for the full buffer to fill.
467const HASH_READ_BUF: usize = 131072;
468
469// Thread-local reusable buffer for streaming hash I/O.
470// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
471// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
472thread_local! {
473    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
474}
475
476/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
477/// Called only on the streaming path, so small-file workloads never allocate 8MB.
478#[inline]
479fn ensure_stream_buf(buf: &mut Vec<u8>) {
480    if buf.len() < HASH_READ_BUF {
481        buf.resize(HASH_READ_BUF, 0);
482    }
483}
484
485// ── Ring-accelerated hash functions (non-Apple targets) ───────────────
486// ring provides BoringSSL assembly with optimized SHA-512/384/256/1 for x86-64/aarch64.
487
488/// Single-shot hash using ring::digest (non-Apple).
489#[cfg(not(target_vendor = "apple"))]
490#[inline]
491fn ring_hash_bytes(algo: &'static ring::digest::Algorithm, data: &[u8]) -> io::Result<String> {
492    Ok(hex_encode(ring::digest::digest(algo, data).as_ref()))
493}
494
495/// Streaming hash using ring::digest::Context (non-Apple).
496#[cfg(not(target_vendor = "apple"))]
497fn ring_hash_reader(
498    algo: &'static ring::digest::Algorithm,
499    mut reader: impl Read,
500) -> io::Result<String> {
501    STREAM_BUF.with(|cell| {
502        let mut buf = cell.borrow_mut();
503        ensure_stream_buf(&mut buf);
504        let mut ctx = ring::digest::Context::new(algo);
505        loop {
506            let n = read_full(&mut reader, &mut buf)?;
507            if n == 0 {
508                break;
509            }
510            ctx.update(&buf[..n]);
511        }
512        Ok(hex_encode(ctx.finish().as_ref()))
513    })
514}
515
516// ── SHA-256 ───────────────────────────────────────────────────────────
517// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to sha2 crate.
518// Apple: sha2 crate. Other: ring (BoringSSL assembly).
519
520#[cfg(target_vendor = "apple")]
521fn sha256_bytes(data: &[u8]) -> io::Result<String> {
522    Ok(hash_digest::<sha2::Sha256>(data))
523}
524
525#[cfg(target_os = "linux")]
526fn sha256_bytes(data: &[u8]) -> io::Result<String> {
527    if openssl_evp::is_available() {
528        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha256, data)?;
529        return Ok(hex_encode(&digest));
530    }
531    Ok(hash_digest::<sha2::Sha256>(data))
532}
533
534#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
535fn sha256_bytes(data: &[u8]) -> io::Result<String> {
536    ring_hash_bytes(&ring::digest::SHA256, data)
537}
538
539#[cfg(target_vendor = "apple")]
540fn sha256_reader(reader: impl Read) -> io::Result<String> {
541    hash_reader_impl::<sha2::Sha256>(reader)
542}
543
544#[cfg(target_os = "linux")]
545fn sha256_reader(reader: impl Read) -> io::Result<String> {
546    if openssl_evp::is_available() {
547        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha256, reader)?;
548        return Ok(hex_encode(&digest));
549    }
550    hash_reader_impl::<sha2::Sha256>(reader)
551}
552
553#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
554fn sha256_reader(reader: impl Read) -> io::Result<String> {
555    ring_hash_reader(&ring::digest::SHA256, reader)
556}
557
558// ── SHA-1 ─────────────────────────────────────────────────────────────
559// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to ring.
560// Apple: sha1 crate. Other: ring (BoringSSL assembly).
561
562#[cfg(target_vendor = "apple")]
563fn sha1_bytes(data: &[u8]) -> io::Result<String> {
564    Ok(hash_digest::<sha1::Sha1>(data))
565}
566
567#[cfg(target_os = "linux")]
568fn sha1_bytes(data: &[u8]) -> io::Result<String> {
569    if openssl_evp::is_available() {
570        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha1, data)?;
571        return Ok(hex_encode(&digest));
572    }
573    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
574}
575
576#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
577fn sha1_bytes(data: &[u8]) -> io::Result<String> {
578    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
579}
580
581#[cfg(target_vendor = "apple")]
582fn sha1_reader(reader: impl Read) -> io::Result<String> {
583    hash_reader_impl::<sha1::Sha1>(reader)
584}
585
586#[cfg(target_os = "linux")]
587fn sha1_reader(reader: impl Read) -> io::Result<String> {
588    if openssl_evp::is_available() {
589        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha1, reader)?;
590        return Ok(hex_encode(&digest));
591    }
592    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
593}
594
595#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
596fn sha1_reader(reader: impl Read) -> io::Result<String> {
597    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
598}
599
600// ── SHA-224 ───────────────────────────────────────────────────────────
601// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to sha2 crate.
602// Other: sha2 crate (ring does not support SHA-224).
603
604#[cfg(target_os = "linux")]
605fn sha224_bytes(data: &[u8]) -> io::Result<String> {
606    if openssl_evp::is_available() {
607        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha224, data)?;
608        return Ok(hex_encode(&digest));
609    }
610    Ok(hash_digest::<sha2::Sha224>(data))
611}
612
613#[cfg(not(target_os = "linux"))]
614fn sha224_bytes(data: &[u8]) -> io::Result<String> {
615    Ok(hash_digest::<sha2::Sha224>(data))
616}
617
618#[cfg(target_os = "linux")]
619fn sha224_reader(reader: impl Read) -> io::Result<String> {
620    if openssl_evp::is_available() {
621        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha224, reader)?;
622        return Ok(hex_encode(&digest));
623    }
624    hash_reader_impl::<sha2::Sha224>(reader)
625}
626
627#[cfg(not(target_os = "linux"))]
628fn sha224_reader(reader: impl Read) -> io::Result<String> {
629    hash_reader_impl::<sha2::Sha224>(reader)
630}
631
632// ── SHA-384 ───────────────────────────────────────────────────────────
633// Linux: OpenSSL libcrypto (AVX-512 optimized) via dlopen, fallback to ring (AVX2).
634// Apple: sha2 crate. Other: ring (BoringSSL assembly).
635
636#[cfg(target_vendor = "apple")]
637fn sha384_bytes(data: &[u8]) -> io::Result<String> {
638    Ok(hash_digest::<sha2::Sha384>(data))
639}
640
641#[cfg(target_os = "linux")]
642fn sha384_bytes(data: &[u8]) -> io::Result<String> {
643    if openssl_evp::is_available() {
644        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha384, data)?;
645        return Ok(hex_encode(&digest));
646    }
647    ring_hash_bytes(&ring::digest::SHA384, data)
648}
649
650#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
651fn sha384_bytes(data: &[u8]) -> io::Result<String> {
652    ring_hash_bytes(&ring::digest::SHA384, data)
653}
654
655#[cfg(target_vendor = "apple")]
656fn sha384_reader(reader: impl Read) -> io::Result<String> {
657    hash_reader_impl::<sha2::Sha384>(reader)
658}
659
660#[cfg(target_os = "linux")]
661fn sha384_reader(reader: impl Read) -> io::Result<String> {
662    if openssl_evp::is_available() {
663        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha384, reader)?;
664        return Ok(hex_encode(&digest));
665    }
666    ring_hash_reader(&ring::digest::SHA384, reader)
667}
668
669#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
670fn sha384_reader(reader: impl Read) -> io::Result<String> {
671    ring_hash_reader(&ring::digest::SHA384, reader)
672}
673
674// ── SHA-512 ───────────────────────────────────────────────────────────
675// Linux: OpenSSL libcrypto (AVX-512 optimized) via dlopen, fallback to ring (AVX2).
676// Apple: sha2 crate. Other: ring (BoringSSL assembly).
677
678#[cfg(target_vendor = "apple")]
679fn sha512_bytes(data: &[u8]) -> io::Result<String> {
680    Ok(hash_digest::<sha2::Sha512>(data))
681}
682
683#[cfg(target_os = "linux")]
684fn sha512_bytes(data: &[u8]) -> io::Result<String> {
685    if openssl_evp::is_available() {
686        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha512, data)?;
687        return Ok(hex_encode(&digest));
688    }
689    ring_hash_bytes(&ring::digest::SHA512, data)
690}
691
692#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
693fn sha512_bytes(data: &[u8]) -> io::Result<String> {
694    ring_hash_bytes(&ring::digest::SHA512, data)
695}
696
697#[cfg(target_vendor = "apple")]
698fn sha512_reader(reader: impl Read) -> io::Result<String> {
699    hash_reader_impl::<sha2::Sha512>(reader)
700}
701
702#[cfg(target_os = "linux")]
703fn sha512_reader(reader: impl Read) -> io::Result<String> {
704    if openssl_evp::is_available() {
705        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha512, reader)?;
706        return Ok(hex_encode(&digest));
707    }
708    ring_hash_reader(&ring::digest::SHA512, reader)
709}
710
711#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
712fn sha512_reader(reader: impl Read) -> io::Result<String> {
713    ring_hash_reader(&ring::digest::SHA512, reader)
714}
715
716/// Compute hash of a byte slice directly (zero-copy fast path).
717/// Returns an error if the underlying crypto library rejects the algorithm.
718pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> io::Result<String> {
719    match algo {
720        HashAlgorithm::Sha1 => sha1_bytes(data),
721        HashAlgorithm::Sha224 => sha224_bytes(data),
722        HashAlgorithm::Sha256 => sha256_bytes(data),
723        HashAlgorithm::Sha384 => sha384_bytes(data),
724        HashAlgorithm::Sha512 => sha512_bytes(data),
725        HashAlgorithm::Md5 => md5_bytes(data),
726        HashAlgorithm::Blake2b => {
727            let hash = blake2b_simd::blake2b(data);
728            Ok(hex_encode(hash.as_bytes()))
729        }
730    }
731}
732
733/// Hash data and write hex result directly into an output buffer.
734/// Returns the number of hex bytes written. Avoids String allocation
735/// on the critical single-file fast path.
736/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
737#[cfg(target_os = "linux")]
738pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> io::Result<usize> {
739    match algo {
740        HashAlgorithm::Md5 => {
741            // For large data, OpenSSL's assembly MD5 is ~1.25x faster than pure Rust.
742            // The dlopen cost (~5ms) is amortized for files > ~20MB.
743            if data.len() >= 4 * 1024 * 1024 && openssl_evp::is_available() {
744                let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Md5, data)?;
745                hex_encode_to_slice(&digest, out);
746                return Ok(32);
747            }
748            let digest = Md5::digest(data);
749            hex_encode_to_slice(&digest, out);
750            Ok(32)
751        }
752        HashAlgorithm::Sha1 => {
753            // sha1 crate uses cpufeatures for runtime SHA-NI dispatch (~1.3 GB/s
754            // on machines with SHA-NI), matching OpenSSL without dlopen overhead.
755            let digest = sha1::Sha1::digest(data);
756            hex_encode_to_slice(&digest, out);
757            Ok(40)
758        }
759        HashAlgorithm::Sha224 => {
760            // sha2 crate has SHA-NI support, comparable to OpenSSL at all sizes.
761            let digest = sha2::Sha224::digest(data);
762            hex_encode_to_slice(&digest, out);
763            Ok(56)
764        }
765        HashAlgorithm::Sha256 => {
766            // sha2 crate has SHA-NI support, comparable to OpenSSL at all sizes.
767            let digest = sha2::Sha256::digest(data);
768            hex_encode_to_slice(&digest, out);
769            Ok(64)
770        }
771        HashAlgorithm::Sha384 => {
772            // OpenSSL's SHA-384/512 uses AVX-512 vector instructions when available,
773            // significantly faster than ring's BoringSSL assembly at all sizes.
774            if openssl_evp::is_available() {
775                let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha384, data)?;
776                hex_encode_to_slice(&digest, out);
777                return Ok(96);
778            }
779            let digest = ring::digest::digest(&ring::digest::SHA384, data);
780            hex_encode_to_slice(digest.as_ref(), out);
781            Ok(96)
782        }
783        HashAlgorithm::Sha512 => {
784            if openssl_evp::is_available() {
785                let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha512, data)?;
786                hex_encode_to_slice(&digest, out);
787                return Ok(128);
788            }
789            let digest = ring::digest::digest(&ring::digest::SHA512, data);
790            hex_encode_to_slice(digest.as_ref(), out);
791            Ok(128)
792        }
793        HashAlgorithm::Blake2b => {
794            let hash = blake2b_simd::blake2b(data);
795            let bytes = hash.as_bytes();
796            hex_encode_to_slice(bytes, out);
797            Ok(bytes.len() * 2)
798        }
799    }
800}
801
802/// Hash a single file using raw syscalls and write hex directly to output buffer.
803/// Returns number of hex bytes written.
804/// This is the absolute minimum-overhead path for single-file hashing:
805/// raw open + fstat + read + hash + hex encode, with zero String allocation.
806#[cfg(target_os = "linux")]
807pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
808    use std::os::unix::ffi::OsStrExt;
809
810    let path_bytes = path.as_os_str().as_bytes();
811    let c_path = std::ffi::CString::new(path_bytes)
812        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
813
814    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
815    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
816        flags |= libc::O_NOATIME;
817    }
818
819    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
820    if fd < 0 {
821        let err = io::Error::last_os_error();
822        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
823            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
824            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
825            if fd2 < 0 {
826                return Err(io::Error::last_os_error());
827            }
828            return hash_from_raw_fd_to_buf(algo, fd2, out);
829        }
830        return Err(err);
831    }
832    hash_from_raw_fd_to_buf(algo, fd, out)
833}
834
835/// Hash from raw fd and write hex directly to output buffer.
836/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
837/// For larger files, falls back to hash_file_raw() which allocates a String.
838#[cfg(target_os = "linux")]
839fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
840    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
841    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
842        let err = io::Error::last_os_error();
843        unsafe {
844            libc::close(fd);
845        }
846        return Err(err);
847    }
848    let size = stat.st_size as u64;
849    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
850
851    // Empty regular file
852    if is_regular && size == 0 {
853        unsafe {
854            libc::close(fd);
855        }
856        return hash_bytes_to_buf(algo, &[], out);
857    }
858
859    // Tiny files (<8KB): fully raw path — zero heap allocation
860    if is_regular && size < TINY_FILE_LIMIT {
861        let mut buf = [0u8; 8192];
862        let mut total = 0usize;
863        while total < size as usize {
864            let n = unsafe {
865                libc::read(
866                    fd,
867                    buf[total..].as_mut_ptr() as *mut libc::c_void,
868                    (size as usize) - total,
869                )
870            };
871            if n < 0 {
872                let err = io::Error::last_os_error();
873                if err.kind() == io::ErrorKind::Interrupted {
874                    continue;
875                }
876                unsafe {
877                    libc::close(fd);
878                }
879                return Err(err);
880            }
881            if n == 0 {
882                break;
883            }
884            total += n as usize;
885        }
886        unsafe {
887            libc::close(fd);
888        }
889        return hash_bytes_to_buf(algo, &buf[..total], out);
890    }
891
892    // Regular files: mmap and use hash_bytes_to_buf (pure Rust crate implementations).
893    // This avoids the OpenSSL dlopen path (~5ms overhead) that hash_regular_file uses,
894    // which is critical for benchmark performance on small/medium files.
895    // The sha2 crate uses SHA-NI hardware instructions via cpufeatures on x86-64,
896    // so performance is on par with OpenSSL for SHA-256.
897    if is_regular && size > 0 {
898        use std::os::unix::io::FromRawFd;
899        let file = unsafe { File::from_raw_fd(fd) };
900        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
901        if let Ok(mmap) = mmap_result {
902            let _ = mmap.advise(memmap2::Advice::Sequential);
903            if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
904                let _ = mmap.advise(memmap2::Advice::WillNeed);
905            }
906            return hash_bytes_to_buf(algo, &mmap, out);
907        }
908        // mmap failed — fall through to streaming
909        let hash_str = hash_reader(algo, file)?;
910        let hex_bytes = hash_str.as_bytes();
911        out[..hex_bytes.len()].copy_from_slice(hex_bytes);
912        return Ok(hex_bytes.len());
913    }
914
915    // Non-regular files (pipes, etc.): streaming read
916    use std::os::unix::io::FromRawFd;
917    let file = unsafe { File::from_raw_fd(fd) };
918    let hash_str = hash_reader(algo, file)?;
919    let hex_bytes = hash_str.as_bytes();
920    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
921    Ok(hex_bytes.len())
922}
923
924// ── MD5 ─────────────────────────────────────────────────────────────
925// Linux: OpenSSL libcrypto (hardware-accelerated) via dlopen, fallback to md-5 crate.
926// Other: md-5 crate (cpufeatures runtime dispatch on supported CPUs).
927
928#[cfg(target_os = "linux")]
929fn md5_bytes(data: &[u8]) -> io::Result<String> {
930    if openssl_evp::is_available() {
931        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Md5, data)?;
932        return Ok(hex_encode(&digest));
933    }
934    Ok(hash_digest::<Md5>(data))
935}
936
937#[cfg(not(target_os = "linux"))]
938fn md5_bytes(data: &[u8]) -> io::Result<String> {
939    Ok(hash_digest::<Md5>(data))
940}
941
942#[cfg(target_os = "linux")]
943fn md5_reader(reader: impl Read) -> io::Result<String> {
944    if openssl_evp::is_available() {
945        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Md5, reader)?;
946        return Ok(hex_encode(&digest));
947    }
948    hash_reader_impl::<Md5>(reader)
949}
950
951#[cfg(not(target_os = "linux"))]
952fn md5_reader(reader: impl Read) -> io::Result<String> {
953    hash_reader_impl::<Md5>(reader)
954}
955
956/// Compute hash of data from a reader, returning hex string.
957pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
958    match algo {
959        HashAlgorithm::Sha1 => sha1_reader(reader),
960        HashAlgorithm::Sha224 => sha224_reader(reader),
961        HashAlgorithm::Sha256 => sha256_reader(reader),
962        HashAlgorithm::Sha384 => sha384_reader(reader),
963        HashAlgorithm::Sha512 => sha512_reader(reader),
964        HashAlgorithm::Md5 => md5_reader(reader),
965        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
966    }
967}
968
969/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
970/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
971#[cfg(target_os = "linux")]
972static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
973
974/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
975/// Caches whether O_NOATIME works to avoid double-open on every file.
976#[cfg(target_os = "linux")]
977fn open_noatime(path: &Path) -> io::Result<File> {
978    use std::os::unix::fs::OpenOptionsExt;
979    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
980        match std::fs::OpenOptions::new()
981            .read(true)
982            .custom_flags(libc::O_NOATIME)
983            .open(path)
984        {
985            Ok(f) => return Ok(f),
986            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
987                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
988                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
989            }
990            Err(e) => return Err(e), // Real error, propagate
991        }
992    }
993    File::open(path)
994}
995
996#[cfg(not(target_os = "linux"))]
997fn open_noatime(path: &Path) -> io::Result<File> {
998    File::open(path)
999}
1000
1001/// Open a file and get its metadata in one step.
1002/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
1003#[cfg(target_os = "linux")]
1004#[inline]
1005fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
1006    let file = open_noatime(path)?;
1007    let fd = {
1008        use std::os::unix::io::AsRawFd;
1009        file.as_raw_fd()
1010    };
1011    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1012    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
1013        return Err(io::Error::last_os_error());
1014    }
1015    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
1016    let size = stat.st_size as u64;
1017    Ok((file, size, is_regular))
1018}
1019
1020#[cfg(not(target_os = "linux"))]
1021#[inline]
1022fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
1023    let file = open_noatime(path)?;
1024    let metadata = file.metadata()?;
1025    Ok((file, metadata.len(), metadata.file_type().is_file()))
1026}
1027
1028/// Minimum file size to issue fadvise hint (1MB).
1029/// For small files, the syscall overhead exceeds the readahead benefit.
1030#[cfg(target_os = "linux")]
1031const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
1032
1033/// Maximum file size for single-read hash optimization.
1034/// Files up to this size are read entirely into a thread-local buffer and hashed
1035/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
1036/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
1037/// across files in sequential mode, saving re-allocation.
1038/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
1039const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
1040
1041/// Threshold for tiny files that can be read into a stack buffer.
1042/// Below this size, we use a stack-allocated buffer + single read() syscall,
1043/// completely avoiding any heap allocation for the data path.
1044const TINY_FILE_LIMIT: u64 = 8 * 1024;
1045
1046// Thread-local reusable buffer for single-read hash.
1047// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
1048// handles tiny files; larger files trigger one grow that persists for reuse.
1049thread_local! {
1050    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
1051}
1052
1053/// Optimized hash for large files (>=16MB) on Linux.
1054/// Hash large files (>=16MB) using streaming I/O with fadvise + ring Context.
1055/// Uses sequential fadvise hint for kernel readahead, then streams through
1056/// hash context in large chunks. For large files (>64MB), uses double-buffered
1057/// reader thread to overlap I/O and hashing.
1058#[cfg(target_os = "linux")]
1059fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1060    // For very large files, double-buffered reader thread overlaps I/O and CPU.
1061    // For medium files, single-thread streaming is faster (avoids thread overhead).
1062    if file_size >= 64 * 1024 * 1024 {
1063        hash_file_pipelined_read(algo, file, file_size)
1064    } else {
1065        hash_file_streaming(algo, file, file_size)
1066    }
1067}
1068
1069/// Simple single-thread streaming hash with fadvise.
1070/// Optimal for files 16-64MB where thread overhead exceeds I/O overlap benefit.
1071#[cfg(target_os = "linux")]
1072fn hash_file_streaming(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1073    use std::os::unix::io::AsRawFd;
1074
1075    let _ = unsafe {
1076        libc::posix_fadvise(
1077            file.as_raw_fd(),
1078            0,
1079            file_size as i64,
1080            libc::POSIX_FADV_SEQUENTIAL,
1081        )
1082    };
1083
1084    hash_reader(algo, file)
1085}
1086
1087/// Streaming fallback for large files when mmap is unavailable.
1088/// Uses double-buffered reader thread with fadvise hints.
1089/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1090#[cfg(target_os = "linux")]
1091fn hash_file_pipelined_read(
1092    algo: HashAlgorithm,
1093    mut file: File,
1094    file_size: u64,
1095) -> io::Result<String> {
1096    use std::os::unix::io::AsRawFd;
1097
1098    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
1099
1100    let _ = unsafe {
1101        libc::posix_fadvise(
1102            file.as_raw_fd(),
1103            0,
1104            file_size as i64,
1105            libc::POSIX_FADV_SEQUENTIAL,
1106        )
1107    };
1108
1109    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1110    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1111    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1112
1113    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1114        while let Ok(mut buf) = buf_rx.recv() {
1115            let mut total = 0;
1116            while total < buf.len() {
1117                match file.read(&mut buf[total..]) {
1118                    Ok(0) => break,
1119                    Ok(n) => total += n,
1120                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1121                    Err(e) => return Err(e),
1122                }
1123            }
1124            if total == 0 {
1125                break;
1126            }
1127            if tx.send((buf, total)).is_err() {
1128                break;
1129            }
1130        }
1131        Ok(())
1132    });
1133
1134    // Use Digest trait for all hash algorithms.
1135    macro_rules! hash_pipelined_digest {
1136        ($hasher_init:expr) => {{
1137            let mut hasher = $hasher_init;
1138            while let Ok((buf, n)) = rx.recv() {
1139                hasher.update(&buf[..n]);
1140                let _ = buf_tx.send(buf);
1141            }
1142            Ok(hex_encode(&hasher.finalize()))
1143        }};
1144    }
1145
1146    // Map HashAlgorithm to OpenSSL EvpAlgorithm for pipelined path
1147    let evp_algo = match algo {
1148        HashAlgorithm::Md5 => Some(openssl_evp::EvpAlgorithm::Md5),
1149        HashAlgorithm::Sha1 => Some(openssl_evp::EvpAlgorithm::Sha1),
1150        HashAlgorithm::Sha224 => Some(openssl_evp::EvpAlgorithm::Sha224),
1151        HashAlgorithm::Sha256 => Some(openssl_evp::EvpAlgorithm::Sha256),
1152        HashAlgorithm::Sha384 => Some(openssl_evp::EvpAlgorithm::Sha384),
1153        HashAlgorithm::Sha512 => Some(openssl_evp::EvpAlgorithm::Sha512),
1154        HashAlgorithm::Blake2b => None,
1155    };
1156
1157    let hash_result: io::Result<String> = if let Some(evp) =
1158        evp_algo.filter(|_| openssl_evp::is_available())
1159    {
1160        let digest = openssl_evp::hash_pipelined(evp, &rx, &buf_tx)?;
1161        Ok(hex_encode(&digest))
1162    } else {
1163        match algo {
1164            HashAlgorithm::Blake2b => {
1165                let mut state = blake2b_simd::Params::new().to_state();
1166                while let Ok((buf, n)) = rx.recv() {
1167                    state.update(&buf[..n]);
1168                    let _ = buf_tx.send(buf);
1169                }
1170                Ok(hex_encode(state.finalize().as_bytes()))
1171            }
1172            HashAlgorithm::Md5 => hash_pipelined_digest!(Md5::new()),
1173            HashAlgorithm::Sha224 => hash_pipelined_digest!(sha2::Sha224::new()),
1174            HashAlgorithm::Sha256 => hash_pipelined_digest!(sha2::Sha256::new()),
1175            HashAlgorithm::Sha1 => {
1176                let mut ctx = ring::digest::Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY);
1177                while let Ok((buf, n)) = rx.recv() {
1178                    ctx.update(&buf[..n]);
1179                    let _ = buf_tx.send(buf);
1180                }
1181                Ok(hex_encode(ctx.finish().as_ref()))
1182            }
1183            HashAlgorithm::Sha384 => {
1184                let mut ctx = ring::digest::Context::new(&ring::digest::SHA384);
1185                while let Ok((buf, n)) = rx.recv() {
1186                    ctx.update(&buf[..n]);
1187                    let _ = buf_tx.send(buf);
1188                }
1189                Ok(hex_encode(ctx.finish().as_ref()))
1190            }
1191            HashAlgorithm::Sha512 => {
1192                let mut ctx = ring::digest::Context::new(&ring::digest::SHA512);
1193                while let Ok((buf, n)) = rx.recv() {
1194                    ctx.update(&buf[..n]);
1195                    let _ = buf_tx.send(buf);
1196                }
1197                Ok(hex_encode(ctx.finish().as_ref()))
1198            }
1199        }
1200    };
1201
1202    match reader_handle.join() {
1203        Ok(Ok(())) => {}
1204        Ok(Err(e)) => {
1205            if hash_result.is_ok() {
1206                return Err(e);
1207            }
1208        }
1209        Err(payload) => {
1210            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1211                format!("reader thread panicked: {}", s)
1212            } else if let Some(s) = payload.downcast_ref::<String>() {
1213                format!("reader thread panicked: {}", s)
1214            } else {
1215                "reader thread panicked".to_string()
1216            };
1217            return Err(io::Error::other(msg));
1218        }
1219    }
1220
1221    hash_result
1222}
1223
1224/// Hash a known-regular file using tiered I/O strategy based on size.
1225/// - Large (>=16MB): mmap with HugePage/PopulateRead hints, pipelined fallback
1226/// - Small/Medium (8KB-16MB): single read into thread-local buffer + single-shot hash
1227///
1228/// SAFETY: mmap is safe for regular local files opened just above. The fallback
1229/// to streaming I/O (hash_reader/hash_file_pipelined) handles mmap failures at
1230/// map time, but cannot protect against post-map truncation. If the file is
1231/// truncated or backing storage disappears after mapping (e.g. NFS), the kernel
1232/// delivers SIGBUS — acceptable, matching other mmap tools.
1233fn hash_regular_file(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1234    // Large files (>=SMALL_FILE_LIMIT): mmap for zero-copy single-shot hash.
1235    if file_size >= SMALL_FILE_LIMIT {
1236        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1237        if let Ok(mmap) = mmap_result {
1238            #[cfg(target_os = "linux")]
1239            {
1240                let _ = mmap.advise(memmap2::Advice::Sequential);
1241                // PopulateRead (Linux 5.14+) synchronously faults all pages into
1242                // TLB before returning. This costs ~200µs/GB but eliminates TLB
1243                // miss stalls during the hash computation, which is net positive
1244                // for files that fit comfortably in page cache.
1245                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1246                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1247                }
1248            }
1249            return hash_bytes(algo, &mmap);
1250        }
1251        // mmap failed — fall back to streaming I/O
1252        #[cfg(target_os = "linux")]
1253        {
1254            return hash_file_pipelined(algo, file, file_size);
1255        }
1256        #[cfg(not(target_os = "linux"))]
1257        {
1258            return hash_reader(algo, file);
1259        }
1260    }
1261    // Small/medium files (8KB-16MB): single read into thread-local buffer.
1262    // One read() + one single-shot hash call. The thread-local buffer grows
1263    // lazily and persists across files, so allocation cost is amortized.
1264    // This outperforms streaming (128KB chunks × N syscalls × N trait dispatches)
1265    // for files that fit comfortably in the page cache.
1266    #[cfg(target_os = "linux")]
1267    {
1268        use std::os::unix::io::AsRawFd;
1269        let _ = unsafe {
1270            libc::posix_fadvise(
1271                file.as_raw_fd(),
1272                0,
1273                file_size as i64,
1274                libc::POSIX_FADV_SEQUENTIAL,
1275            )
1276        };
1277    }
1278    hash_file_small(algo, file, file_size as usize)
1279}
1280
1281/// Hash a file by path. Uses tiered I/O strategy for regular files,
1282/// streaming read for non-regular files.
1283pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1284    let (file, file_size, is_regular) = open_and_stat(path)?;
1285
1286    if is_regular && file_size == 0 {
1287        return hash_bytes(algo, &[]);
1288    }
1289
1290    if file_size > 0 && is_regular {
1291        if file_size < TINY_FILE_LIMIT {
1292            return hash_file_tiny(algo, file, file_size as usize);
1293        }
1294        return hash_regular_file(algo, file, file_size);
1295    }
1296
1297    // Non-regular files or fallback: stream
1298    #[cfg(target_os = "linux")]
1299    if file_size >= FADVISE_MIN_SIZE {
1300        use std::os::unix::io::AsRawFd;
1301        let _ = unsafe {
1302            libc::posix_fadvise(
1303                file.as_raw_fd(),
1304                0,
1305                file_size as i64,
1306                libc::POSIX_FADV_SEQUENTIAL,
1307            )
1308        };
1309    }
1310    hash_reader(algo, file)
1311}
1312
1313/// Hash a tiny file (<8KB) using a stack-allocated buffer.
1314/// Single read() syscall, zero heap allocation on the data path.
1315/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
1316#[inline]
1317fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
1318    let mut buf = [0u8; 8192];
1319    let mut total = 0;
1320    // Read with known size — usually completes in a single read() for regular files
1321    while total < size {
1322        match file.read(&mut buf[total..size]) {
1323            Ok(0) => break,
1324            Ok(n) => total += n,
1325            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1326            Err(e) => return Err(e),
1327        }
1328    }
1329    hash_bytes(algo, &buf[..total])
1330}
1331
1332/// Hash a small file by reading it entirely into a thread-local buffer,
1333/// then using the single-shot hash function. Avoids per-file Hasher allocation.
1334#[inline]
1335fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
1336    SMALL_FILE_BUF.with(|cell| {
1337        let mut buf = cell.borrow_mut();
1338        // Reset length but keep allocation, then grow if needed
1339        buf.clear();
1340        buf.reserve(size);
1341        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
1342        // directly and only access buf[..total] where total <= size <= capacity.
1343        unsafe {
1344            buf.set_len(size);
1345        }
1346        let mut total = 0;
1347        while total < size {
1348            match file.read(&mut buf[total..size]) {
1349                Ok(0) => break,
1350                Ok(n) => total += n,
1351                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1352                Err(e) => return Err(e),
1353            }
1354        }
1355        hash_bytes(algo, &buf[..total])
1356    })
1357}
1358
1359/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
1360pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
1361    let stdin = io::stdin();
1362    // Hint kernel for sequential access if stdin is a regular file (redirect)
1363    #[cfg(target_os = "linux")]
1364    {
1365        use std::os::unix::io::AsRawFd;
1366        let fd = stdin.as_raw_fd();
1367        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1368        if unsafe { libc::fstat(fd, &mut stat) } == 0
1369            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1370            && stat.st_size > 0
1371        {
1372            unsafe {
1373                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1374            }
1375        }
1376    }
1377    // Streaming hash — works for both pipe and file-redirect stdin
1378    hash_reader(algo, stdin.lock())
1379}
1380
1381/// Check if parallel hashing is worthwhile for the given file paths.
1382/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
1383/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
1384/// Removing the stat()-based size check eliminates N extra syscalls for N files.
1385pub fn should_use_parallel(paths: &[&Path]) -> bool {
1386    paths.len() >= 2
1387}
1388
1389/// Issue readahead hints for a list of file paths to warm the page cache.
1390/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1391/// Only issues hints for files >= 1MB; small files are read fast enough
1392/// that the fadvise syscall overhead isn't worth it.
1393#[cfg(target_os = "linux")]
1394pub fn readahead_files(paths: &[&Path]) {
1395    use std::os::unix::io::AsRawFd;
1396    for path in paths {
1397        if let Ok(file) = open_noatime(path) {
1398            if let Ok(meta) = file.metadata() {
1399                let len = meta.len();
1400                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1401                    unsafe {
1402                        libc::posix_fadvise(
1403                            file.as_raw_fd(),
1404                            0,
1405                            len as i64,
1406                            libc::POSIX_FADV_WILLNEED,
1407                        );
1408                    }
1409                }
1410            }
1411        }
1412    }
1413}
1414
1415#[cfg(not(target_os = "linux"))]
1416pub fn readahead_files(_paths: &[&Path]) {
1417    // No-op on non-Linux
1418}
1419
1420// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1421
1422/// Hash raw data with BLAKE2b variable output length.
1423/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1424pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1425    let hash = blake2b_simd::Params::new()
1426        .hash_length(output_bytes)
1427        .hash(data);
1428    hex_encode(hash.as_bytes())
1429}
1430
1431/// Hash a reader with BLAKE2b variable output length.
1432/// Uses thread-local buffer for cache-friendly streaming.
1433pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1434    STREAM_BUF.with(|cell| {
1435        let mut buf = cell.borrow_mut();
1436        ensure_stream_buf(&mut buf);
1437        let mut state = blake2b_simd::Params::new()
1438            .hash_length(output_bytes)
1439            .to_state();
1440        loop {
1441            let n = read_full(&mut reader, &mut buf)?;
1442            if n == 0 {
1443                break;
1444            }
1445            state.update(&buf[..n]);
1446        }
1447        Ok(hex_encode(state.finalize().as_bytes()))
1448    })
1449}
1450
1451/// Hash a file with BLAKE2b variable output length.
1452/// Uses mmap for large files (zero-copy), single-read for small files,
1453/// and streaming read as fallback.
1454pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1455    let (file, file_size, is_regular) = open_and_stat(path)?;
1456
1457    if is_regular && file_size == 0 {
1458        return Ok(blake2b_hash_data(&[], output_bytes));
1459    }
1460
1461    if file_size > 0 && is_regular {
1462        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1463        if file_size < TINY_FILE_LIMIT {
1464            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1465        }
1466        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1467        if file_size >= SMALL_FILE_LIMIT {
1468            #[cfg(target_os = "linux")]
1469            {
1470                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1471            }
1472            #[cfg(not(target_os = "linux"))]
1473            {
1474                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1475                if let Ok(mmap) = mmap_result {
1476                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1477                }
1478            }
1479        }
1480        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash
1481        if file_size < SMALL_FILE_LIMIT {
1482            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1483        }
1484    }
1485
1486    // Non-regular files or fallback: stream
1487    #[cfg(target_os = "linux")]
1488    if file_size >= FADVISE_MIN_SIZE {
1489        use std::os::unix::io::AsRawFd;
1490        let _ = unsafe {
1491            libc::posix_fadvise(
1492                file.as_raw_fd(),
1493                0,
1494                file_size as i64,
1495                libc::POSIX_FADV_SEQUENTIAL,
1496            )
1497        };
1498    }
1499    blake2b_hash_reader(file, output_bytes)
1500}
1501
1502/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1503#[inline]
1504fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1505    let mut buf = [0u8; 8192];
1506    let mut total = 0;
1507    while total < size {
1508        match file.read(&mut buf[total..size]) {
1509            Ok(0) => break,
1510            Ok(n) => total += n,
1511            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1512            Err(e) => return Err(e),
1513        }
1514    }
1515    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1516}
1517
1518/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1519#[inline]
1520fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1521    SMALL_FILE_BUF.with(|cell| {
1522        let mut buf = cell.borrow_mut();
1523        buf.clear();
1524        buf.reserve(size);
1525        // SAFETY: capacity >= size after clear+reserve
1526        unsafe {
1527            buf.set_len(size);
1528        }
1529        let mut total = 0;
1530        while total < size {
1531            match file.read(&mut buf[total..size]) {
1532                Ok(0) => break,
1533                Ok(n) => total += n,
1534                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1535                Err(e) => return Err(e),
1536            }
1537        }
1538        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1539    })
1540}
1541
1542/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1543/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1544/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1545/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1546#[cfg(target_os = "linux")]
1547fn blake2b_hash_file_pipelined(
1548    file: File,
1549    file_size: u64,
1550    output_bytes: usize,
1551) -> io::Result<String> {
1552    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1553    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1554    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1555    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1556        Ok(mmap) => {
1557            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1558            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1559            if file_size >= 2 * 1024 * 1024 {
1560                let _ = mmap.advise(memmap2::Advice::HugePage);
1561            }
1562            let _ = mmap.advise(memmap2::Advice::Sequential);
1563            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1564            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1565            if file_size >= 4 * 1024 * 1024 {
1566                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1567                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1568                }
1569            } else {
1570                let _ = mmap.advise(memmap2::Advice::WillNeed);
1571            }
1572            // Single-shot hash: processes entire file in one call, streaming
1573            // directly from page cache with no user-space buffer copies.
1574            Ok(blake2b_hash_data(&mmap, output_bytes))
1575        }
1576        Err(_) => {
1577            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1578            // to streaming pipelined I/O.
1579            blake2b_hash_file_streamed(file, file_size, output_bytes)
1580        }
1581    }
1582}
1583
1584/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1585/// Uses double-buffered reader thread with fadvise hints.
1586/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1587#[cfg(target_os = "linux")]
1588fn blake2b_hash_file_streamed(
1589    mut file: File,
1590    file_size: u64,
1591    output_bytes: usize,
1592) -> io::Result<String> {
1593    use std::os::unix::io::AsRawFd;
1594
1595    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1596
1597    // Hint kernel for sequential access
1598    unsafe {
1599        libc::posix_fadvise(
1600            file.as_raw_fd(),
1601            0,
1602            file_size as i64,
1603            libc::POSIX_FADV_SEQUENTIAL,
1604        );
1605    }
1606
1607    // Double-buffered channels: reader fills one buffer while hasher processes another.
1608    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1609    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1610    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1611
1612    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1613        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1614        while let Ok(mut buf) = buf_rx.recv() {
1615            let mut total = 0;
1616            while total < buf.len() {
1617                match file.read(&mut buf[total..]) {
1618                    Ok(0) => break,
1619                    Ok(n) => total += n,
1620                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1621                    Err(e) => return Err(e),
1622                }
1623            }
1624            if total == 0 {
1625                break;
1626            }
1627            if tx.send((buf, total)).is_err() {
1628                break;
1629            }
1630        }
1631        Ok(())
1632    });
1633
1634    let mut state = blake2b_simd::Params::new()
1635        .hash_length(output_bytes)
1636        .to_state();
1637    while let Ok((buf, n)) = rx.recv() {
1638        state.update(&buf[..n]);
1639        let _ = buf_tx.send(buf);
1640    }
1641    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1642
1643    match reader_handle.join() {
1644        Ok(Ok(())) => {}
1645        Ok(Err(e)) => {
1646            if hash_result.is_ok() {
1647                return Err(e);
1648            }
1649        }
1650        Err(payload) => {
1651            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1652                format!("reader thread panicked: {}", s)
1653            } else if let Some(s) = payload.downcast_ref::<String>() {
1654                format!("reader thread panicked: {}", s)
1655            } else {
1656                "reader thread panicked".to_string()
1657            };
1658            return Err(io::Error::other(msg));
1659        }
1660    }
1661
1662    hash_result
1663}
1664
1665/// Hash stdin with BLAKE2b variable output length.
1666/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1667pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1668    let stdin = io::stdin();
1669    #[cfg(target_os = "linux")]
1670    {
1671        use std::os::unix::io::AsRawFd;
1672        let fd = stdin.as_raw_fd();
1673        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1674        if unsafe { libc::fstat(fd, &mut stat) } == 0
1675            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1676            && stat.st_size > 0
1677        {
1678            unsafe {
1679                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1680            }
1681        }
1682    }
1683    blake2b_hash_reader(stdin.lock(), output_bytes)
1684}
1685
1686/// Internal enum for file content in batch hashing.
1687/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1688enum FileContent {
1689    Mmap(memmap2::Mmap),
1690    Buf(Vec<u8>),
1691}
1692
1693impl AsRef<[u8]> for FileContent {
1694    fn as_ref(&self) -> &[u8] {
1695        match self {
1696            FileContent::Mmap(m) => m,
1697            FileContent::Buf(v) => v,
1698        }
1699    }
1700}
1701
1702/// Open a file and load its content for batch hashing.
1703/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1704/// files (zero-copy), and read-to-end for non-regular files.
1705fn open_file_content(path: &Path) -> io::Result<FileContent> {
1706    let (file, size, is_regular) = open_and_stat(path)?;
1707    if is_regular && size == 0 {
1708        return Ok(FileContent::Buf(Vec::new()));
1709    }
1710    if is_regular && size > 0 {
1711        // Tiny files: read directly into Vec. The mmap syscall + page fault
1712        // overhead exceeds the data transfer cost for files under 8KB.
1713        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1714        if size < TINY_FILE_LIMIT {
1715            let mut buf = vec![0u8; size as usize];
1716            let mut total = 0;
1717            let mut f = file;
1718            while total < size as usize {
1719                match f.read(&mut buf[total..]) {
1720                    Ok(0) => break,
1721                    Ok(n) => total += n,
1722                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1723                    Err(e) => return Err(e),
1724                }
1725            }
1726            buf.truncate(total);
1727            return Ok(FileContent::Buf(buf));
1728        }
1729        // HUGEPAGE + PopulateRead for optimal page faulting
1730        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1731        if let Ok(mmap) = mmap_result {
1732            #[cfg(target_os = "linux")]
1733            {
1734                if size >= 2 * 1024 * 1024 {
1735                    let _ = mmap.advise(memmap2::Advice::HugePage);
1736                }
1737                let _ = mmap.advise(memmap2::Advice::Sequential);
1738                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1739                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1740                }
1741            }
1742            return Ok(FileContent::Mmap(mmap));
1743        }
1744        // Fallback: read into Vec
1745        let mut buf = vec![0u8; size as usize];
1746        let mut total = 0;
1747        let mut f = file;
1748        while total < size as usize {
1749            match f.read(&mut buf[total..]) {
1750                Ok(0) => break,
1751                Ok(n) => total += n,
1752                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1753                Err(e) => return Err(e),
1754            }
1755        }
1756        buf.truncate(total);
1757        return Ok(FileContent::Buf(buf));
1758    }
1759    // Non-regular: read to end
1760    let mut buf = Vec::new();
1761    let mut f = file;
1762    f.read_to_end(&mut buf)?;
1763    Ok(FileContent::Buf(buf))
1764}
1765
1766/// Read remaining file content from an already-open fd into a Vec.
1767/// Used when the initial stack buffer is exhausted and we need to read
1768/// the rest without re-opening the file.
1769fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1770    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1771    buf.extend_from_slice(prefix);
1772    file.read_to_end(&mut buf)?;
1773    Ok(FileContent::Buf(buf))
1774}
1775
1776/// Open a file and read all content without fstat — just open+read+close.
1777/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1778/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1779/// then falls back to larger buffer or read_to_end for bigger files.
1780fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1781    let mut file = open_noatime(path)?;
1782    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1783    // For tiny files, allocate exact-size Vec to avoid waste.
1784    let mut small_buf = [0u8; 4096];
1785    match file.read(&mut small_buf) {
1786        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1787        Ok(n) if n < small_buf.len() => {
1788            // File fits in small buffer — allocate exact size
1789            let mut vec = Vec::with_capacity(n);
1790            vec.extend_from_slice(&small_buf[..n]);
1791            return Ok(FileContent::Buf(vec));
1792        }
1793        Ok(n) => {
1794            // Might be more data — allocate heap buffer and read into it directly
1795            let mut buf = vec![0u8; 65536];
1796            buf[..n].copy_from_slice(&small_buf[..n]);
1797            let mut total = n;
1798            loop {
1799                match file.read(&mut buf[total..]) {
1800                    Ok(0) => {
1801                        buf.truncate(total);
1802                        return Ok(FileContent::Buf(buf));
1803                    }
1804                    Ok(n) => {
1805                        total += n;
1806                        if total >= buf.len() {
1807                            // File > 64KB: read rest from existing fd
1808                            return read_remaining_to_vec(&buf[..total], file);
1809                        }
1810                    }
1811                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1812                    Err(e) => return Err(e),
1813                }
1814            }
1815        }
1816        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1817            let mut buf = vec![0u8; 65536];
1818            let mut total = 0;
1819            loop {
1820                match file.read(&mut buf[total..]) {
1821                    Ok(0) => {
1822                        buf.truncate(total);
1823                        return Ok(FileContent::Buf(buf));
1824                    }
1825                    Ok(n) => {
1826                        total += n;
1827                        if total >= buf.len() {
1828                            // File > 64KB: read rest from existing fd
1829                            return read_remaining_to_vec(&buf[..total], file);
1830                        }
1831                    }
1832                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1833                    Err(e) => return Err(e),
1834                }
1835            }
1836        }
1837        Err(e) => return Err(e),
1838    }
1839}
1840
1841/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1842///
1843/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1844/// All files are pre-loaded into memory (mmap for large, read for small),
1845/// then hashed simultaneously. Returns results in input order.
1846///
1847/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1848pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1849    use blake2b_simd::many::{HashManyJob, hash_many};
1850
1851    // Phase 1: Read all files into memory.
1852    // For small file counts (≤10), load sequentially to avoid thread::scope
1853    // overhead (~120µs). For many files, use parallel loading with lightweight
1854    // OS threads. For 100+ files, use fast path that skips fstat.
1855    let use_fast = paths.len() >= 20;
1856
1857    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1858        // Sequential loading — avoids thread spawn overhead for small batches
1859        paths.iter().map(|&path| open_file_content(path)).collect()
1860    } else {
1861        let num_threads = std::thread::available_parallelism()
1862            .map(|n| n.get())
1863            .unwrap_or(4)
1864            .min(paths.len());
1865        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1866
1867        std::thread::scope(|s| {
1868            let handles: Vec<_> = paths
1869                .chunks(chunk_size)
1870                .map(|chunk| {
1871                    s.spawn(move || {
1872                        chunk
1873                            .iter()
1874                            .map(|&path| {
1875                                if use_fast {
1876                                    open_file_content_fast(path)
1877                                } else {
1878                                    open_file_content(path)
1879                                }
1880                            })
1881                            .collect::<Vec<_>>()
1882                    })
1883                })
1884                .collect();
1885
1886            handles
1887                .into_iter()
1888                .flat_map(|h| h.join().unwrap())
1889                .collect()
1890        })
1891    };
1892
1893    // Phase 2: Build hash_many jobs for successful reads
1894    let hash_results = {
1895        let mut params = blake2b_simd::Params::new();
1896        params.hash_length(output_bytes);
1897
1898        let ok_entries: Vec<(usize, &[u8])> = file_data
1899            .iter()
1900            .enumerate()
1901            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1902            .collect();
1903
1904        let mut jobs: Vec<HashManyJob> = ok_entries
1905            .iter()
1906            .map(|(_, data)| HashManyJob::new(&params, data))
1907            .collect();
1908
1909        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1910        hash_many(jobs.iter_mut());
1911
1912        // Extract hashes into a map
1913        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1914        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1915            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1916        }
1917        hm
1918    }; // file_data borrow released here
1919
1920    // Phase 4: Combine hashes and errors in original order
1921    hash_results
1922        .into_iter()
1923        .zip(file_data)
1924        .map(|(hash_opt, result)| match result {
1925            Ok(_) => Ok(hash_opt.unwrap()),
1926            Err(e) => Err(e),
1927        })
1928        .collect()
1929}
1930
1931/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1932/// Samples a few files to estimate total data size. For small workloads, uses
1933/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1934/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1935/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1936/// for large files on Linux).
1937/// Returns results in input order.
1938pub fn blake2b_hash_files_parallel(
1939    paths: &[&Path],
1940    output_bytes: usize,
1941) -> Vec<io::Result<String>> {
1942    let n = paths.len();
1943
1944    // Sample a few files to estimate whether parallel processing is worthwhile.
1945    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1946    // is too small for parallelism to help.
1947    let sample_count = n.min(5);
1948    let mut sample_max: u64 = 0;
1949    let mut sample_total: u64 = 0;
1950    for &p in paths.iter().take(sample_count) {
1951        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1952        sample_total += size;
1953        sample_max = sample_max.max(size);
1954    }
1955    let estimated_total = if sample_count > 0 {
1956        sample_total * (n as u64) / (sample_count as u64)
1957    } else {
1958        0
1959    };
1960
1961    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1962    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1963    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1964        return blake2b_hash_files_many(paths, output_bytes);
1965    }
1966
1967    // Full stat pass for parallel scheduling — worth it for larger workloads.
1968    let mut indexed: Vec<(usize, &Path, u64)> = paths
1969        .iter()
1970        .enumerate()
1971        .map(|(i, &p)| {
1972            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1973            (i, p, size)
1974        })
1975        .collect();
1976
1977    // Sort largest first: ensures big files start hashing immediately while
1978    // small files fill in gaps, minimizing tail latency.
1979    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1980
1981    // Warm page cache for the largest files using async readahead(2).
1982    // Each hash call handles its own mmap prefaulting, but issuing readahead
1983    // here lets the kernel start I/O for upcoming files while workers process
1984    // current ones. readahead(2) returns immediately (non-blocking).
1985    #[cfg(target_os = "linux")]
1986    {
1987        use std::os::unix::io::AsRawFd;
1988        for &(_, path, size) in indexed.iter().take(20) {
1989            if size >= 1024 * 1024 {
1990                if let Ok(file) = open_noatime(path) {
1991                    unsafe {
1992                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1993                    }
1994                }
1995            }
1996        }
1997    }
1998
1999    let num_threads = std::thread::available_parallelism()
2000        .map(|n| n.get())
2001        .unwrap_or(4)
2002        .min(n);
2003
2004    // Atomic work index for dynamic work-stealing.
2005    let work_idx = AtomicUsize::new(0);
2006
2007    std::thread::scope(|s| {
2008        let work_idx = &work_idx;
2009        let indexed = &indexed;
2010
2011        let handles: Vec<_> = (0..num_threads)
2012            .map(|_| {
2013                s.spawn(move || {
2014                    let mut local_results = Vec::new();
2015                    loop {
2016                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2017                        if idx >= indexed.len() {
2018                            break;
2019                        }
2020                        let (orig_idx, path, _size) = indexed[idx];
2021                        let result = blake2b_hash_file(path, output_bytes);
2022                        local_results.push((orig_idx, result));
2023                    }
2024                    local_results
2025                })
2026            })
2027            .collect();
2028
2029        // Collect results and reorder to match original input order.
2030        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2031        for handle in handles {
2032            for (orig_idx, result) in handle.join().unwrap() {
2033                results[orig_idx] = Some(result);
2034            }
2035        }
2036        results
2037            .into_iter()
2038            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2039            .collect()
2040    })
2041}
2042
2043/// Auto-dispatch multi-file hashing: picks sequential or parallel based on workload.
2044///
2045/// For small files (<64KB sample), sequential avoids thread spawn + readahead overhead
2046/// that dominates for tiny files. On the "100 × 55-byte files" benchmark, this saves
2047/// ~5ms of overhead (thread creation + 200 stat() calls + 100 fadvise() calls).
2048///
2049/// For large files (>=64KB), parallel processing amortizes thread spawn cost over
2050/// substantial per-file hash work. Returns results in input order.
2051pub fn hash_files_auto(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2052    let n = paths.len();
2053    if n == 0 {
2054        return Vec::new();
2055    }
2056    if n == 1 {
2057        return vec![hash_file_nostat(algo, paths[0])];
2058    }
2059
2060    // Sample up to 3 files (max size) to correctly dispatch mixed workloads
2061    // like `md5sum small.txt big1.gb big2.gb`. Costs at most 3 stat calls (~6µs)
2062    // to save potentially 3-6ms of thread overhead for small-file workloads.
2063    let sample_size = paths
2064        .iter()
2065        .take(3)
2066        .filter_map(|p| std::fs::metadata(p).ok())
2067        .map(|m| m.len())
2068        .max()
2069        .unwrap_or(0);
2070
2071    if sample_size < 65536 {
2072        // Small files: sequential loop avoiding thread spawn overhead.
2073        #[cfg(target_os = "linux")]
2074        {
2075            // Raw syscall path: reuses CString buffer, avoids OpenOptions/File overhead
2076            let mut c_path_buf = Vec::with_capacity(256);
2077            paths
2078                .iter()
2079                .map(|&p| hash_file_raw_nostat(algo, p, &mut c_path_buf))
2080                .collect()
2081        }
2082        #[cfg(not(target_os = "linux"))]
2083        {
2084            paths.iter().map(|&p| hash_file_nostat(algo, p)).collect()
2085        }
2086    } else if n >= 20 {
2087        hash_files_batch(paths, algo)
2088    } else {
2089        hash_files_parallel_fast(paths, algo)
2090    }
2091}
2092
2093/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
2094/// Files are sorted by size (largest first) so the biggest files start processing
2095/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
2096/// eliminating tail latency from uneven file sizes.
2097/// Returns results in input order.
2098pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2099    let n = paths.len();
2100
2101    // Build (original_index, path, size) tuples — stat all files for scheduling.
2102    // The stat cost (~5µs/file) is repaid by better work distribution.
2103    let mut indexed: Vec<(usize, &Path, u64)> = paths
2104        .iter()
2105        .enumerate()
2106        .map(|(i, &p)| {
2107            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
2108            (i, p, size)
2109        })
2110        .collect();
2111
2112    // Sort largest first: ensures big files start hashing immediately while
2113    // small files fill in gaps, minimizing tail latency.
2114    indexed.sort_by(|a, b| b.2.cmp(&a.2));
2115
2116    // Warm page cache for the largest files using async readahead(2).
2117    // Each hash call handles its own mmap prefaulting, but issuing readahead
2118    // here lets the kernel start I/O for upcoming files while workers process
2119    // current ones. readahead(2) returns immediately (non-blocking).
2120    #[cfg(target_os = "linux")]
2121    {
2122        use std::os::unix::io::AsRawFd;
2123        for &(_, path, size) in indexed.iter().take(20) {
2124            if size >= 1024 * 1024 {
2125                if let Ok(file) = open_noatime(path) {
2126                    unsafe {
2127                        libc::readahead(file.as_raw_fd(), 0, size as usize);
2128                    }
2129                }
2130            }
2131        }
2132    }
2133
2134    let num_threads = std::thread::available_parallelism()
2135        .map(|n| n.get())
2136        .unwrap_or(4)
2137        .min(n);
2138
2139    // Atomic work index for dynamic work-stealing.
2140    let work_idx = AtomicUsize::new(0);
2141
2142    std::thread::scope(|s| {
2143        let work_idx = &work_idx;
2144        let indexed = &indexed;
2145
2146        let handles: Vec<_> = (0..num_threads)
2147            .map(|_| {
2148                s.spawn(move || {
2149                    let mut local_results = Vec::new();
2150                    loop {
2151                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2152                        if idx >= indexed.len() {
2153                            break;
2154                        }
2155                        let (orig_idx, path, _size) = indexed[idx];
2156                        let result = hash_file(algo, path);
2157                        local_results.push((orig_idx, result));
2158                    }
2159                    local_results
2160                })
2161            })
2162            .collect();
2163
2164        // Collect results and reorder to match original input order.
2165        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2166        for handle in handles {
2167            for (orig_idx, result) in handle.join().unwrap() {
2168                results[orig_idx] = Some(result);
2169            }
2170        }
2171        results
2172            .into_iter()
2173            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2174            .collect()
2175    })
2176}
2177
2178/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
2179/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
2180/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
2181/// (100 from the sort phase + 100 from open_and_stat inside each worker).
2182/// Returns results in input order.
2183pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2184    let n = paths.len();
2185    if n == 0 {
2186        return Vec::new();
2187    }
2188    if n == 1 {
2189        return vec![hash_file_nostat(algo, paths[0])];
2190    }
2191
2192    // Issue readahead for all files (no size threshold — even tiny files benefit
2193    // from batched WILLNEED hints when processing 100+ files)
2194    #[cfg(target_os = "linux")]
2195    readahead_files_all(paths);
2196
2197    let num_threads = std::thread::available_parallelism()
2198        .map(|n| n.get())
2199        .unwrap_or(4)
2200        .min(n);
2201
2202    let work_idx = AtomicUsize::new(0);
2203
2204    std::thread::scope(|s| {
2205        let work_idx = &work_idx;
2206
2207        let handles: Vec<_> = (0..num_threads)
2208            .map(|_| {
2209                s.spawn(move || {
2210                    let mut local_results = Vec::new();
2211                    loop {
2212                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2213                        if idx >= n {
2214                            break;
2215                        }
2216                        let result = hash_file_nostat(algo, paths[idx]);
2217                        local_results.push((idx, result));
2218                    }
2219                    local_results
2220                })
2221            })
2222            .collect();
2223
2224        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2225        for handle in handles {
2226            for (idx, result) in handle.join().unwrap() {
2227                results[idx] = Some(result);
2228            }
2229        }
2230        results
2231            .into_iter()
2232            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2233            .collect()
2234    })
2235}
2236
2237/// Batch-hash multiple files: pre-read all files into memory in parallel,
2238/// then hash all data in parallel. Optimal for many small files where per-file
2239/// overhead (open/read/close syscalls) dominates over hash computation.
2240///
2241/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
2242/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
2243/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
2244/// Returns results in input order.
2245pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2246    let n = paths.len();
2247    if n == 0 {
2248        return Vec::new();
2249    }
2250
2251    // Issue readahead for all files
2252    #[cfg(target_os = "linux")]
2253    readahead_files_all(paths);
2254
2255    // Phase 1: Load all files into memory in parallel.
2256    // For 20+ files, use fast path that skips fstat.
2257    let use_fast = n >= 20;
2258
2259    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
2260        // Sequential loading — avoids thread spawn overhead for small batches
2261        paths
2262            .iter()
2263            .map(|&path| {
2264                if use_fast {
2265                    open_file_content_fast(path)
2266                } else {
2267                    open_file_content(path)
2268                }
2269            })
2270            .collect()
2271    } else {
2272        let num_threads = std::thread::available_parallelism()
2273            .map(|t| t.get())
2274            .unwrap_or(4)
2275            .min(n);
2276        let chunk_size = (n + num_threads - 1) / num_threads;
2277
2278        std::thread::scope(|s| {
2279            let handles: Vec<_> = paths
2280                .chunks(chunk_size)
2281                .map(|chunk| {
2282                    s.spawn(move || {
2283                        chunk
2284                            .iter()
2285                            .map(|&path| {
2286                                if use_fast {
2287                                    open_file_content_fast(path)
2288                                } else {
2289                                    open_file_content(path)
2290                                }
2291                            })
2292                            .collect::<Vec<_>>()
2293                    })
2294                })
2295                .collect();
2296
2297            handles
2298                .into_iter()
2299                .flat_map(|h| h.join().unwrap())
2300                .collect()
2301        })
2302    };
2303
2304    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
2305    // for larger files the parallel hashing across threads helps.
2306    let num_hash_threads = std::thread::available_parallelism()
2307        .map(|t| t.get())
2308        .unwrap_or(4)
2309        .min(n);
2310    let work_idx = AtomicUsize::new(0);
2311
2312    std::thread::scope(|s| {
2313        let work_idx = &work_idx;
2314        let file_data = &file_data;
2315
2316        let handles: Vec<_> = (0..num_hash_threads)
2317            .map(|_| {
2318                s.spawn(move || {
2319                    let mut local_results = Vec::new();
2320                    loop {
2321                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2322                        if idx >= n {
2323                            break;
2324                        }
2325                        let result = match &file_data[idx] {
2326                            Ok(content) => hash_bytes(algo, content.as_ref()),
2327                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
2328                        };
2329                        local_results.push((idx, result));
2330                    }
2331                    local_results
2332                })
2333            })
2334            .collect();
2335
2336        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2337        for handle in handles {
2338            for (idx, result) in handle.join().unwrap() {
2339                results[idx] = Some(result);
2340            }
2341        }
2342        results
2343            .into_iter()
2344            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2345            .collect()
2346    })
2347}
2348
2349/// Stream-hash a file that already has a prefix read into memory.
2350/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
2351/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
2352fn hash_stream_with_prefix(
2353    algo: HashAlgorithm,
2354    prefix: &[u8],
2355    mut file: File,
2356) -> io::Result<String> {
2357    // Blake2b uses its own hasher on all platforms
2358    if matches!(algo, HashAlgorithm::Blake2b) {
2359        let mut state = blake2b_simd::Params::new().to_state();
2360        state.update(prefix);
2361        return STREAM_BUF.with(|cell| {
2362            let mut buf = cell.borrow_mut();
2363            ensure_stream_buf(&mut buf);
2364            loop {
2365                let n = read_full(&mut file, &mut buf)?;
2366                if n == 0 {
2367                    break;
2368                }
2369                state.update(&buf[..n]);
2370            }
2371            Ok(hex_encode(state.finalize().as_bytes()))
2372        });
2373    }
2374
2375    // On Linux, try OpenSSL for all supported algorithms
2376    #[cfg(target_os = "linux")]
2377    {
2378        let evp_algo = match algo {
2379            HashAlgorithm::Md5 => Some(openssl_evp::EvpAlgorithm::Md5),
2380            HashAlgorithm::Sha1 => Some(openssl_evp::EvpAlgorithm::Sha1),
2381            HashAlgorithm::Sha224 => Some(openssl_evp::EvpAlgorithm::Sha224),
2382            HashAlgorithm::Sha256 => Some(openssl_evp::EvpAlgorithm::Sha256),
2383            HashAlgorithm::Sha384 => Some(openssl_evp::EvpAlgorithm::Sha384),
2384            HashAlgorithm::Sha512 => Some(openssl_evp::EvpAlgorithm::Sha512),
2385            HashAlgorithm::Blake2b => None,
2386        };
2387        if let Some(evp) = evp_algo.filter(|_| openssl_evp::is_available()) {
2388            let digest = openssl_evp::hash_reader_with_prefix(evp, prefix, file)?;
2389            return Ok(hex_encode(&digest));
2390        }
2391    }
2392
2393    match algo {
2394        HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
2395        HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
2396        HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
2397        #[cfg(not(target_vendor = "apple"))]
2398        HashAlgorithm::Sha1 => {
2399            hash_stream_with_prefix_ring(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, prefix, file)
2400        }
2401        #[cfg(target_vendor = "apple")]
2402        HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
2403        #[cfg(not(target_vendor = "apple"))]
2404        HashAlgorithm::Sha384 => hash_stream_with_prefix_ring(&ring::digest::SHA384, prefix, file),
2405        #[cfg(target_vendor = "apple")]
2406        HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
2407        #[cfg(not(target_vendor = "apple"))]
2408        HashAlgorithm::Sha512 => hash_stream_with_prefix_ring(&ring::digest::SHA512, prefix, file),
2409        #[cfg(target_vendor = "apple")]
2410        HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
2411        HashAlgorithm::Blake2b => unreachable!(),
2412    }
2413}
2414
2415/// Generic stream-hash with prefix using Digest trait (all platforms).
2416fn hash_stream_with_prefix_digest<D: digest::Digest>(
2417    prefix: &[u8],
2418    mut file: File,
2419) -> io::Result<String> {
2420    STREAM_BUF.with(|cell| {
2421        let mut buf = cell.borrow_mut();
2422        ensure_stream_buf(&mut buf);
2423        let mut hasher = D::new();
2424        hasher.update(prefix);
2425        loop {
2426            let n = read_full(&mut file, &mut buf)?;
2427            if n == 0 {
2428                break;
2429            }
2430            hasher.update(&buf[..n]);
2431        }
2432        Ok(hex_encode(&hasher.finalize()))
2433    })
2434}
2435
2436/// Stream-hash with prefix using ring's BoringSSL assembly (non-Apple targets).
2437#[cfg(not(target_vendor = "apple"))]
2438fn hash_stream_with_prefix_ring(
2439    algo: &'static ring::digest::Algorithm,
2440    prefix: &[u8],
2441    mut file: File,
2442) -> io::Result<String> {
2443    STREAM_BUF.with(|cell| {
2444        let mut buf = cell.borrow_mut();
2445        ensure_stream_buf(&mut buf);
2446        let mut ctx = ring::digest::Context::new(algo);
2447        ctx.update(prefix);
2448        loop {
2449            let n = read_full(&mut file, &mut buf)?;
2450            if n == 0 {
2451                break;
2452            }
2453            ctx.update(&buf[..n]);
2454        }
2455        Ok(hex_encode(ctx.finish().as_ref()))
2456    })
2457}
2458
2459/// Hash a file without fstat — just open, read until EOF, hash.
2460/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2461/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2462/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2463/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2464pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2465    let mut file = open_noatime(path)?;
2466    // First try a small stack buffer — optimal for tiny files (< 4KB).
2467    // Most "many_files" benchmark files are ~55 bytes, so this completes
2468    // with a single read() syscall and no fallback.
2469    let mut small_buf = [0u8; 4096];
2470    match file.read(&mut small_buf) {
2471        Ok(0) => return hash_bytes(algo, &[]),
2472        Ok(n) if n < small_buf.len() => {
2473            // File fits in small buffer — hash directly (common case)
2474            return hash_bytes(algo, &small_buf[..n]);
2475        }
2476        Ok(n) => {
2477            // Might be more data — fall back to larger buffer
2478            let mut buf = [0u8; 65536];
2479            buf[..n].copy_from_slice(&small_buf[..n]);
2480            let mut total = n;
2481            loop {
2482                match file.read(&mut buf[total..]) {
2483                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2484                    Ok(n) => {
2485                        total += n;
2486                        if total >= buf.len() {
2487                            // File > 64KB: stream-hash from existing fd instead of
2488                            // re-opening. Feed already-read prefix, continue streaming.
2489                            return hash_stream_with_prefix(algo, &buf[..total], file);
2490                        }
2491                    }
2492                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2493                    Err(e) => return Err(e),
2494                }
2495            }
2496        }
2497        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2498            // Retry with full buffer on interrupt
2499            let mut buf = [0u8; 65536];
2500            let mut total = 0;
2501            loop {
2502                match file.read(&mut buf[total..]) {
2503                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2504                    Ok(n) => {
2505                        total += n;
2506                        if total >= buf.len() {
2507                            // File > 64KB: stream-hash from existing fd
2508                            return hash_stream_with_prefix(algo, &buf[..total], file);
2509                        }
2510                    }
2511                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2512                    Err(e) => return Err(e),
2513                }
2514            }
2515        }
2516        Err(e) => return Err(e),
2517    }
2518}
2519
2520/// Hash a small file using raw Linux syscalls without fstat.
2521/// For the multi-file sequential path where we already know files are small.
2522/// Avoids: OpenOptions builder, CString per-file alloc (reuses caller's buffer),
2523/// fstat overhead (unnecessary when we just need open+read+close).
2524/// Returns hash as hex string.
2525#[cfg(target_os = "linux")]
2526fn hash_file_raw_nostat(
2527    algo: HashAlgorithm,
2528    path: &Path,
2529    c_path_buf: &mut Vec<u8>,
2530) -> io::Result<String> {
2531    use std::os::unix::ffi::OsStrExt;
2532
2533    let path_bytes = path.as_os_str().as_bytes();
2534
2535    // Reuse caller's buffer for null-terminated path (avoids heap alloc per file)
2536    c_path_buf.clear();
2537    c_path_buf.reserve(path_bytes.len() + 1);
2538    c_path_buf.extend_from_slice(path_bytes);
2539    c_path_buf.push(0);
2540
2541    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2542    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2543        flags |= libc::O_NOATIME;
2544    }
2545
2546    let fd = unsafe { libc::open(c_path_buf.as_ptr() as *const libc::c_char, flags) };
2547    if fd < 0 {
2548        let err = io::Error::last_os_error();
2549        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2550            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2551            let fd2 = unsafe {
2552                libc::open(
2553                    c_path_buf.as_ptr() as *const libc::c_char,
2554                    libc::O_RDONLY | libc::O_CLOEXEC,
2555                )
2556            };
2557            if fd2 < 0 {
2558                return Err(io::Error::last_os_error());
2559            }
2560            return hash_fd_small(algo, fd2);
2561        }
2562        return Err(err);
2563    }
2564    hash_fd_small(algo, fd)
2565}
2566
2567/// Read a small file from fd, hash it, close fd. No fstat needed.
2568#[cfg(target_os = "linux")]
2569#[inline]
2570fn hash_fd_small(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2571    let mut buf = [0u8; 4096];
2572    let n = loop {
2573        let ret = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
2574        if ret >= 0 {
2575            break ret;
2576        }
2577        let err = io::Error::last_os_error();
2578        if err.kind() == io::ErrorKind::Interrupted {
2579            continue;
2580        }
2581        unsafe {
2582            libc::close(fd);
2583        }
2584        return Err(err);
2585    };
2586    let n = n as usize;
2587    if n < buf.len() {
2588        // File fits in 4KB — common case for small files
2589        unsafe {
2590            libc::close(fd);
2591        }
2592        return hash_bytes(algo, &buf[..n]);
2593    }
2594    // File > 4KB: fall back to hash_file_nostat-style reading
2595    // Wrap fd in File for RAII close
2596    use std::os::unix::io::FromRawFd;
2597    let mut file = unsafe { File::from_raw_fd(fd) };
2598    let mut big_buf = [0u8; 65536];
2599    big_buf[..n].copy_from_slice(&buf[..n]);
2600    let mut total = n;
2601    loop {
2602        match std::io::Read::read(&mut file, &mut big_buf[total..]) {
2603            Ok(0) => return hash_bytes(algo, &big_buf[..total]),
2604            Ok(n) => {
2605                total += n;
2606                if total >= big_buf.len() {
2607                    return hash_stream_with_prefix(algo, &big_buf[..total], file);
2608                }
2609            }
2610            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2611            Err(e) => return Err(e),
2612        }
2613    }
2614}
2615
2616/// Hash a single file using raw Linux syscalls for minimum overhead.
2617/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2618/// For the single-file fast path, this eliminates OpenOptions builder,
2619/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2620///
2621/// Size-based dispatch:
2622/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2623/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2624/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2625/// - Non-regular: wraps fd in File, streaming hash_reader
2626#[cfg(target_os = "linux")]
2627pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2628    use std::os::unix::ffi::OsStrExt;
2629
2630    let path_bytes = path.as_os_str().as_bytes();
2631    let c_path = std::ffi::CString::new(path_bytes)
2632        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2633
2634    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2635    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2636    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2637        flags |= libc::O_NOATIME;
2638    }
2639
2640    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2641    if fd < 0 {
2642        let err = io::Error::last_os_error();
2643        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2644            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2645            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2646            if fd2 < 0 {
2647                return Err(io::Error::last_os_error());
2648            }
2649            return hash_from_raw_fd(algo, fd2);
2650        }
2651        return Err(err);
2652    }
2653    hash_from_raw_fd(algo, fd)
2654}
2655
2656/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2657/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2658/// non-regular (streaming) files.
2659#[cfg(target_os = "linux")]
2660fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2661    // Raw fstat to determine size and type
2662    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2663    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2664        let err = io::Error::last_os_error();
2665        unsafe {
2666            libc::close(fd);
2667        }
2668        return Err(err);
2669    }
2670    let size = stat.st_size as u64;
2671    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2672
2673    // Empty regular file
2674    if is_regular && size == 0 {
2675        unsafe {
2676            libc::close(fd);
2677        }
2678        return hash_bytes(algo, &[]);
2679    }
2680
2681    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2682    // Entire I/O in 3 raw syscalls: open + read + close.
2683    if is_regular && size < TINY_FILE_LIMIT {
2684        let mut buf = [0u8; 8192];
2685        let mut total = 0usize;
2686        while total < size as usize {
2687            let n = unsafe {
2688                libc::read(
2689                    fd,
2690                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2691                    (size as usize) - total,
2692                )
2693            };
2694            if n < 0 {
2695                let err = io::Error::last_os_error();
2696                if err.kind() == io::ErrorKind::Interrupted {
2697                    continue;
2698                }
2699                unsafe {
2700                    libc::close(fd);
2701                }
2702                return Err(err);
2703            }
2704            if n == 0 {
2705                break;
2706            }
2707            total += n as usize;
2708        }
2709        unsafe {
2710            libc::close(fd);
2711        }
2712        return hash_bytes(algo, &buf[..total]);
2713    }
2714
2715    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2716    use std::os::unix::io::FromRawFd;
2717    let file = unsafe { File::from_raw_fd(fd) };
2718
2719    if is_regular && size > 0 {
2720        return hash_regular_file(algo, file, size);
2721    }
2722
2723    // Non-regular files: streaming hash
2724    hash_reader(algo, file)
2725}
2726
2727/// Issue readahead hints for ALL file paths (no size threshold).
2728/// For multi-file benchmarks, even small files benefit from batched readahead.
2729#[cfg(target_os = "linux")]
2730pub fn readahead_files_all(paths: &[&Path]) {
2731    use std::os::unix::io::AsRawFd;
2732    for path in paths {
2733        if let Ok(file) = open_noatime(path) {
2734            if let Ok(meta) = file.metadata() {
2735                if meta.file_type().is_file() {
2736                    let len = meta.len();
2737                    unsafe {
2738                        libc::posix_fadvise(
2739                            file.as_raw_fd(),
2740                            0,
2741                            len as i64,
2742                            libc::POSIX_FADV_WILLNEED,
2743                        );
2744                    }
2745                }
2746            }
2747        }
2748    }
2749}
2750
2751#[cfg(not(target_os = "linux"))]
2752pub fn readahead_files_all(_paths: &[&Path]) {}
2753
2754/// Print hash result in GNU format: "hash  filename\n"
2755/// Uses raw byte writes to avoid std::fmt overhead.
2756pub fn print_hash(
2757    out: &mut impl Write,
2758    hash: &str,
2759    filename: &str,
2760    binary: bool,
2761) -> io::Result<()> {
2762    let mode = if binary { b'*' } else { b' ' };
2763    out.write_all(hash.as_bytes())?;
2764    out.write_all(&[b' ', mode])?;
2765    out.write_all(filename.as_bytes())?;
2766    out.write_all(b"\n")
2767}
2768
2769/// Print hash in GNU format with NUL terminator instead of newline.
2770pub fn print_hash_zero(
2771    out: &mut impl Write,
2772    hash: &str,
2773    filename: &str,
2774    binary: bool,
2775) -> io::Result<()> {
2776    let mode = if binary { b'*' } else { b' ' };
2777    out.write_all(hash.as_bytes())?;
2778    out.write_all(&[b' ', mode])?;
2779    out.write_all(filename.as_bytes())?;
2780    out.write_all(b"\0")
2781}
2782
2783// ── Single-write output buffer ─────────────────────────────────────
2784// For multi-file workloads, batch the entire "hash  filename\n" line into
2785// a single write() call. This halves the number of BufWriter flushes.
2786
2787// Thread-local output line buffer for batched writes.
2788// Reused across files to avoid per-file allocation.
2789thread_local! {
2790    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2791}
2792
2793/// Build and write the standard GNU hash output line in a single write() call.
2794/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2795/// For escaped filenames: "\hash  escaped_filename\n".
2796#[inline]
2797pub fn write_hash_line(
2798    out: &mut impl Write,
2799    hash: &str,
2800    filename: &str,
2801    binary: bool,
2802    zero: bool,
2803    escaped: bool,
2804) -> io::Result<()> {
2805    LINE_BUF.with(|cell| {
2806        let mut buf = cell.borrow_mut();
2807        buf.clear();
2808        let mode = if binary { b'*' } else { b' ' };
2809        let term = if zero { b'\0' } else { b'\n' };
2810        if escaped {
2811            buf.push(b'\\');
2812        }
2813        buf.extend_from_slice(hash.as_bytes());
2814        buf.push(b' ');
2815        buf.push(mode);
2816        buf.extend_from_slice(filename.as_bytes());
2817        buf.push(term);
2818        out.write_all(&buf)
2819    })
2820}
2821
2822/// Build and write BSD tag format output in a single write() call.
2823/// Format: "ALGO (filename) = hash\n"
2824#[inline]
2825pub fn write_hash_tag_line(
2826    out: &mut impl Write,
2827    algo_name: &str,
2828    hash: &str,
2829    filename: &str,
2830    zero: bool,
2831) -> io::Result<()> {
2832    LINE_BUF.with(|cell| {
2833        let mut buf = cell.borrow_mut();
2834        buf.clear();
2835        let term = if zero { b'\0' } else { b'\n' };
2836        buf.extend_from_slice(algo_name.as_bytes());
2837        buf.extend_from_slice(b" (");
2838        buf.extend_from_slice(filename.as_bytes());
2839        buf.extend_from_slice(b") = ");
2840        buf.extend_from_slice(hash.as_bytes());
2841        buf.push(term);
2842        out.write_all(&buf)
2843    })
2844}
2845
2846/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2847pub fn print_hash_tag(
2848    out: &mut impl Write,
2849    algo: HashAlgorithm,
2850    hash: &str,
2851    filename: &str,
2852) -> io::Result<()> {
2853    out.write_all(algo.name().as_bytes())?;
2854    out.write_all(b" (")?;
2855    out.write_all(filename.as_bytes())?;
2856    out.write_all(b") = ")?;
2857    out.write_all(hash.as_bytes())?;
2858    out.write_all(b"\n")
2859}
2860
2861/// Print hash in BSD tag format with NUL terminator.
2862pub fn print_hash_tag_zero(
2863    out: &mut impl Write,
2864    algo: HashAlgorithm,
2865    hash: &str,
2866    filename: &str,
2867) -> io::Result<()> {
2868    out.write_all(algo.name().as_bytes())?;
2869    out.write_all(b" (")?;
2870    out.write_all(filename.as_bytes())?;
2871    out.write_all(b") = ")?;
2872    out.write_all(hash.as_bytes())?;
2873    out.write_all(b"\0")
2874}
2875
2876/// Print hash in BSD tag format with BLAKE2b length info:
2877/// "BLAKE2b (filename) = hash" for 512-bit, or
2878/// "BLAKE2b-256 (filename) = hash" for other lengths.
2879pub fn print_hash_tag_b2sum(
2880    out: &mut impl Write,
2881    hash: &str,
2882    filename: &str,
2883    bits: usize,
2884) -> io::Result<()> {
2885    if bits == 512 {
2886        out.write_all(b"BLAKE2b (")?;
2887    } else {
2888        // Use write! for the rare non-512 path (negligible overhead per file)
2889        write!(out, "BLAKE2b-{} (", bits)?;
2890    }
2891    out.write_all(filename.as_bytes())?;
2892    out.write_all(b") = ")?;
2893    out.write_all(hash.as_bytes())?;
2894    out.write_all(b"\n")
2895}
2896
2897/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2898pub fn print_hash_tag_b2sum_zero(
2899    out: &mut impl Write,
2900    hash: &str,
2901    filename: &str,
2902    bits: usize,
2903) -> io::Result<()> {
2904    if bits == 512 {
2905        out.write_all(b"BLAKE2b (")?;
2906    } else {
2907        write!(out, "BLAKE2b-{} (", bits)?;
2908    }
2909    out.write_all(filename.as_bytes())?;
2910    out.write_all(b") = ")?;
2911    out.write_all(hash.as_bytes())?;
2912    out.write_all(b"\0")
2913}
2914
2915/// Options for check mode.
2916pub struct CheckOptions {
2917    pub quiet: bool,
2918    pub status_only: bool,
2919    pub strict: bool,
2920    pub warn: bool,
2921    pub ignore_missing: bool,
2922    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2923    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2924    /// When empty, uses generic format: "line {line}: message".
2925    pub warn_prefix: String,
2926}
2927
2928/// Result of check mode verification.
2929pub struct CheckResult {
2930    pub ok: usize,
2931    pub mismatches: usize,
2932    pub format_errors: usize,
2933    pub read_errors: usize,
2934    /// Number of files skipped because they were missing and --ignore-missing was set.
2935    pub ignored_missing: usize,
2936}
2937
2938/// Verify checksums from a check file.
2939/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2940pub fn check_file<R: BufRead>(
2941    algo: HashAlgorithm,
2942    reader: R,
2943    opts: &CheckOptions,
2944    out: &mut impl Write,
2945    err_out: &mut impl Write,
2946) -> io::Result<CheckResult> {
2947    let quiet = opts.quiet;
2948    let status_only = opts.status_only;
2949    let warn = opts.warn;
2950    let ignore_missing = opts.ignore_missing;
2951    let mut ok_count = 0;
2952    let mut mismatch_count = 0;
2953    let mut format_errors = 0;
2954    let mut read_errors = 0;
2955    let mut ignored_missing_count = 0;
2956    let mut line_num = 0;
2957
2958    for line_result in reader.lines() {
2959        line_num += 1;
2960        let line = line_result?;
2961        let line = line.trim_end();
2962
2963        if line.is_empty() {
2964            continue;
2965        }
2966
2967        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2968        let (expected_hash, filename) = match parse_check_line(line) {
2969            Some(v) => v,
2970            None => {
2971                format_errors += 1;
2972                if warn {
2973                    out.flush()?;
2974                    if opts.warn_prefix.is_empty() {
2975                        writeln!(
2976                            err_out,
2977                            "line {}: improperly formatted {} checksum line",
2978                            line_num,
2979                            algo.name()
2980                        )?;
2981                    } else {
2982                        writeln!(
2983                            err_out,
2984                            "{}: {}: improperly formatted {} checksum line",
2985                            opts.warn_prefix,
2986                            line_num,
2987                            algo.name()
2988                        )?;
2989                    }
2990                }
2991                continue;
2992            }
2993        };
2994
2995        // Compute actual hash
2996        let actual = match hash_file(algo, Path::new(filename)) {
2997            Ok(h) => h,
2998            Err(e) => {
2999                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
3000                    ignored_missing_count += 1;
3001                    continue;
3002                }
3003                read_errors += 1;
3004                if !status_only {
3005                    out.flush()?;
3006                    writeln!(err_out, "{}: {}", filename, e)?;
3007                    writeln!(out, "{}: FAILED open or read", filename)?;
3008                }
3009                continue;
3010            }
3011        };
3012
3013        if actual.eq_ignore_ascii_case(expected_hash) {
3014            ok_count += 1;
3015            if !quiet && !status_only {
3016                writeln!(out, "{}: OK", filename)?;
3017            }
3018        } else {
3019            mismatch_count += 1;
3020            if !status_only {
3021                writeln!(out, "{}: FAILED", filename)?;
3022            }
3023        }
3024    }
3025
3026    Ok(CheckResult {
3027        ok: ok_count,
3028        mismatches: mismatch_count,
3029        format_errors,
3030        read_errors,
3031        ignored_missing: ignored_missing_count,
3032    })
3033}
3034
3035/// Parse a checksum line in any supported format.
3036pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
3037    // Try BSD tag format: "ALGO (filename) = hash"
3038    let rest = line
3039        .strip_prefix("MD5 (")
3040        .or_else(|| line.strip_prefix("SHA1 ("))
3041        .or_else(|| line.strip_prefix("SHA224 ("))
3042        .or_else(|| line.strip_prefix("SHA256 ("))
3043        .or_else(|| line.strip_prefix("SHA384 ("))
3044        .or_else(|| line.strip_prefix("SHA512 ("))
3045        .or_else(|| line.strip_prefix("BLAKE2b ("))
3046        .or_else(|| {
3047            // Handle BLAKE2b-NNN (filename) = hash
3048            if line.starts_with("BLAKE2b-") {
3049                let after = &line["BLAKE2b-".len()..];
3050                if let Some(sp) = after.find(" (") {
3051                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
3052                        return Some(&after[sp + 2..]);
3053                    }
3054                }
3055            }
3056            None
3057        });
3058    if let Some(rest) = rest {
3059        if let Some(paren_idx) = rest.find(") = ") {
3060            let filename = &rest[..paren_idx];
3061            let hash = &rest[paren_idx + 4..];
3062            return Some((hash, filename));
3063        }
3064    }
3065
3066    // Handle backslash-escaped lines (leading '\')
3067    let line = line.strip_prefix('\\').unwrap_or(line);
3068
3069    // Standard format: "hash  filename"
3070    if let Some(idx) = line.find("  ") {
3071        let hash = &line[..idx];
3072        let rest = &line[idx + 2..];
3073        return Some((hash, rest));
3074    }
3075    // Binary mode: "hash *filename"
3076    if let Some(idx) = line.find(" *") {
3077        let hash = &line[..idx];
3078        let rest = &line[idx + 2..];
3079        return Some((hash, rest));
3080    }
3081    None
3082}
3083
3084/// Parse a BSD-style tag line: "ALGO (filename) = hash"
3085/// Returns (expected_hash, filename, optional_bits).
3086/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
3087pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
3088    let paren_start = line.find(" (")?;
3089    let algo_part = &line[..paren_start];
3090    let rest = &line[paren_start + 2..];
3091    let paren_end = rest.find(") = ")?;
3092    let filename = &rest[..paren_end];
3093    let hash = &rest[paren_end + 4..];
3094
3095    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
3096    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
3097        algo_part[dash_pos + 1..].parse::<usize>().ok()
3098    } else {
3099        None
3100    };
3101
3102    Some((hash, filename, bits))
3103}
3104
3105/// Read as many bytes as possible into buf, retrying on partial reads.
3106/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
3107/// Fast path: regular file reads usually return the full buffer on the first call.
3108#[inline]
3109fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
3110    // Fast path: first read() usually fills the entire buffer for regular files
3111    let n = reader.read(buf)?;
3112    if n == buf.len() || n == 0 {
3113        return Ok(n);
3114    }
3115    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
3116    let mut total = n;
3117    while total < buf.len() {
3118        match reader.read(&mut buf[total..]) {
3119            Ok(0) => break,
3120            Ok(n) => total += n,
3121            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3122            Err(e) => return Err(e),
3123        }
3124    }
3125    Ok(total)
3126}
3127
3128/// Compile-time generated 2-byte hex pair lookup table.
3129/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
3130const fn generate_hex_table() -> [[u8; 2]; 256] {
3131    let hex = b"0123456789abcdef";
3132    let mut table = [[0u8; 2]; 256];
3133    let mut i = 0;
3134    while i < 256 {
3135        table[i] = [hex[i >> 4], hex[i & 0xf]];
3136        i += 1;
3137    }
3138    table
3139}
3140
3141const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
3142
3143/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
3144/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
3145pub(crate) fn hex_encode(bytes: &[u8]) -> String {
3146    let len = bytes.len() * 2;
3147    let mut hex = String::with_capacity(len);
3148    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
3149    unsafe {
3150        let buf = hex.as_mut_vec();
3151        buf.set_len(len);
3152        hex_encode_to_slice(bytes, buf);
3153    }
3154    hex
3155}
3156
3157/// Encode bytes as hex directly into a pre-allocated output slice.
3158/// Output slice must be at least `bytes.len() * 2` bytes long.
3159#[inline]
3160fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
3161    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
3162    unsafe {
3163        let ptr = out.as_mut_ptr();
3164        for (i, &b) in bytes.iter().enumerate() {
3165            let pair = *HEX_TABLE.get_unchecked(b as usize);
3166            *ptr.add(i * 2) = pair[0];
3167            *ptr.add(i * 2 + 1) = pair[1];
3168        }
3169    }
3170}