Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10use digest::Digest;
11use md5::Md5;
12
13// ── OpenSSL dynamic loading for all hash algorithms (Linux only) ──────
14// GNU coreutils links against OpenSSL which has hardware-accelerated hash
15// instructions (SHA-NI for SHA-1/SHA-256, AVX-512 for SHA-512 on x86_64).
16// We dynamically load libcrypto at runtime via dlopen/dlsym to get the same
17// performance without requiring a compile-time dependency on libssl-dev.
18// If libcrypto is unavailable, we fall back to ring/sha2/md5 crates.
19#[cfg(target_os = "linux")]
20mod openssl_evp {
21    use std::ffi::CStr;
22    use std::io;
23    use std::ptr;
24    use std::sync::OnceLock;
25
26    // OpenSSL EVP types (opaque pointers)
27    type EvpMdCtx = *mut libc::c_void;
28    type EvpMd = *const libc::c_void;
29    type Engine = *const libc::c_void;
30
31    // Function pointer types matching OpenSSL's EVP API
32    type FnEvpMdGetter = unsafe extern "C" fn() -> EvpMd;
33    type FnEvpMdCtxNew = unsafe extern "C" fn() -> EvpMdCtx;
34    type FnEvpDigestInitEx = unsafe extern "C" fn(EvpMdCtx, EvpMd, Engine) -> libc::c_int;
35    type FnEvpDigestUpdate =
36        unsafe extern "C" fn(EvpMdCtx, *const libc::c_void, libc::size_t) -> libc::c_int;
37    type FnEvpDigestFinalEx =
38        unsafe extern "C" fn(EvpMdCtx, *mut u8, *mut libc::c_uint) -> libc::c_int;
39    type FnEvpMdCtxFree = unsafe extern "C" fn(EvpMdCtx);
40
41    struct OpenSslFns {
42        evp_md5: FnEvpMdGetter,
43        evp_sha1: FnEvpMdGetter,
44        evp_sha224: FnEvpMdGetter,
45        evp_sha256: FnEvpMdGetter,
46        evp_sha384: FnEvpMdGetter,
47        evp_sha512: FnEvpMdGetter,
48        evp_md_ctx_new: FnEvpMdCtxNew,
49        evp_digest_init_ex: FnEvpDigestInitEx,
50        evp_digest_update: FnEvpDigestUpdate,
51        evp_digest_final_ex: FnEvpDigestFinalEx,
52        evp_md_ctx_free: FnEvpMdCtxFree,
53        _handle: *mut libc::c_void, // kept alive so symbols stay valid
54    }
55
56    // SAFETY: The function pointers are valid for the lifetime of the process
57    // (dlopen handle is never closed). The pointers themselves are immutable
58    // after initialization via OnceLock.
59    unsafe impl Send for OpenSslFns {}
60    unsafe impl Sync for OpenSslFns {}
61
62    /// Cached OpenSSL function pointers. Initialized once on first use.
63    /// `None` inside means libcrypto was not found or symbols were missing.
64    static FNS: OnceLock<Option<OpenSslFns>> = OnceLock::new();
65
66    fn dlsym_checked(handle: *mut libc::c_void, name: &CStr) -> Option<*mut libc::c_void> {
67        let ptr = unsafe { libc::dlsym(handle, name.as_ptr()) };
68        if ptr.is_null() { None } else { Some(ptr) }
69    }
70
71    /// Guard that ensures dlclose is called if we fail to resolve all symbols.
72    struct DlopenHandle(*mut libc::c_void);
73    impl Drop for DlopenHandle {
74        fn drop(&mut self) {
75            unsafe {
76                libc::dlclose(self.0);
77            }
78        }
79    }
80
81    fn try_load() -> Option<OpenSslFns> {
82        // Try OpenSSL 3.x first, then 1.1.x
83        let handle = unsafe {
84            let h = libc::dlopen(
85                c"libcrypto.so.3".as_ptr(),
86                libc::RTLD_LAZY | libc::RTLD_LOCAL,
87            );
88            if h.is_null() {
89                let h = libc::dlopen(
90                    c"libcrypto.so.1.1".as_ptr(),
91                    libc::RTLD_LAZY | libc::RTLD_LOCAL,
92                );
93                if h.is_null() {
94                    return None;
95                }
96                h
97            } else {
98                h
99            }
100        };
101
102        // Guard ensures dlclose on early return (any dlsym failure).
103        let guard = DlopenHandle(handle);
104
105        unsafe {
106            let evp_md5: FnEvpMdGetter = std::mem::transmute(dlsym_checked(handle, c"EVP_md5")?);
107            let evp_sha1: FnEvpMdGetter = std::mem::transmute(dlsym_checked(handle, c"EVP_sha1")?);
108            let evp_sha224: FnEvpMdGetter =
109                std::mem::transmute(dlsym_checked(handle, c"EVP_sha224")?);
110            let evp_sha256: FnEvpMdGetter =
111                std::mem::transmute(dlsym_checked(handle, c"EVP_sha256")?);
112            let evp_sha384: FnEvpMdGetter =
113                std::mem::transmute(dlsym_checked(handle, c"EVP_sha384")?);
114            let evp_sha512: FnEvpMdGetter =
115                std::mem::transmute(dlsym_checked(handle, c"EVP_sha512")?);
116            let evp_md_ctx_new: FnEvpMdCtxNew =
117                std::mem::transmute(dlsym_checked(handle, c"EVP_MD_CTX_new")?);
118            let evp_digest_init_ex: FnEvpDigestInitEx =
119                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestInit_ex")?);
120            let evp_digest_update: FnEvpDigestUpdate =
121                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestUpdate")?);
122            let evp_digest_final_ex: FnEvpDigestFinalEx =
123                std::mem::transmute(dlsym_checked(handle, c"EVP_DigestFinal_ex")?);
124            let evp_md_ctx_free: FnEvpMdCtxFree =
125                std::mem::transmute(dlsym_checked(handle, c"EVP_MD_CTX_free")?);
126
127            // All symbols resolved — prevent dlclose by forgetting the guard.
128            std::mem::forget(guard);
129
130            Some(OpenSslFns {
131                evp_md5,
132                evp_sha1,
133                evp_sha224,
134                evp_sha256,
135                evp_sha384,
136                evp_sha512,
137                evp_md_ctx_new,
138                evp_digest_init_ex,
139                evp_digest_update,
140                evp_digest_final_ex,
141                evp_md_ctx_free,
142                _handle: handle,
143            })
144        }
145    }
146
147    fn get_fns() -> Option<&'static OpenSslFns> {
148        FNS.get_or_init(try_load).as_ref()
149    }
150
151    /// Returns true if OpenSSL's libcrypto is available for hardware-accelerated hashing.
152    pub fn is_available() -> bool {
153        get_fns().is_some()
154    }
155
156    /// RAII wrapper for EVP_MD_CTX that frees on drop.
157    struct EvpCtx {
158        ctx: EvpMdCtx,
159        free_fn: FnEvpMdCtxFree,
160    }
161
162    impl Drop for EvpCtx {
163        fn drop(&mut self) {
164            if !self.ctx.is_null() {
165                unsafe {
166                    (self.free_fn)(self.ctx);
167                }
168            }
169        }
170    }
171
172    /// Which EVP hash algorithm to use.
173    #[derive(Clone, Copy)]
174    pub enum EvpAlgorithm {
175        Md5,
176        Sha1,
177        Sha224,
178        Sha256,
179        Sha384,
180        Sha512,
181    }
182
183    impl EvpAlgorithm {
184        fn digest_len(self) -> usize {
185            match self {
186                EvpAlgorithm::Md5 => 16,
187                EvpAlgorithm::Sha1 => 20,
188                EvpAlgorithm::Sha224 => 28,
189                EvpAlgorithm::Sha256 => 32,
190                EvpAlgorithm::Sha384 => 48,
191                EvpAlgorithm::Sha512 => 64,
192            }
193        }
194
195        fn get_md(self, fns: &OpenSslFns) -> EvpMd {
196            unsafe {
197                match self {
198                    EvpAlgorithm::Md5 => (fns.evp_md5)(),
199                    EvpAlgorithm::Sha1 => (fns.evp_sha1)(),
200                    EvpAlgorithm::Sha224 => (fns.evp_sha224)(),
201                    EvpAlgorithm::Sha256 => (fns.evp_sha256)(),
202                    EvpAlgorithm::Sha384 => (fns.evp_sha384)(),
203                    EvpAlgorithm::Sha512 => (fns.evp_sha512)(),
204                }
205            }
206        }
207    }
208
209    /// Single-shot hash of a byte slice using OpenSSL EVP.
210    pub fn hash_bytes(algo: EvpAlgorithm, data: &[u8]) -> io::Result<Vec<u8>> {
211        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
212
213        unsafe {
214            let md = algo.get_md(fns);
215            if md.is_null() {
216                return Err(io::Error::other("EVP_* returned null"));
217            }
218
219            let ctx = (fns.evp_md_ctx_new)();
220            if ctx.is_null() {
221                return Err(io::Error::other("EVP_MD_CTX_new failed"));
222            }
223            let _guard = EvpCtx {
224                ctx,
225                free_fn: fns.evp_md_ctx_free,
226            };
227
228            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
229                return Err(io::Error::other("EVP_DigestInit_ex failed"));
230            }
231            if !data.is_empty()
232                && (fns.evp_digest_update)(ctx, data.as_ptr() as *const libc::c_void, data.len())
233                    != 1
234            {
235                return Err(io::Error::other("EVP_DigestUpdate failed"));
236            }
237
238            let mut out = vec![0u8; algo.digest_len()];
239            let mut out_len: libc::c_uint = 0;
240            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
241                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
242            }
243            out.truncate(out_len as usize);
244            Ok(out)
245        }
246    }
247
248    /// Streaming hash: create context, feed chunks, finalize.
249    pub fn hash_reader(algo: EvpAlgorithm, mut reader: impl std::io::Read) -> io::Result<Vec<u8>> {
250        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
251
252        unsafe {
253            let md = algo.get_md(fns);
254            if md.is_null() {
255                return Err(io::Error::other("EVP_* returned null"));
256            }
257
258            let ctx = (fns.evp_md_ctx_new)();
259            if ctx.is_null() {
260                return Err(io::Error::other("EVP_MD_CTX_new failed"));
261            }
262            let _guard = EvpCtx {
263                ctx,
264                free_fn: fns.evp_md_ctx_free,
265            };
266
267            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
268                return Err(io::Error::other("EVP_DigestInit_ex failed"));
269            }
270
271            super::STREAM_BUF.with(|cell| {
272                let mut buf = cell.borrow_mut();
273                super::ensure_stream_buf(&mut buf);
274                loop {
275                    let n = super::read_full(&mut reader, &mut buf)?;
276                    if n == 0 {
277                        break;
278                    }
279                    if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n)
280                        != 1
281                    {
282                        return Err(io::Error::other("EVP_DigestUpdate failed"));
283                    }
284                }
285                Ok(())
286            })?;
287
288            let mut out = vec![0u8; algo.digest_len()];
289            let mut out_len: libc::c_uint = 0;
290            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
291                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
292            }
293            out.truncate(out_len as usize);
294            Ok(out)
295        }
296    }
297
298    /// Streaming hash with a prefix already read into memory.
299    pub fn hash_reader_with_prefix(
300        algo: EvpAlgorithm,
301        prefix: &[u8],
302        mut reader: impl std::io::Read,
303    ) -> io::Result<Vec<u8>> {
304        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
305
306        unsafe {
307            let md = algo.get_md(fns);
308            if md.is_null() {
309                return Err(io::Error::other("EVP_* returned null"));
310            }
311
312            let ctx = (fns.evp_md_ctx_new)();
313            if ctx.is_null() {
314                return Err(io::Error::other("EVP_MD_CTX_new failed"));
315            }
316            let _guard = EvpCtx {
317                ctx,
318                free_fn: fns.evp_md_ctx_free,
319            };
320
321            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
322                return Err(io::Error::other("EVP_DigestInit_ex failed"));
323            }
324
325            // Feed prefix
326            if !prefix.is_empty()
327                && (fns.evp_digest_update)(
328                    ctx,
329                    prefix.as_ptr() as *const libc::c_void,
330                    prefix.len(),
331                ) != 1
332            {
333                return Err(io::Error::other("EVP_DigestUpdate failed"));
334            }
335
336            // Stream rest
337            super::STREAM_BUF.with(|cell| {
338                let mut buf = cell.borrow_mut();
339                super::ensure_stream_buf(&mut buf);
340                loop {
341                    let n = super::read_full(&mut reader, &mut buf)?;
342                    if n == 0 {
343                        break;
344                    }
345                    if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n)
346                        != 1
347                    {
348                        return Err(io::Error::other("EVP_DigestUpdate failed"));
349                    }
350                }
351                Ok(())
352            })?;
353
354            let mut out = vec![0u8; algo.digest_len()];
355            let mut out_len: libc::c_uint = 0;
356            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
357                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
358            }
359            out.truncate(out_len as usize);
360            Ok(out)
361        }
362    }
363
364    /// Pipelined hash for the double-buffered reader thread path.
365    /// Returns raw digest bytes for the caller to hex-encode.
366    pub fn hash_pipelined(
367        algo: EvpAlgorithm,
368        rx: &std::sync::mpsc::Receiver<(Vec<u8>, usize)>,
369        buf_tx: &std::sync::mpsc::SyncSender<Vec<u8>>,
370    ) -> io::Result<Vec<u8>> {
371        let fns = get_fns().ok_or_else(|| io::Error::other("OpenSSL not available"))?;
372
373        unsafe {
374            let md = algo.get_md(fns);
375            if md.is_null() {
376                return Err(io::Error::other("EVP_* returned null"));
377            }
378
379            let ctx = (fns.evp_md_ctx_new)();
380            if ctx.is_null() {
381                return Err(io::Error::other("EVP_MD_CTX_new failed"));
382            }
383            let _guard = EvpCtx {
384                ctx,
385                free_fn: fns.evp_md_ctx_free,
386            };
387
388            if (fns.evp_digest_init_ex)(ctx, md, ptr::null()) != 1 {
389                return Err(io::Error::other("EVP_DigestInit_ex failed"));
390            }
391
392            while let Ok((buf, n)) = rx.recv() {
393                if (fns.evp_digest_update)(ctx, buf[..n].as_ptr() as *const libc::c_void, n) != 1 {
394                    let _ = buf_tx.send(buf);
395                    return Err(io::Error::other("EVP_DigestUpdate failed"));
396                }
397                let _ = buf_tx.send(buf);
398            }
399
400            let mut out = vec![0u8; algo.digest_len()];
401            let mut out_len: libc::c_uint = 0;
402            if (fns.evp_digest_final_ex)(ctx, out.as_mut_ptr(), &mut out_len) != 1 {
403                return Err(io::Error::other("EVP_DigestFinal_ex failed"));
404            }
405            out.truncate(out_len as usize);
406            Ok(out)
407        }
408    }
409}
410
411/// Supported hash algorithms.
412#[derive(Debug, Clone, Copy)]
413pub enum HashAlgorithm {
414    Sha1,
415    Sha224,
416    Sha256,
417    Sha384,
418    Sha512,
419    Md5,
420    Blake2b,
421}
422
423impl HashAlgorithm {
424    pub fn name(self) -> &'static str {
425        match self {
426            HashAlgorithm::Sha1 => "SHA1",
427            HashAlgorithm::Sha224 => "SHA224",
428            HashAlgorithm::Sha256 => "SHA256",
429            HashAlgorithm::Sha384 => "SHA384",
430            HashAlgorithm::Sha512 => "SHA512",
431            HashAlgorithm::Md5 => "MD5",
432            HashAlgorithm::Blake2b => "BLAKE2b",
433        }
434    }
435}
436
437// ── Generic hash helpers ────────────────────────────────────────────
438
439/// Single-shot hash using the Digest trait.
440fn hash_digest<D: Digest>(data: &[u8]) -> String {
441    hex_encode(&D::digest(data))
442}
443
444/// Streaming hash using thread-local buffer via the Digest trait.
445fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
446    STREAM_BUF.with(|cell| {
447        let mut buf = cell.borrow_mut();
448        ensure_stream_buf(&mut buf);
449        let mut hasher = D::new();
450        loop {
451            let n = read_full(&mut reader, &mut buf)?;
452            if n == 0 {
453                break;
454            }
455            hasher.update(&buf[..n]);
456        }
457        Ok(hex_encode(&hasher.finalize()))
458    })
459}
460
461// ── Public hashing API ──────────────────────────────────────────────
462
463/// Buffer size for streaming hash I/O.
464/// 128KB matches GNU coreutils' buffer size (BUFSIZE=131072), which works well with kernel readahead.
465/// Many small reads allow the kernel to pipeline I/O efficiently, reducing latency
466/// vs fewer large reads that stall waiting for the full buffer to fill.
467const HASH_READ_BUF: usize = 131072;
468
469// Thread-local reusable buffer for streaming hash I/O.
470// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
471// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
472thread_local! {
473    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
474}
475
476/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
477/// Called only on the streaming path, so small-file workloads never allocate 8MB.
478#[inline]
479fn ensure_stream_buf(buf: &mut Vec<u8>) {
480    if buf.len() < HASH_READ_BUF {
481        buf.resize(HASH_READ_BUF, 0);
482    }
483}
484
485// ── Ring-accelerated hash functions (non-Apple targets) ───────────────
486// ring provides BoringSSL assembly with optimized SHA-512/384/256/1 for x86-64/aarch64.
487
488/// Single-shot hash using ring::digest (non-Apple).
489#[cfg(not(target_vendor = "apple"))]
490#[inline]
491fn ring_hash_bytes(algo: &'static ring::digest::Algorithm, data: &[u8]) -> io::Result<String> {
492    Ok(hex_encode(ring::digest::digest(algo, data).as_ref()))
493}
494
495/// Streaming hash using ring::digest::Context (non-Apple).
496#[cfg(not(target_vendor = "apple"))]
497fn ring_hash_reader(
498    algo: &'static ring::digest::Algorithm,
499    mut reader: impl Read,
500) -> io::Result<String> {
501    STREAM_BUF.with(|cell| {
502        let mut buf = cell.borrow_mut();
503        ensure_stream_buf(&mut buf);
504        let mut ctx = ring::digest::Context::new(algo);
505        loop {
506            let n = read_full(&mut reader, &mut buf)?;
507            if n == 0 {
508                break;
509            }
510            ctx.update(&buf[..n]);
511        }
512        Ok(hex_encode(ctx.finish().as_ref()))
513    })
514}
515
516// ── SHA-256 ───────────────────────────────────────────────────────────
517// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to sha2 crate.
518// Apple: sha2 crate. Other: ring (BoringSSL assembly).
519
520#[cfg(target_vendor = "apple")]
521fn sha256_bytes(data: &[u8]) -> io::Result<String> {
522    Ok(hash_digest::<sha2::Sha256>(data))
523}
524
525#[cfg(target_os = "linux")]
526fn sha256_bytes(data: &[u8]) -> io::Result<String> {
527    if openssl_evp::is_available() {
528        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha256, data)?;
529        return Ok(hex_encode(&digest));
530    }
531    Ok(hash_digest::<sha2::Sha256>(data))
532}
533
534#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
535fn sha256_bytes(data: &[u8]) -> io::Result<String> {
536    ring_hash_bytes(&ring::digest::SHA256, data)
537}
538
539#[cfg(target_vendor = "apple")]
540fn sha256_reader(reader: impl Read) -> io::Result<String> {
541    hash_reader_impl::<sha2::Sha256>(reader)
542}
543
544#[cfg(target_os = "linux")]
545fn sha256_reader(reader: impl Read) -> io::Result<String> {
546    if openssl_evp::is_available() {
547        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha256, reader)?;
548        return Ok(hex_encode(&digest));
549    }
550    hash_reader_impl::<sha2::Sha256>(reader)
551}
552
553#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
554fn sha256_reader(reader: impl Read) -> io::Result<String> {
555    ring_hash_reader(&ring::digest::SHA256, reader)
556}
557
558// ── SHA-1 ─────────────────────────────────────────────────────────────
559// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to ring.
560// Apple: sha1 crate. Other: ring (BoringSSL assembly).
561
562#[cfg(target_vendor = "apple")]
563fn sha1_bytes(data: &[u8]) -> io::Result<String> {
564    Ok(hash_digest::<sha1::Sha1>(data))
565}
566
567#[cfg(target_os = "linux")]
568fn sha1_bytes(data: &[u8]) -> io::Result<String> {
569    if openssl_evp::is_available() {
570        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha1, data)?;
571        return Ok(hex_encode(&digest));
572    }
573    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
574}
575
576#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
577fn sha1_bytes(data: &[u8]) -> io::Result<String> {
578    ring_hash_bytes(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data)
579}
580
581#[cfg(target_vendor = "apple")]
582fn sha1_reader(reader: impl Read) -> io::Result<String> {
583    hash_reader_impl::<sha1::Sha1>(reader)
584}
585
586#[cfg(target_os = "linux")]
587fn sha1_reader(reader: impl Read) -> io::Result<String> {
588    if openssl_evp::is_available() {
589        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha1, reader)?;
590        return Ok(hex_encode(&digest));
591    }
592    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
593}
594
595#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
596fn sha1_reader(reader: impl Read) -> io::Result<String> {
597    ring_hash_reader(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, reader)
598}
599
600// ── SHA-224 ───────────────────────────────────────────────────────────
601// Linux: OpenSSL libcrypto (SHA-NI accelerated) via dlopen, fallback to sha2 crate.
602// Other: sha2 crate (ring does not support SHA-224).
603
604#[cfg(target_os = "linux")]
605fn sha224_bytes(data: &[u8]) -> io::Result<String> {
606    if openssl_evp::is_available() {
607        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha224, data)?;
608        return Ok(hex_encode(&digest));
609    }
610    Ok(hash_digest::<sha2::Sha224>(data))
611}
612
613#[cfg(not(target_os = "linux"))]
614fn sha224_bytes(data: &[u8]) -> io::Result<String> {
615    Ok(hash_digest::<sha2::Sha224>(data))
616}
617
618#[cfg(target_os = "linux")]
619fn sha224_reader(reader: impl Read) -> io::Result<String> {
620    if openssl_evp::is_available() {
621        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha224, reader)?;
622        return Ok(hex_encode(&digest));
623    }
624    hash_reader_impl::<sha2::Sha224>(reader)
625}
626
627#[cfg(not(target_os = "linux"))]
628fn sha224_reader(reader: impl Read) -> io::Result<String> {
629    hash_reader_impl::<sha2::Sha224>(reader)
630}
631
632// ── SHA-384 ───────────────────────────────────────────────────────────
633// Linux: OpenSSL libcrypto (AVX-512 optimized) via dlopen, fallback to ring (AVX2).
634// Apple: sha2 crate. Other: ring (BoringSSL assembly).
635
636#[cfg(target_vendor = "apple")]
637fn sha384_bytes(data: &[u8]) -> io::Result<String> {
638    Ok(hash_digest::<sha2::Sha384>(data))
639}
640
641#[cfg(target_os = "linux")]
642fn sha384_bytes(data: &[u8]) -> io::Result<String> {
643    if openssl_evp::is_available() {
644        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha384, data)?;
645        return Ok(hex_encode(&digest));
646    }
647    ring_hash_bytes(&ring::digest::SHA384, data)
648}
649
650#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
651fn sha384_bytes(data: &[u8]) -> io::Result<String> {
652    ring_hash_bytes(&ring::digest::SHA384, data)
653}
654
655#[cfg(target_vendor = "apple")]
656fn sha384_reader(reader: impl Read) -> io::Result<String> {
657    hash_reader_impl::<sha2::Sha384>(reader)
658}
659
660#[cfg(target_os = "linux")]
661fn sha384_reader(reader: impl Read) -> io::Result<String> {
662    if openssl_evp::is_available() {
663        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha384, reader)?;
664        return Ok(hex_encode(&digest));
665    }
666    ring_hash_reader(&ring::digest::SHA384, reader)
667}
668
669#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
670fn sha384_reader(reader: impl Read) -> io::Result<String> {
671    ring_hash_reader(&ring::digest::SHA384, reader)
672}
673
674// ── SHA-512 ───────────────────────────────────────────────────────────
675// Linux: OpenSSL libcrypto (AVX-512 optimized) via dlopen, fallback to ring (AVX2).
676// Apple: sha2 crate. Other: ring (BoringSSL assembly).
677
678#[cfg(target_vendor = "apple")]
679fn sha512_bytes(data: &[u8]) -> io::Result<String> {
680    Ok(hash_digest::<sha2::Sha512>(data))
681}
682
683#[cfg(target_os = "linux")]
684fn sha512_bytes(data: &[u8]) -> io::Result<String> {
685    if openssl_evp::is_available() {
686        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha512, data)?;
687        return Ok(hex_encode(&digest));
688    }
689    ring_hash_bytes(&ring::digest::SHA512, data)
690}
691
692#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
693fn sha512_bytes(data: &[u8]) -> io::Result<String> {
694    ring_hash_bytes(&ring::digest::SHA512, data)
695}
696
697#[cfg(target_vendor = "apple")]
698fn sha512_reader(reader: impl Read) -> io::Result<String> {
699    hash_reader_impl::<sha2::Sha512>(reader)
700}
701
702#[cfg(target_os = "linux")]
703fn sha512_reader(reader: impl Read) -> io::Result<String> {
704    if openssl_evp::is_available() {
705        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Sha512, reader)?;
706        return Ok(hex_encode(&digest));
707    }
708    ring_hash_reader(&ring::digest::SHA512, reader)
709}
710
711#[cfg(all(not(target_os = "linux"), not(target_vendor = "apple")))]
712fn sha512_reader(reader: impl Read) -> io::Result<String> {
713    ring_hash_reader(&ring::digest::SHA512, reader)
714}
715
716/// Compute hash of a byte slice directly (zero-copy fast path).
717/// Returns an error if the underlying crypto library rejects the algorithm.
718pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> io::Result<String> {
719    match algo {
720        HashAlgorithm::Sha1 => sha1_bytes(data),
721        HashAlgorithm::Sha224 => sha224_bytes(data),
722        HashAlgorithm::Sha256 => sha256_bytes(data),
723        HashAlgorithm::Sha384 => sha384_bytes(data),
724        HashAlgorithm::Sha512 => sha512_bytes(data),
725        HashAlgorithm::Md5 => md5_bytes(data),
726        HashAlgorithm::Blake2b => {
727            let hash = blake2b_simd::blake2b(data);
728            Ok(hex_encode(hash.as_bytes()))
729        }
730    }
731}
732
733/// Hash data and write hex result directly into an output buffer.
734/// Returns the number of hex bytes written. Avoids String allocation
735/// on the critical single-file fast path.
736/// `out` must be at least 128 bytes for BLAKE2b/SHA512 (64 * 2), 64 for SHA256, 32 for MD5, etc.
737#[cfg(target_os = "linux")]
738pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> io::Result<usize> {
739    match algo {
740        HashAlgorithm::Md5 => {
741            let digest = Md5::digest(data);
742            hex_encode_to_slice(&digest, out);
743            Ok(32)
744        }
745        HashAlgorithm::Sha1 => {
746            let digest = ring::digest::digest(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, data);
747            hex_encode_to_slice(digest.as_ref(), out);
748            Ok(40)
749        }
750        HashAlgorithm::Sha224 => {
751            let digest = sha2::Sha224::digest(data);
752            hex_encode_to_slice(&digest, out);
753            Ok(56)
754        }
755        HashAlgorithm::Sha256 => {
756            let digest = sha2::Sha256::digest(data);
757            hex_encode_to_slice(&digest, out);
758            Ok(64)
759        }
760        HashAlgorithm::Sha384 => {
761            if openssl_evp::is_available() {
762                let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha384, data)?;
763                hex_encode_to_slice(&digest, out);
764                return Ok(96);
765            }
766            let digest = ring::digest::digest(&ring::digest::SHA384, data);
767            hex_encode_to_slice(digest.as_ref(), out);
768            Ok(96)
769        }
770        HashAlgorithm::Sha512 => {
771            if openssl_evp::is_available() {
772                let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Sha512, data)?;
773                hex_encode_to_slice(&digest, out);
774                return Ok(128);
775            }
776            let digest = ring::digest::digest(&ring::digest::SHA512, data);
777            hex_encode_to_slice(digest.as_ref(), out);
778            Ok(128)
779        }
780        HashAlgorithm::Blake2b => {
781            let hash = blake2b_simd::blake2b(data);
782            let bytes = hash.as_bytes();
783            hex_encode_to_slice(bytes, out);
784            Ok(bytes.len() * 2)
785        }
786    }
787}
788
789/// Hash a single file using raw syscalls and write hex directly to output buffer.
790/// Returns number of hex bytes written.
791/// This is the absolute minimum-overhead path for single-file hashing:
792/// raw open + fstat + read + hash + hex encode, with zero String allocation.
793#[cfg(target_os = "linux")]
794pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
795    use std::os::unix::ffi::OsStrExt;
796
797    let path_bytes = path.as_os_str().as_bytes();
798    let c_path = std::ffi::CString::new(path_bytes)
799        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
800
801    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
802    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
803        flags |= libc::O_NOATIME;
804    }
805
806    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
807    if fd < 0 {
808        let err = io::Error::last_os_error();
809        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
810            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
811            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
812            if fd2 < 0 {
813                return Err(io::Error::last_os_error());
814            }
815            return hash_from_raw_fd_to_buf(algo, fd2, out);
816        }
817        return Err(err);
818    }
819    hash_from_raw_fd_to_buf(algo, fd, out)
820}
821
822/// Hash from raw fd and write hex directly to output buffer.
823/// For tiny files (<8KB), the entire path is raw syscalls + stack buffer — zero heap.
824/// For larger files, falls back to hash_file_raw() which allocates a String.
825#[cfg(target_os = "linux")]
826fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
827    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
828    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
829        let err = io::Error::last_os_error();
830        unsafe {
831            libc::close(fd);
832        }
833        return Err(err);
834    }
835    let size = stat.st_size as u64;
836    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
837
838    // Empty regular file
839    if is_regular && size == 0 {
840        unsafe {
841            libc::close(fd);
842        }
843        return hash_bytes_to_buf(algo, &[], out);
844    }
845
846    // Tiny files (<8KB): fully raw path — zero heap allocation
847    if is_regular && size < TINY_FILE_LIMIT {
848        let mut buf = [0u8; 8192];
849        let mut total = 0usize;
850        while total < size as usize {
851            let n = unsafe {
852                libc::read(
853                    fd,
854                    buf[total..].as_mut_ptr() as *mut libc::c_void,
855                    (size as usize) - total,
856                )
857            };
858            if n < 0 {
859                let err = io::Error::last_os_error();
860                if err.kind() == io::ErrorKind::Interrupted {
861                    continue;
862                }
863                unsafe {
864                    libc::close(fd);
865                }
866                return Err(err);
867            }
868            if n == 0 {
869                break;
870            }
871            total += n as usize;
872        }
873        unsafe {
874            libc::close(fd);
875        }
876        return hash_bytes_to_buf(algo, &buf[..total], out);
877    }
878
879    // Larger files: fall back to hash_from_raw_fd which returns a String,
880    // then copy the hex into out.
881    use std::os::unix::io::FromRawFd;
882    let file = unsafe { File::from_raw_fd(fd) };
883    let hash_str = if is_regular && size > 0 {
884        hash_regular_file(algo, file, size)?
885    } else {
886        hash_reader(algo, file)?
887    };
888    let hex_bytes = hash_str.as_bytes();
889    out[..hex_bytes.len()].copy_from_slice(hex_bytes);
890    Ok(hex_bytes.len())
891}
892
893// ── MD5 ─────────────────────────────────────────────────────────────
894// Linux: OpenSSL libcrypto (hardware-accelerated) via dlopen, fallback to md-5 crate.
895// Other: md-5 crate (cpufeatures runtime dispatch on supported CPUs).
896
897#[cfg(target_os = "linux")]
898fn md5_bytes(data: &[u8]) -> io::Result<String> {
899    if openssl_evp::is_available() {
900        let digest = openssl_evp::hash_bytes(openssl_evp::EvpAlgorithm::Md5, data)?;
901        return Ok(hex_encode(&digest));
902    }
903    Ok(hash_digest::<Md5>(data))
904}
905
906#[cfg(not(target_os = "linux"))]
907fn md5_bytes(data: &[u8]) -> io::Result<String> {
908    Ok(hash_digest::<Md5>(data))
909}
910
911#[cfg(target_os = "linux")]
912fn md5_reader(reader: impl Read) -> io::Result<String> {
913    if openssl_evp::is_available() {
914        let digest = openssl_evp::hash_reader(openssl_evp::EvpAlgorithm::Md5, reader)?;
915        return Ok(hex_encode(&digest));
916    }
917    hash_reader_impl::<Md5>(reader)
918}
919
920#[cfg(not(target_os = "linux"))]
921fn md5_reader(reader: impl Read) -> io::Result<String> {
922    hash_reader_impl::<Md5>(reader)
923}
924
925/// Compute hash of data from a reader, returning hex string.
926pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
927    match algo {
928        HashAlgorithm::Sha1 => sha1_reader(reader),
929        HashAlgorithm::Sha224 => sha224_reader(reader),
930        HashAlgorithm::Sha256 => sha256_reader(reader),
931        HashAlgorithm::Sha384 => sha384_reader(reader),
932        HashAlgorithm::Sha512 => sha512_reader(reader),
933        HashAlgorithm::Md5 => md5_reader(reader),
934        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
935    }
936}
937
938/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
939/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
940#[cfg(target_os = "linux")]
941static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
942
943/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
944/// Caches whether O_NOATIME works to avoid double-open on every file.
945#[cfg(target_os = "linux")]
946fn open_noatime(path: &Path) -> io::Result<File> {
947    use std::os::unix::fs::OpenOptionsExt;
948    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
949        match std::fs::OpenOptions::new()
950            .read(true)
951            .custom_flags(libc::O_NOATIME)
952            .open(path)
953        {
954            Ok(f) => return Ok(f),
955            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
956                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
957                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
958            }
959            Err(e) => return Err(e), // Real error, propagate
960        }
961    }
962    File::open(path)
963}
964
965#[cfg(not(target_os = "linux"))]
966fn open_noatime(path: &Path) -> io::Result<File> {
967    File::open(path)
968}
969
970/// Open a file and get its metadata in one step.
971/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
972#[cfg(target_os = "linux")]
973#[inline]
974fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
975    let file = open_noatime(path)?;
976    let fd = {
977        use std::os::unix::io::AsRawFd;
978        file.as_raw_fd()
979    };
980    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
981    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
982        return Err(io::Error::last_os_error());
983    }
984    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
985    let size = stat.st_size as u64;
986    Ok((file, size, is_regular))
987}
988
989#[cfg(not(target_os = "linux"))]
990#[inline]
991fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
992    let file = open_noatime(path)?;
993    let metadata = file.metadata()?;
994    Ok((file, metadata.len(), metadata.file_type().is_file()))
995}
996
997/// Minimum file size to issue fadvise hint (1MB).
998/// For small files, the syscall overhead exceeds the readahead benefit.
999#[cfg(target_os = "linux")]
1000const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
1001
1002/// Maximum file size for single-read hash optimization.
1003/// Files up to this size are read entirely into a thread-local buffer and hashed
1004/// with single-shot hash. This avoids mmap/munmap overhead (~100µs each) and
1005/// MAP_POPULATE page faults (~300ns/page). The thread-local buffer is reused
1006/// across files in sequential mode, saving re-allocation.
1007/// 16MB covers typical benchmark files (10MB) while keeping memory usage bounded.
1008const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
1009
1010/// Threshold for tiny files that can be read into a stack buffer.
1011/// Below this size, we use a stack-allocated buffer + single read() syscall,
1012/// completely avoiding any heap allocation for the data path.
1013const TINY_FILE_LIMIT: u64 = 8 * 1024;
1014
1015// Thread-local reusable buffer for single-read hash.
1016// Grows lazily up to SMALL_FILE_LIMIT (16MB). Initial 64KB allocation
1017// handles tiny files; larger files trigger one grow that persists for reuse.
1018thread_local! {
1019    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
1020}
1021
1022/// Optimized hash for large files (>=16MB) on Linux.
1023/// Hash large files (>=16MB) using streaming I/O with fadvise + ring Context.
1024/// Uses sequential fadvise hint for kernel readahead, then streams through
1025/// hash context in large chunks. For large files (>64MB), uses double-buffered
1026/// reader thread to overlap I/O and hashing.
1027#[cfg(target_os = "linux")]
1028fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1029    // For very large files, double-buffered reader thread overlaps I/O and CPU.
1030    // For medium files, single-thread streaming is faster (avoids thread overhead).
1031    if file_size >= 64 * 1024 * 1024 {
1032        hash_file_pipelined_read(algo, file, file_size)
1033    } else {
1034        hash_file_streaming(algo, file, file_size)
1035    }
1036}
1037
1038/// Simple single-thread streaming hash with fadvise.
1039/// Optimal for files 16-64MB where thread overhead exceeds I/O overlap benefit.
1040#[cfg(target_os = "linux")]
1041fn hash_file_streaming(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1042    use std::os::unix::io::AsRawFd;
1043
1044    let _ = unsafe {
1045        libc::posix_fadvise(
1046            file.as_raw_fd(),
1047            0,
1048            file_size as i64,
1049            libc::POSIX_FADV_SEQUENTIAL,
1050        )
1051    };
1052
1053    hash_reader(algo, file)
1054}
1055
1056/// Streaming fallback for large files when mmap is unavailable.
1057/// Uses double-buffered reader thread with fadvise hints.
1058/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1059#[cfg(target_os = "linux")]
1060fn hash_file_pipelined_read(
1061    algo: HashAlgorithm,
1062    mut file: File,
1063    file_size: u64,
1064) -> io::Result<String> {
1065    use std::os::unix::io::AsRawFd;
1066
1067    const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; // 4MB per buffer
1068
1069    let _ = unsafe {
1070        libc::posix_fadvise(
1071            file.as_raw_fd(),
1072            0,
1073            file_size as i64,
1074            libc::POSIX_FADV_SEQUENTIAL,
1075        )
1076    };
1077
1078    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1079    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1080    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1081
1082    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1083        while let Ok(mut buf) = buf_rx.recv() {
1084            let mut total = 0;
1085            while total < buf.len() {
1086                match file.read(&mut buf[total..]) {
1087                    Ok(0) => break,
1088                    Ok(n) => total += n,
1089                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1090                    Err(e) => return Err(e),
1091                }
1092            }
1093            if total == 0 {
1094                break;
1095            }
1096            if tx.send((buf, total)).is_err() {
1097                break;
1098            }
1099        }
1100        Ok(())
1101    });
1102
1103    // Use Digest trait for all hash algorithms.
1104    macro_rules! hash_pipelined_digest {
1105        ($hasher_init:expr) => {{
1106            let mut hasher = $hasher_init;
1107            while let Ok((buf, n)) = rx.recv() {
1108                hasher.update(&buf[..n]);
1109                let _ = buf_tx.send(buf);
1110            }
1111            Ok(hex_encode(&hasher.finalize()))
1112        }};
1113    }
1114
1115    // Map HashAlgorithm to OpenSSL EvpAlgorithm for pipelined path
1116    let evp_algo = match algo {
1117        HashAlgorithm::Md5 => Some(openssl_evp::EvpAlgorithm::Md5),
1118        HashAlgorithm::Sha1 => Some(openssl_evp::EvpAlgorithm::Sha1),
1119        HashAlgorithm::Sha224 => Some(openssl_evp::EvpAlgorithm::Sha224),
1120        HashAlgorithm::Sha256 => Some(openssl_evp::EvpAlgorithm::Sha256),
1121        HashAlgorithm::Sha384 => Some(openssl_evp::EvpAlgorithm::Sha384),
1122        HashAlgorithm::Sha512 => Some(openssl_evp::EvpAlgorithm::Sha512),
1123        HashAlgorithm::Blake2b => None,
1124    };
1125
1126    let hash_result: io::Result<String> = if let Some(evp) =
1127        evp_algo.filter(|_| openssl_evp::is_available())
1128    {
1129        let digest = openssl_evp::hash_pipelined(evp, &rx, &buf_tx)?;
1130        Ok(hex_encode(&digest))
1131    } else {
1132        match algo {
1133            HashAlgorithm::Blake2b => {
1134                let mut state = blake2b_simd::Params::new().to_state();
1135                while let Ok((buf, n)) = rx.recv() {
1136                    state.update(&buf[..n]);
1137                    let _ = buf_tx.send(buf);
1138                }
1139                Ok(hex_encode(state.finalize().as_bytes()))
1140            }
1141            HashAlgorithm::Md5 => hash_pipelined_digest!(Md5::new()),
1142            HashAlgorithm::Sha224 => hash_pipelined_digest!(sha2::Sha224::new()),
1143            HashAlgorithm::Sha256 => hash_pipelined_digest!(sha2::Sha256::new()),
1144            HashAlgorithm::Sha1 => {
1145                let mut ctx = ring::digest::Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY);
1146                while let Ok((buf, n)) = rx.recv() {
1147                    ctx.update(&buf[..n]);
1148                    let _ = buf_tx.send(buf);
1149                }
1150                Ok(hex_encode(ctx.finish().as_ref()))
1151            }
1152            HashAlgorithm::Sha384 => {
1153                let mut ctx = ring::digest::Context::new(&ring::digest::SHA384);
1154                while let Ok((buf, n)) = rx.recv() {
1155                    ctx.update(&buf[..n]);
1156                    let _ = buf_tx.send(buf);
1157                }
1158                Ok(hex_encode(ctx.finish().as_ref()))
1159            }
1160            HashAlgorithm::Sha512 => {
1161                let mut ctx = ring::digest::Context::new(&ring::digest::SHA512);
1162                while let Ok((buf, n)) = rx.recv() {
1163                    ctx.update(&buf[..n]);
1164                    let _ = buf_tx.send(buf);
1165                }
1166                Ok(hex_encode(ctx.finish().as_ref()))
1167            }
1168        }
1169    };
1170
1171    match reader_handle.join() {
1172        Ok(Ok(())) => {}
1173        Ok(Err(e)) => {
1174            if hash_result.is_ok() {
1175                return Err(e);
1176            }
1177        }
1178        Err(payload) => {
1179            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1180                format!("reader thread panicked: {}", s)
1181            } else if let Some(s) = payload.downcast_ref::<String>() {
1182                format!("reader thread panicked: {}", s)
1183            } else {
1184                "reader thread panicked".to_string()
1185            };
1186            return Err(io::Error::other(msg));
1187        }
1188    }
1189
1190    hash_result
1191}
1192
1193/// Hash a known-regular file using tiered I/O strategy based on size.
1194/// - Large (>=16MB): mmap with HugePage/PopulateRead hints, pipelined fallback
1195/// - Small/Medium (8KB-16MB): single read into thread-local buffer + single-shot hash
1196///
1197/// SAFETY: mmap is safe for regular local files opened just above. The fallback
1198/// to streaming I/O (hash_reader/hash_file_pipelined) handles mmap failures at
1199/// map time, but cannot protect against post-map truncation. If the file is
1200/// truncated or backing storage disappears after mapping (e.g. NFS), the kernel
1201/// delivers SIGBUS — acceptable, matching other mmap tools.
1202fn hash_regular_file(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
1203    // Large files (>=SMALL_FILE_LIMIT): mmap for zero-copy single-shot hash.
1204    if file_size >= SMALL_FILE_LIMIT {
1205        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1206        if let Ok(mmap) = mmap_result {
1207            #[cfg(target_os = "linux")]
1208            {
1209                let _ = mmap.advise(memmap2::Advice::Sequential);
1210                // PopulateRead (Linux 5.14+) synchronously faults all pages into
1211                // TLB before returning. This costs ~200µs/GB but eliminates TLB
1212                // miss stalls during the hash computation, which is net positive
1213                // for files that fit comfortably in page cache.
1214                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1215                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1216                }
1217            }
1218            return hash_bytes(algo, &mmap);
1219        }
1220        // mmap failed — fall back to streaming I/O
1221        #[cfg(target_os = "linux")]
1222        {
1223            return hash_file_pipelined(algo, file, file_size);
1224        }
1225        #[cfg(not(target_os = "linux"))]
1226        {
1227            return hash_reader(algo, file);
1228        }
1229    }
1230    // Small/medium files (8KB-16MB): single read into thread-local buffer.
1231    // One read() + one single-shot hash call. The thread-local buffer grows
1232    // lazily and persists across files, so allocation cost is amortized.
1233    // This outperforms streaming (128KB chunks × N syscalls × N trait dispatches)
1234    // for files that fit comfortably in the page cache.
1235    #[cfg(target_os = "linux")]
1236    {
1237        use std::os::unix::io::AsRawFd;
1238        let _ = unsafe {
1239            libc::posix_fadvise(
1240                file.as_raw_fd(),
1241                0,
1242                file_size as i64,
1243                libc::POSIX_FADV_SEQUENTIAL,
1244            )
1245        };
1246    }
1247    hash_file_small(algo, file, file_size as usize)
1248}
1249
1250/// Hash a file by path. Uses tiered I/O strategy for regular files,
1251/// streaming read for non-regular files.
1252pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1253    let (file, file_size, is_regular) = open_and_stat(path)?;
1254
1255    if is_regular && file_size == 0 {
1256        return hash_bytes(algo, &[]);
1257    }
1258
1259    if file_size > 0 && is_regular {
1260        if file_size < TINY_FILE_LIMIT {
1261            return hash_file_tiny(algo, file, file_size as usize);
1262        }
1263        return hash_regular_file(algo, file, file_size);
1264    }
1265
1266    // Non-regular files or fallback: stream
1267    #[cfg(target_os = "linux")]
1268    if file_size >= FADVISE_MIN_SIZE {
1269        use std::os::unix::io::AsRawFd;
1270        let _ = unsafe {
1271            libc::posix_fadvise(
1272                file.as_raw_fd(),
1273                0,
1274                file_size as i64,
1275                libc::POSIX_FADV_SEQUENTIAL,
1276            )
1277        };
1278    }
1279    hash_reader(algo, file)
1280}
1281
1282/// Hash a tiny file (<8KB) using a stack-allocated buffer.
1283/// Single read() syscall, zero heap allocation on the data path.
1284/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
1285#[inline]
1286fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
1287    let mut buf = [0u8; 8192];
1288    let mut total = 0;
1289    // Read with known size — usually completes in a single read() for regular files
1290    while total < size {
1291        match file.read(&mut buf[total..size]) {
1292            Ok(0) => break,
1293            Ok(n) => total += n,
1294            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1295            Err(e) => return Err(e),
1296        }
1297    }
1298    hash_bytes(algo, &buf[..total])
1299}
1300
1301/// Hash a small file by reading it entirely into a thread-local buffer,
1302/// then using the single-shot hash function. Avoids per-file Hasher allocation.
1303#[inline]
1304fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
1305    SMALL_FILE_BUF.with(|cell| {
1306        let mut buf = cell.borrow_mut();
1307        // Reset length but keep allocation, then grow if needed
1308        buf.clear();
1309        buf.reserve(size);
1310        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
1311        // directly and only access buf[..total] where total <= size <= capacity.
1312        unsafe {
1313            buf.set_len(size);
1314        }
1315        let mut total = 0;
1316        while total < size {
1317            match file.read(&mut buf[total..size]) {
1318                Ok(0) => break,
1319                Ok(n) => total += n,
1320                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1321                Err(e) => return Err(e),
1322            }
1323        }
1324        hash_bytes(algo, &buf[..total])
1325    })
1326}
1327
1328/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
1329pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
1330    let stdin = io::stdin();
1331    // Hint kernel for sequential access if stdin is a regular file (redirect)
1332    #[cfg(target_os = "linux")]
1333    {
1334        use std::os::unix::io::AsRawFd;
1335        let fd = stdin.as_raw_fd();
1336        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1337        if unsafe { libc::fstat(fd, &mut stat) } == 0
1338            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1339            && stat.st_size > 0
1340        {
1341            unsafe {
1342                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1343            }
1344        }
1345    }
1346    // Streaming hash — works for both pipe and file-redirect stdin
1347    hash_reader(algo, stdin.lock())
1348}
1349
1350/// Check if parallel hashing is worthwhile for the given file paths.
1351/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
1352/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
1353/// Removing the stat()-based size check eliminates N extra syscalls for N files.
1354pub fn should_use_parallel(paths: &[&Path]) -> bool {
1355    paths.len() >= 2
1356}
1357
1358/// Issue readahead hints for a list of file paths to warm the page cache.
1359/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
1360/// Only issues hints for files >= 1MB; small files are read fast enough
1361/// that the fadvise syscall overhead isn't worth it.
1362#[cfg(target_os = "linux")]
1363pub fn readahead_files(paths: &[&Path]) {
1364    use std::os::unix::io::AsRawFd;
1365    for path in paths {
1366        if let Ok(file) = open_noatime(path) {
1367            if let Ok(meta) = file.metadata() {
1368                let len = meta.len();
1369                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1370                    unsafe {
1371                        libc::posix_fadvise(
1372                            file.as_raw_fd(),
1373                            0,
1374                            len as i64,
1375                            libc::POSIX_FADV_WILLNEED,
1376                        );
1377                    }
1378                }
1379            }
1380        }
1381    }
1382}
1383
1384#[cfg(not(target_os = "linux"))]
1385pub fn readahead_files(_paths: &[&Path]) {
1386    // No-op on non-Linux
1387}
1388
1389// --- BLAKE2b variable-length functions (using blake2b_simd) ---
1390
1391/// Hash raw data with BLAKE2b variable output length.
1392/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
1393pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1394    let hash = blake2b_simd::Params::new()
1395        .hash_length(output_bytes)
1396        .hash(data);
1397    hex_encode(hash.as_bytes())
1398}
1399
1400/// Hash a reader with BLAKE2b variable output length.
1401/// Uses thread-local buffer for cache-friendly streaming.
1402pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1403    STREAM_BUF.with(|cell| {
1404        let mut buf = cell.borrow_mut();
1405        ensure_stream_buf(&mut buf);
1406        let mut state = blake2b_simd::Params::new()
1407            .hash_length(output_bytes)
1408            .to_state();
1409        loop {
1410            let n = read_full(&mut reader, &mut buf)?;
1411            if n == 0 {
1412                break;
1413            }
1414            state.update(&buf[..n]);
1415        }
1416        Ok(hex_encode(state.finalize().as_bytes()))
1417    })
1418}
1419
1420/// Hash a file with BLAKE2b variable output length.
1421/// Uses mmap for large files (zero-copy), single-read for small files,
1422/// and streaming read as fallback.
1423pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1424    let (file, file_size, is_regular) = open_and_stat(path)?;
1425
1426    if is_regular && file_size == 0 {
1427        return Ok(blake2b_hash_data(&[], output_bytes));
1428    }
1429
1430    if file_size > 0 && is_regular {
1431        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
1432        if file_size < TINY_FILE_LIMIT {
1433            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1434        }
1435        // Large files (>=16MB): I/O pipelining on Linux, mmap on other platforms
1436        if file_size >= SMALL_FILE_LIMIT {
1437            #[cfg(target_os = "linux")]
1438            {
1439                return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1440            }
1441            #[cfg(not(target_os = "linux"))]
1442            {
1443                let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1444                if let Ok(mmap) = mmap_result {
1445                    return Ok(blake2b_hash_data(&mmap, output_bytes));
1446                }
1447            }
1448        }
1449        // Small files (8KB..16MB): single read into thread-local buffer, then single-shot hash
1450        if file_size < SMALL_FILE_LIMIT {
1451            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1452        }
1453    }
1454
1455    // Non-regular files or fallback: stream
1456    #[cfg(target_os = "linux")]
1457    if file_size >= FADVISE_MIN_SIZE {
1458        use std::os::unix::io::AsRawFd;
1459        let _ = unsafe {
1460            libc::posix_fadvise(
1461                file.as_raw_fd(),
1462                0,
1463                file_size as i64,
1464                libc::POSIX_FADV_SEQUENTIAL,
1465            )
1466        };
1467    }
1468    blake2b_hash_reader(file, output_bytes)
1469}
1470
1471/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
1472#[inline]
1473fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1474    let mut buf = [0u8; 8192];
1475    let mut total = 0;
1476    while total < size {
1477        match file.read(&mut buf[total..size]) {
1478            Ok(0) => break,
1479            Ok(n) => total += n,
1480            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1481            Err(e) => return Err(e),
1482        }
1483    }
1484    Ok(blake2b_hash_data(&buf[..total], output_bytes))
1485}
1486
1487/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
1488#[inline]
1489fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1490    SMALL_FILE_BUF.with(|cell| {
1491        let mut buf = cell.borrow_mut();
1492        buf.clear();
1493        buf.reserve(size);
1494        // SAFETY: capacity >= size after clear+reserve
1495        unsafe {
1496            buf.set_len(size);
1497        }
1498        let mut total = 0;
1499        while total < size {
1500            match file.read(&mut buf[total..size]) {
1501                Ok(0) => break,
1502                Ok(n) => total += n,
1503                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1504                Err(e) => return Err(e),
1505            }
1506        }
1507        Ok(blake2b_hash_data(&buf[..total], output_bytes))
1508    })
1509}
1510
1511/// Optimized BLAKE2b hash for large files (>=16MB) on Linux.
1512/// Primary path: mmap with HUGEPAGE + POPULATE_READ for zero-copy, single-shot hash.
1513/// Eliminates thread spawn, channel synchronization, buffer allocation (24MB→0),
1514/// and read() memcpy overhead. Falls back to streaming I/O if mmap fails.
1515#[cfg(target_os = "linux")]
1516fn blake2b_hash_file_pipelined(
1517    file: File,
1518    file_size: u64,
1519    output_bytes: usize,
1520) -> io::Result<String> {
1521    // Primary path: mmap with huge pages for zero-copy single-shot hash.
1522    // Eliminates: thread spawn (~50µs), channel sync, buffer allocs (24MB),
1523    // 13+ read() syscalls, and page-cache → user-buffer memcpy.
1524    match unsafe { memmap2::MmapOptions::new().map(&file) } {
1525        Ok(mmap) => {
1526            // HUGEPAGE MUST come before any page faults: reduces 25,600 minor
1527            // faults (4KB) to ~50 faults (2MB) for 100MB. Saves ~12ms overhead.
1528            if file_size >= 2 * 1024 * 1024 {
1529                let _ = mmap.advise(memmap2::Advice::HugePage);
1530            }
1531            let _ = mmap.advise(memmap2::Advice::Sequential);
1532            // POPULATE_READ (Linux 5.14+): synchronously prefaults all pages with
1533            // huge pages before hashing begins. Falls back to WillNeed on older kernels.
1534            if file_size >= 4 * 1024 * 1024 {
1535                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1536                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1537                }
1538            } else {
1539                let _ = mmap.advise(memmap2::Advice::WillNeed);
1540            }
1541            // Single-shot hash: processes entire file in one call, streaming
1542            // directly from page cache with no user-space buffer copies.
1543            Ok(blake2b_hash_data(&mmap, output_bytes))
1544        }
1545        Err(_) => {
1546            // mmap failed (FUSE, NFS without mmap support, etc.) — fall back
1547            // to streaming pipelined I/O.
1548            blake2b_hash_file_streamed(file, file_size, output_bytes)
1549        }
1550    }
1551}
1552
1553/// Streaming fallback for BLAKE2b large files when mmap is unavailable.
1554/// Uses double-buffered reader thread with fadvise hints.
1555/// Fixed: uses blocking recv() to eliminate triple-buffer allocation bug.
1556#[cfg(target_os = "linux")]
1557fn blake2b_hash_file_streamed(
1558    mut file: File,
1559    file_size: u64,
1560    output_bytes: usize,
1561) -> io::Result<String> {
1562    use std::os::unix::io::AsRawFd;
1563
1564    const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; // 8MB per buffer
1565
1566    // Hint kernel for sequential access
1567    unsafe {
1568        libc::posix_fadvise(
1569            file.as_raw_fd(),
1570            0,
1571            file_size as i64,
1572            libc::POSIX_FADV_SEQUENTIAL,
1573        );
1574    }
1575
1576    // Double-buffered channels: reader fills one buffer while hasher processes another.
1577    let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1578    let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1579    let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1580
1581    let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1582        // Blocking recv reuses hasher's returned buffer (2 buffers total, not 3).
1583        while let Ok(mut buf) = buf_rx.recv() {
1584            let mut total = 0;
1585            while total < buf.len() {
1586                match file.read(&mut buf[total..]) {
1587                    Ok(0) => break,
1588                    Ok(n) => total += n,
1589                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1590                    Err(e) => return Err(e),
1591                }
1592            }
1593            if total == 0 {
1594                break;
1595            }
1596            if tx.send((buf, total)).is_err() {
1597                break;
1598            }
1599        }
1600        Ok(())
1601    });
1602
1603    let mut state = blake2b_simd::Params::new()
1604        .hash_length(output_bytes)
1605        .to_state();
1606    while let Ok((buf, n)) = rx.recv() {
1607        state.update(&buf[..n]);
1608        let _ = buf_tx.send(buf);
1609    }
1610    let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1611
1612    match reader_handle.join() {
1613        Ok(Ok(())) => {}
1614        Ok(Err(e)) => {
1615            if hash_result.is_ok() {
1616                return Err(e);
1617            }
1618        }
1619        Err(payload) => {
1620            let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1621                format!("reader thread panicked: {}", s)
1622            } else if let Some(s) = payload.downcast_ref::<String>() {
1623                format!("reader thread panicked: {}", s)
1624            } else {
1625                "reader thread panicked".to_string()
1626            };
1627            return Err(io::Error::other(msg));
1628        }
1629    }
1630
1631    hash_result
1632}
1633
1634/// Hash stdin with BLAKE2b variable output length.
1635/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
1636pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1637    let stdin = io::stdin();
1638    #[cfg(target_os = "linux")]
1639    {
1640        use std::os::unix::io::AsRawFd;
1641        let fd = stdin.as_raw_fd();
1642        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1643        if unsafe { libc::fstat(fd, &mut stat) } == 0
1644            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1645            && stat.st_size > 0
1646        {
1647            unsafe {
1648                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1649            }
1650        }
1651    }
1652    blake2b_hash_reader(stdin.lock(), output_bytes)
1653}
1654
1655/// Internal enum for file content in batch hashing.
1656/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
1657enum FileContent {
1658    Mmap(memmap2::Mmap),
1659    Buf(Vec<u8>),
1660}
1661
1662impl AsRef<[u8]> for FileContent {
1663    fn as_ref(&self) -> &[u8] {
1664        match self {
1665            FileContent::Mmap(m) => m,
1666            FileContent::Buf(v) => v,
1667        }
1668    }
1669}
1670
1671/// Open a file and load its content for batch hashing.
1672/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
1673/// files (zero-copy), and read-to-end for non-regular files.
1674fn open_file_content(path: &Path) -> io::Result<FileContent> {
1675    let (file, size, is_regular) = open_and_stat(path)?;
1676    if is_regular && size == 0 {
1677        return Ok(FileContent::Buf(Vec::new()));
1678    }
1679    if is_regular && size > 0 {
1680        // Tiny files: read directly into Vec. The mmap syscall + page fault
1681        // overhead exceeds the data transfer cost for files under 8KB.
1682        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
1683        if size < TINY_FILE_LIMIT {
1684            let mut buf = vec![0u8; size as usize];
1685            let mut total = 0;
1686            let mut f = file;
1687            while total < size as usize {
1688                match f.read(&mut buf[total..]) {
1689                    Ok(0) => break,
1690                    Ok(n) => total += n,
1691                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1692                    Err(e) => return Err(e),
1693                }
1694            }
1695            buf.truncate(total);
1696            return Ok(FileContent::Buf(buf));
1697        }
1698        // HUGEPAGE + PopulateRead for optimal page faulting
1699        let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1700        if let Ok(mmap) = mmap_result {
1701            #[cfg(target_os = "linux")]
1702            {
1703                if size >= 2 * 1024 * 1024 {
1704                    let _ = mmap.advise(memmap2::Advice::HugePage);
1705                }
1706                let _ = mmap.advise(memmap2::Advice::Sequential);
1707                if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1708                    let _ = mmap.advise(memmap2::Advice::WillNeed);
1709                }
1710            }
1711            return Ok(FileContent::Mmap(mmap));
1712        }
1713        // Fallback: read into Vec
1714        let mut buf = vec![0u8; size as usize];
1715        let mut total = 0;
1716        let mut f = file;
1717        while total < size as usize {
1718            match f.read(&mut buf[total..]) {
1719                Ok(0) => break,
1720                Ok(n) => total += n,
1721                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1722                Err(e) => return Err(e),
1723            }
1724        }
1725        buf.truncate(total);
1726        return Ok(FileContent::Buf(buf));
1727    }
1728    // Non-regular: read to end
1729    let mut buf = Vec::new();
1730    let mut f = file;
1731    f.read_to_end(&mut buf)?;
1732    Ok(FileContent::Buf(buf))
1733}
1734
1735/// Read remaining file content from an already-open fd into a Vec.
1736/// Used when the initial stack buffer is exhausted and we need to read
1737/// the rest without re-opening the file.
1738fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1739    let mut buf = Vec::with_capacity(prefix.len() + 65536);
1740    buf.extend_from_slice(prefix);
1741    file.read_to_end(&mut buf)?;
1742    Ok(FileContent::Buf(buf))
1743}
1744
1745/// Open a file and read all content without fstat — just open+read+close.
1746/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
1747/// (~0.5ms for 100 files). Uses a small initial buffer for tiny files (< 4KB),
1748/// then falls back to larger buffer or read_to_end for bigger files.
1749fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1750    let mut file = open_noatime(path)?;
1751    // Try small stack buffer first — optimal for benchmark's ~55 byte files.
1752    // For tiny files, allocate exact-size Vec to avoid waste.
1753    let mut small_buf = [0u8; 4096];
1754    match file.read(&mut small_buf) {
1755        Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1756        Ok(n) if n < small_buf.len() => {
1757            // File fits in small buffer — allocate exact size
1758            let mut vec = Vec::with_capacity(n);
1759            vec.extend_from_slice(&small_buf[..n]);
1760            return Ok(FileContent::Buf(vec));
1761        }
1762        Ok(n) => {
1763            // Might be more data — allocate heap buffer and read into it directly
1764            let mut buf = vec![0u8; 65536];
1765            buf[..n].copy_from_slice(&small_buf[..n]);
1766            let mut total = n;
1767            loop {
1768                match file.read(&mut buf[total..]) {
1769                    Ok(0) => {
1770                        buf.truncate(total);
1771                        return Ok(FileContent::Buf(buf));
1772                    }
1773                    Ok(n) => {
1774                        total += n;
1775                        if total >= buf.len() {
1776                            // File > 64KB: read rest from existing fd
1777                            return read_remaining_to_vec(&buf[..total], file);
1778                        }
1779                    }
1780                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1781                    Err(e) => return Err(e),
1782                }
1783            }
1784        }
1785        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1786            let mut buf = vec![0u8; 65536];
1787            let mut total = 0;
1788            loop {
1789                match file.read(&mut buf[total..]) {
1790                    Ok(0) => {
1791                        buf.truncate(total);
1792                        return Ok(FileContent::Buf(buf));
1793                    }
1794                    Ok(n) => {
1795                        total += n;
1796                        if total >= buf.len() {
1797                            // File > 64KB: read rest from existing fd
1798                            return read_remaining_to_vec(&buf[..total], file);
1799                        }
1800                    }
1801                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1802                    Err(e) => return Err(e),
1803                }
1804            }
1805        }
1806        Err(e) => return Err(e),
1807    }
1808}
1809
1810/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
1811///
1812/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
1813/// All files are pre-loaded into memory (mmap for large, read for small),
1814/// then hashed simultaneously. Returns results in input order.
1815///
1816/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
1817pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1818    use blake2b_simd::many::{HashManyJob, hash_many};
1819
1820    // Phase 1: Read all files into memory.
1821    // For small file counts (≤10), load sequentially to avoid thread::scope
1822    // overhead (~120µs). For many files, use parallel loading with lightweight
1823    // OS threads. For 100+ files, use fast path that skips fstat.
1824    let use_fast = paths.len() >= 20;
1825
1826    let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1827        // Sequential loading — avoids thread spawn overhead for small batches
1828        paths.iter().map(|&path| open_file_content(path)).collect()
1829    } else {
1830        let num_threads = std::thread::available_parallelism()
1831            .map(|n| n.get())
1832            .unwrap_or(4)
1833            .min(paths.len());
1834        let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1835
1836        std::thread::scope(|s| {
1837            let handles: Vec<_> = paths
1838                .chunks(chunk_size)
1839                .map(|chunk| {
1840                    s.spawn(move || {
1841                        chunk
1842                            .iter()
1843                            .map(|&path| {
1844                                if use_fast {
1845                                    open_file_content_fast(path)
1846                                } else {
1847                                    open_file_content(path)
1848                                }
1849                            })
1850                            .collect::<Vec<_>>()
1851                    })
1852                })
1853                .collect();
1854
1855            handles
1856                .into_iter()
1857                .flat_map(|h| h.join().unwrap())
1858                .collect()
1859        })
1860    };
1861
1862    // Phase 2: Build hash_many jobs for successful reads
1863    let hash_results = {
1864        let mut params = blake2b_simd::Params::new();
1865        params.hash_length(output_bytes);
1866
1867        let ok_entries: Vec<(usize, &[u8])> = file_data
1868            .iter()
1869            .enumerate()
1870            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1871            .collect();
1872
1873        let mut jobs: Vec<HashManyJob> = ok_entries
1874            .iter()
1875            .map(|(_, data)| HashManyJob::new(&params, data))
1876            .collect();
1877
1878        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
1879        hash_many(jobs.iter_mut());
1880
1881        // Extract hashes into a map
1882        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1883        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1884            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1885        }
1886        hm
1887    }; // file_data borrow released here
1888
1889    // Phase 4: Combine hashes and errors in original order
1890    hash_results
1891        .into_iter()
1892        .zip(file_data)
1893        .map(|(hash_opt, result)| match result {
1894            Ok(_) => Ok(hash_opt.unwrap()),
1895            Err(e) => Err(e),
1896        })
1897        .collect()
1898}
1899
1900/// Batch-hash multiple files with BLAKE2b using the best strategy for the workload.
1901/// Samples a few files to estimate total data size. For small workloads, uses
1902/// single-core SIMD batch hashing (`blake2b_hash_files_many`) to avoid stat and
1903/// thread spawn overhead. For larger workloads, uses multi-core work-stealing
1904/// parallelism where each worker calls `blake2b_hash_file` (with I/O pipelining
1905/// for large files on Linux).
1906/// Returns results in input order.
1907pub fn blake2b_hash_files_parallel(
1908    paths: &[&Path],
1909    output_bytes: usize,
1910) -> Vec<io::Result<String>> {
1911    let n = paths.len();
1912
1913    // Sample a few files to estimate whether parallel processing is worthwhile.
1914    // This avoids the cost of statting ALL files (~70µs/file) when the workload
1915    // is too small for parallelism to help.
1916    let sample_count = n.min(5);
1917    let mut sample_max: u64 = 0;
1918    let mut sample_total: u64 = 0;
1919    for &p in paths.iter().take(sample_count) {
1920        let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1921        sample_total += size;
1922        sample_max = sample_max.max(size);
1923    }
1924    let estimated_total = if sample_count > 0 {
1925        sample_total * (n as u64) / (sample_count as u64)
1926    } else {
1927        0
1928    };
1929
1930    // For small workloads, thread spawn overhead (~120µs × N_threads) exceeds
1931    // any parallelism benefit. Use SIMD batch hashing directly (no stat pass).
1932    if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1933        return blake2b_hash_files_many(paths, output_bytes);
1934    }
1935
1936    // Full stat pass for parallel scheduling — worth it for larger workloads.
1937    let mut indexed: Vec<(usize, &Path, u64)> = paths
1938        .iter()
1939        .enumerate()
1940        .map(|(i, &p)| {
1941            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1942            (i, p, size)
1943        })
1944        .collect();
1945
1946    // Sort largest first: ensures big files start hashing immediately while
1947    // small files fill in gaps, minimizing tail latency.
1948    indexed.sort_by(|a, b| b.2.cmp(&a.2));
1949
1950    // Warm page cache for the largest files using async readahead(2).
1951    // Each hash call handles its own mmap prefaulting, but issuing readahead
1952    // here lets the kernel start I/O for upcoming files while workers process
1953    // current ones. readahead(2) returns immediately (non-blocking).
1954    #[cfg(target_os = "linux")]
1955    {
1956        use std::os::unix::io::AsRawFd;
1957        for &(_, path, size) in indexed.iter().take(20) {
1958            if size >= 1024 * 1024 {
1959                if let Ok(file) = open_noatime(path) {
1960                    unsafe {
1961                        libc::readahead(file.as_raw_fd(), 0, size as usize);
1962                    }
1963                }
1964            }
1965        }
1966    }
1967
1968    let num_threads = std::thread::available_parallelism()
1969        .map(|n| n.get())
1970        .unwrap_or(4)
1971        .min(n);
1972
1973    // Atomic work index for dynamic work-stealing.
1974    let work_idx = AtomicUsize::new(0);
1975
1976    std::thread::scope(|s| {
1977        let work_idx = &work_idx;
1978        let indexed = &indexed;
1979
1980        let handles: Vec<_> = (0..num_threads)
1981            .map(|_| {
1982                s.spawn(move || {
1983                    let mut local_results = Vec::new();
1984                    loop {
1985                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1986                        if idx >= indexed.len() {
1987                            break;
1988                        }
1989                        let (orig_idx, path, _size) = indexed[idx];
1990                        let result = blake2b_hash_file(path, output_bytes);
1991                        local_results.push((orig_idx, result));
1992                    }
1993                    local_results
1994                })
1995            })
1996            .collect();
1997
1998        // Collect results and reorder to match original input order.
1999        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2000        for handle in handles {
2001            for (orig_idx, result) in handle.join().unwrap() {
2002                results[orig_idx] = Some(result);
2003            }
2004        }
2005        results
2006            .into_iter()
2007            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2008            .collect()
2009    })
2010}
2011
2012/// Auto-dispatch multi-file hashing: picks sequential or parallel based on workload.
2013///
2014/// For small files (<64KB sample), sequential avoids thread spawn + readahead overhead
2015/// that dominates for tiny files. On the "100 × 55-byte files" benchmark, this saves
2016/// ~5ms of overhead (thread creation + 200 stat() calls + 100 fadvise() calls).
2017///
2018/// For large files (>=64KB), parallel processing amortizes thread spawn cost over
2019/// substantial per-file hash work. Returns results in input order.
2020pub fn hash_files_auto(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2021    let n = paths.len();
2022    if n == 0 {
2023        return Vec::new();
2024    }
2025    if n == 1 {
2026        return vec![hash_file_nostat(algo, paths[0])];
2027    }
2028
2029    // Sample up to 3 files (max size) to correctly dispatch mixed workloads
2030    // like `md5sum small.txt big1.gb big2.gb`. Costs at most 3 stat calls (~6µs)
2031    // to save potentially 3-6ms of thread overhead for small-file workloads.
2032    let sample_size = paths
2033        .iter()
2034        .take(3)
2035        .filter_map(|p| std::fs::metadata(p).ok())
2036        .map(|m| m.len())
2037        .max()
2038        .unwrap_or(0);
2039
2040    if sample_size < 65536 {
2041        // Small files: sequential loop avoiding thread spawn overhead.
2042        #[cfg(target_os = "linux")]
2043        {
2044            // Raw syscall path: reuses CString buffer, avoids OpenOptions/File overhead
2045            let mut c_path_buf = Vec::with_capacity(256);
2046            paths
2047                .iter()
2048                .map(|&p| hash_file_raw_nostat(algo, p, &mut c_path_buf))
2049                .collect()
2050        }
2051        #[cfg(not(target_os = "linux"))]
2052        {
2053            paths.iter().map(|&p| hash_file_nostat(algo, p)).collect()
2054        }
2055    } else if n >= 20 {
2056        hash_files_batch(paths, algo)
2057    } else {
2058        hash_files_parallel_fast(paths, algo)
2059    }
2060}
2061
2062/// Batch-hash multiple files with SHA-256/MD5 using work-stealing parallelism.
2063/// Files are sorted by size (largest first) so the biggest files start processing
2064/// immediately. Each worker thread grabs the next unprocessed file via atomic index,
2065/// eliminating tail latency from uneven file sizes.
2066/// Returns results in input order.
2067pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2068    let n = paths.len();
2069
2070    // Build (original_index, path, size) tuples — stat all files for scheduling.
2071    // The stat cost (~5µs/file) is repaid by better work distribution.
2072    let mut indexed: Vec<(usize, &Path, u64)> = paths
2073        .iter()
2074        .enumerate()
2075        .map(|(i, &p)| {
2076            let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
2077            (i, p, size)
2078        })
2079        .collect();
2080
2081    // Sort largest first: ensures big files start hashing immediately while
2082    // small files fill in gaps, minimizing tail latency.
2083    indexed.sort_by(|a, b| b.2.cmp(&a.2));
2084
2085    // Warm page cache for the largest files using async readahead(2).
2086    // Each hash call handles its own mmap prefaulting, but issuing readahead
2087    // here lets the kernel start I/O for upcoming files while workers process
2088    // current ones. readahead(2) returns immediately (non-blocking).
2089    #[cfg(target_os = "linux")]
2090    {
2091        use std::os::unix::io::AsRawFd;
2092        for &(_, path, size) in indexed.iter().take(20) {
2093            if size >= 1024 * 1024 {
2094                if let Ok(file) = open_noatime(path) {
2095                    unsafe {
2096                        libc::readahead(file.as_raw_fd(), 0, size as usize);
2097                    }
2098                }
2099            }
2100        }
2101    }
2102
2103    let num_threads = std::thread::available_parallelism()
2104        .map(|n| n.get())
2105        .unwrap_or(4)
2106        .min(n);
2107
2108    // Atomic work index for dynamic work-stealing.
2109    let work_idx = AtomicUsize::new(0);
2110
2111    std::thread::scope(|s| {
2112        let work_idx = &work_idx;
2113        let indexed = &indexed;
2114
2115        let handles: Vec<_> = (0..num_threads)
2116            .map(|_| {
2117                s.spawn(move || {
2118                    let mut local_results = Vec::new();
2119                    loop {
2120                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2121                        if idx >= indexed.len() {
2122                            break;
2123                        }
2124                        let (orig_idx, path, _size) = indexed[idx];
2125                        let result = hash_file(algo, path);
2126                        local_results.push((orig_idx, result));
2127                    }
2128                    local_results
2129                })
2130            })
2131            .collect();
2132
2133        // Collect results and reorder to match original input order.
2134        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2135        for handle in handles {
2136            for (orig_idx, result) in handle.join().unwrap() {
2137                results[orig_idx] = Some(result);
2138            }
2139        }
2140        results
2141            .into_iter()
2142            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2143            .collect()
2144    })
2145}
2146
2147/// Fast parallel hash for multi-file workloads. Skips the stat-all-and-sort phase
2148/// of `hash_files_parallel()` and uses `hash_file_nostat()` per worker to minimize
2149/// per-file syscall overhead. For 100 tiny files, this eliminates ~200 stat() calls
2150/// (100 from the sort phase + 100 from open_and_stat inside each worker).
2151/// Returns results in input order.
2152pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2153    let n = paths.len();
2154    if n == 0 {
2155        return Vec::new();
2156    }
2157    if n == 1 {
2158        return vec![hash_file_nostat(algo, paths[0])];
2159    }
2160
2161    // Issue readahead for all files (no size threshold — even tiny files benefit
2162    // from batched WILLNEED hints when processing 100+ files)
2163    #[cfg(target_os = "linux")]
2164    readahead_files_all(paths);
2165
2166    let num_threads = std::thread::available_parallelism()
2167        .map(|n| n.get())
2168        .unwrap_or(4)
2169        .min(n);
2170
2171    let work_idx = AtomicUsize::new(0);
2172
2173    std::thread::scope(|s| {
2174        let work_idx = &work_idx;
2175
2176        let handles: Vec<_> = (0..num_threads)
2177            .map(|_| {
2178                s.spawn(move || {
2179                    let mut local_results = Vec::new();
2180                    loop {
2181                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2182                        if idx >= n {
2183                            break;
2184                        }
2185                        let result = hash_file_nostat(algo, paths[idx]);
2186                        local_results.push((idx, result));
2187                    }
2188                    local_results
2189                })
2190            })
2191            .collect();
2192
2193        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2194        for handle in handles {
2195            for (idx, result) in handle.join().unwrap() {
2196                results[idx] = Some(result);
2197            }
2198        }
2199        results
2200            .into_iter()
2201            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2202            .collect()
2203    })
2204}
2205
2206/// Batch-hash multiple files: pre-read all files into memory in parallel,
2207/// then hash all data in parallel. Optimal for many small files where per-file
2208/// overhead (open/read/close syscalls) dominates over hash computation.
2209///
2210/// Reuses the same parallel file loading pattern as `blake2b_hash_files_many()`.
2211/// For 100 × 55-byte files: all 5500 bytes are loaded in parallel across threads,
2212/// then hashed in parallel — minimizing wall-clock time for syscall-bound workloads.
2213/// Returns results in input order.
2214pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
2215    let n = paths.len();
2216    if n == 0 {
2217        return Vec::new();
2218    }
2219
2220    // Issue readahead for all files
2221    #[cfg(target_os = "linux")]
2222    readahead_files_all(paths);
2223
2224    // Phase 1: Load all files into memory in parallel.
2225    // For 20+ files, use fast path that skips fstat.
2226    let use_fast = n >= 20;
2227
2228    let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
2229        // Sequential loading — avoids thread spawn overhead for small batches
2230        paths
2231            .iter()
2232            .map(|&path| {
2233                if use_fast {
2234                    open_file_content_fast(path)
2235                } else {
2236                    open_file_content(path)
2237                }
2238            })
2239            .collect()
2240    } else {
2241        let num_threads = std::thread::available_parallelism()
2242            .map(|t| t.get())
2243            .unwrap_or(4)
2244            .min(n);
2245        let chunk_size = (n + num_threads - 1) / num_threads;
2246
2247        std::thread::scope(|s| {
2248            let handles: Vec<_> = paths
2249                .chunks(chunk_size)
2250                .map(|chunk| {
2251                    s.spawn(move || {
2252                        chunk
2253                            .iter()
2254                            .map(|&path| {
2255                                if use_fast {
2256                                    open_file_content_fast(path)
2257                                } else {
2258                                    open_file_content(path)
2259                                }
2260                            })
2261                            .collect::<Vec<_>>()
2262                    })
2263                })
2264                .collect();
2265
2266            handles
2267                .into_iter()
2268                .flat_map(|h| h.join().unwrap())
2269                .collect()
2270        })
2271    };
2272
2273    // Phase 2: Hash all loaded data. For tiny files hash is negligible;
2274    // for larger files the parallel hashing across threads helps.
2275    let num_hash_threads = std::thread::available_parallelism()
2276        .map(|t| t.get())
2277        .unwrap_or(4)
2278        .min(n);
2279    let work_idx = AtomicUsize::new(0);
2280
2281    std::thread::scope(|s| {
2282        let work_idx = &work_idx;
2283        let file_data = &file_data;
2284
2285        let handles: Vec<_> = (0..num_hash_threads)
2286            .map(|_| {
2287                s.spawn(move || {
2288                    let mut local_results = Vec::new();
2289                    loop {
2290                        let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2291                        if idx >= n {
2292                            break;
2293                        }
2294                        let result = match &file_data[idx] {
2295                            Ok(content) => hash_bytes(algo, content.as_ref()),
2296                            Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
2297                        };
2298                        local_results.push((idx, result));
2299                    }
2300                    local_results
2301                })
2302            })
2303            .collect();
2304
2305        let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
2306        for handle in handles {
2307            for (idx, result) in handle.join().unwrap() {
2308                results[idx] = Some(result);
2309            }
2310        }
2311        results
2312            .into_iter()
2313            .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
2314            .collect()
2315    })
2316}
2317
2318/// Stream-hash a file that already has a prefix read into memory.
2319/// Feeds `prefix` into the hasher first, then streams the rest from `file`.
2320/// Avoids re-opening and re-reading the file when the initial buffer is exhausted.
2321fn hash_stream_with_prefix(
2322    algo: HashAlgorithm,
2323    prefix: &[u8],
2324    mut file: File,
2325) -> io::Result<String> {
2326    // Blake2b uses its own hasher on all platforms
2327    if matches!(algo, HashAlgorithm::Blake2b) {
2328        let mut state = blake2b_simd::Params::new().to_state();
2329        state.update(prefix);
2330        return STREAM_BUF.with(|cell| {
2331            let mut buf = cell.borrow_mut();
2332            ensure_stream_buf(&mut buf);
2333            loop {
2334                let n = read_full(&mut file, &mut buf)?;
2335                if n == 0 {
2336                    break;
2337                }
2338                state.update(&buf[..n]);
2339            }
2340            Ok(hex_encode(state.finalize().as_bytes()))
2341        });
2342    }
2343
2344    // On Linux, try OpenSSL for all supported algorithms
2345    #[cfg(target_os = "linux")]
2346    {
2347        let evp_algo = match algo {
2348            HashAlgorithm::Md5 => Some(openssl_evp::EvpAlgorithm::Md5),
2349            HashAlgorithm::Sha1 => Some(openssl_evp::EvpAlgorithm::Sha1),
2350            HashAlgorithm::Sha224 => Some(openssl_evp::EvpAlgorithm::Sha224),
2351            HashAlgorithm::Sha256 => Some(openssl_evp::EvpAlgorithm::Sha256),
2352            HashAlgorithm::Sha384 => Some(openssl_evp::EvpAlgorithm::Sha384),
2353            HashAlgorithm::Sha512 => Some(openssl_evp::EvpAlgorithm::Sha512),
2354            HashAlgorithm::Blake2b => None,
2355        };
2356        if let Some(evp) = evp_algo.filter(|_| openssl_evp::is_available()) {
2357            let digest = openssl_evp::hash_reader_with_prefix(evp, prefix, file)?;
2358            return Ok(hex_encode(&digest));
2359        }
2360    }
2361
2362    match algo {
2363        HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
2364        HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
2365        HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
2366        #[cfg(not(target_vendor = "apple"))]
2367        HashAlgorithm::Sha1 => {
2368            hash_stream_with_prefix_ring(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY, prefix, file)
2369        }
2370        #[cfg(target_vendor = "apple")]
2371        HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
2372        #[cfg(not(target_vendor = "apple"))]
2373        HashAlgorithm::Sha384 => hash_stream_with_prefix_ring(&ring::digest::SHA384, prefix, file),
2374        #[cfg(target_vendor = "apple")]
2375        HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
2376        #[cfg(not(target_vendor = "apple"))]
2377        HashAlgorithm::Sha512 => hash_stream_with_prefix_ring(&ring::digest::SHA512, prefix, file),
2378        #[cfg(target_vendor = "apple")]
2379        HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
2380        HashAlgorithm::Blake2b => unreachable!(),
2381    }
2382}
2383
2384/// Generic stream-hash with prefix using Digest trait (all platforms).
2385fn hash_stream_with_prefix_digest<D: digest::Digest>(
2386    prefix: &[u8],
2387    mut file: File,
2388) -> io::Result<String> {
2389    STREAM_BUF.with(|cell| {
2390        let mut buf = cell.borrow_mut();
2391        ensure_stream_buf(&mut buf);
2392        let mut hasher = D::new();
2393        hasher.update(prefix);
2394        loop {
2395            let n = read_full(&mut file, &mut buf)?;
2396            if n == 0 {
2397                break;
2398            }
2399            hasher.update(&buf[..n]);
2400        }
2401        Ok(hex_encode(&hasher.finalize()))
2402    })
2403}
2404
2405/// Stream-hash with prefix using ring's BoringSSL assembly (non-Apple targets).
2406#[cfg(not(target_vendor = "apple"))]
2407fn hash_stream_with_prefix_ring(
2408    algo: &'static ring::digest::Algorithm,
2409    prefix: &[u8],
2410    mut file: File,
2411) -> io::Result<String> {
2412    STREAM_BUF.with(|cell| {
2413        let mut buf = cell.borrow_mut();
2414        ensure_stream_buf(&mut buf);
2415        let mut ctx = ring::digest::Context::new(algo);
2416        ctx.update(prefix);
2417        loop {
2418            let n = read_full(&mut file, &mut buf)?;
2419            if n == 0 {
2420                break;
2421            }
2422            ctx.update(&buf[..n]);
2423        }
2424        Ok(hex_encode(ctx.finish().as_ref()))
2425    })
2426}
2427
2428/// Hash a file without fstat — just open, read until EOF, hash.
2429/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
2430/// Uses a two-tier buffer strategy: small stack buffer (4KB) for the initial read,
2431/// then falls back to a larger stack buffer (64KB) or streaming hash for bigger files.
2432/// For benchmark's 55-byte files: one read() fills the 4KB buffer, hash immediately.
2433pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2434    let mut file = open_noatime(path)?;
2435    // First try a small stack buffer — optimal for tiny files (< 4KB).
2436    // Most "many_files" benchmark files are ~55 bytes, so this completes
2437    // with a single read() syscall and no fallback.
2438    let mut small_buf = [0u8; 4096];
2439    match file.read(&mut small_buf) {
2440        Ok(0) => return hash_bytes(algo, &[]),
2441        Ok(n) if n < small_buf.len() => {
2442            // File fits in small buffer — hash directly (common case)
2443            return hash_bytes(algo, &small_buf[..n]);
2444        }
2445        Ok(n) => {
2446            // Might be more data — fall back to larger buffer
2447            let mut buf = [0u8; 65536];
2448            buf[..n].copy_from_slice(&small_buf[..n]);
2449            let mut total = n;
2450            loop {
2451                match file.read(&mut buf[total..]) {
2452                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2453                    Ok(n) => {
2454                        total += n;
2455                        if total >= buf.len() {
2456                            // File > 64KB: stream-hash from existing fd instead of
2457                            // re-opening. Feed already-read prefix, continue streaming.
2458                            return hash_stream_with_prefix(algo, &buf[..total], file);
2459                        }
2460                    }
2461                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2462                    Err(e) => return Err(e),
2463                }
2464            }
2465        }
2466        Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2467            // Retry with full buffer on interrupt
2468            let mut buf = [0u8; 65536];
2469            let mut total = 0;
2470            loop {
2471                match file.read(&mut buf[total..]) {
2472                    Ok(0) => return hash_bytes(algo, &buf[..total]),
2473                    Ok(n) => {
2474                        total += n;
2475                        if total >= buf.len() {
2476                            // File > 64KB: stream-hash from existing fd
2477                            return hash_stream_with_prefix(algo, &buf[..total], file);
2478                        }
2479                    }
2480                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2481                    Err(e) => return Err(e),
2482                }
2483            }
2484        }
2485        Err(e) => return Err(e),
2486    }
2487}
2488
2489/// Hash a small file using raw Linux syscalls without fstat.
2490/// For the multi-file sequential path where we already know files are small.
2491/// Avoids: OpenOptions builder, CString per-file alloc (reuses caller's buffer),
2492/// fstat overhead (unnecessary when we just need open+read+close).
2493/// Returns hash as hex string.
2494#[cfg(target_os = "linux")]
2495fn hash_file_raw_nostat(
2496    algo: HashAlgorithm,
2497    path: &Path,
2498    c_path_buf: &mut Vec<u8>,
2499) -> io::Result<String> {
2500    use std::os::unix::ffi::OsStrExt;
2501
2502    let path_bytes = path.as_os_str().as_bytes();
2503
2504    // Reuse caller's buffer for null-terminated path (avoids heap alloc per file)
2505    c_path_buf.clear();
2506    c_path_buf.reserve(path_bytes.len() + 1);
2507    c_path_buf.extend_from_slice(path_bytes);
2508    c_path_buf.push(0);
2509
2510    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2511    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2512        flags |= libc::O_NOATIME;
2513    }
2514
2515    let fd = unsafe { libc::open(c_path_buf.as_ptr() as *const libc::c_char, flags) };
2516    if fd < 0 {
2517        let err = io::Error::last_os_error();
2518        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2519            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2520            let fd2 = unsafe {
2521                libc::open(
2522                    c_path_buf.as_ptr() as *const libc::c_char,
2523                    libc::O_RDONLY | libc::O_CLOEXEC,
2524                )
2525            };
2526            if fd2 < 0 {
2527                return Err(io::Error::last_os_error());
2528            }
2529            return hash_fd_small(algo, fd2);
2530        }
2531        return Err(err);
2532    }
2533    hash_fd_small(algo, fd)
2534}
2535
2536/// Read a small file from fd, hash it, close fd. No fstat needed.
2537#[cfg(target_os = "linux")]
2538#[inline]
2539fn hash_fd_small(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2540    let mut buf = [0u8; 4096];
2541    let n = loop {
2542        let ret = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
2543        if ret >= 0 {
2544            break ret;
2545        }
2546        let err = io::Error::last_os_error();
2547        if err.kind() == io::ErrorKind::Interrupted {
2548            continue;
2549        }
2550        unsafe {
2551            libc::close(fd);
2552        }
2553        return Err(err);
2554    };
2555    let n = n as usize;
2556    if n < buf.len() {
2557        // File fits in 4KB — common case for small files
2558        unsafe {
2559            libc::close(fd);
2560        }
2561        return hash_bytes(algo, &buf[..n]);
2562    }
2563    // File > 4KB: fall back to hash_file_nostat-style reading
2564    // Wrap fd in File for RAII close
2565    use std::os::unix::io::FromRawFd;
2566    let mut file = unsafe { File::from_raw_fd(fd) };
2567    let mut big_buf = [0u8; 65536];
2568    big_buf[..n].copy_from_slice(&buf[..n]);
2569    let mut total = n;
2570    loop {
2571        match std::io::Read::read(&mut file, &mut big_buf[total..]) {
2572            Ok(0) => return hash_bytes(algo, &big_buf[..total]),
2573            Ok(n) => {
2574                total += n;
2575                if total >= big_buf.len() {
2576                    return hash_stream_with_prefix(algo, &big_buf[..total], file);
2577                }
2578            }
2579            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2580            Err(e) => return Err(e),
2581        }
2582    }
2583}
2584
2585/// Hash a single file using raw Linux syscalls for minimum overhead.
2586/// Bypasses Rust's File abstraction entirely: raw open/fstat/read/close.
2587/// For the single-file fast path, this eliminates OpenOptions builder,
2588/// CString heap allocation, File wrapper overhead, and Read trait dispatch.
2589///
2590/// Size-based dispatch:
2591/// - Tiny (<8KB): stack buffer + raw read + hash_bytes (3 syscalls total)
2592/// - Small (8KB-16MB): wraps fd in File, reads into thread-local buffer
2593/// - Large (>=16MB): wraps fd in File, mmaps with HugePage + PopulateRead
2594/// - Non-regular: wraps fd in File, streaming hash_reader
2595#[cfg(target_os = "linux")]
2596pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2597    use std::os::unix::ffi::OsStrExt;
2598
2599    let path_bytes = path.as_os_str().as_bytes();
2600    let c_path = std::ffi::CString::new(path_bytes)
2601        .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2602
2603    // Raw open with O_RDONLY | O_CLOEXEC, optionally O_NOATIME
2604    let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2605    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2606        flags |= libc::O_NOATIME;
2607    }
2608
2609    let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2610    if fd < 0 {
2611        let err = io::Error::last_os_error();
2612        if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2613            NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2614            let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2615            if fd2 < 0 {
2616                return Err(io::Error::last_os_error());
2617            }
2618            return hash_from_raw_fd(algo, fd2);
2619        }
2620        return Err(err);
2621    }
2622    hash_from_raw_fd(algo, fd)
2623}
2624
2625/// Hash from a raw fd — dispatches by file size for optimal I/O strategy.
2626/// Handles tiny (stack buffer), small (thread-local buffer), large (mmap), and
2627/// non-regular (streaming) files.
2628#[cfg(target_os = "linux")]
2629fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2630    // Raw fstat to determine size and type
2631    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2632    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2633        let err = io::Error::last_os_error();
2634        unsafe {
2635            libc::close(fd);
2636        }
2637        return Err(err);
2638    }
2639    let size = stat.st_size as u64;
2640    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2641
2642    // Empty regular file
2643    if is_regular && size == 0 {
2644        unsafe {
2645            libc::close(fd);
2646        }
2647        return hash_bytes(algo, &[]);
2648    }
2649
2650    // Tiny files (<8KB): raw read into stack buffer, no File wrapper needed.
2651    // Entire I/O in 3 raw syscalls: open + read + close.
2652    if is_regular && size < TINY_FILE_LIMIT {
2653        let mut buf = [0u8; 8192];
2654        let mut total = 0usize;
2655        while total < size as usize {
2656            let n = unsafe {
2657                libc::read(
2658                    fd,
2659                    buf[total..].as_mut_ptr() as *mut libc::c_void,
2660                    (size as usize) - total,
2661                )
2662            };
2663            if n < 0 {
2664                let err = io::Error::last_os_error();
2665                if err.kind() == io::ErrorKind::Interrupted {
2666                    continue;
2667                }
2668                unsafe {
2669                    libc::close(fd);
2670                }
2671                return Err(err);
2672            }
2673            if n == 0 {
2674                break;
2675            }
2676            total += n as usize;
2677        }
2678        unsafe {
2679            libc::close(fd);
2680        }
2681        return hash_bytes(algo, &buf[..total]);
2682    }
2683
2684    // For larger files, wrap fd in File for RAII close and existing optimized paths.
2685    use std::os::unix::io::FromRawFd;
2686    let file = unsafe { File::from_raw_fd(fd) };
2687
2688    if is_regular && size > 0 {
2689        return hash_regular_file(algo, file, size);
2690    }
2691
2692    // Non-regular files: streaming hash
2693    hash_reader(algo, file)
2694}
2695
2696/// Issue readahead hints for ALL file paths (no size threshold).
2697/// For multi-file benchmarks, even small files benefit from batched readahead.
2698#[cfg(target_os = "linux")]
2699pub fn readahead_files_all(paths: &[&Path]) {
2700    use std::os::unix::io::AsRawFd;
2701    for path in paths {
2702        if let Ok(file) = open_noatime(path) {
2703            if let Ok(meta) = file.metadata() {
2704                if meta.file_type().is_file() {
2705                    let len = meta.len();
2706                    unsafe {
2707                        libc::posix_fadvise(
2708                            file.as_raw_fd(),
2709                            0,
2710                            len as i64,
2711                            libc::POSIX_FADV_WILLNEED,
2712                        );
2713                    }
2714                }
2715            }
2716        }
2717    }
2718}
2719
2720#[cfg(not(target_os = "linux"))]
2721pub fn readahead_files_all(_paths: &[&Path]) {}
2722
2723/// Print hash result in GNU format: "hash  filename\n"
2724/// Uses raw byte writes to avoid std::fmt overhead.
2725pub fn print_hash(
2726    out: &mut impl Write,
2727    hash: &str,
2728    filename: &str,
2729    binary: bool,
2730) -> io::Result<()> {
2731    let mode = if binary { b'*' } else { b' ' };
2732    out.write_all(hash.as_bytes())?;
2733    out.write_all(&[b' ', mode])?;
2734    out.write_all(filename.as_bytes())?;
2735    out.write_all(b"\n")
2736}
2737
2738/// Print hash in GNU format with NUL terminator instead of newline.
2739pub fn print_hash_zero(
2740    out: &mut impl Write,
2741    hash: &str,
2742    filename: &str,
2743    binary: bool,
2744) -> io::Result<()> {
2745    let mode = if binary { b'*' } else { b' ' };
2746    out.write_all(hash.as_bytes())?;
2747    out.write_all(&[b' ', mode])?;
2748    out.write_all(filename.as_bytes())?;
2749    out.write_all(b"\0")
2750}
2751
2752// ── Single-write output buffer ─────────────────────────────────────
2753// For multi-file workloads, batch the entire "hash  filename\n" line into
2754// a single write() call. This halves the number of BufWriter flushes.
2755
2756// Thread-local output line buffer for batched writes.
2757// Reused across files to avoid per-file allocation.
2758thread_local! {
2759    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2760}
2761
2762/// Build and write the standard GNU hash output line in a single write() call.
2763/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
2764/// For escaped filenames: "\hash  escaped_filename\n".
2765#[inline]
2766pub fn write_hash_line(
2767    out: &mut impl Write,
2768    hash: &str,
2769    filename: &str,
2770    binary: bool,
2771    zero: bool,
2772    escaped: bool,
2773) -> io::Result<()> {
2774    LINE_BUF.with(|cell| {
2775        let mut buf = cell.borrow_mut();
2776        buf.clear();
2777        let mode = if binary { b'*' } else { b' ' };
2778        let term = if zero { b'\0' } else { b'\n' };
2779        if escaped {
2780            buf.push(b'\\');
2781        }
2782        buf.extend_from_slice(hash.as_bytes());
2783        buf.push(b' ');
2784        buf.push(mode);
2785        buf.extend_from_slice(filename.as_bytes());
2786        buf.push(term);
2787        out.write_all(&buf)
2788    })
2789}
2790
2791/// Build and write BSD tag format output in a single write() call.
2792/// Format: "ALGO (filename) = hash\n"
2793#[inline]
2794pub fn write_hash_tag_line(
2795    out: &mut impl Write,
2796    algo_name: &str,
2797    hash: &str,
2798    filename: &str,
2799    zero: bool,
2800) -> io::Result<()> {
2801    LINE_BUF.with(|cell| {
2802        let mut buf = cell.borrow_mut();
2803        buf.clear();
2804        let term = if zero { b'\0' } else { b'\n' };
2805        buf.extend_from_slice(algo_name.as_bytes());
2806        buf.extend_from_slice(b" (");
2807        buf.extend_from_slice(filename.as_bytes());
2808        buf.extend_from_slice(b") = ");
2809        buf.extend_from_slice(hash.as_bytes());
2810        buf.push(term);
2811        out.write_all(&buf)
2812    })
2813}
2814
2815/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
2816pub fn print_hash_tag(
2817    out: &mut impl Write,
2818    algo: HashAlgorithm,
2819    hash: &str,
2820    filename: &str,
2821) -> io::Result<()> {
2822    out.write_all(algo.name().as_bytes())?;
2823    out.write_all(b" (")?;
2824    out.write_all(filename.as_bytes())?;
2825    out.write_all(b") = ")?;
2826    out.write_all(hash.as_bytes())?;
2827    out.write_all(b"\n")
2828}
2829
2830/// Print hash in BSD tag format with NUL terminator.
2831pub fn print_hash_tag_zero(
2832    out: &mut impl Write,
2833    algo: HashAlgorithm,
2834    hash: &str,
2835    filename: &str,
2836) -> io::Result<()> {
2837    out.write_all(algo.name().as_bytes())?;
2838    out.write_all(b" (")?;
2839    out.write_all(filename.as_bytes())?;
2840    out.write_all(b") = ")?;
2841    out.write_all(hash.as_bytes())?;
2842    out.write_all(b"\0")
2843}
2844
2845/// Print hash in BSD tag format with BLAKE2b length info:
2846/// "BLAKE2b (filename) = hash" for 512-bit, or
2847/// "BLAKE2b-256 (filename) = hash" for other lengths.
2848pub fn print_hash_tag_b2sum(
2849    out: &mut impl Write,
2850    hash: &str,
2851    filename: &str,
2852    bits: usize,
2853) -> io::Result<()> {
2854    if bits == 512 {
2855        out.write_all(b"BLAKE2b (")?;
2856    } else {
2857        // Use write! for the rare non-512 path (negligible overhead per file)
2858        write!(out, "BLAKE2b-{} (", bits)?;
2859    }
2860    out.write_all(filename.as_bytes())?;
2861    out.write_all(b") = ")?;
2862    out.write_all(hash.as_bytes())?;
2863    out.write_all(b"\n")
2864}
2865
2866/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
2867pub fn print_hash_tag_b2sum_zero(
2868    out: &mut impl Write,
2869    hash: &str,
2870    filename: &str,
2871    bits: usize,
2872) -> io::Result<()> {
2873    if bits == 512 {
2874        out.write_all(b"BLAKE2b (")?;
2875    } else {
2876        write!(out, "BLAKE2b-{} (", bits)?;
2877    }
2878    out.write_all(filename.as_bytes())?;
2879    out.write_all(b") = ")?;
2880    out.write_all(hash.as_bytes())?;
2881    out.write_all(b"\0")
2882}
2883
2884/// Options for check mode.
2885pub struct CheckOptions {
2886    pub quiet: bool,
2887    pub status_only: bool,
2888    pub strict: bool,
2889    pub warn: bool,
2890    pub ignore_missing: bool,
2891    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
2892    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
2893    /// When empty, uses generic format: "line {line}: message".
2894    pub warn_prefix: String,
2895}
2896
2897/// Result of check mode verification.
2898pub struct CheckResult {
2899    pub ok: usize,
2900    pub mismatches: usize,
2901    pub format_errors: usize,
2902    pub read_errors: usize,
2903    /// Number of files skipped because they were missing and --ignore-missing was set.
2904    pub ignored_missing: usize,
2905}
2906
2907/// Verify checksums from a check file.
2908/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
2909pub fn check_file<R: BufRead>(
2910    algo: HashAlgorithm,
2911    reader: R,
2912    opts: &CheckOptions,
2913    out: &mut impl Write,
2914    err_out: &mut impl Write,
2915) -> io::Result<CheckResult> {
2916    let quiet = opts.quiet;
2917    let status_only = opts.status_only;
2918    let warn = opts.warn;
2919    let ignore_missing = opts.ignore_missing;
2920    let mut ok_count = 0;
2921    let mut mismatch_count = 0;
2922    let mut format_errors = 0;
2923    let mut read_errors = 0;
2924    let mut ignored_missing_count = 0;
2925    let mut line_num = 0;
2926
2927    for line_result in reader.lines() {
2928        line_num += 1;
2929        let line = line_result?;
2930        let line = line.trim_end();
2931
2932        if line.is_empty() {
2933            continue;
2934        }
2935
2936        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
2937        let (expected_hash, filename) = match parse_check_line(line) {
2938            Some(v) => v,
2939            None => {
2940                format_errors += 1;
2941                if warn {
2942                    out.flush()?;
2943                    if opts.warn_prefix.is_empty() {
2944                        writeln!(
2945                            err_out,
2946                            "line {}: improperly formatted {} checksum line",
2947                            line_num,
2948                            algo.name()
2949                        )?;
2950                    } else {
2951                        writeln!(
2952                            err_out,
2953                            "{}: {}: improperly formatted {} checksum line",
2954                            opts.warn_prefix,
2955                            line_num,
2956                            algo.name()
2957                        )?;
2958                    }
2959                }
2960                continue;
2961            }
2962        };
2963
2964        // Compute actual hash
2965        let actual = match hash_file(algo, Path::new(filename)) {
2966            Ok(h) => h,
2967            Err(e) => {
2968                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2969                    ignored_missing_count += 1;
2970                    continue;
2971                }
2972                read_errors += 1;
2973                if !status_only {
2974                    out.flush()?;
2975                    writeln!(err_out, "{}: {}", filename, e)?;
2976                    writeln!(out, "{}: FAILED open or read", filename)?;
2977                }
2978                continue;
2979            }
2980        };
2981
2982        if actual.eq_ignore_ascii_case(expected_hash) {
2983            ok_count += 1;
2984            if !quiet && !status_only {
2985                writeln!(out, "{}: OK", filename)?;
2986            }
2987        } else {
2988            mismatch_count += 1;
2989            if !status_only {
2990                writeln!(out, "{}: FAILED", filename)?;
2991            }
2992        }
2993    }
2994
2995    Ok(CheckResult {
2996        ok: ok_count,
2997        mismatches: mismatch_count,
2998        format_errors,
2999        read_errors,
3000        ignored_missing: ignored_missing_count,
3001    })
3002}
3003
3004/// Parse a checksum line in any supported format.
3005pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
3006    // Try BSD tag format: "ALGO (filename) = hash"
3007    let rest = line
3008        .strip_prefix("MD5 (")
3009        .or_else(|| line.strip_prefix("SHA1 ("))
3010        .or_else(|| line.strip_prefix("SHA224 ("))
3011        .or_else(|| line.strip_prefix("SHA256 ("))
3012        .or_else(|| line.strip_prefix("SHA384 ("))
3013        .or_else(|| line.strip_prefix("SHA512 ("))
3014        .or_else(|| line.strip_prefix("BLAKE2b ("))
3015        .or_else(|| {
3016            // Handle BLAKE2b-NNN (filename) = hash
3017            if line.starts_with("BLAKE2b-") {
3018                let after = &line["BLAKE2b-".len()..];
3019                if let Some(sp) = after.find(" (") {
3020                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
3021                        return Some(&after[sp + 2..]);
3022                    }
3023                }
3024            }
3025            None
3026        });
3027    if let Some(rest) = rest {
3028        if let Some(paren_idx) = rest.find(") = ") {
3029            let filename = &rest[..paren_idx];
3030            let hash = &rest[paren_idx + 4..];
3031            return Some((hash, filename));
3032        }
3033    }
3034
3035    // Handle backslash-escaped lines (leading '\')
3036    let line = line.strip_prefix('\\').unwrap_or(line);
3037
3038    // Standard format: "hash  filename"
3039    if let Some(idx) = line.find("  ") {
3040        let hash = &line[..idx];
3041        let rest = &line[idx + 2..];
3042        return Some((hash, rest));
3043    }
3044    // Binary mode: "hash *filename"
3045    if let Some(idx) = line.find(" *") {
3046        let hash = &line[..idx];
3047        let rest = &line[idx + 2..];
3048        return Some((hash, rest));
3049    }
3050    None
3051}
3052
3053/// Parse a BSD-style tag line: "ALGO (filename) = hash"
3054/// Returns (expected_hash, filename, optional_bits).
3055/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
3056pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
3057    let paren_start = line.find(" (")?;
3058    let algo_part = &line[..paren_start];
3059    let rest = &line[paren_start + 2..];
3060    let paren_end = rest.find(") = ")?;
3061    let filename = &rest[..paren_end];
3062    let hash = &rest[paren_end + 4..];
3063
3064    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
3065    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
3066        algo_part[dash_pos + 1..].parse::<usize>().ok()
3067    } else {
3068        None
3069    };
3070
3071    Some((hash, filename, bits))
3072}
3073
3074/// Read as many bytes as possible into buf, retrying on partial reads.
3075/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
3076/// Fast path: regular file reads usually return the full buffer on the first call.
3077#[inline]
3078fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
3079    // Fast path: first read() usually fills the entire buffer for regular files
3080    let n = reader.read(buf)?;
3081    if n == buf.len() || n == 0 {
3082        return Ok(n);
3083    }
3084    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
3085    let mut total = n;
3086    while total < buf.len() {
3087        match reader.read(&mut buf[total..]) {
3088            Ok(0) => break,
3089            Ok(n) => total += n,
3090            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3091            Err(e) => return Err(e),
3092        }
3093    }
3094    Ok(total)
3095}
3096
3097/// Compile-time generated 2-byte hex pair lookup table.
3098/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
3099const fn generate_hex_table() -> [[u8; 2]; 256] {
3100    let hex = b"0123456789abcdef";
3101    let mut table = [[0u8; 2]; 256];
3102    let mut i = 0;
3103    while i < 256 {
3104        table[i] = [hex[i >> 4], hex[i & 0xf]];
3105        i += 1;
3106    }
3107    table
3108}
3109
3110const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
3111
3112/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
3113/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
3114pub(crate) fn hex_encode(bytes: &[u8]) -> String {
3115    let len = bytes.len() * 2;
3116    let mut hex = String::with_capacity(len);
3117    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
3118    unsafe {
3119        let buf = hex.as_mut_vec();
3120        buf.set_len(len);
3121        hex_encode_to_slice(bytes, buf);
3122    }
3123    hex
3124}
3125
3126/// Encode bytes as hex directly into a pre-allocated output slice.
3127/// Output slice must be at least `bytes.len() * 2` bytes long.
3128#[inline]
3129fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
3130    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
3131    unsafe {
3132        let ptr = out.as_mut_ptr();
3133        for (i, &b) in bytes.iter().enumerate() {
3134            let pair = *HEX_TABLE.get_unchecked(b as usize);
3135            *ptr.add(i * 2) = pair[0];
3136            *ptr.add(i * 2 + 1) = pair[1];
3137        }
3138    }
3139}