Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use rayon::prelude::*;
7
8#[cfg(target_os = "linux")]
9use std::sync::atomic::{AtomicBool, Ordering};
10
11#[cfg(not(target_os = "linux"))]
12use digest::Digest;
13#[cfg(not(target_os = "linux"))]
14use md5::Md5;
15
16/// Supported hash algorithms.
17#[derive(Debug, Clone, Copy)]
18pub enum HashAlgorithm {
19    Sha256,
20    Md5,
21    Blake2b,
22}
23
24impl HashAlgorithm {
25    pub fn name(self) -> &'static str {
26        match self {
27            HashAlgorithm::Sha256 => "SHA256",
28            HashAlgorithm::Md5 => "MD5",
29            HashAlgorithm::Blake2b => "BLAKE2b",
30        }
31    }
32}
33
34// ── Generic hash helpers ────────────────────────────────────────────
35
36/// Single-shot hash using the Digest trait (non-Linux fallback).
37#[cfg(not(target_os = "linux"))]
38fn hash_digest<D: Digest>(data: &[u8]) -> String {
39    hex_encode(&D::digest(data))
40}
41
42/// Streaming hash using thread-local buffer (non-Linux fallback).
43#[cfg(not(target_os = "linux"))]
44fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
45    STREAM_BUF.with(|cell| {
46        let mut buf = cell.borrow_mut();
47        ensure_stream_buf(&mut buf);
48        let mut hasher = D::new();
49        loop {
50            let n = read_full(&mut reader, &mut buf)?;
51            if n == 0 {
52                break;
53            }
54            hasher.update(&buf[..n]);
55        }
56        Ok(hex_encode(&hasher.finalize()))
57    })
58}
59
60// ── Public hashing API ──────────────────────────────────────────────
61
62/// Buffer size for streaming hash I/O.
63/// 8MB: amortizes syscall overhead while still fitting in L3 cache on modern CPUs.
64/// Larger buffer means fewer read() calls per file (e.g., 13 reads for 100MB vs 25).
65const HASH_READ_BUF: usize = 8 * 1024 * 1024;
66
67// Thread-local reusable buffer for streaming hash I/O.
68// Allocated LAZILY (only on first streaming-hash call) to avoid 8MB cost for
69// small-file-only workloads (e.g., "sha256sum *.txt" where every file is <1MB).
70thread_local! {
71    static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
72}
73
74/// Ensure the streaming buffer is at least HASH_READ_BUF bytes.
75/// Called only on the streaming path, so small-file workloads never allocate 8MB.
76#[inline]
77fn ensure_stream_buf(buf: &mut Vec<u8>) {
78    if buf.len() < HASH_READ_BUF {
79        buf.resize(HASH_READ_BUF, 0);
80    }
81}
82
83// ── SHA-256 ───────────────────────────────────────────────────────────
84
85/// Single-shot SHA-256 using OpenSSL's optimized assembly (SHA-NI on x86).
86/// Linux only — OpenSSL is not available on Windows/macOS in CI.
87#[cfg(target_os = "linux")]
88fn sha256_bytes(data: &[u8]) -> String {
89    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
90        .expect("SHA256 hash failed");
91    hex_encode(&digest)
92}
93
94/// Single-shot SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
95#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
96fn sha256_bytes(data: &[u8]) -> String {
97    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
98}
99
100/// Single-shot SHA-256 using sha2 crate (macOS fallback — ring doesn't compile on Apple Silicon).
101#[cfg(target_vendor = "apple")]
102fn sha256_bytes(data: &[u8]) -> String {
103    hash_digest::<sha2::Sha256>(data)
104}
105
106/// Streaming SHA-256 using OpenSSL's optimized assembly.
107/// Linux only — OpenSSL is not available on Windows/macOS in CI.
108#[cfg(target_os = "linux")]
109fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
110    STREAM_BUF.with(|cell| {
111        let mut buf = cell.borrow_mut();
112        ensure_stream_buf(&mut buf);
113        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
114            .map_err(|e| io::Error::other(e))?;
115        loop {
116            let n = read_full(&mut reader, &mut buf)?;
117            if n == 0 {
118                break;
119            }
120            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
121        }
122        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
123        Ok(hex_encode(&digest))
124    })
125}
126
127/// Streaming SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
128#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
129fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
130    STREAM_BUF.with(|cell| {
131        let mut buf = cell.borrow_mut();
132        ensure_stream_buf(&mut buf);
133        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
134        loop {
135            let n = read_full(&mut reader, &mut buf)?;
136            if n == 0 {
137                break;
138            }
139            ctx.update(&buf[..n]);
140        }
141        Ok(hex_encode(ctx.finish().as_ref()))
142    })
143}
144
145/// Streaming SHA-256 using sha2 crate (macOS fallback).
146#[cfg(target_vendor = "apple")]
147fn sha256_reader(reader: impl Read) -> io::Result<String> {
148    hash_reader_impl::<sha2::Sha256>(reader)
149}
150
151/// Compute hash of a byte slice directly (zero-copy fast path).
152pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
153    match algo {
154        HashAlgorithm::Sha256 => sha256_bytes(data),
155        HashAlgorithm::Md5 => md5_bytes(data),
156        HashAlgorithm::Blake2b => {
157            let hash = blake2b_simd::blake2b(data);
158            hex_encode(hash.as_bytes())
159        }
160    }
161}
162
163// ── MD5 ─────────────────────────────────────────────────────────────
164
165/// Single-shot MD5 using OpenSSL's optimized assembly (Linux).
166#[cfg(target_os = "linux")]
167fn md5_bytes(data: &[u8]) -> String {
168    let digest =
169        openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
170    hex_encode(&digest)
171}
172
173/// Single-shot MD5 using md-5 crate (non-Linux fallback).
174#[cfg(not(target_os = "linux"))]
175fn md5_bytes(data: &[u8]) -> String {
176    hash_digest::<Md5>(data)
177}
178
179/// Compute hash of data from a reader, returning hex string.
180pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
181    match algo {
182        HashAlgorithm::Sha256 => sha256_reader(reader),
183        HashAlgorithm::Md5 => md5_reader(reader),
184        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
185    }
186}
187
188/// Streaming MD5 using OpenSSL's optimized assembly (Linux).
189#[cfg(target_os = "linux")]
190fn md5_reader(mut reader: impl Read) -> io::Result<String> {
191    STREAM_BUF.with(|cell| {
192        let mut buf = cell.borrow_mut();
193        ensure_stream_buf(&mut buf);
194        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
195            .map_err(|e| io::Error::other(e))?;
196        loop {
197            let n = read_full(&mut reader, &mut buf)?;
198            if n == 0 {
199                break;
200            }
201            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
202        }
203        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
204        Ok(hex_encode(&digest))
205    })
206}
207
208/// Streaming MD5 using md-5 crate (non-Linux fallback).
209#[cfg(not(target_os = "linux"))]
210fn md5_reader(reader: impl Read) -> io::Result<String> {
211    hash_reader_impl::<Md5>(reader)
212}
213
214/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
215/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
216#[cfg(target_os = "linux")]
217static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
218
219/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
220/// Caches whether O_NOATIME works to avoid double-open on every file.
221#[cfg(target_os = "linux")]
222fn open_noatime(path: &Path) -> io::Result<File> {
223    use std::os::unix::fs::OpenOptionsExt;
224    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
225        match std::fs::OpenOptions::new()
226            .read(true)
227            .custom_flags(libc::O_NOATIME)
228            .open(path)
229        {
230            Ok(f) => return Ok(f),
231            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
232                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
233                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
234            }
235            Err(e) => return Err(e), // Real error, propagate
236        }
237    }
238    File::open(path)
239}
240
241#[cfg(not(target_os = "linux"))]
242fn open_noatime(path: &Path) -> io::Result<File> {
243    File::open(path)
244}
245
246/// Open a file and get its metadata in one step.
247/// On Linux uses fstat directly on the fd to avoid an extra syscall layer.
248#[cfg(target_os = "linux")]
249#[inline]
250fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
251    let file = open_noatime(path)?;
252    let fd = {
253        use std::os::unix::io::AsRawFd;
254        file.as_raw_fd()
255    };
256    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
257    if unsafe { libc::fstat(fd, &mut stat) } != 0 {
258        return Err(io::Error::last_os_error());
259    }
260    let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
261    let size = stat.st_size as u64;
262    Ok((file, size, is_regular))
263}
264
265#[cfg(not(target_os = "linux"))]
266#[inline]
267fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
268    let file = open_noatime(path)?;
269    let metadata = file.metadata()?;
270    Ok((file, metadata.len(), metadata.file_type().is_file()))
271}
272
273/// Minimum file size to issue fadvise hint (1MB).
274/// For small files, the syscall overhead exceeds the readahead benefit.
275#[cfg(target_os = "linux")]
276const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
277
278/// Maximum file size for single-read hash optimization.
279/// Files up to this size are read entirely into a thread-local buffer and hashed
280/// with single-shot hash (avoids Hasher allocation + streaming overhead).
281const SMALL_FILE_LIMIT: u64 = 1024 * 1024;
282
283/// Threshold for tiny files that can be read into a stack buffer.
284/// Below this size, we use a stack-allocated buffer + single read() syscall,
285/// completely avoiding any heap allocation for the data path.
286const TINY_FILE_LIMIT: u64 = 8 * 1024;
287
288// Thread-local reusable buffer for small-file single-read hash.
289// Avoids repeated allocation for many small files (e.g., 100 files of 1KB each).
290thread_local! {
291    static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
292}
293
294/// Hash a file by path. Uses mmap for large files (zero-copy, no read() syscalls),
295/// single-read + single-shot hash for small files, and streaming read as fallback.
296pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
297    let (file, file_size, is_regular) = open_and_stat(path)?;
298
299    if is_regular && file_size == 0 {
300        return Ok(hash_bytes(algo, &[]));
301    }
302
303    if file_size > 0 && is_regular {
304        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
305        if file_size < TINY_FILE_LIMIT {
306            return hash_file_tiny(algo, file, file_size as usize);
307        }
308        // mmap for large files — zero-copy, eliminates multiple read() syscalls
309        if file_size >= SMALL_FILE_LIMIT {
310            #[cfg(target_os = "linux")]
311            if file_size >= FADVISE_MIN_SIZE {
312                use std::os::unix::io::AsRawFd;
313                unsafe {
314                    libc::posix_fadvise(
315                        file.as_raw_fd(),
316                        0,
317                        file_size as i64,
318                        libc::POSIX_FADV_SEQUENTIAL,
319                    );
320                }
321            }
322            if let Ok(mmap) = unsafe { memmap2::MmapOptions::new().populate().map(&file) } {
323                #[cfg(target_os = "linux")]
324                {
325                    let _ = mmap.advise(memmap2::Advice::Sequential);
326                    if file_size >= 2 * 1024 * 1024 {
327                        let _ = mmap.advise(memmap2::Advice::HugePage);
328                    }
329                }
330                return Ok(hash_bytes(algo, &mmap));
331            }
332        }
333        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash.
334        // This avoids Hasher context allocation + streaming overhead for each file.
335        if file_size < SMALL_FILE_LIMIT {
336            return hash_file_small(algo, file, file_size as usize);
337        }
338    }
339
340    // Non-regular files or fallback: stream
341    #[cfg(target_os = "linux")]
342    if file_size >= FADVISE_MIN_SIZE {
343        use std::os::unix::io::AsRawFd;
344        unsafe {
345            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
346        }
347    }
348    hash_reader(algo, file)
349}
350
351/// Hash a tiny file (<8KB) using a stack-allocated buffer.
352/// Single read() syscall, zero heap allocation on the data path.
353/// Optimal for the "100 small files" benchmark where per-file overhead dominates.
354#[inline]
355fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
356    let mut buf = [0u8; 8192];
357    let mut total = 0;
358    // Read with known size — usually completes in a single read() for regular files
359    while total < size {
360        match file.read(&mut buf[total..size]) {
361            Ok(0) => break,
362            Ok(n) => total += n,
363            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
364            Err(e) => return Err(e),
365        }
366    }
367    Ok(hash_bytes(algo, &buf[..total]))
368}
369
370/// Hash a small file by reading it entirely into a thread-local buffer,
371/// then using the single-shot hash function. Avoids per-file Hasher allocation.
372#[inline]
373fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
374    SMALL_FILE_BUF.with(|cell| {
375        let mut buf = cell.borrow_mut();
376        // Reset length but keep allocation, then grow if needed
377        buf.clear();
378        buf.reserve(size);
379        // SAFETY: capacity >= size after clear+reserve. We read into the buffer
380        // directly and only access buf[..total] where total <= size <= capacity.
381        unsafe {
382            buf.set_len(size);
383        }
384        let mut total = 0;
385        while total < size {
386            match file.read(&mut buf[total..size]) {
387                Ok(0) => break,
388                Ok(n) => total += n,
389                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
390                Err(e) => return Err(e),
391            }
392        }
393        Ok(hash_bytes(algo, &buf[..total]))
394    })
395}
396
397/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
398pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
399    let stdin = io::stdin();
400    // Hint kernel for sequential access if stdin is a regular file (redirect)
401    #[cfg(target_os = "linux")]
402    {
403        use std::os::unix::io::AsRawFd;
404        let fd = stdin.as_raw_fd();
405        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
406        if unsafe { libc::fstat(fd, &mut stat) } == 0
407            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
408            && stat.st_size > 0
409        {
410            unsafe {
411                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
412            }
413        }
414    }
415    // Streaming hash — works for both pipe and file-redirect stdin
416    hash_reader(algo, stdin.lock())
417}
418
419/// Check if parallel hashing is worthwhile for the given file paths.
420/// Always parallelize with 2+ files — rayon's thread pool is lazily initialized
421/// once and reused, so per-file work-stealing overhead is negligible (~1µs).
422/// Removing the stat()-based size check eliminates N extra syscalls for N files.
423pub fn should_use_parallel(paths: &[&Path]) -> bool {
424    paths.len() >= 2
425}
426
427/// Issue readahead hints for a list of file paths to warm the page cache.
428/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
429/// Only issues hints for files >= 1MB; small files are read fast enough
430/// that the fadvise syscall overhead isn't worth it.
431#[cfg(target_os = "linux")]
432pub fn readahead_files(paths: &[&Path]) {
433    use std::os::unix::io::AsRawFd;
434    for path in paths {
435        if let Ok(file) = open_noatime(path) {
436            if let Ok(meta) = file.metadata() {
437                let len = meta.len();
438                if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
439                    unsafe {
440                        libc::posix_fadvise(
441                            file.as_raw_fd(),
442                            0,
443                            len as i64,
444                            libc::POSIX_FADV_WILLNEED,
445                        );
446                    }
447                }
448            }
449        }
450    }
451}
452
453#[cfg(not(target_os = "linux"))]
454pub fn readahead_files(_paths: &[&Path]) {
455    // No-op on non-Linux
456}
457
458// --- BLAKE2b variable-length functions (using blake2b_simd) ---
459
460/// Hash raw data with BLAKE2b variable output length.
461/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
462pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
463    let hash = blake2b_simd::Params::new()
464        .hash_length(output_bytes)
465        .hash(data);
466    hex_encode(hash.as_bytes())
467}
468
469/// Hash a reader with BLAKE2b variable output length.
470/// Uses thread-local buffer for cache-friendly streaming.
471pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
472    STREAM_BUF.with(|cell| {
473        let mut buf = cell.borrow_mut();
474        ensure_stream_buf(&mut buf);
475        let mut state = blake2b_simd::Params::new()
476            .hash_length(output_bytes)
477            .to_state();
478        loop {
479            let n = read_full(&mut reader, &mut buf)?;
480            if n == 0 {
481                break;
482            }
483            state.update(&buf[..n]);
484        }
485        Ok(hex_encode(state.finalize().as_bytes()))
486    })
487}
488
489/// Hash a file with BLAKE2b variable output length.
490/// Uses mmap for large files (zero-copy), single-read for small files,
491/// and streaming read as fallback.
492pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
493    let (file, file_size, is_regular) = open_and_stat(path)?;
494
495    if is_regular && file_size == 0 {
496        return Ok(blake2b_hash_data(&[], output_bytes));
497    }
498
499    if file_size > 0 && is_regular {
500        // Tiny files (<8KB): stack buffer + single read() — zero heap allocation
501        if file_size < TINY_FILE_LIMIT {
502            return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
503        }
504        // mmap for large files — zero-copy, eliminates multiple read() syscalls
505        if file_size >= SMALL_FILE_LIMIT {
506            #[cfg(target_os = "linux")]
507            if file_size >= FADVISE_MIN_SIZE {
508                use std::os::unix::io::AsRawFd;
509                unsafe {
510                    libc::posix_fadvise(
511                        file.as_raw_fd(),
512                        0,
513                        file_size as i64,
514                        libc::POSIX_FADV_SEQUENTIAL,
515                    );
516                }
517            }
518            if let Ok(mmap) = unsafe { memmap2::MmapOptions::new().populate().map(&file) } {
519                #[cfg(target_os = "linux")]
520                {
521                    let _ = mmap.advise(memmap2::Advice::Sequential);
522                    if file_size >= 2 * 1024 * 1024 {
523                        let _ = mmap.advise(memmap2::Advice::HugePage);
524                    }
525                }
526                return Ok(blake2b_hash_data(&mmap, output_bytes));
527            }
528        }
529        // Small files (8KB..1MB): single read into thread-local buffer, then single-shot hash
530        if file_size < SMALL_FILE_LIMIT {
531            return blake2b_hash_file_small(file, file_size as usize, output_bytes);
532        }
533    }
534
535    // Non-regular files or fallback: stream
536    #[cfg(target_os = "linux")]
537    if file_size >= FADVISE_MIN_SIZE {
538        use std::os::unix::io::AsRawFd;
539        unsafe {
540            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
541        }
542    }
543    blake2b_hash_reader(file, output_bytes)
544}
545
546/// Hash a tiny BLAKE2b file (<8KB) using a stack-allocated buffer.
547#[inline]
548fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
549    let mut buf = [0u8; 8192];
550    let mut total = 0;
551    while total < size {
552        match file.read(&mut buf[total..size]) {
553            Ok(0) => break,
554            Ok(n) => total += n,
555            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
556            Err(e) => return Err(e),
557        }
558    }
559    Ok(blake2b_hash_data(&buf[..total], output_bytes))
560}
561
562/// Hash a small file with BLAKE2b by reading it entirely into a thread-local buffer.
563#[inline]
564fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
565    SMALL_FILE_BUF.with(|cell| {
566        let mut buf = cell.borrow_mut();
567        buf.clear();
568        buf.reserve(size);
569        // SAFETY: capacity >= size after clear+reserve
570        unsafe {
571            buf.set_len(size);
572        }
573        let mut total = 0;
574        while total < size {
575            match file.read(&mut buf[total..size]) {
576                Ok(0) => break,
577                Ok(n) => total += n,
578                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
579                Err(e) => return Err(e),
580            }
581        }
582        Ok(blake2b_hash_data(&buf[..total], output_bytes))
583    })
584}
585
586/// Hash stdin with BLAKE2b variable output length.
587/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
588pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
589    let stdin = io::stdin();
590    #[cfg(target_os = "linux")]
591    {
592        use std::os::unix::io::AsRawFd;
593        let fd = stdin.as_raw_fd();
594        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
595        if unsafe { libc::fstat(fd, &mut stat) } == 0
596            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
597            && stat.st_size > 0
598        {
599            unsafe {
600                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
601            }
602        }
603    }
604    blake2b_hash_reader(stdin.lock(), output_bytes)
605}
606
607/// Internal enum for file content in batch hashing.
608/// Keeps data alive (either as mmap or owned Vec) while hash_many references it.
609enum FileContent {
610    Mmap(memmap2::Mmap),
611    Buf(Vec<u8>),
612}
613
614impl AsRef<[u8]> for FileContent {
615    fn as_ref(&self) -> &[u8] {
616        match self {
617            FileContent::Mmap(m) => m,
618            FileContent::Buf(v) => v,
619        }
620    }
621}
622
623/// Open a file and load its content for batch hashing.
624/// Uses read for tiny files (avoids mmap syscall overhead), mmap for large
625/// files (zero-copy), and read-to-end for non-regular files.
626fn open_file_content(path: &Path) -> io::Result<FileContent> {
627    let (file, size, is_regular) = open_and_stat(path)?;
628    if is_regular && size == 0 {
629        return Ok(FileContent::Buf(Vec::new()));
630    }
631    if is_regular && size > 0 {
632        // Tiny files: read directly into Vec. The mmap syscall + page fault
633        // overhead exceeds the data transfer cost for files under 8KB.
634        // For the 100-file benchmark (55 bytes each), this saves ~100 mmap calls.
635        if size < TINY_FILE_LIMIT {
636            let mut buf = vec![0u8; size as usize];
637            let mut total = 0;
638            let mut f = file;
639            while total < size as usize {
640                match f.read(&mut buf[total..]) {
641                    Ok(0) => break,
642                    Ok(n) => total += n,
643                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
644                    Err(e) => return Err(e),
645                }
646            }
647            buf.truncate(total);
648            return Ok(FileContent::Buf(buf));
649        }
650        if let Ok(mmap) = unsafe { memmap2::MmapOptions::new().populate().map(&file) } {
651            #[cfg(target_os = "linux")]
652            {
653                let _ = mmap.advise(memmap2::Advice::Sequential);
654                if size >= 2 * 1024 * 1024 {
655                    let _ = mmap.advise(memmap2::Advice::HugePage);
656                }
657            }
658            return Ok(FileContent::Mmap(mmap));
659        }
660        // Fallback: read into Vec
661        let mut buf = vec![0u8; size as usize];
662        let mut total = 0;
663        let mut f = file;
664        while total < size as usize {
665            match f.read(&mut buf[total..]) {
666                Ok(0) => break,
667                Ok(n) => total += n,
668                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
669                Err(e) => return Err(e),
670            }
671        }
672        buf.truncate(total);
673        return Ok(FileContent::Buf(buf));
674    }
675    // Non-regular: read to end
676    let mut buf = Vec::new();
677    let mut f = file;
678    f.read_to_end(&mut buf)?;
679    Ok(FileContent::Buf(buf))
680}
681
682/// Open a file and read all content without fstat — just open+read+close.
683/// For many-file workloads (100+ files), skipping fstat saves ~5µs/file
684/// (~0.5ms for 100 files). Falls back to mmap for files > 64KB.
685fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
686    let mut file = open_noatime(path)?;
687    // First read into a small stack-allocated buffer
688    let mut buf = [0u8; 65536];
689    let mut total = 0;
690    loop {
691        match file.read(&mut buf[total..]) {
692            Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
693            Ok(n) => {
694                total += n;
695                if total >= buf.len() {
696                    // File exceeds stack buffer — fall back to open_file_content
697                    // which uses mmap for large files
698                    return open_file_content(path);
699                }
700            }
701            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
702            Err(e) => return Err(e),
703        }
704    }
705}
706
707/// Batch-hash multiple files with BLAKE2b using multi-buffer SIMD.
708///
709/// Uses blake2b_simd::many::hash_many for 4-way AVX2 parallel hashing.
710/// All files are pre-loaded into memory (mmap for large, read for small),
711/// then hashed simultaneously. Returns results in input order.
712///
713/// For 100 files on AVX2: 4x throughput from SIMD parallelism.
714pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
715    use blake2b_simd::many::{HashManyJob, hash_many};
716
717    // Phase 1: Read all files into memory in parallel using rayon.
718    // For many files (100+), use fast path that skips fstat.
719    // Batch into chunks of N/4 to reduce rayon work-stealing overhead.
720    let use_fast = paths.len() >= 20;
721    let min_chunk = (paths.len() / 4).max(1);
722    let file_data: Vec<io::Result<FileContent>> = paths
723        .par_iter()
724        .with_min_len(min_chunk)
725        .map(|&path| {
726            if use_fast {
727                open_file_content_fast(path)
728            } else {
729                open_file_content(path)
730            }
731        })
732        .collect();
733
734    // Phase 2: Build hash_many jobs for successful reads
735    let hash_results = {
736        let mut params = blake2b_simd::Params::new();
737        params.hash_length(output_bytes);
738
739        let ok_entries: Vec<(usize, &[u8])> = file_data
740            .iter()
741            .enumerate()
742            .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
743            .collect();
744
745        let mut jobs: Vec<HashManyJob> = ok_entries
746            .iter()
747            .map(|(_, data)| HashManyJob::new(&params, data))
748            .collect();
749
750        // Phase 3: Run multi-buffer SIMD hash (4-way AVX2)
751        hash_many(jobs.iter_mut());
752
753        // Extract hashes into a map
754        let mut hm: Vec<Option<String>> = vec![None; paths.len()];
755        for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
756            hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
757        }
758        hm
759    }; // file_data borrow released here
760
761    // Phase 4: Combine hashes and errors in original order
762    hash_results
763        .into_iter()
764        .zip(file_data)
765        .map(|(hash_opt, result)| match result {
766            Ok(_) => Ok(hash_opt.unwrap()),
767            Err(e) => Err(e),
768        })
769        .collect()
770}
771
772/// Batch-hash multiple files with SHA-256/MD5 using rayon parallel processing.
773/// Pre-loads all files in parallel, then hashes them in parallel.
774/// Returns results in input order.
775pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
776    // Only issue readahead for modest file counts (likely larger files).
777    // For 100+ tiny files, readahead's per-file overhead (open+stat+fadvise+close
778    // = ~30µs/file = ~3ms for 100 files) exceeds its benefit since tiny files
779    // are already served from page cache after warmup.
780    if paths.len() <= 20 {
781        readahead_files_all(paths);
782    }
783
784    // For many files (100+), use nostat path that skips fstat syscall.
785    // Saves ~5µs/file = ~0.5ms for 100 files.
786    let use_fast = paths.len() >= 20;
787
788    // Batch files into chunks of at least N/4 to reduce rayon work-stealing
789    // overhead. For 100 tiny files, this means ~4 chunks of 25 instead of
790    // 100 individual tasks — saves ~100µs of scheduling overhead.
791    let min_chunk = (paths.len() / 4).max(1);
792    paths
793        .par_iter()
794        .with_min_len(min_chunk)
795        .map(|&path| {
796            if use_fast {
797                hash_file_nostat(algo, path)
798            } else {
799                hash_file(algo, path)
800            }
801        })
802        .collect()
803}
804
805/// Hash a file without fstat — just open, read until EOF, hash.
806/// For many-file workloads (100+ tiny files), skipping fstat saves ~5µs/file.
807/// Falls back to streaming hash for files > 64KB.
808fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
809    let mut file = open_noatime(path)?;
810    let mut buf = [0u8; 65536];
811    let mut total = 0;
812    loop {
813        match file.read(&mut buf[total..]) {
814            Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
815            Ok(n) => {
816                total += n;
817                if total >= buf.len() {
818                    // File exceeds stack buffer — fall back to full hash_file
819                    return hash_file(algo, path);
820                }
821            }
822            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
823            Err(e) => return Err(e),
824        }
825    }
826}
827
828/// Issue readahead hints for ALL file paths (no size threshold).
829/// For multi-file benchmarks, even small files benefit from batched readahead.
830#[cfg(target_os = "linux")]
831pub fn readahead_files_all(paths: &[&Path]) {
832    use std::os::unix::io::AsRawFd;
833    for path in paths {
834        if let Ok(file) = open_noatime(path) {
835            if let Ok(meta) = file.metadata() {
836                if meta.file_type().is_file() {
837                    let len = meta.len();
838                    unsafe {
839                        libc::posix_fadvise(
840                            file.as_raw_fd(),
841                            0,
842                            len as i64,
843                            libc::POSIX_FADV_WILLNEED,
844                        );
845                    }
846                }
847            }
848        }
849    }
850}
851
852#[cfg(not(target_os = "linux"))]
853pub fn readahead_files_all(_paths: &[&Path]) {}
854
855/// Print hash result in GNU format: "hash  filename\n"
856/// Uses raw byte writes to avoid std::fmt overhead.
857pub fn print_hash(
858    out: &mut impl Write,
859    hash: &str,
860    filename: &str,
861    binary: bool,
862) -> io::Result<()> {
863    let mode = if binary { b'*' } else { b' ' };
864    out.write_all(hash.as_bytes())?;
865    out.write_all(&[b' ', mode])?;
866    out.write_all(filename.as_bytes())?;
867    out.write_all(b"\n")
868}
869
870/// Print hash in GNU format with NUL terminator instead of newline.
871pub fn print_hash_zero(
872    out: &mut impl Write,
873    hash: &str,
874    filename: &str,
875    binary: bool,
876) -> io::Result<()> {
877    let mode = if binary { b'*' } else { b' ' };
878    out.write_all(hash.as_bytes())?;
879    out.write_all(&[b' ', mode])?;
880    out.write_all(filename.as_bytes())?;
881    out.write_all(b"\0")
882}
883
884// ── Single-write output buffer ─────────────────────────────────────
885// For multi-file workloads, batch the entire "hash  filename\n" line into
886// a single write() call. This halves the number of BufWriter flushes.
887
888// Thread-local output line buffer for batched writes.
889// Reused across files to avoid per-file allocation.
890thread_local! {
891    static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
892}
893
894/// Build and write the standard GNU hash output line in a single write() call.
895/// Format: "hash  filename\n" or "hash *filename\n" (binary mode).
896/// For escaped filenames: "\hash  escaped_filename\n".
897#[inline]
898pub fn write_hash_line(
899    out: &mut impl Write,
900    hash: &str,
901    filename: &str,
902    binary: bool,
903    zero: bool,
904    escaped: bool,
905) -> io::Result<()> {
906    LINE_BUF.with(|cell| {
907        let mut buf = cell.borrow_mut();
908        buf.clear();
909        let mode = if binary { b'*' } else { b' ' };
910        let term = if zero { b'\0' } else { b'\n' };
911        if escaped {
912            buf.push(b'\\');
913        }
914        buf.extend_from_slice(hash.as_bytes());
915        buf.push(b' ');
916        buf.push(mode);
917        buf.extend_from_slice(filename.as_bytes());
918        buf.push(term);
919        out.write_all(&buf)
920    })
921}
922
923/// Build and write BSD tag format output in a single write() call.
924/// Format: "ALGO (filename) = hash\n"
925#[inline]
926pub fn write_hash_tag_line(
927    out: &mut impl Write,
928    algo_name: &str,
929    hash: &str,
930    filename: &str,
931    zero: bool,
932) -> io::Result<()> {
933    LINE_BUF.with(|cell| {
934        let mut buf = cell.borrow_mut();
935        buf.clear();
936        let term = if zero { b'\0' } else { b'\n' };
937        buf.extend_from_slice(algo_name.as_bytes());
938        buf.extend_from_slice(b" (");
939        buf.extend_from_slice(filename.as_bytes());
940        buf.extend_from_slice(b") = ");
941        buf.extend_from_slice(hash.as_bytes());
942        buf.push(term);
943        out.write_all(&buf)
944    })
945}
946
947/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
948pub fn print_hash_tag(
949    out: &mut impl Write,
950    algo: HashAlgorithm,
951    hash: &str,
952    filename: &str,
953) -> io::Result<()> {
954    out.write_all(algo.name().as_bytes())?;
955    out.write_all(b" (")?;
956    out.write_all(filename.as_bytes())?;
957    out.write_all(b") = ")?;
958    out.write_all(hash.as_bytes())?;
959    out.write_all(b"\n")
960}
961
962/// Print hash in BSD tag format with NUL terminator.
963pub fn print_hash_tag_zero(
964    out: &mut impl Write,
965    algo: HashAlgorithm,
966    hash: &str,
967    filename: &str,
968) -> io::Result<()> {
969    out.write_all(algo.name().as_bytes())?;
970    out.write_all(b" (")?;
971    out.write_all(filename.as_bytes())?;
972    out.write_all(b") = ")?;
973    out.write_all(hash.as_bytes())?;
974    out.write_all(b"\0")
975}
976
977/// Print hash in BSD tag format with BLAKE2b length info:
978/// "BLAKE2b (filename) = hash" for 512-bit, or
979/// "BLAKE2b-256 (filename) = hash" for other lengths.
980pub fn print_hash_tag_b2sum(
981    out: &mut impl Write,
982    hash: &str,
983    filename: &str,
984    bits: usize,
985) -> io::Result<()> {
986    if bits == 512 {
987        out.write_all(b"BLAKE2b (")?;
988    } else {
989        // Use write! for the rare non-512 path (negligible overhead per file)
990        write!(out, "BLAKE2b-{} (", bits)?;
991    }
992    out.write_all(filename.as_bytes())?;
993    out.write_all(b") = ")?;
994    out.write_all(hash.as_bytes())?;
995    out.write_all(b"\n")
996}
997
998/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
999pub fn print_hash_tag_b2sum_zero(
1000    out: &mut impl Write,
1001    hash: &str,
1002    filename: &str,
1003    bits: usize,
1004) -> io::Result<()> {
1005    if bits == 512 {
1006        out.write_all(b"BLAKE2b (")?;
1007    } else {
1008        write!(out, "BLAKE2b-{} (", bits)?;
1009    }
1010    out.write_all(filename.as_bytes())?;
1011    out.write_all(b") = ")?;
1012    out.write_all(hash.as_bytes())?;
1013    out.write_all(b"\0")
1014}
1015
1016/// Options for check mode.
1017pub struct CheckOptions {
1018    pub quiet: bool,
1019    pub status_only: bool,
1020    pub strict: bool,
1021    pub warn: bool,
1022    pub ignore_missing: bool,
1023    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
1024    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
1025    /// When empty, uses generic format: "line {line}: message".
1026    pub warn_prefix: String,
1027}
1028
1029/// Result of check mode verification.
1030pub struct CheckResult {
1031    pub ok: usize,
1032    pub mismatches: usize,
1033    pub format_errors: usize,
1034    pub read_errors: usize,
1035    /// Number of files skipped because they were missing and --ignore-missing was set.
1036    pub ignored_missing: usize,
1037}
1038
1039/// Verify checksums from a check file.
1040/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
1041pub fn check_file<R: BufRead>(
1042    algo: HashAlgorithm,
1043    reader: R,
1044    opts: &CheckOptions,
1045    out: &mut impl Write,
1046    err_out: &mut impl Write,
1047) -> io::Result<CheckResult> {
1048    let quiet = opts.quiet;
1049    let status_only = opts.status_only;
1050    let warn = opts.warn;
1051    let ignore_missing = opts.ignore_missing;
1052    let mut ok_count = 0;
1053    let mut mismatch_count = 0;
1054    let mut format_errors = 0;
1055    let mut read_errors = 0;
1056    let mut ignored_missing_count = 0;
1057    let mut line_num = 0;
1058
1059    for line_result in reader.lines() {
1060        line_num += 1;
1061        let line = line_result?;
1062        let line = line.trim_end();
1063
1064        if line.is_empty() {
1065            continue;
1066        }
1067
1068        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
1069        let (expected_hash, filename) = match parse_check_line(line) {
1070            Some(v) => v,
1071            None => {
1072                format_errors += 1;
1073                if warn {
1074                    out.flush()?;
1075                    if opts.warn_prefix.is_empty() {
1076                        writeln!(
1077                            err_out,
1078                            "line {}: improperly formatted {} checksum line",
1079                            line_num,
1080                            algo.name()
1081                        )?;
1082                    } else {
1083                        writeln!(
1084                            err_out,
1085                            "{}: {}: improperly formatted {} checksum line",
1086                            opts.warn_prefix,
1087                            line_num,
1088                            algo.name()
1089                        )?;
1090                    }
1091                }
1092                continue;
1093            }
1094        };
1095
1096        // Compute actual hash
1097        let actual = match hash_file(algo, Path::new(filename)) {
1098            Ok(h) => h,
1099            Err(e) => {
1100                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
1101                    ignored_missing_count += 1;
1102                    continue;
1103                }
1104                read_errors += 1;
1105                if !status_only {
1106                    out.flush()?;
1107                    writeln!(err_out, "{}: {}", filename, e)?;
1108                    writeln!(out, "{}: FAILED open or read", filename)?;
1109                }
1110                continue;
1111            }
1112        };
1113
1114        if actual.eq_ignore_ascii_case(expected_hash) {
1115            ok_count += 1;
1116            if !quiet && !status_only {
1117                writeln!(out, "{}: OK", filename)?;
1118            }
1119        } else {
1120            mismatch_count += 1;
1121            if !status_only {
1122                writeln!(out, "{}: FAILED", filename)?;
1123            }
1124        }
1125    }
1126
1127    Ok(CheckResult {
1128        ok: ok_count,
1129        mismatches: mismatch_count,
1130        format_errors,
1131        read_errors,
1132        ignored_missing: ignored_missing_count,
1133    })
1134}
1135
1136/// Parse a checksum line in any supported format.
1137pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
1138    // Try BSD tag format: "ALGO (filename) = hash"
1139    let rest = line
1140        .strip_prefix("MD5 (")
1141        .or_else(|| line.strip_prefix("SHA256 ("))
1142        .or_else(|| line.strip_prefix("BLAKE2b ("))
1143        .or_else(|| {
1144            // Handle BLAKE2b-NNN (filename) = hash
1145            if line.starts_with("BLAKE2b-") {
1146                let after = &line["BLAKE2b-".len()..];
1147                if let Some(sp) = after.find(" (") {
1148                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
1149                        return Some(&after[sp + 2..]);
1150                    }
1151                }
1152            }
1153            None
1154        });
1155    if let Some(rest) = rest {
1156        if let Some(paren_idx) = rest.find(") = ") {
1157            let filename = &rest[..paren_idx];
1158            let hash = &rest[paren_idx + 4..];
1159            return Some((hash, filename));
1160        }
1161    }
1162
1163    // Handle backslash-escaped lines (leading '\')
1164    let line = line.strip_prefix('\\').unwrap_or(line);
1165
1166    // Standard format: "hash  filename"
1167    if let Some(idx) = line.find("  ") {
1168        let hash = &line[..idx];
1169        let rest = &line[idx + 2..];
1170        return Some((hash, rest));
1171    }
1172    // Binary mode: "hash *filename"
1173    if let Some(idx) = line.find(" *") {
1174        let hash = &line[..idx];
1175        let rest = &line[idx + 2..];
1176        return Some((hash, rest));
1177    }
1178    None
1179}
1180
1181/// Parse a BSD-style tag line: "ALGO (filename) = hash"
1182/// Returns (expected_hash, filename, optional_bits).
1183/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
1184pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
1185    let paren_start = line.find(" (")?;
1186    let algo_part = &line[..paren_start];
1187    let rest = &line[paren_start + 2..];
1188    let paren_end = rest.find(") = ")?;
1189    let filename = &rest[..paren_end];
1190    let hash = &rest[paren_end + 4..];
1191
1192    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
1193    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
1194        algo_part[dash_pos + 1..].parse::<usize>().ok()
1195    } else {
1196        None
1197    };
1198
1199    Some((hash, filename, bits))
1200}
1201
1202/// Read as many bytes as possible into buf, retrying on partial reads.
1203/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
1204/// Fast path: regular file reads usually return the full buffer on the first call.
1205#[inline]
1206fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
1207    // Fast path: first read() usually fills the entire buffer for regular files
1208    let n = reader.read(buf)?;
1209    if n == buf.len() || n == 0 {
1210        return Ok(n);
1211    }
1212    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
1213    let mut total = n;
1214    while total < buf.len() {
1215        match reader.read(&mut buf[total..]) {
1216            Ok(0) => break,
1217            Ok(n) => total += n,
1218            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
1219            Err(e) => return Err(e),
1220        }
1221    }
1222    Ok(total)
1223}
1224
1225/// Compile-time generated 2-byte hex pair lookup table.
1226/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
1227const fn generate_hex_table() -> [[u8; 2]; 256] {
1228    let hex = b"0123456789abcdef";
1229    let mut table = [[0u8; 2]; 256];
1230    let mut i = 0;
1231    while i < 256 {
1232        table[i] = [hex[i >> 4], hex[i & 0xf]];
1233        i += 1;
1234    }
1235    table
1236}
1237
1238const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
1239
1240/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
1241/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
1242pub(crate) fn hex_encode(bytes: &[u8]) -> String {
1243    let len = bytes.len() * 2;
1244    let mut hex = String::with_capacity(len);
1245    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
1246    unsafe {
1247        let buf = hex.as_mut_vec();
1248        buf.set_len(len);
1249        hex_encode_to_slice(bytes, buf);
1250    }
1251    hex
1252}
1253
1254/// Encode bytes as hex directly into a pre-allocated output slice.
1255/// Output slice must be at least `bytes.len() * 2` bytes long.
1256#[inline]
1257fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
1258    // SAFETY: We write exactly bytes.len()*2 bytes into `out`, which must be large enough.
1259    unsafe {
1260        let ptr = out.as_mut_ptr();
1261        for (i, &b) in bytes.iter().enumerate() {
1262            let pair = *HEX_TABLE.get_unchecked(b as usize);
1263            *ptr.add(i * 2) = pair[0];
1264            *ptr.add(i * 2 + 1) = pair[1];
1265        }
1266    }
1267}