Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9#[cfg(not(target_os = "linux"))]
10use digest::Digest;
11#[cfg(not(target_os = "linux"))]
12use md5::Md5;
13
14/// Supported hash algorithms.
15#[derive(Debug, Clone, Copy)]
16pub enum HashAlgorithm {
17    Sha256,
18    Md5,
19    Blake2b,
20}
21
22impl HashAlgorithm {
23    pub fn name(self) -> &'static str {
24        match self {
25            HashAlgorithm::Sha256 => "SHA256",
26            HashAlgorithm::Md5 => "MD5",
27            HashAlgorithm::Blake2b => "BLAKE2b",
28        }
29    }
30}
31
32// ── Generic hash helpers ────────────────────────────────────────────
33
34/// Single-shot hash using the Digest trait (non-Linux fallback).
35#[cfg(not(target_os = "linux"))]
36fn hash_digest<D: Digest>(data: &[u8]) -> String {
37    hex_encode(&D::digest(data))
38}
39
40/// Streaming hash using thread-local buffer (non-Linux fallback).
41#[cfg(not(target_os = "linux"))]
42fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
43    STREAM_BUF.with(|cell| {
44        let mut buf = cell.borrow_mut();
45        let mut hasher = D::new();
46        loop {
47            let n = read_full(&mut reader, &mut buf)?;
48            if n == 0 {
49                break;
50            }
51            hasher.update(&buf[..n]);
52        }
53        Ok(hex_encode(&hasher.finalize()))
54    })
55}
56
57// ── Public hashing API ──────────────────────────────────────────────
58
59/// Buffer size for streaming hash I/O.
60/// 8MB: amortizes syscall overhead while still fitting in L3 cache on modern CPUs.
61/// Larger buffer means fewer read() calls per file (e.g., 13 reads for 100MB vs 25).
62const HASH_READ_BUF: usize = 8 * 1024 * 1024;
63
64// Thread-local reusable buffer for streaming hash I/O.
65// Allocated once per thread, reused across all hash_reader calls.
66thread_local! {
67    static STREAM_BUF: RefCell<Vec<u8>> = RefCell::new(vec![0u8; HASH_READ_BUF]);
68}
69
70// ── SHA-256 ───────────────────────────────────────────────────────────
71
72/// Single-shot SHA-256 using OpenSSL's optimized assembly (SHA-NI on x86).
73/// Linux only — OpenSSL is not available on Windows/macOS in CI.
74#[cfg(target_os = "linux")]
75fn sha256_bytes(data: &[u8]) -> String {
76    let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
77        .expect("SHA256 hash failed");
78    hex_encode(&digest)
79}
80
81/// Single-shot SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
82#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
83fn sha256_bytes(data: &[u8]) -> String {
84    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
85}
86
87/// Single-shot SHA-256 using sha2 crate (macOS fallback — ring doesn't compile on Apple Silicon).
88#[cfg(target_vendor = "apple")]
89fn sha256_bytes(data: &[u8]) -> String {
90    hash_digest::<sha2::Sha256>(data)
91}
92
93/// Streaming SHA-256 using OpenSSL's optimized assembly.
94/// Linux only — OpenSSL is not available on Windows/macOS in CI.
95#[cfg(target_os = "linux")]
96fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
97    STREAM_BUF.with(|cell| {
98        let mut buf = cell.borrow_mut();
99        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
100            .map_err(|e| io::Error::other(e))?;
101        loop {
102            let n = read_full(&mut reader, &mut buf)?;
103            if n == 0 {
104                break;
105            }
106            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
107        }
108        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
109        Ok(hex_encode(&digest))
110    })
111}
112
113/// Streaming SHA-256 using ring's BoringSSL assembly (Windows and other non-Apple).
114#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
115fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
116    STREAM_BUF.with(|cell| {
117        let mut buf = cell.borrow_mut();
118        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
119        loop {
120            let n = read_full(&mut reader, &mut buf)?;
121            if n == 0 {
122                break;
123            }
124            ctx.update(&buf[..n]);
125        }
126        Ok(hex_encode(ctx.finish().as_ref()))
127    })
128}
129
130/// Streaming SHA-256 using sha2 crate (macOS fallback).
131#[cfg(target_vendor = "apple")]
132fn sha256_reader(reader: impl Read) -> io::Result<String> {
133    hash_reader_impl::<sha2::Sha256>(reader)
134}
135
136/// Compute hash of a byte slice directly (zero-copy fast path).
137pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
138    match algo {
139        HashAlgorithm::Sha256 => sha256_bytes(data),
140        HashAlgorithm::Md5 => md5_bytes(data),
141        HashAlgorithm::Blake2b => {
142            let hash = blake2b_simd::blake2b(data);
143            hex_encode(hash.as_bytes())
144        }
145    }
146}
147
148// ── MD5 ─────────────────────────────────────────────────────────────
149
150/// Single-shot MD5 using OpenSSL's optimized assembly (Linux).
151#[cfg(target_os = "linux")]
152fn md5_bytes(data: &[u8]) -> String {
153    let digest =
154        openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
155    hex_encode(&digest)
156}
157
158/// Single-shot MD5 using md-5 crate (non-Linux fallback).
159#[cfg(not(target_os = "linux"))]
160fn md5_bytes(data: &[u8]) -> String {
161    hash_digest::<Md5>(data)
162}
163
164/// Compute hash of data from a reader, returning hex string.
165pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
166    match algo {
167        HashAlgorithm::Sha256 => sha256_reader(reader),
168        HashAlgorithm::Md5 => md5_reader(reader),
169        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
170    }
171}
172
173/// Streaming MD5 using OpenSSL's optimized assembly (Linux).
174#[cfg(target_os = "linux")]
175fn md5_reader(mut reader: impl Read) -> io::Result<String> {
176    STREAM_BUF.with(|cell| {
177        let mut buf = cell.borrow_mut();
178        let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
179            .map_err(|e| io::Error::other(e))?;
180        loop {
181            let n = read_full(&mut reader, &mut buf)?;
182            if n == 0 {
183                break;
184            }
185            hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
186        }
187        let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
188        Ok(hex_encode(&digest))
189    })
190}
191
192/// Streaming MD5 using md-5 crate (non-Linux fallback).
193#[cfg(not(target_os = "linux"))]
194fn md5_reader(reader: impl Read) -> io::Result<String> {
195    hash_reader_impl::<Md5>(reader)
196}
197
198/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
199/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
200#[cfg(target_os = "linux")]
201static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
202
203/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
204/// Caches whether O_NOATIME works to avoid double-open on every file.
205#[cfg(target_os = "linux")]
206fn open_noatime(path: &Path) -> io::Result<File> {
207    use std::os::unix::fs::OpenOptionsExt;
208    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
209        match std::fs::OpenOptions::new()
210            .read(true)
211            .custom_flags(libc::O_NOATIME)
212            .open(path)
213        {
214            Ok(f) => return Ok(f),
215            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
216                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
217                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
218            }
219            Err(e) => return Err(e), // Real error, propagate
220        }
221    }
222    File::open(path)
223}
224
225#[cfg(not(target_os = "linux"))]
226fn open_noatime(path: &Path) -> io::Result<File> {
227    File::open(path)
228}
229
230/// Hash a file by path. Uses streaming read with sequential fadvise hint.
231/// Streaming avoids MAP_POPULATE blocking (pre-faults all pages upfront)
232/// and mmap setup/teardown overhead for small files.
233pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
234    let file = open_noatime(path)?;
235    let metadata = file.metadata()?;
236
237    if metadata.file_type().is_file() && metadata.len() == 0 {
238        return Ok(hash_bytes(algo, &[]));
239    }
240
241    // Hint kernel for aggressive sequential readahead — overlaps I/O with hashing
242    #[cfg(target_os = "linux")]
243    {
244        use std::os::unix::io::AsRawFd;
245        unsafe {
246            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
247        }
248    }
249
250    hash_reader(algo, file)
251}
252
253/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
254pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
255    let stdin = io::stdin();
256    // Hint kernel for sequential access if stdin is a regular file (redirect)
257    #[cfg(target_os = "linux")]
258    {
259        use std::os::unix::io::AsRawFd;
260        let fd = stdin.as_raw_fd();
261        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
262        if unsafe { libc::fstat(fd, &mut stat) } == 0
263            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
264            && stat.st_size > 0
265        {
266            unsafe {
267                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
268            }
269        }
270    }
271    // Streaming hash — works for both pipe and file-redirect stdin
272    hash_reader(algo, stdin.lock())
273}
274
275/// Check if parallel hashing is worthwhile for the given file paths.
276/// Always parallelize with 2+ files — rayon's thread pool is initialized
277/// lazily once, and work-stealing overhead is negligible compared to I/O.
278pub fn should_use_parallel(paths: &[&Path]) -> bool {
279    paths.len() >= 2
280}
281
282/// Issue readahead hints for a list of file paths to warm the page cache.
283/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
284#[cfg(target_os = "linux")]
285pub fn readahead_files(paths: &[&Path]) {
286    use std::os::unix::io::AsRawFd;
287    for path in paths {
288        if let Ok(file) = open_noatime(path) {
289            if let Ok(meta) = file.metadata() {
290                let len = meta.len();
291                if meta.file_type().is_file() && len > 0 {
292                    unsafe {
293                        libc::posix_fadvise(
294                            file.as_raw_fd(),
295                            0,
296                            len as i64,
297                            libc::POSIX_FADV_WILLNEED,
298                        );
299                    }
300                }
301            }
302        }
303    }
304}
305
306#[cfg(not(target_os = "linux"))]
307pub fn readahead_files(_paths: &[&Path]) {
308    // No-op on non-Linux
309}
310
311// --- BLAKE2b variable-length functions (using blake2b_simd) ---
312
313/// Hash raw data with BLAKE2b variable output length.
314/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
315pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
316    let hash = blake2b_simd::Params::new()
317        .hash_length(output_bytes)
318        .hash(data);
319    hex_encode(hash.as_bytes())
320}
321
322/// Hash a reader with BLAKE2b variable output length.
323/// Uses thread-local buffer for cache-friendly streaming.
324pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
325    STREAM_BUF.with(|cell| {
326        let mut buf = cell.borrow_mut();
327        let mut state = blake2b_simd::Params::new()
328            .hash_length(output_bytes)
329            .to_state();
330        loop {
331            let n = read_full(&mut reader, &mut buf)?;
332            if n == 0 {
333                break;
334            }
335            state.update(&buf[..n]);
336        }
337        Ok(hex_encode(state.finalize().as_bytes()))
338    })
339}
340
341/// Hash a file with BLAKE2b variable output length.
342/// Uses streaming read with sequential fadvise for overlapped I/O.
343pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
344    let file = open_noatime(path)?;
345    let metadata = file.metadata()?;
346
347    if metadata.file_type().is_file() && metadata.len() == 0 {
348        return Ok(blake2b_hash_data(&[], output_bytes));
349    }
350
351    #[cfg(target_os = "linux")]
352    {
353        use std::os::unix::io::AsRawFd;
354        unsafe {
355            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
356        }
357    }
358
359    blake2b_hash_reader(file, output_bytes)
360}
361
362/// Hash stdin with BLAKE2b variable output length.
363/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
364pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
365    let stdin = io::stdin();
366    #[cfg(target_os = "linux")]
367    {
368        use std::os::unix::io::AsRawFd;
369        let fd = stdin.as_raw_fd();
370        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
371        if unsafe { libc::fstat(fd, &mut stat) } == 0
372            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
373            && stat.st_size > 0
374        {
375            unsafe {
376                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
377            }
378        }
379    }
380    blake2b_hash_reader(stdin.lock(), output_bytes)
381}
382
383/// Print hash result in GNU format: "hash  filename\n"
384/// Uses raw byte writes to avoid std::fmt overhead.
385pub fn print_hash(
386    out: &mut impl Write,
387    hash: &str,
388    filename: &str,
389    binary: bool,
390) -> io::Result<()> {
391    let mode = if binary { b'*' } else { b' ' };
392    out.write_all(hash.as_bytes())?;
393    out.write_all(&[b' ', mode])?;
394    out.write_all(filename.as_bytes())?;
395    out.write_all(b"\n")
396}
397
398/// Print hash in GNU format with NUL terminator instead of newline.
399pub fn print_hash_zero(
400    out: &mut impl Write,
401    hash: &str,
402    filename: &str,
403    binary: bool,
404) -> io::Result<()> {
405    let mode = if binary { b'*' } else { b' ' };
406    out.write_all(hash.as_bytes())?;
407    out.write_all(&[b' ', mode])?;
408    out.write_all(filename.as_bytes())?;
409    out.write_all(b"\0")
410}
411
412/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
413pub fn print_hash_tag(
414    out: &mut impl Write,
415    algo: HashAlgorithm,
416    hash: &str,
417    filename: &str,
418) -> io::Result<()> {
419    out.write_all(algo.name().as_bytes())?;
420    out.write_all(b" (")?;
421    out.write_all(filename.as_bytes())?;
422    out.write_all(b") = ")?;
423    out.write_all(hash.as_bytes())?;
424    out.write_all(b"\n")
425}
426
427/// Print hash in BSD tag format with NUL terminator.
428pub fn print_hash_tag_zero(
429    out: &mut impl Write,
430    algo: HashAlgorithm,
431    hash: &str,
432    filename: &str,
433) -> io::Result<()> {
434    out.write_all(algo.name().as_bytes())?;
435    out.write_all(b" (")?;
436    out.write_all(filename.as_bytes())?;
437    out.write_all(b") = ")?;
438    out.write_all(hash.as_bytes())?;
439    out.write_all(b"\0")
440}
441
442/// Print hash in BSD tag format with BLAKE2b length info:
443/// "BLAKE2b (filename) = hash" for 512-bit, or
444/// "BLAKE2b-256 (filename) = hash" for other lengths.
445pub fn print_hash_tag_b2sum(
446    out: &mut impl Write,
447    hash: &str,
448    filename: &str,
449    bits: usize,
450) -> io::Result<()> {
451    if bits == 512 {
452        out.write_all(b"BLAKE2b (")?;
453    } else {
454        // Use write! for the rare non-512 path (negligible overhead per file)
455        write!(out, "BLAKE2b-{} (", bits)?;
456    }
457    out.write_all(filename.as_bytes())?;
458    out.write_all(b") = ")?;
459    out.write_all(hash.as_bytes())?;
460    out.write_all(b"\n")
461}
462
463/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
464pub fn print_hash_tag_b2sum_zero(
465    out: &mut impl Write,
466    hash: &str,
467    filename: &str,
468    bits: usize,
469) -> io::Result<()> {
470    if bits == 512 {
471        out.write_all(b"BLAKE2b (")?;
472    } else {
473        write!(out, "BLAKE2b-{} (", bits)?;
474    }
475    out.write_all(filename.as_bytes())?;
476    out.write_all(b") = ")?;
477    out.write_all(hash.as_bytes())?;
478    out.write_all(b"\0")
479}
480
481/// Options for check mode.
482pub struct CheckOptions {
483    pub quiet: bool,
484    pub status_only: bool,
485    pub strict: bool,
486    pub warn: bool,
487    pub ignore_missing: bool,
488    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
489    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
490    /// When empty, uses generic format: "line {line}: message".
491    pub warn_prefix: String,
492}
493
494/// Result of check mode verification.
495pub struct CheckResult {
496    pub ok: usize,
497    pub mismatches: usize,
498    pub format_errors: usize,
499    pub read_errors: usize,
500    /// Number of files skipped because they were missing and --ignore-missing was set.
501    pub ignored_missing: usize,
502}
503
504/// Verify checksums from a check file.
505/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
506pub fn check_file<R: BufRead>(
507    algo: HashAlgorithm,
508    reader: R,
509    opts: &CheckOptions,
510    out: &mut impl Write,
511    err_out: &mut impl Write,
512) -> io::Result<CheckResult> {
513    let quiet = opts.quiet;
514    let status_only = opts.status_only;
515    let warn = opts.warn;
516    let ignore_missing = opts.ignore_missing;
517    let mut ok_count = 0;
518    let mut mismatch_count = 0;
519    let mut format_errors = 0;
520    let mut read_errors = 0;
521    let mut ignored_missing_count = 0;
522    let mut line_num = 0;
523
524    for line_result in reader.lines() {
525        line_num += 1;
526        let line = line_result?;
527        let line = line.trim_end();
528
529        if line.is_empty() {
530            continue;
531        }
532
533        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
534        let (expected_hash, filename) = match parse_check_line(line) {
535            Some(v) => v,
536            None => {
537                format_errors += 1;
538                if warn {
539                    out.flush()?;
540                    if opts.warn_prefix.is_empty() {
541                        writeln!(
542                            err_out,
543                            "line {}: improperly formatted {} checksum line",
544                            line_num,
545                            algo.name()
546                        )?;
547                    } else {
548                        writeln!(
549                            err_out,
550                            "{}: {}: improperly formatted {} checksum line",
551                            opts.warn_prefix,
552                            line_num,
553                            algo.name()
554                        )?;
555                    }
556                }
557                continue;
558            }
559        };
560
561        // Compute actual hash
562        let actual = match hash_file(algo, Path::new(filename)) {
563            Ok(h) => h,
564            Err(e) => {
565                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
566                    ignored_missing_count += 1;
567                    continue;
568                }
569                read_errors += 1;
570                if !status_only {
571                    out.flush()?;
572                    writeln!(err_out, "{}: {}", filename, e)?;
573                    writeln!(out, "{}: FAILED open or read", filename)?;
574                }
575                continue;
576            }
577        };
578
579        if actual.eq_ignore_ascii_case(expected_hash) {
580            ok_count += 1;
581            if !quiet && !status_only {
582                writeln!(out, "{}: OK", filename)?;
583            }
584        } else {
585            mismatch_count += 1;
586            if !status_only {
587                writeln!(out, "{}: FAILED", filename)?;
588            }
589        }
590    }
591
592    Ok(CheckResult {
593        ok: ok_count,
594        mismatches: mismatch_count,
595        format_errors,
596        read_errors,
597        ignored_missing: ignored_missing_count,
598    })
599}
600
601/// Parse a checksum line in any supported format.
602pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
603    // Try BSD tag format: "ALGO (filename) = hash"
604    let rest = line
605        .strip_prefix("MD5 (")
606        .or_else(|| line.strip_prefix("SHA256 ("))
607        .or_else(|| line.strip_prefix("BLAKE2b ("))
608        .or_else(|| {
609            // Handle BLAKE2b-NNN (filename) = hash
610            if line.starts_with("BLAKE2b-") {
611                let after = &line["BLAKE2b-".len()..];
612                if let Some(sp) = after.find(" (") {
613                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
614                        return Some(&after[sp + 2..]);
615                    }
616                }
617            }
618            None
619        });
620    if let Some(rest) = rest {
621        if let Some(paren_idx) = rest.find(") = ") {
622            let filename = &rest[..paren_idx];
623            let hash = &rest[paren_idx + 4..];
624            return Some((hash, filename));
625        }
626    }
627
628    // Handle backslash-escaped lines (leading '\')
629    let line = line.strip_prefix('\\').unwrap_or(line);
630
631    // Standard format: "hash  filename"
632    if let Some(idx) = line.find("  ") {
633        let hash = &line[..idx];
634        let rest = &line[idx + 2..];
635        return Some((hash, rest));
636    }
637    // Binary mode: "hash *filename"
638    if let Some(idx) = line.find(" *") {
639        let hash = &line[..idx];
640        let rest = &line[idx + 2..];
641        return Some((hash, rest));
642    }
643    None
644}
645
646/// Parse a BSD-style tag line: "ALGO (filename) = hash"
647/// Returns (expected_hash, filename, optional_bits).
648/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
649pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
650    let paren_start = line.find(" (")?;
651    let algo_part = &line[..paren_start];
652    let rest = &line[paren_start + 2..];
653    let paren_end = rest.find(") = ")?;
654    let filename = &rest[..paren_end];
655    let hash = &rest[paren_end + 4..];
656
657    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
658    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
659        algo_part[dash_pos + 1..].parse::<usize>().ok()
660    } else {
661        None
662    };
663
664    Some((hash, filename, bits))
665}
666
667/// Read as many bytes as possible into buf, retrying on partial reads.
668/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
669/// Fast path: regular file reads usually return the full buffer on the first call.
670#[inline]
671fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
672    // Fast path: first read() usually fills the entire buffer for regular files
673    let n = reader.read(buf)?;
674    if n == buf.len() || n == 0 {
675        return Ok(n);
676    }
677    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
678    let mut total = n;
679    while total < buf.len() {
680        match reader.read(&mut buf[total..]) {
681            Ok(0) => break,
682            Ok(n) => total += n,
683            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
684            Err(e) => return Err(e),
685        }
686    }
687    Ok(total)
688}
689
690/// Compile-time generated 2-byte hex pair lookup table.
691/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
692const fn generate_hex_table() -> [[u8; 2]; 256] {
693    let hex = b"0123456789abcdef";
694    let mut table = [[0u8; 2]; 256];
695    let mut i = 0;
696    while i < 256 {
697        table[i] = [hex[i >> 4], hex[i & 0xf]];
698        i += 1;
699    }
700    table
701}
702
703const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
704
705/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
706/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
707pub(crate) fn hex_encode(bytes: &[u8]) -> String {
708    let len = bytes.len() * 2;
709    let mut hex = String::with_capacity(len);
710    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
711    unsafe {
712        let buf = hex.as_mut_vec();
713        buf.set_len(len);
714        let ptr = buf.as_mut_ptr();
715        for (i, &b) in bytes.iter().enumerate() {
716            let pair = *HEX_TABLE.get_unchecked(b as usize);
717            *ptr.add(i * 2) = pair[0];
718            *ptr.add(i * 2 + 1) = pair[1];
719        }
720    }
721    hex
722}