Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, BufReader, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use md5::Md5;
10use memmap2::MmapOptions;
11use sha2::{Digest, Sha256};
12
13/// Supported hash algorithms.
14#[derive(Debug, Clone, Copy)]
15pub enum HashAlgorithm {
16    Sha256,
17    Md5,
18    Blake2b,
19}
20
21impl HashAlgorithm {
22    pub fn name(self) -> &'static str {
23        match self {
24            HashAlgorithm::Sha256 => "SHA256",
25            HashAlgorithm::Md5 => "MD5",
26            HashAlgorithm::Blake2b => "BLAKE2b",
27        }
28    }
29}
30
31// ── Generic hash helpers ────────────────────────────────────────────
32
33fn hash_digest<D: Digest>(data: &[u8]) -> String {
34    hex_encode(&D::digest(data))
35}
36
37fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
38    let mut hasher = D::new();
39    let mut buf = vec![0u8; 16 * 1024 * 1024]; // 16MB buffer — fewer syscalls
40    loop {
41        let n = reader.read(&mut buf)?;
42        if n == 0 {
43            break;
44        }
45        hasher.update(&buf[..n]);
46    }
47    Ok(hex_encode(&hasher.finalize()))
48}
49
50// ── Public hashing API ──────────────────────────────────────────────
51
52/// Chunk size for cache-friendly hashing of large mmap'd data.
53/// 4MB fits in L3 cache, reducing TLB misses and improving memory bus
54/// utilization on large files (100MB+). Without chunking, single-shot
55/// hashing touches all pages before any computation, causing poor
56/// cache behavior.
57const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024;
58
59/// Compute hash of a byte slice directly (zero-copy fast path).
60pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
61    match algo {
62        HashAlgorithm::Sha256 => hash_digest::<Sha256>(data),
63        HashAlgorithm::Md5 => hash_digest::<Md5>(data),
64        HashAlgorithm::Blake2b => {
65            let hash = blake2b_simd::blake2b(data);
66            hex_encode(hash.as_bytes())
67        }
68    }
69}
70
71/// Chunked hash for cache-friendly processing of large mmap'd data.
72/// Feeds HASH_CHUNK_SIZE chunks to keep working set in L3 cache.
73/// For small data (<= HASH_CHUNK_SIZE), delegates to single-shot hash_bytes.
74fn hash_bytes_chunked(algo: HashAlgorithm, data: &[u8]) -> String {
75    if data.len() <= HASH_CHUNK_SIZE {
76        return hash_bytes(algo, data);
77    }
78    match algo {
79        HashAlgorithm::Sha256 => {
80            let mut hasher = Sha256::new();
81            for chunk in data.chunks(HASH_CHUNK_SIZE) {
82                hasher.update(chunk);
83            }
84            hex_encode(&hasher.finalize())
85        }
86        HashAlgorithm::Md5 => {
87            let mut hasher = Md5::new();
88            for chunk in data.chunks(HASH_CHUNK_SIZE) {
89                hasher.update(chunk);
90            }
91            hex_encode(&hasher.finalize())
92        }
93        HashAlgorithm::Blake2b => {
94            let mut state = blake2b_simd::State::new();
95            for chunk in data.chunks(HASH_CHUNK_SIZE) {
96                state.update(chunk);
97            }
98            hex_encode(state.finalize().as_bytes())
99        }
100    }
101}
102
103/// Chunked BLAKE2b hash with variable output length for large mmap'd data.
104fn blake2b_hash_data_chunked(data: &[u8], output_bytes: usize) -> String {
105    if data.len() <= HASH_CHUNK_SIZE {
106        return blake2b_hash_data(data, output_bytes);
107    }
108    let mut state = blake2b_simd::Params::new()
109        .hash_length(output_bytes)
110        .to_state();
111    for chunk in data.chunks(HASH_CHUNK_SIZE) {
112        state.update(chunk);
113    }
114    hex_encode(state.finalize().as_bytes())
115}
116
117/// Compute hash of data from a reader, returning hex string.
118pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
119    match algo {
120        HashAlgorithm::Sha256 => hash_reader_impl::<Sha256>(reader),
121        HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
122        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
123    }
124}
125
126/// Threshold below which read() is faster than mmap() due to mmap setup overhead.
127/// mmap creates page table entries (one per 4KB page), issues madvise syscalls,
128/// and requires munmap cleanup. For files under 1MB, the total mmap overhead
129/// (~50-200μs) is significant relative to hash time (~500μs for 1MB at 2GB/s).
130const MMAP_THRESHOLD: u64 = 1024 * 1024; // 1MB
131
132// Thread-local reusable buffer for small file reads.
133// Avoids per-file heap allocation when processing many small files sequentially or in parallel.
134// Each rayon worker thread gets its own buffer automatically.
135thread_local! {
136    static READ_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(MMAP_THRESHOLD as usize));
137}
138
139/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
140/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
141#[cfg(target_os = "linux")]
142static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
143
144/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
145/// Caches whether O_NOATIME works to avoid double-open on every file.
146#[cfg(target_os = "linux")]
147fn open_noatime(path: &Path) -> io::Result<File> {
148    use std::os::unix::fs::OpenOptionsExt;
149    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
150        match fs::OpenOptions::new()
151            .read(true)
152            .custom_flags(libc::O_NOATIME)
153            .open(path)
154        {
155            Ok(f) => return Ok(f),
156            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
157                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
158                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
159            }
160            Err(e) => return Err(e), // Real error, propagate
161        }
162    }
163    File::open(path)
164}
165
166#[cfg(not(target_os = "linux"))]
167fn open_noatime(path: &Path) -> io::Result<File> {
168    File::open(path)
169}
170
171/// Hash a file by path. Single open + fstat to minimize syscalls.
172/// Uses read() for small files, mmap for large files.
173pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
174    // Single open — reuse fd for fstat + read/mmap (saves separate stat + open)
175    let file = open_noatime(path)?;
176    let metadata = file.metadata()?; // fstat on existing fd, cheaper than stat(path)
177    let len = metadata.len();
178    let is_regular = metadata.file_type().is_file();
179
180    if is_regular && len == 0 {
181        return Ok(hash_bytes(algo, &[]));
182    }
183
184    if is_regular && len > 0 {
185        // Small files: read into thread-local buffer (zero allocation after first call)
186        if len < MMAP_THRESHOLD {
187            return READ_BUF.with(|cell| {
188                let mut buf = cell.borrow_mut();
189                buf.clear();
190                // Reserve is a no-op if capacity >= len (which it is after first call)
191                buf.reserve(len as usize);
192                Read::read_to_end(&mut &file, &mut buf)?;
193                Ok(hash_bytes(algo, &buf))
194            });
195        }
196
197        // Large files: mmap the already-open fd for zero-copy
198        return mmap_and_hash(algo, &file);
199    }
200
201    // Fallback: buffered read (special files, pipes, etc.) — fd already open
202    let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
203    hash_reader(algo, reader)
204}
205
206/// Mmap a file and hash it. Shared by hash_file and blake2b_hash_file.
207/// With chunked hashing, MADV_SEQUENTIAL is sufficient — the kernel
208/// prefetches ahead of our sequential 4MB chunk access pattern.
209fn mmap_and_hash(algo: HashAlgorithm, file: &File) -> io::Result<String> {
210    match unsafe { MmapOptions::new().map(file) } {
211        Ok(mmap) => {
212            #[cfg(target_os = "linux")]
213            {
214                let _ = mmap.advise(memmap2::Advice::Sequential);
215            }
216            Ok(hash_bytes_chunked(algo, &mmap))
217        }
218        Err(_) => {
219            // mmap failed — fall back to buffered read from the same fd
220            let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
221            hash_reader(algo, reader)
222        }
223    }
224}
225
226/// Mmap a file and hash with BLAKE2b. Shared helper for blake2b_hash_file.
227fn mmap_and_hash_blake2b(file: &File, output_bytes: usize) -> io::Result<String> {
228    match unsafe { MmapOptions::new().map(file) } {
229        Ok(mmap) => {
230            #[cfg(target_os = "linux")]
231            {
232                let _ = mmap.advise(memmap2::Advice::Sequential);
233            }
234            Ok(blake2b_hash_data_chunked(&mmap, output_bytes))
235        }
236        Err(_) => {
237            let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
238            blake2b_hash_reader(reader, output_bytes)
239        }
240    }
241}
242
243/// Hash stdin. Reads all data first, then hashes in one pass for optimal throughput.
244pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
245    // Try to mmap stdin if it's a regular file (shell redirect)
246    #[cfg(unix)]
247    {
248        use std::os::unix::io::AsRawFd;
249        let stdin = io::stdin();
250        let fd = stdin.as_raw_fd();
251        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
252        if unsafe { libc::fstat(fd, &mut stat) } == 0
253            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
254            && stat.st_size > 0
255        {
256            use std::os::unix::io::FromRawFd;
257            let file = unsafe { File::from_raw_fd(fd) };
258            let result = unsafe { MmapOptions::new().map(&file) };
259            std::mem::forget(file); // Don't close stdin
260            if let Ok(mmap) = result {
261                #[cfg(target_os = "linux")]
262                {
263                    let _ = mmap.advise(memmap2::Advice::Sequential);
264                }
265                return Ok(hash_bytes_chunked(algo, &mmap));
266            }
267        }
268    }
269    // Fallback: read all then hash in one pass (avoids per-read update overhead)
270    let mut data = Vec::new();
271    io::stdin().lock().read_to_end(&mut data)?;
272    Ok(hash_bytes(algo, &data))
273}
274
275/// Estimate total file size for parallel/sequential decision.
276/// Uses a quick heuristic: samples first file and extrapolates.
277/// Returns 0 if estimation fails.
278pub fn estimate_total_size(paths: &[&Path]) -> u64 {
279    if paths.is_empty() {
280        return 0;
281    }
282    // Sample first file to estimate
283    if let Ok(meta) = fs::metadata(paths[0]) {
284        meta.len().saturating_mul(paths.len() as u64)
285    } else {
286        0
287    }
288}
289
290/// Check if parallel hashing is worthwhile for the given file paths.
291/// Only uses rayon when files are individually large enough for the hash
292/// computation to dominate over rayon overhead (thread pool init + work stealing).
293/// For many small files (e.g., 100 × 100KB), sequential is much faster.
294pub fn should_use_parallel(paths: &[&Path]) -> bool {
295    if paths.len() < 2 {
296        return false;
297    }
298    let total = estimate_total_size(paths);
299    let avg = total / paths.len() as u64;
300    // Only parallelize when average file size >= 1MB.
301    // Below this, rayon overhead exceeds the benefit of parallel hashing.
302    avg >= 1024 * 1024
303}
304
305/// Issue readahead hints for a list of file paths to warm the page cache.
306/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
307#[cfg(target_os = "linux")]
308pub fn readahead_files(paths: &[&Path]) {
309    use std::os::unix::io::AsRawFd;
310    for path in paths {
311        if let Ok(file) = open_noatime(path) {
312            if let Ok(meta) = file.metadata() {
313                let len = meta.len();
314                if meta.file_type().is_file() && len > 0 {
315                    unsafe {
316                        libc::posix_fadvise(
317                            file.as_raw_fd(),
318                            0,
319                            len as i64,
320                            libc::POSIX_FADV_WILLNEED,
321                        );
322                    }
323                }
324            }
325        }
326    }
327}
328
329#[cfg(not(target_os = "linux"))]
330pub fn readahead_files(_paths: &[&Path]) {
331    // No-op on non-Linux
332}
333
334// --- BLAKE2b variable-length functions (using blake2b_simd) ---
335
336/// Hash raw data with BLAKE2b variable output length.
337/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
338pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
339    let hash = blake2b_simd::Params::new()
340        .hash_length(output_bytes)
341        .hash(data);
342    hex_encode(hash.as_bytes())
343}
344
345/// Hash a reader with BLAKE2b variable output length.
346pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
347    let mut state = blake2b_simd::Params::new()
348        .hash_length(output_bytes)
349        .to_state();
350    let mut buf = vec![0u8; 16 * 1024 * 1024]; // 16MB buffer
351    loop {
352        let n = reader.read(&mut buf)?;
353        if n == 0 {
354            break;
355        }
356        state.update(&buf[..n]);
357    }
358    Ok(hex_encode(state.finalize().as_bytes()))
359}
360
361/// Hash a file with BLAKE2b variable output length. Single open + fstat.
362/// Uses read() for small files, mmap for large.
363pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
364    // Single open — reuse fd for fstat + read/mmap
365    let file = open_noatime(path)?;
366    let metadata = file.metadata()?;
367    let len = metadata.len();
368    let is_regular = metadata.file_type().is_file();
369
370    if is_regular && len == 0 {
371        return Ok(blake2b_hash_data(&[], output_bytes));
372    }
373
374    if is_regular && len > 0 {
375        // Small files: read into thread-local buffer (zero allocation after first call)
376        if len < MMAP_THRESHOLD {
377            return READ_BUF.with(|cell| {
378                let mut buf = cell.borrow_mut();
379                buf.clear();
380                buf.reserve(len as usize);
381                Read::read_to_end(&mut &file, &mut buf)?;
382                Ok(blake2b_hash_data(&buf, output_bytes))
383            });
384        }
385
386        // Large files: mmap the already-open fd for zero-copy
387        return mmap_and_hash_blake2b(&file, output_bytes);
388    }
389
390    // Fallback: buffered read — fd already open
391    let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
392    blake2b_hash_reader(reader, output_bytes)
393}
394
395/// Hash stdin with BLAKE2b variable output length.
396/// Tries mmap if stdin is a regular file (shell redirect), falls back to read.
397pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
398    // Try to mmap stdin if it's a regular file (shell redirect)
399    #[cfg(unix)]
400    {
401        use std::os::unix::io::AsRawFd;
402        let stdin = io::stdin();
403        let fd = stdin.as_raw_fd();
404        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
405        if unsafe { libc::fstat(fd, &mut stat) } == 0
406            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
407            && stat.st_size > 0
408        {
409            use std::os::unix::io::FromRawFd;
410            let file = unsafe { File::from_raw_fd(fd) };
411            let result = unsafe { MmapOptions::new().map(&file) };
412            std::mem::forget(file); // Don't close stdin
413            if let Ok(mmap) = result {
414                #[cfg(target_os = "linux")]
415                {
416                    let _ = mmap.advise(memmap2::Advice::Sequential);
417                }
418                return Ok(blake2b_hash_data_chunked(&mmap, output_bytes));
419            }
420        }
421    }
422    // Fallback: read all then hash in one pass
423    let mut data = Vec::new();
424    io::stdin().lock().read_to_end(&mut data)?;
425    Ok(blake2b_hash_data(&data, output_bytes))
426}
427
428/// Print hash result in GNU format: "hash  filename\n"
429pub fn print_hash(
430    out: &mut impl Write,
431    hash: &str,
432    filename: &str,
433    binary: bool,
434) -> io::Result<()> {
435    let mode_char = if binary { '*' } else { ' ' };
436    writeln!(out, "{} {}{}", hash, mode_char, filename)
437}
438
439/// Print hash in GNU format with NUL terminator instead of newline.
440pub fn print_hash_zero(
441    out: &mut impl Write,
442    hash: &str,
443    filename: &str,
444    binary: bool,
445) -> io::Result<()> {
446    let mode_char = if binary { '*' } else { ' ' };
447    write!(out, "{} {}{}\0", hash, mode_char, filename)
448}
449
450/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
451pub fn print_hash_tag(
452    out: &mut impl Write,
453    algo: HashAlgorithm,
454    hash: &str,
455    filename: &str,
456) -> io::Result<()> {
457    writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
458}
459
460/// Print hash in BSD tag format with NUL terminator.
461pub fn print_hash_tag_zero(
462    out: &mut impl Write,
463    algo: HashAlgorithm,
464    hash: &str,
465    filename: &str,
466) -> io::Result<()> {
467    write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
468}
469
470/// Print hash in BSD tag format with BLAKE2b length info:
471/// "BLAKE2b (filename) = hash" for 512-bit, or
472/// "BLAKE2b-256 (filename) = hash" for other lengths.
473pub fn print_hash_tag_b2sum(
474    out: &mut impl Write,
475    hash: &str,
476    filename: &str,
477    bits: usize,
478) -> io::Result<()> {
479    if bits == 512 {
480        writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
481    } else {
482        writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
483    }
484}
485
486/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
487pub fn print_hash_tag_b2sum_zero(
488    out: &mut impl Write,
489    hash: &str,
490    filename: &str,
491    bits: usize,
492) -> io::Result<()> {
493    if bits == 512 {
494        write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
495    } else {
496        write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
497    }
498}
499
500/// Options for check mode.
501pub struct CheckOptions {
502    pub quiet: bool,
503    pub status_only: bool,
504    pub strict: bool,
505    pub warn: bool,
506    pub ignore_missing: bool,
507    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
508    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
509    /// When empty, uses generic format: "line {line}: message".
510    pub warn_prefix: String,
511}
512
513/// Result of check mode verification.
514pub struct CheckResult {
515    pub ok: usize,
516    pub mismatches: usize,
517    pub format_errors: usize,
518    pub read_errors: usize,
519    /// Number of files skipped because they were missing and --ignore-missing was set.
520    pub ignored_missing: usize,
521}
522
523/// Verify checksums from a check file.
524/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
525pub fn check_file<R: BufRead>(
526    algo: HashAlgorithm,
527    reader: R,
528    opts: &CheckOptions,
529    out: &mut impl Write,
530    err_out: &mut impl Write,
531) -> io::Result<CheckResult> {
532    let quiet = opts.quiet;
533    let status_only = opts.status_only;
534    let warn = opts.warn;
535    let ignore_missing = opts.ignore_missing;
536    let mut ok_count = 0;
537    let mut mismatch_count = 0;
538    let mut format_errors = 0;
539    let mut read_errors = 0;
540    let mut ignored_missing_count = 0;
541    let mut line_num = 0;
542
543    for line_result in reader.lines() {
544        line_num += 1;
545        let line = line_result?;
546        let line = line.trim_end();
547
548        if line.is_empty() {
549            continue;
550        }
551
552        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
553        let (expected_hash, filename) = match parse_check_line(line) {
554            Some(v) => v,
555            None => {
556                format_errors += 1;
557                if warn {
558                    out.flush()?;
559                    if opts.warn_prefix.is_empty() {
560                        writeln!(
561                            err_out,
562                            "line {}: improperly formatted {} checksum line",
563                            line_num,
564                            algo.name()
565                        )?;
566                    } else {
567                        writeln!(
568                            err_out,
569                            "{}: {}: improperly formatted {} checksum line",
570                            opts.warn_prefix,
571                            line_num,
572                            algo.name()
573                        )?;
574                    }
575                }
576                continue;
577            }
578        };
579
580        // Compute actual hash
581        let actual = match hash_file(algo, Path::new(filename)) {
582            Ok(h) => h,
583            Err(e) => {
584                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
585                    ignored_missing_count += 1;
586                    continue;
587                }
588                read_errors += 1;
589                if !status_only {
590                    out.flush()?;
591                    writeln!(err_out, "{}: {}", filename, e)?;
592                    writeln!(out, "{}: FAILED open or read", filename)?;
593                }
594                continue;
595            }
596        };
597
598        if actual.eq_ignore_ascii_case(expected_hash) {
599            ok_count += 1;
600            if !quiet && !status_only {
601                writeln!(out, "{}: OK", filename)?;
602            }
603        } else {
604            mismatch_count += 1;
605            if !status_only {
606                writeln!(out, "{}: FAILED", filename)?;
607            }
608        }
609    }
610
611    Ok(CheckResult {
612        ok: ok_count,
613        mismatches: mismatch_count,
614        format_errors,
615        read_errors,
616        ignored_missing: ignored_missing_count,
617    })
618}
619
620/// Parse a checksum line in any supported format.
621pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
622    // Try BSD tag format: "ALGO (filename) = hash"
623    let rest = line
624        .strip_prefix("MD5 (")
625        .or_else(|| line.strip_prefix("SHA256 ("))
626        .or_else(|| line.strip_prefix("BLAKE2b ("))
627        .or_else(|| {
628            // Handle BLAKE2b-NNN (filename) = hash
629            if line.starts_with("BLAKE2b-") {
630                let after = &line["BLAKE2b-".len()..];
631                if let Some(sp) = after.find(" (") {
632                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
633                        return Some(&after[sp + 2..]);
634                    }
635                }
636            }
637            None
638        });
639    if let Some(rest) = rest {
640        if let Some(paren_idx) = rest.find(") = ") {
641            let filename = &rest[..paren_idx];
642            let hash = &rest[paren_idx + 4..];
643            return Some((hash, filename));
644        }
645    }
646
647    // Handle backslash-escaped lines (leading '\')
648    let line = line.strip_prefix('\\').unwrap_or(line);
649
650    // Standard format: "hash  filename"
651    if let Some(idx) = line.find("  ") {
652        let hash = &line[..idx];
653        let rest = &line[idx + 2..];
654        return Some((hash, rest));
655    }
656    // Binary mode: "hash *filename"
657    if let Some(idx) = line.find(" *") {
658        let hash = &line[..idx];
659        let rest = &line[idx + 2..];
660        return Some((hash, rest));
661    }
662    None
663}
664
665/// Parse a BSD-style tag line: "ALGO (filename) = hash"
666/// Returns (expected_hash, filename, optional_bits).
667/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
668pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
669    let paren_start = line.find(" (")?;
670    let algo_part = &line[..paren_start];
671    let rest = &line[paren_start + 2..];
672    let paren_end = rest.find(") = ")?;
673    let filename = &rest[..paren_end];
674    let hash = &rest[paren_end + 4..];
675
676    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
677    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
678        algo_part[dash_pos + 1..].parse::<usize>().ok()
679    } else {
680        None
681    };
682
683    Some((hash, filename, bits))
684}
685
686/// Compile-time generated 2-byte hex pair lookup table.
687/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
688const fn generate_hex_table() -> [[u8; 2]; 256] {
689    let hex = b"0123456789abcdef";
690    let mut table = [[0u8; 2]; 256];
691    let mut i = 0;
692    while i < 256 {
693        table[i] = [hex[i >> 4], hex[i & 0xf]];
694        i += 1;
695    }
696    table
697}
698
699const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
700
701/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
702/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
703pub(crate) fn hex_encode(bytes: &[u8]) -> String {
704    let len = bytes.len() * 2;
705    let mut hex = String::with_capacity(len);
706    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
707    unsafe {
708        let buf = hex.as_mut_vec();
709        buf.set_len(len);
710        let ptr = buf.as_mut_ptr();
711        for (i, &b) in bytes.iter().enumerate() {
712            let pair = *HEX_TABLE.get_unchecked(b as usize);
713            *ptr.add(i * 2) = pair[0];
714            *ptr.add(i * 2 + 1) = pair[1];
715        }
716    }
717    hex
718}