Skip to main content

coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, BufReader, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use md5::Md5;
10use memmap2::MmapOptions;
11use sha2::{Digest, Sha256};
12
13/// Supported hash algorithms.
14#[derive(Debug, Clone, Copy)]
15pub enum HashAlgorithm {
16    Sha256,
17    Md5,
18    Blake2b,
19}
20
21impl HashAlgorithm {
22    pub fn name(self) -> &'static str {
23        match self {
24            HashAlgorithm::Sha256 => "SHA256",
25            HashAlgorithm::Md5 => "MD5",
26            HashAlgorithm::Blake2b => "BLAKE2b",
27        }
28    }
29}
30
31// ── Generic hash helpers ────────────────────────────────────────────
32
33fn hash_digest<D: Digest>(data: &[u8]) -> String {
34    hex_encode(&D::digest(data))
35}
36
37fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
38    let mut hasher = D::new();
39    let mut buf = vec![0u8; 16 * 1024 * 1024]; // 16MB buffer — fewer syscalls
40    loop {
41        let n = reader.read(&mut buf)?;
42        if n == 0 {
43            break;
44        }
45        hasher.update(&buf[..n]);
46    }
47    Ok(hex_encode(&hasher.finalize()))
48}
49
50// ── Public hashing API ──────────────────────────────────────────────
51
52/// Compute hash of a byte slice directly (zero-copy fast path).
53pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
54    match algo {
55        HashAlgorithm::Sha256 => hash_digest::<Sha256>(data),
56        HashAlgorithm::Md5 => hash_digest::<Md5>(data),
57        HashAlgorithm::Blake2b => {
58            let hash = blake2b_simd::blake2b(data);
59            hex_encode(hash.as_bytes())
60        }
61    }
62}
63
64/// Compute hash of data from a reader, returning hex string.
65pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
66    match algo {
67        HashAlgorithm::Sha256 => hash_reader_impl::<Sha256>(reader),
68        HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
69        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
70    }
71}
72
73/// Threshold below which read() is faster than mmap() due to mmap setup overhead.
74/// mmap creates page table entries (one per 4KB page), issues madvise syscalls,
75/// and requires munmap cleanup. For files under 1MB, the total mmap overhead
76/// (~50-200μs) is significant relative to hash time (~500μs for 1MB at 2GB/s).
77const MMAP_THRESHOLD: u64 = 1024 * 1024; // 1MB
78
79// Thread-local reusable buffer for small file reads.
80// Avoids per-file heap allocation when processing many small files sequentially or in parallel.
81// Each rayon worker thread gets its own buffer automatically.
82thread_local! {
83    static READ_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(MMAP_THRESHOLD as usize));
84}
85
86/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
87/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
88#[cfg(target_os = "linux")]
89static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
90
91/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
92/// Caches whether O_NOATIME works to avoid double-open on every file.
93#[cfg(target_os = "linux")]
94fn open_noatime(path: &Path) -> io::Result<File> {
95    use std::os::unix::fs::OpenOptionsExt;
96    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
97        match fs::OpenOptions::new()
98            .read(true)
99            .custom_flags(libc::O_NOATIME)
100            .open(path)
101        {
102            Ok(f) => return Ok(f),
103            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
104                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
105                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
106            }
107            Err(e) => return Err(e), // Real error, propagate
108        }
109    }
110    File::open(path)
111}
112
113#[cfg(not(target_os = "linux"))]
114fn open_noatime(path: &Path) -> io::Result<File> {
115    File::open(path)
116}
117
118/// Hash a file by path. Single open + fstat to minimize syscalls.
119/// Uses read() for small files, mmap for large files.
120pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
121    // Single open — reuse fd for fstat + read/mmap (saves separate stat + open)
122    let file = open_noatime(path)?;
123    let metadata = file.metadata()?; // fstat on existing fd, cheaper than stat(path)
124    let len = metadata.len();
125    let is_regular = metadata.file_type().is_file();
126
127    if is_regular && len == 0 {
128        return Ok(hash_bytes(algo, &[]));
129    }
130
131    if is_regular && len > 0 {
132        // Small files: read into thread-local buffer (zero allocation after first call)
133        if len < MMAP_THRESHOLD {
134            return READ_BUF.with(|cell| {
135                let mut buf = cell.borrow_mut();
136                buf.clear();
137                // Reserve is a no-op if capacity >= len (which it is after first call)
138                buf.reserve(len as usize);
139                Read::read_to_end(&mut &file, &mut buf)?;
140                Ok(hash_bytes(algo, &buf))
141            });
142        }
143
144        // Large files: mmap the already-open fd for zero-copy
145        return mmap_and_hash(algo, &file);
146    }
147
148    // Fallback: buffered read (special files, pipes, etc.) — fd already open
149    let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
150    hash_reader(algo, reader)
151}
152
153/// Mmap a file and hash it. Shared by hash_file and blake2b_hash_file.
154fn mmap_and_hash(algo: HashAlgorithm, file: &File) -> io::Result<String> {
155    match unsafe {
156        MmapOptions::new()
157            // No populate() — lazy faults with async readahead avoids blocking
158            // until all pages are loaded. MADV_WILLNEED below triggers non-blocking
159            // readahead so pages are ready by the time we access them.
160            .map(file)
161    } {
162        Ok(mmap) => {
163            #[cfg(target_os = "linux")]
164            {
165                let _ = mmap.advise(memmap2::Advice::Sequential);
166                // WILLNEED triggers async readahead without blocking (unlike populate)
167                unsafe {
168                    libc::madvise(
169                        mmap.as_ptr() as *mut libc::c_void,
170                        mmap.len(),
171                        libc::MADV_WILLNEED,
172                    );
173                }
174                if mmap.len() >= 2 * 1024 * 1024 {
175                    unsafe {
176                        libc::madvise(
177                            mmap.as_ptr() as *mut libc::c_void,
178                            mmap.len(),
179                            libc::MADV_HUGEPAGE,
180                        );
181                    }
182                }
183            }
184            Ok(hash_bytes(algo, &mmap))
185        }
186        Err(_) => {
187            // mmap failed — fall back to buffered read from the same fd
188            let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
189            hash_reader(algo, reader)
190        }
191    }
192}
193
194/// Mmap a file and hash with BLAKE2b. Shared helper for blake2b_hash_file.
195fn mmap_and_hash_blake2b(file: &File, output_bytes: usize) -> io::Result<String> {
196    match unsafe { MmapOptions::new().map(file) } {
197        Ok(mmap) => {
198            #[cfg(target_os = "linux")]
199            {
200                let _ = mmap.advise(memmap2::Advice::Sequential);
201                unsafe {
202                    libc::madvise(
203                        mmap.as_ptr() as *mut libc::c_void,
204                        mmap.len(),
205                        libc::MADV_WILLNEED,
206                    );
207                }
208                if mmap.len() >= 2 * 1024 * 1024 {
209                    unsafe {
210                        libc::madvise(
211                            mmap.as_ptr() as *mut libc::c_void,
212                            mmap.len(),
213                            libc::MADV_HUGEPAGE,
214                        );
215                    }
216                }
217            }
218            Ok(blake2b_hash_data(&mmap, output_bytes))
219        }
220        Err(_) => {
221            let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
222            blake2b_hash_reader(reader, output_bytes)
223        }
224    }
225}
226
227/// Hash stdin. Reads all data first, then hashes in one pass for optimal throughput.
228pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
229    // Try to mmap stdin if it's a regular file (shell redirect)
230    #[cfg(unix)]
231    {
232        use std::os::unix::io::AsRawFd;
233        let stdin = io::stdin();
234        let fd = stdin.as_raw_fd();
235        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
236        if unsafe { libc::fstat(fd, &mut stat) } == 0
237            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
238            && stat.st_size > 0
239        {
240            use std::os::unix::io::FromRawFd;
241            let file = unsafe { File::from_raw_fd(fd) };
242            let result = unsafe { MmapOptions::new().map(&file) };
243            std::mem::forget(file); // Don't close stdin
244            if let Ok(mmap) = result {
245                #[cfg(target_os = "linux")]
246                {
247                    let _ = mmap.advise(memmap2::Advice::Sequential);
248                    unsafe {
249                        libc::madvise(
250                            mmap.as_ptr() as *mut libc::c_void,
251                            mmap.len(),
252                            libc::MADV_WILLNEED,
253                        );
254                    }
255                }
256                return Ok(hash_bytes(algo, &mmap));
257            }
258        }
259    }
260    // Fallback: read all then hash in one pass (avoids per-read update overhead)
261    let mut data = Vec::new();
262    io::stdin().lock().read_to_end(&mut data)?;
263    Ok(hash_bytes(algo, &data))
264}
265
266/// Estimate total file size for parallel/sequential decision.
267/// Uses a quick heuristic: samples first file and extrapolates.
268/// Returns 0 if estimation fails.
269pub fn estimate_total_size(paths: &[&Path]) -> u64 {
270    if paths.is_empty() {
271        return 0;
272    }
273    // Sample first file to estimate
274    if let Ok(meta) = fs::metadata(paths[0]) {
275        meta.len().saturating_mul(paths.len() as u64)
276    } else {
277        0
278    }
279}
280
281/// Check if parallel hashing is worthwhile for the given file paths.
282/// Only uses rayon when files are individually large enough for the hash
283/// computation to dominate over rayon overhead (thread pool init + work stealing).
284/// For many small files (e.g., 100 × 100KB), sequential is much faster.
285pub fn should_use_parallel(paths: &[&Path]) -> bool {
286    if paths.len() < 2 {
287        return false;
288    }
289    let total = estimate_total_size(paths);
290    let avg = total / paths.len() as u64;
291    // Only parallelize when average file size >= 1MB.
292    // Below this, rayon overhead exceeds the benefit of parallel hashing.
293    avg >= 1024 * 1024
294}
295
296/// Issue readahead hints for a list of file paths to warm the page cache.
297/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
298#[cfg(target_os = "linux")]
299pub fn readahead_files(paths: &[&Path]) {
300    use std::os::unix::io::AsRawFd;
301    for path in paths {
302        if let Ok(file) = open_noatime(path) {
303            if let Ok(meta) = file.metadata() {
304                let len = meta.len();
305                if meta.file_type().is_file() && len > 0 {
306                    unsafe {
307                        libc::posix_fadvise(
308                            file.as_raw_fd(),
309                            0,
310                            len as i64,
311                            libc::POSIX_FADV_WILLNEED,
312                        );
313                    }
314                }
315            }
316        }
317    }
318}
319
320#[cfg(not(target_os = "linux"))]
321pub fn readahead_files(_paths: &[&Path]) {
322    // No-op on non-Linux
323}
324
325// --- BLAKE2b variable-length functions (using blake2b_simd) ---
326
327/// Hash raw data with BLAKE2b variable output length.
328/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
329pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
330    let hash = blake2b_simd::Params::new()
331        .hash_length(output_bytes)
332        .hash(data);
333    hex_encode(hash.as_bytes())
334}
335
336/// Hash a reader with BLAKE2b variable output length.
337pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
338    let mut state = blake2b_simd::Params::new()
339        .hash_length(output_bytes)
340        .to_state();
341    let mut buf = vec![0u8; 16 * 1024 * 1024]; // 16MB buffer
342    loop {
343        let n = reader.read(&mut buf)?;
344        if n == 0 {
345            break;
346        }
347        state.update(&buf[..n]);
348    }
349    Ok(hex_encode(state.finalize().as_bytes()))
350}
351
352/// Hash a file with BLAKE2b variable output length. Single open + fstat.
353/// Uses read() for small files, mmap for large.
354pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
355    // Single open — reuse fd for fstat + read/mmap
356    let file = open_noatime(path)?;
357    let metadata = file.metadata()?;
358    let len = metadata.len();
359    let is_regular = metadata.file_type().is_file();
360
361    if is_regular && len == 0 {
362        return Ok(blake2b_hash_data(&[], output_bytes));
363    }
364
365    if is_regular && len > 0 {
366        // Small files: read into thread-local buffer (zero allocation after first call)
367        if len < MMAP_THRESHOLD {
368            return READ_BUF.with(|cell| {
369                let mut buf = cell.borrow_mut();
370                buf.clear();
371                buf.reserve(len as usize);
372                Read::read_to_end(&mut &file, &mut buf)?;
373                Ok(blake2b_hash_data(&buf, output_bytes))
374            });
375        }
376
377        // Large files: mmap the already-open fd for zero-copy
378        return mmap_and_hash_blake2b(&file, output_bytes);
379    }
380
381    // Fallback: buffered read — fd already open
382    let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
383    blake2b_hash_reader(reader, output_bytes)
384}
385
386/// Hash stdin with BLAKE2b variable output length.
387/// Tries mmap if stdin is a regular file (shell redirect), falls back to read.
388pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
389    // Try to mmap stdin if it's a regular file (shell redirect)
390    #[cfg(unix)]
391    {
392        use std::os::unix::io::AsRawFd;
393        let stdin = io::stdin();
394        let fd = stdin.as_raw_fd();
395        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
396        if unsafe { libc::fstat(fd, &mut stat) } == 0
397            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
398            && stat.st_size > 0
399        {
400            use std::os::unix::io::FromRawFd;
401            let file = unsafe { File::from_raw_fd(fd) };
402            let result = unsafe { MmapOptions::new().map(&file) };
403            std::mem::forget(file); // Don't close stdin
404            if let Ok(mmap) = result {
405                #[cfg(target_os = "linux")]
406                {
407                    let _ = mmap.advise(memmap2::Advice::Sequential);
408                    unsafe {
409                        libc::madvise(
410                            mmap.as_ptr() as *mut libc::c_void,
411                            mmap.len(),
412                            libc::MADV_WILLNEED,
413                        );
414                    }
415                }
416                return Ok(blake2b_hash_data(&mmap, output_bytes));
417            }
418        }
419    }
420    // Fallback: read all then hash in one pass
421    let mut data = Vec::new();
422    io::stdin().lock().read_to_end(&mut data)?;
423    Ok(blake2b_hash_data(&data, output_bytes))
424}
425
426/// Print hash result in GNU format: "hash  filename\n"
427pub fn print_hash(
428    out: &mut impl Write,
429    hash: &str,
430    filename: &str,
431    binary: bool,
432) -> io::Result<()> {
433    let mode_char = if binary { '*' } else { ' ' };
434    writeln!(out, "{} {}{}", hash, mode_char, filename)
435}
436
437/// Print hash in GNU format with NUL terminator instead of newline.
438pub fn print_hash_zero(
439    out: &mut impl Write,
440    hash: &str,
441    filename: &str,
442    binary: bool,
443) -> io::Result<()> {
444    let mode_char = if binary { '*' } else { ' ' };
445    write!(out, "{} {}{}\0", hash, mode_char, filename)
446}
447
448/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
449pub fn print_hash_tag(
450    out: &mut impl Write,
451    algo: HashAlgorithm,
452    hash: &str,
453    filename: &str,
454) -> io::Result<()> {
455    writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
456}
457
458/// Print hash in BSD tag format with NUL terminator.
459pub fn print_hash_tag_zero(
460    out: &mut impl Write,
461    algo: HashAlgorithm,
462    hash: &str,
463    filename: &str,
464) -> io::Result<()> {
465    write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
466}
467
468/// Print hash in BSD tag format with BLAKE2b length info:
469/// "BLAKE2b (filename) = hash" for 512-bit, or
470/// "BLAKE2b-256 (filename) = hash" for other lengths.
471pub fn print_hash_tag_b2sum(
472    out: &mut impl Write,
473    hash: &str,
474    filename: &str,
475    bits: usize,
476) -> io::Result<()> {
477    if bits == 512 {
478        writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
479    } else {
480        writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
481    }
482}
483
484/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
485pub fn print_hash_tag_b2sum_zero(
486    out: &mut impl Write,
487    hash: &str,
488    filename: &str,
489    bits: usize,
490) -> io::Result<()> {
491    if bits == 512 {
492        write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
493    } else {
494        write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
495    }
496}
497
498/// Options for check mode.
499pub struct CheckOptions {
500    pub quiet: bool,
501    pub status_only: bool,
502    pub strict: bool,
503    pub warn: bool,
504    pub ignore_missing: bool,
505    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
506    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
507    /// When empty, uses generic format: "line {line}: message".
508    pub warn_prefix: String,
509}
510
511/// Result of check mode verification.
512pub struct CheckResult {
513    pub ok: usize,
514    pub mismatches: usize,
515    pub format_errors: usize,
516    pub read_errors: usize,
517    /// Number of files skipped because they were missing and --ignore-missing was set.
518    pub ignored_missing: usize,
519}
520
521/// Verify checksums from a check file.
522/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
523pub fn check_file<R: BufRead>(
524    algo: HashAlgorithm,
525    reader: R,
526    opts: &CheckOptions,
527    out: &mut impl Write,
528    err_out: &mut impl Write,
529) -> io::Result<CheckResult> {
530    let quiet = opts.quiet;
531    let status_only = opts.status_only;
532    let warn = opts.warn;
533    let ignore_missing = opts.ignore_missing;
534    let mut ok_count = 0;
535    let mut mismatch_count = 0;
536    let mut format_errors = 0;
537    let mut read_errors = 0;
538    let mut ignored_missing_count = 0;
539    let mut line_num = 0;
540
541    for line_result in reader.lines() {
542        line_num += 1;
543        let line = line_result?;
544        let line = line.trim_end();
545
546        if line.is_empty() {
547            continue;
548        }
549
550        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
551        let (expected_hash, filename) = match parse_check_line(line) {
552            Some(v) => v,
553            None => {
554                format_errors += 1;
555                if warn {
556                    out.flush()?;
557                    if opts.warn_prefix.is_empty() {
558                        writeln!(
559                            err_out,
560                            "line {}: improperly formatted {} checksum line",
561                            line_num,
562                            algo.name()
563                        )?;
564                    } else {
565                        writeln!(
566                            err_out,
567                            "{}: {}: improperly formatted {} checksum line",
568                            opts.warn_prefix,
569                            line_num,
570                            algo.name()
571                        )?;
572                    }
573                }
574                continue;
575            }
576        };
577
578        // Compute actual hash
579        let actual = match hash_file(algo, Path::new(filename)) {
580            Ok(h) => h,
581            Err(e) => {
582                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
583                    ignored_missing_count += 1;
584                    continue;
585                }
586                read_errors += 1;
587                if !status_only {
588                    out.flush()?;
589                    writeln!(err_out, "{}: {}", filename, e)?;
590                    writeln!(out, "{}: FAILED open or read", filename)?;
591                }
592                continue;
593            }
594        };
595
596        if actual.eq_ignore_ascii_case(expected_hash) {
597            ok_count += 1;
598            if !quiet && !status_only {
599                writeln!(out, "{}: OK", filename)?;
600            }
601        } else {
602            mismatch_count += 1;
603            if !status_only {
604                writeln!(out, "{}: FAILED", filename)?;
605            }
606        }
607    }
608
609    Ok(CheckResult {
610        ok: ok_count,
611        mismatches: mismatch_count,
612        format_errors,
613        read_errors,
614        ignored_missing: ignored_missing_count,
615    })
616}
617
618/// Parse a checksum line in any supported format.
619pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
620    // Try BSD tag format: "ALGO (filename) = hash"
621    let rest = line
622        .strip_prefix("MD5 (")
623        .or_else(|| line.strip_prefix("SHA256 ("))
624        .or_else(|| line.strip_prefix("BLAKE2b ("))
625        .or_else(|| {
626            // Handle BLAKE2b-NNN (filename) = hash
627            if line.starts_with("BLAKE2b-") {
628                let after = &line["BLAKE2b-".len()..];
629                if let Some(sp) = after.find(" (") {
630                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
631                        return Some(&after[sp + 2..]);
632                    }
633                }
634            }
635            None
636        });
637    if let Some(rest) = rest {
638        if let Some(paren_idx) = rest.find(") = ") {
639            let filename = &rest[..paren_idx];
640            let hash = &rest[paren_idx + 4..];
641            return Some((hash, filename));
642        }
643    }
644
645    // Handle backslash-escaped lines (leading '\')
646    let line = line.strip_prefix('\\').unwrap_or(line);
647
648    // Standard format: "hash  filename"
649    if let Some(idx) = line.find("  ") {
650        let hash = &line[..idx];
651        let rest = &line[idx + 2..];
652        return Some((hash, rest));
653    }
654    // Binary mode: "hash *filename"
655    if let Some(idx) = line.find(" *") {
656        let hash = &line[..idx];
657        let rest = &line[idx + 2..];
658        return Some((hash, rest));
659    }
660    None
661}
662
663/// Parse a BSD-style tag line: "ALGO (filename) = hash"
664/// Returns (expected_hash, filename, optional_bits).
665/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
666pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
667    let paren_start = line.find(" (")?;
668    let algo_part = &line[..paren_start];
669    let rest = &line[paren_start + 2..];
670    let paren_end = rest.find(") = ")?;
671    let filename = &rest[..paren_end];
672    let hash = &rest[paren_end + 4..];
673
674    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
675    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
676        algo_part[dash_pos + 1..].parse::<usize>().ok()
677    } else {
678        None
679    };
680
681    Some((hash, filename, bits))
682}
683
684/// Compile-time generated 2-byte hex pair lookup table.
685/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
686const fn generate_hex_table() -> [[u8; 2]; 256] {
687    let hex = b"0123456789abcdef";
688    let mut table = [[0u8; 2]; 256];
689    let mut i = 0;
690    while i < 256 {
691        table[i] = [hex[i >> 4], hex[i & 0xf]];
692        i += 1;
693    }
694    table
695}
696
697const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
698
699/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
700/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
701pub(crate) fn hex_encode(bytes: &[u8]) -> String {
702    let len = bytes.len() * 2;
703    let mut hex = String::with_capacity(len);
704    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
705    unsafe {
706        let buf = hex.as_mut_vec();
707        buf.set_len(len);
708        let ptr = buf.as_mut_ptr();
709        for (i, &b) in bytes.iter().enumerate() {
710            let pair = *HEX_TABLE.get_unchecked(b as usize);
711            *ptr.add(i * 2) = pair[0];
712            *ptr.add(i * 2 + 1) = pair[1];
713        }
714    }
715    hex
716}