coreutils_rs/hash/
core.rs

1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use digest::Digest;
10use md5::Md5;
11
12/// Supported hash algorithms.
13#[derive(Debug, Clone, Copy)]
14pub enum HashAlgorithm {
15    Sha256,
16    Md5,
17    Blake2b,
18}
19
20impl HashAlgorithm {
21    pub fn name(self) -> &'static str {
22        match self {
23            HashAlgorithm::Sha256 => "SHA256",
24            HashAlgorithm::Md5 => "MD5",
25            HashAlgorithm::Blake2b => "BLAKE2b",
26        }
27    }
28}
29
30// ── Generic hash helpers ────────────────────────────────────────────
31
32fn hash_digest<D: Digest>(data: &[u8]) -> String {
33    hex_encode(&D::digest(data))
34}
35
36/// Streaming hash using thread-local 1MB buffer for optimal L2 cache behavior.
37/// 1MB fits in L2 cache on most CPUs, keeping data hot during hash update.
38/// Uses read_full to ensure each update() gets a full buffer, minimizing
39/// per-chunk hasher overhead and maximizing SIMD-friendly aligned updates.
40fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
41    STREAM_BUF.with(|cell| {
42        let mut buf = cell.borrow_mut();
43        let mut hasher = D::new();
44        loop {
45            let n = read_full(&mut reader, &mut buf)?;
46            if n == 0 {
47                break;
48            }
49            hasher.update(&buf[..n]);
50        }
51        Ok(hex_encode(&hasher.finalize()))
52    })
53}
54
55// ── Public hashing API ──────────────────────────────────────────────
56
57/// Buffer size for streaming hash I/O.
58/// 2MB gives fewer syscalls while still fitting in L3 cache.
59/// With fadvise(SEQUENTIAL) the kernel prefetches ahead, so the next
60/// 2MB is already in page cache by the time we finish hashing the current chunk.
61/// ring's SHA-NI processes at ~3-4 GB/s, so 2MB takes ~0.5ms — fast enough
62/// that I/O latency dominates and larger buffers just waste memory.
63const HASH_READ_BUF: usize = 2 * 1024 * 1024;
64
65/// Threshold below which we read the entire file + single-shot hash.
66/// Files up to 8MB fit comfortably in memory and benefit from contiguous
67/// data access — the CPU hardware prefetcher works best on large contiguous
68/// buffers. Also avoids per-chunk hasher.update() call overhead.
69const SINGLE_SHOT_THRESHOLD: u64 = 8 * 1024 * 1024;
70
71// Thread-local reusable buffer for streaming hash I/O.
72// Allocated once per thread, reused across all hash_reader calls.
73thread_local! {
74    static STREAM_BUF: RefCell<Vec<u8>> = RefCell::new(vec![0u8; HASH_READ_BUF]);
75}
76
77// ── SHA-256: ring on non-Apple, sha2 fallback on Apple ───────────────
78
79/// Single-shot SHA-256 using ring's BoringSSL assembly (Linux/Windows).
80#[cfg(not(target_vendor = "apple"))]
81fn sha256_bytes(data: &[u8]) -> String {
82    hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
83}
84
85/// Single-shot SHA-256 using sha2 crate (macOS fallback).
86#[cfg(target_vendor = "apple")]
87fn sha256_bytes(data: &[u8]) -> String {
88    hash_digest::<sha2::Sha256>(data)
89}
90
91/// Streaming SHA-256 using ring's BoringSSL assembly (Linux/Windows).
92#[cfg(not(target_vendor = "apple"))]
93fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
94    STREAM_BUF.with(|cell| {
95        let mut buf = cell.borrow_mut();
96        let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
97        loop {
98            let n = read_full(&mut reader, &mut buf)?;
99            if n == 0 {
100                break;
101            }
102            ctx.update(&buf[..n]);
103        }
104        Ok(hex_encode(ctx.finish().as_ref()))
105    })
106}
107
108/// Streaming SHA-256 using sha2 crate (macOS fallback).
109#[cfg(target_vendor = "apple")]
110fn sha256_reader(reader: impl Read) -> io::Result<String> {
111    hash_reader_impl::<sha2::Sha256>(reader)
112}
113
114/// Compute hash of a byte slice directly (zero-copy fast path).
115pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
116    match algo {
117        HashAlgorithm::Sha256 => sha256_bytes(data),
118        HashAlgorithm::Md5 => hash_digest::<Md5>(data),
119        HashAlgorithm::Blake2b => {
120            let hash = blake2b_simd::blake2b(data);
121            hex_encode(hash.as_bytes())
122        }
123    }
124}
125
126/// Compute hash of data from a reader, returning hex string.
127pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
128    match algo {
129        HashAlgorithm::Sha256 => sha256_reader(reader),
130        HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
131        HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
132    }
133}
134
135/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
136/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
137#[cfg(target_os = "linux")]
138static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
139
140/// Open a file with O_NOATIME on Linux to avoid atime update overhead.
141/// Caches whether O_NOATIME works to avoid double-open on every file.
142#[cfg(target_os = "linux")]
143fn open_noatime(path: &Path) -> io::Result<File> {
144    use std::os::unix::fs::OpenOptionsExt;
145    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
146        match fs::OpenOptions::new()
147            .read(true)
148            .custom_flags(libc::O_NOATIME)
149            .open(path)
150        {
151            Ok(f) => return Ok(f),
152            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
153                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
154                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
155            }
156            Err(e) => return Err(e), // Real error, propagate
157        }
158    }
159    File::open(path)
160}
161
162#[cfg(not(target_os = "linux"))]
163fn open_noatime(path: &Path) -> io::Result<File> {
164    File::open(path)
165}
166
167/// Hint the kernel for sequential read access. Non-blocking.
168#[cfg(target_os = "linux")]
169#[inline]
170fn fadvise_sequential(file: &File, len: u64) {
171    use std::os::unix::io::AsRawFd;
172    unsafe {
173        libc::posix_fadvise(file.as_raw_fd(), 0, len as i64, libc::POSIX_FADV_SEQUENTIAL);
174    }
175}
176
177#[cfg(not(target_os = "linux"))]
178#[inline]
179fn fadvise_sequential(_file: &File, _len: u64) {}
180
181/// Hash a file by path. Single open + fstat to minimize syscalls.
182/// Uses read() for small files, streaming read+hash for large files.
183/// Replaced mmap with read()+fadvise for better cache behavior:
184/// read() keeps data hot in L2/L3 cache, while mmap suffers page table
185/// and TLB overhead for sequential single-pass workloads.
186pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
187    // Single open — reuse fd for fstat + read (saves separate stat + open)
188    let file = open_noatime(path)?;
189    let metadata = file.metadata()?; // fstat on existing fd, cheaper than stat(path)
190    let len = metadata.len();
191    let is_regular = metadata.file_type().is_file();
192
193    if is_regular && len == 0 {
194        return Ok(hash_bytes(algo, &[]));
195    }
196
197    if is_regular && len > 0 {
198        // Files up to 8MB: read entirely + single-shot hash.
199        // Contiguous data enables optimal CPU prefetching and avoids
200        // per-chunk hasher.update() overhead.
201        // Use read_full into exact-size buffer instead of read_to_end
202        // to avoid the grow-and-probe loop (saves 1-2 extra read() syscalls).
203        if len <= SINGLE_SHOT_THRESHOLD {
204            fadvise_sequential(&file, len);
205            let mut buf = vec![0u8; len as usize];
206            let n = read_full(&mut &file, &mut buf)?;
207            return Ok(hash_bytes(algo, &buf[..n]));
208        }
209
210        // Large files (>8MB): streaming read with kernel readahead hint.
211        // fadvise(SEQUENTIAL) enables aggressive readahead (2x default).
212        fadvise_sequential(&file, len);
213        return hash_reader(algo, file);
214    }
215
216    // Fallback: streaming read (special files, pipes, etc.) — fd already open
217    hash_reader(algo, file)
218}
219
220/// Hash stdin. Uses fadvise for file redirects, streaming for pipes.
221pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
222    let stdin = io::stdin();
223    // Hint kernel for sequential access if stdin is a regular file (redirect)
224    #[cfg(target_os = "linux")]
225    {
226        use std::os::unix::io::AsRawFd;
227        let fd = stdin.as_raw_fd();
228        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
229        if unsafe { libc::fstat(fd, &mut stat) } == 0
230            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
231            && stat.st_size > 0
232        {
233            unsafe {
234                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
235            }
236        }
237    }
238    // Streaming hash — works for both pipe and file-redirect stdin
239    hash_reader(algo, stdin.lock())
240}
241
242/// Estimate total file size for parallel/sequential decision.
243/// Uses a quick heuristic: samples first file and extrapolates.
244/// Returns 0 if estimation fails.
245pub fn estimate_total_size(paths: &[&Path]) -> u64 {
246    if paths.is_empty() {
247        return 0;
248    }
249    // Sample first file to estimate
250    if let Ok(meta) = fs::metadata(paths[0]) {
251        meta.len().saturating_mul(paths.len() as u64)
252    } else {
253        0
254    }
255}
256
257/// Check if parallel hashing is worthwhile for the given file paths.
258/// Only uses rayon when files are individually large enough for the hash
259/// computation to dominate over rayon overhead (thread pool init + work stealing).
260/// For many small files (e.g., 100 × 100KB), sequential is much faster.
261pub fn should_use_parallel(paths: &[&Path]) -> bool {
262    if paths.len() < 2 {
263        return false;
264    }
265    let total = estimate_total_size(paths);
266    let avg = total / paths.len() as u64;
267    // Only parallelize when average file size >= 1MB.
268    // Below this, rayon overhead exceeds the benefit of parallel hashing.
269    avg >= 1024 * 1024
270}
271
272/// Issue readahead hints for a list of file paths to warm the page cache.
273/// Uses POSIX_FADV_WILLNEED which is non-blocking and batches efficiently.
274#[cfg(target_os = "linux")]
275pub fn readahead_files(paths: &[&Path]) {
276    use std::os::unix::io::AsRawFd;
277    for path in paths {
278        if let Ok(file) = open_noatime(path) {
279            if let Ok(meta) = file.metadata() {
280                let len = meta.len();
281                if meta.file_type().is_file() && len > 0 {
282                    unsafe {
283                        libc::posix_fadvise(
284                            file.as_raw_fd(),
285                            0,
286                            len as i64,
287                            libc::POSIX_FADV_WILLNEED,
288                        );
289                    }
290                }
291            }
292        }
293    }
294}
295
296#[cfg(not(target_os = "linux"))]
297pub fn readahead_files(_paths: &[&Path]) {
298    // No-op on non-Linux
299}
300
301// --- BLAKE2b variable-length functions (using blake2b_simd) ---
302
303/// Hash raw data with BLAKE2b variable output length.
304/// `output_bytes` is the output size in bytes (e.g., 32 for 256-bit).
305pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
306    let hash = blake2b_simd::Params::new()
307        .hash_length(output_bytes)
308        .hash(data);
309    hex_encode(hash.as_bytes())
310}
311
312/// Hash a reader with BLAKE2b variable output length.
313/// Uses thread-local 1MB buffer for cache-friendly streaming.
314pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
315    STREAM_BUF.with(|cell| {
316        let mut buf = cell.borrow_mut();
317        let mut state = blake2b_simd::Params::new()
318            .hash_length(output_bytes)
319            .to_state();
320        loop {
321            let n = read_full(&mut reader, &mut buf)?;
322            if n == 0 {
323                break;
324            }
325            state.update(&buf[..n]);
326        }
327        Ok(hex_encode(state.finalize().as_bytes()))
328    })
329}
330
331/// Hash a file with BLAKE2b variable output length. Single open + fstat.
332/// Uses read() for small files, streaming read+hash for large.
333pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
334    // Single open — reuse fd for fstat + read
335    let file = open_noatime(path)?;
336    let metadata = file.metadata()?;
337    let len = metadata.len();
338    let is_regular = metadata.file_type().is_file();
339
340    if is_regular && len == 0 {
341        return Ok(blake2b_hash_data(&[], output_bytes));
342    }
343
344    if is_regular && len > 0 {
345        // Files up to 8MB: read entirely + single-shot hash.
346        if len <= SINGLE_SHOT_THRESHOLD {
347            fadvise_sequential(&file, len);
348            let mut buf = vec![0u8; len as usize];
349            let n = read_full(&mut &file, &mut buf)?;
350            return Ok(blake2b_hash_data(&buf[..n], output_bytes));
351        }
352
353        // Large files (>8MB): streaming read with kernel readahead hint
354        fadvise_sequential(&file, len);
355        return blake2b_hash_reader(file, output_bytes);
356    }
357
358    // Fallback: streaming read — fd already open
359    blake2b_hash_reader(file, output_bytes)
360}
361
362/// Hash stdin with BLAKE2b variable output length.
363/// Tries fadvise if stdin is a regular file (shell redirect), then streams.
364pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
365    let stdin = io::stdin();
366    #[cfg(target_os = "linux")]
367    {
368        use std::os::unix::io::AsRawFd;
369        let fd = stdin.as_raw_fd();
370        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
371        if unsafe { libc::fstat(fd, &mut stat) } == 0
372            && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
373            && stat.st_size > 0
374        {
375            unsafe {
376                libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
377            }
378        }
379    }
380    blake2b_hash_reader(stdin.lock(), output_bytes)
381}
382
383/// Print hash result in GNU format: "hash  filename\n"
384pub fn print_hash(
385    out: &mut impl Write,
386    hash: &str,
387    filename: &str,
388    binary: bool,
389) -> io::Result<()> {
390    let mode_char = if binary { '*' } else { ' ' };
391    writeln!(out, "{} {}{}", hash, mode_char, filename)
392}
393
394/// Print hash in GNU format with NUL terminator instead of newline.
395pub fn print_hash_zero(
396    out: &mut impl Write,
397    hash: &str,
398    filename: &str,
399    binary: bool,
400) -> io::Result<()> {
401    let mode_char = if binary { '*' } else { ' ' };
402    write!(out, "{} {}{}\0", hash, mode_char, filename)
403}
404
405/// Print hash result in BSD tag format: "ALGO (filename) = hash\n"
406pub fn print_hash_tag(
407    out: &mut impl Write,
408    algo: HashAlgorithm,
409    hash: &str,
410    filename: &str,
411) -> io::Result<()> {
412    writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
413}
414
415/// Print hash in BSD tag format with NUL terminator.
416pub fn print_hash_tag_zero(
417    out: &mut impl Write,
418    algo: HashAlgorithm,
419    hash: &str,
420    filename: &str,
421) -> io::Result<()> {
422    write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
423}
424
425/// Print hash in BSD tag format with BLAKE2b length info:
426/// "BLAKE2b (filename) = hash" for 512-bit, or
427/// "BLAKE2b-256 (filename) = hash" for other lengths.
428pub fn print_hash_tag_b2sum(
429    out: &mut impl Write,
430    hash: &str,
431    filename: &str,
432    bits: usize,
433) -> io::Result<()> {
434    if bits == 512 {
435        writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
436    } else {
437        writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
438    }
439}
440
441/// Print hash in BSD tag format with BLAKE2b length info and NUL terminator.
442pub fn print_hash_tag_b2sum_zero(
443    out: &mut impl Write,
444    hash: &str,
445    filename: &str,
446    bits: usize,
447) -> io::Result<()> {
448    if bits == 512 {
449        write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
450    } else {
451        write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
452    }
453}
454
455/// Options for check mode.
456pub struct CheckOptions {
457    pub quiet: bool,
458    pub status_only: bool,
459    pub strict: bool,
460    pub warn: bool,
461    pub ignore_missing: bool,
462    /// Prefix for per-line format warnings, e.g., "fmd5sum: checksums.txt".
463    /// When non-empty, warnings use GNU format: "{prefix}: {line}: message".
464    /// When empty, uses generic format: "line {line}: message".
465    pub warn_prefix: String,
466}
467
468/// Result of check mode verification.
469pub struct CheckResult {
470    pub ok: usize,
471    pub mismatches: usize,
472    pub format_errors: usize,
473    pub read_errors: usize,
474    /// Number of files skipped because they were missing and --ignore-missing was set.
475    pub ignored_missing: usize,
476}
477
478/// Verify checksums from a check file.
479/// Each line should be "hash  filename" or "hash *filename" or "ALGO (filename) = hash".
480pub fn check_file<R: BufRead>(
481    algo: HashAlgorithm,
482    reader: R,
483    opts: &CheckOptions,
484    out: &mut impl Write,
485    err_out: &mut impl Write,
486) -> io::Result<CheckResult> {
487    let quiet = opts.quiet;
488    let status_only = opts.status_only;
489    let warn = opts.warn;
490    let ignore_missing = opts.ignore_missing;
491    let mut ok_count = 0;
492    let mut mismatch_count = 0;
493    let mut format_errors = 0;
494    let mut read_errors = 0;
495    let mut ignored_missing_count = 0;
496    let mut line_num = 0;
497
498    for line_result in reader.lines() {
499        line_num += 1;
500        let line = line_result?;
501        let line = line.trim_end();
502
503        if line.is_empty() {
504            continue;
505        }
506
507        // Parse "hash  filename" or "hash *filename" or "ALGO (file) = hash"
508        let (expected_hash, filename) = match parse_check_line(line) {
509            Some(v) => v,
510            None => {
511                format_errors += 1;
512                if warn {
513                    out.flush()?;
514                    if opts.warn_prefix.is_empty() {
515                        writeln!(
516                            err_out,
517                            "line {}: improperly formatted {} checksum line",
518                            line_num,
519                            algo.name()
520                        )?;
521                    } else {
522                        writeln!(
523                            err_out,
524                            "{}: {}: improperly formatted {} checksum line",
525                            opts.warn_prefix,
526                            line_num,
527                            algo.name()
528                        )?;
529                    }
530                }
531                continue;
532            }
533        };
534
535        // Compute actual hash
536        let actual = match hash_file(algo, Path::new(filename)) {
537            Ok(h) => h,
538            Err(e) => {
539                if ignore_missing && e.kind() == io::ErrorKind::NotFound {
540                    ignored_missing_count += 1;
541                    continue;
542                }
543                read_errors += 1;
544                if !status_only {
545                    out.flush()?;
546                    writeln!(err_out, "{}: {}", filename, e)?;
547                    writeln!(out, "{}: FAILED open or read", filename)?;
548                }
549                continue;
550            }
551        };
552
553        if actual.eq_ignore_ascii_case(expected_hash) {
554            ok_count += 1;
555            if !quiet && !status_only {
556                writeln!(out, "{}: OK", filename)?;
557            }
558        } else {
559            mismatch_count += 1;
560            if !status_only {
561                writeln!(out, "{}: FAILED", filename)?;
562            }
563        }
564    }
565
566    Ok(CheckResult {
567        ok: ok_count,
568        mismatches: mismatch_count,
569        format_errors,
570        read_errors,
571        ignored_missing: ignored_missing_count,
572    })
573}
574
575/// Parse a checksum line in any supported format.
576pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
577    // Try BSD tag format: "ALGO (filename) = hash"
578    let rest = line
579        .strip_prefix("MD5 (")
580        .or_else(|| line.strip_prefix("SHA256 ("))
581        .or_else(|| line.strip_prefix("BLAKE2b ("))
582        .or_else(|| {
583            // Handle BLAKE2b-NNN (filename) = hash
584            if line.starts_with("BLAKE2b-") {
585                let after = &line["BLAKE2b-".len()..];
586                if let Some(sp) = after.find(" (") {
587                    if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
588                        return Some(&after[sp + 2..]);
589                    }
590                }
591            }
592            None
593        });
594    if let Some(rest) = rest {
595        if let Some(paren_idx) = rest.find(") = ") {
596            let filename = &rest[..paren_idx];
597            let hash = &rest[paren_idx + 4..];
598            return Some((hash, filename));
599        }
600    }
601
602    // Handle backslash-escaped lines (leading '\')
603    let line = line.strip_prefix('\\').unwrap_or(line);
604
605    // Standard format: "hash  filename"
606    if let Some(idx) = line.find("  ") {
607        let hash = &line[..idx];
608        let rest = &line[idx + 2..];
609        return Some((hash, rest));
610    }
611    // Binary mode: "hash *filename"
612    if let Some(idx) = line.find(" *") {
613        let hash = &line[..idx];
614        let rest = &line[idx + 2..];
615        return Some((hash, rest));
616    }
617    None
618}
619
620/// Parse a BSD-style tag line: "ALGO (filename) = hash"
621/// Returns (expected_hash, filename, optional_bits).
622/// `bits` is the hash length parsed from the algo name (e.g., BLAKE2b-256 -> Some(256)).
623pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
624    let paren_start = line.find(" (")?;
625    let algo_part = &line[..paren_start];
626    let rest = &line[paren_start + 2..];
627    let paren_end = rest.find(") = ")?;
628    let filename = &rest[..paren_end];
629    let hash = &rest[paren_end + 4..];
630
631    // Parse optional bit length from algo name (e.g., "BLAKE2b-256" -> Some(256))
632    let bits = if let Some(dash_pos) = algo_part.rfind('-') {
633        algo_part[dash_pos + 1..].parse::<usize>().ok()
634    } else {
635        None
636    };
637
638    Some((hash, filename, bits))
639}
640
641/// Read as many bytes as possible into buf, retrying on partial reads.
642/// Ensures each hash update gets a full buffer (fewer update calls = less overhead).
643#[inline]
644fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
645    let mut total = 0;
646    while total < buf.len() {
647        match reader.read(&mut buf[total..]) {
648            Ok(0) => break,
649            Ok(n) => total += n,
650            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
651            Err(e) => return Err(e),
652        }
653    }
654    Ok(total)
655}
656
657/// Compile-time generated 2-byte hex pair lookup table.
658/// Each byte maps directly to its 2-char hex representation — single lookup per byte.
659const fn generate_hex_table() -> [[u8; 2]; 256] {
660    let hex = b"0123456789abcdef";
661    let mut table = [[0u8; 2]; 256];
662    let mut i = 0;
663    while i < 256 {
664        table[i] = [hex[i >> 4], hex[i & 0xf]];
665        i += 1;
666    }
667    table
668}
669
670const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
671
672/// Fast hex encoding using 2-byte pair lookup table — one lookup per input byte.
673/// Uses String directly instead of Vec<u8> to avoid the from_utf8 conversion overhead.
674pub(crate) fn hex_encode(bytes: &[u8]) -> String {
675    let len = bytes.len() * 2;
676    let mut hex = String::with_capacity(len);
677    // SAFETY: We write exactly `len` valid ASCII hex bytes into the String's buffer.
678    unsafe {
679        let buf = hex.as_mut_vec();
680        buf.set_len(len);
681        let ptr = buf.as_mut_ptr();
682        for (i, &b) in bytes.iter().enumerate() {
683            let pair = *HEX_TABLE.get_unchecked(b as usize);
684            *ptr.add(i * 2) = pair[0];
685            *ptr.add(i * 2 + 1) = pair[1];
686        }
687    }
688    hex
689}
coreutils_rs/hash/core.rs

coreutils_rs/hash/
core.rs