1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, BufReader, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use md5::Md5;
10use memmap2::MmapOptions;
11use sha2::{Digest, Sha256};
12
13#[derive(Debug, Clone, Copy)]
15pub enum HashAlgorithm {
16 Sha256,
17 Md5,
18 Blake2b,
19}
20
21impl HashAlgorithm {
22 pub fn name(self) -> &'static str {
23 match self {
24 HashAlgorithm::Sha256 => "SHA256",
25 HashAlgorithm::Md5 => "MD5",
26 HashAlgorithm::Blake2b => "BLAKE2b",
27 }
28 }
29}
30
31fn hash_digest<D: Digest>(data: &[u8]) -> String {
34 hex_encode(&D::digest(data))
35}
36
37fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
38 let mut hasher = D::new();
39 let mut buf = vec![0u8; 16 * 1024 * 1024]; loop {
41 let n = reader.read(&mut buf)?;
42 if n == 0 {
43 break;
44 }
45 hasher.update(&buf[..n]);
46 }
47 Ok(hex_encode(&hasher.finalize()))
48}
49
50pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
54 match algo {
55 HashAlgorithm::Sha256 => hash_digest::<Sha256>(data),
56 HashAlgorithm::Md5 => hash_digest::<Md5>(data),
57 HashAlgorithm::Blake2b => {
58 let hash = blake2b_simd::blake2b(data);
59 hex_encode(hash.as_bytes())
60 }
61 }
62}
63
64pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
66 match algo {
67 HashAlgorithm::Sha256 => hash_reader_impl::<Sha256>(reader),
68 HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
69 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
70 }
71}
72
73const MMAP_THRESHOLD: u64 = 1024 * 1024; thread_local! {
83 static READ_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(MMAP_THRESHOLD as usize));
84}
85
86#[cfg(target_os = "linux")]
89static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
90
91#[cfg(target_os = "linux")]
94fn open_noatime(path: &Path) -> io::Result<File> {
95 use std::os::unix::fs::OpenOptionsExt;
96 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
97 match fs::OpenOptions::new()
98 .read(true)
99 .custom_flags(libc::O_NOATIME)
100 .open(path)
101 {
102 Ok(f) => return Ok(f),
103 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
104 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
106 }
107 Err(e) => return Err(e), }
109 }
110 File::open(path)
111}
112
113#[cfg(not(target_os = "linux"))]
114fn open_noatime(path: &Path) -> io::Result<File> {
115 File::open(path)
116}
117
118pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
121 let file = open_noatime(path)?;
123 let metadata = file.metadata()?; let len = metadata.len();
125 let is_regular = metadata.file_type().is_file();
126
127 if is_regular && len == 0 {
128 return Ok(hash_bytes(algo, &[]));
129 }
130
131 if is_regular && len > 0 {
132 if len < MMAP_THRESHOLD {
134 return READ_BUF.with(|cell| {
135 let mut buf = cell.borrow_mut();
136 buf.clear();
137 buf.reserve(len as usize);
139 Read::read_to_end(&mut &file, &mut buf)?;
140 Ok(hash_bytes(algo, &buf))
141 });
142 }
143
144 return mmap_and_hash(algo, &file);
146 }
147
148 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
150 hash_reader(algo, reader)
151}
152
153fn mmap_and_hash(algo: HashAlgorithm, file: &File) -> io::Result<String> {
155 match unsafe {
156 MmapOptions::new()
157 .map(file)
161 } {
162 Ok(mmap) => {
163 #[cfg(target_os = "linux")]
164 {
165 let _ = mmap.advise(memmap2::Advice::Sequential);
166 unsafe {
168 libc::madvise(
169 mmap.as_ptr() as *mut libc::c_void,
170 mmap.len(),
171 libc::MADV_WILLNEED,
172 );
173 }
174 if mmap.len() >= 2 * 1024 * 1024 {
175 unsafe {
176 libc::madvise(
177 mmap.as_ptr() as *mut libc::c_void,
178 mmap.len(),
179 libc::MADV_HUGEPAGE,
180 );
181 }
182 }
183 }
184 Ok(hash_bytes(algo, &mmap))
185 }
186 Err(_) => {
187 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
189 hash_reader(algo, reader)
190 }
191 }
192}
193
194fn mmap_and_hash_blake2b(file: &File, output_bytes: usize) -> io::Result<String> {
196 match unsafe { MmapOptions::new().map(file) } {
197 Ok(mmap) => {
198 #[cfg(target_os = "linux")]
199 {
200 let _ = mmap.advise(memmap2::Advice::Sequential);
201 unsafe {
202 libc::madvise(
203 mmap.as_ptr() as *mut libc::c_void,
204 mmap.len(),
205 libc::MADV_WILLNEED,
206 );
207 }
208 if mmap.len() >= 2 * 1024 * 1024 {
209 unsafe {
210 libc::madvise(
211 mmap.as_ptr() as *mut libc::c_void,
212 mmap.len(),
213 libc::MADV_HUGEPAGE,
214 );
215 }
216 }
217 }
218 Ok(blake2b_hash_data(&mmap, output_bytes))
219 }
220 Err(_) => {
221 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
222 blake2b_hash_reader(reader, output_bytes)
223 }
224 }
225}
226
227pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
229 #[cfg(unix)]
231 {
232 use std::os::unix::io::AsRawFd;
233 let stdin = io::stdin();
234 let fd = stdin.as_raw_fd();
235 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
236 if unsafe { libc::fstat(fd, &mut stat) } == 0
237 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
238 && stat.st_size > 0
239 {
240 use std::os::unix::io::FromRawFd;
241 let file = unsafe { File::from_raw_fd(fd) };
242 let result = unsafe { MmapOptions::new().map(&file) };
243 std::mem::forget(file); if let Ok(mmap) = result {
245 #[cfg(target_os = "linux")]
246 {
247 let _ = mmap.advise(memmap2::Advice::Sequential);
248 unsafe {
249 libc::madvise(
250 mmap.as_ptr() as *mut libc::c_void,
251 mmap.len(),
252 libc::MADV_WILLNEED,
253 );
254 }
255 }
256 return Ok(hash_bytes(algo, &mmap));
257 }
258 }
259 }
260 let mut data = Vec::new();
262 io::stdin().lock().read_to_end(&mut data)?;
263 Ok(hash_bytes(algo, &data))
264}
265
266pub fn estimate_total_size(paths: &[&Path]) -> u64 {
270 if paths.is_empty() {
271 return 0;
272 }
273 if let Ok(meta) = fs::metadata(paths[0]) {
275 meta.len().saturating_mul(paths.len() as u64)
276 } else {
277 0
278 }
279}
280
281pub fn should_use_parallel(paths: &[&Path]) -> bool {
286 if paths.len() < 2 {
287 return false;
288 }
289 let total = estimate_total_size(paths);
290 let avg = total / paths.len() as u64;
291 avg >= 1024 * 1024
294}
295
296#[cfg(target_os = "linux")]
299pub fn readahead_files(paths: &[&Path]) {
300 use std::os::unix::io::AsRawFd;
301 for path in paths {
302 if let Ok(file) = open_noatime(path) {
303 if let Ok(meta) = file.metadata() {
304 let len = meta.len();
305 if meta.file_type().is_file() && len > 0 {
306 unsafe {
307 libc::posix_fadvise(
308 file.as_raw_fd(),
309 0,
310 len as i64,
311 libc::POSIX_FADV_WILLNEED,
312 );
313 }
314 }
315 }
316 }
317 }
318}
319
320#[cfg(not(target_os = "linux"))]
321pub fn readahead_files(_paths: &[&Path]) {
322 }
324
325pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
330 let hash = blake2b_simd::Params::new()
331 .hash_length(output_bytes)
332 .hash(data);
333 hex_encode(hash.as_bytes())
334}
335
336pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
338 let mut state = blake2b_simd::Params::new()
339 .hash_length(output_bytes)
340 .to_state();
341 let mut buf = vec![0u8; 16 * 1024 * 1024]; loop {
343 let n = reader.read(&mut buf)?;
344 if n == 0 {
345 break;
346 }
347 state.update(&buf[..n]);
348 }
349 Ok(hex_encode(state.finalize().as_bytes()))
350}
351
352pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
355 let file = open_noatime(path)?;
357 let metadata = file.metadata()?;
358 let len = metadata.len();
359 let is_regular = metadata.file_type().is_file();
360
361 if is_regular && len == 0 {
362 return Ok(blake2b_hash_data(&[], output_bytes));
363 }
364
365 if is_regular && len > 0 {
366 if len < MMAP_THRESHOLD {
368 return READ_BUF.with(|cell| {
369 let mut buf = cell.borrow_mut();
370 buf.clear();
371 buf.reserve(len as usize);
372 Read::read_to_end(&mut &file, &mut buf)?;
373 Ok(blake2b_hash_data(&buf, output_bytes))
374 });
375 }
376
377 return mmap_and_hash_blake2b(&file, output_bytes);
379 }
380
381 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
383 blake2b_hash_reader(reader, output_bytes)
384}
385
386pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
389 #[cfg(unix)]
391 {
392 use std::os::unix::io::AsRawFd;
393 let stdin = io::stdin();
394 let fd = stdin.as_raw_fd();
395 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
396 if unsafe { libc::fstat(fd, &mut stat) } == 0
397 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
398 && stat.st_size > 0
399 {
400 use std::os::unix::io::FromRawFd;
401 let file = unsafe { File::from_raw_fd(fd) };
402 let result = unsafe { MmapOptions::new().map(&file) };
403 std::mem::forget(file); if let Ok(mmap) = result {
405 #[cfg(target_os = "linux")]
406 {
407 let _ = mmap.advise(memmap2::Advice::Sequential);
408 unsafe {
409 libc::madvise(
410 mmap.as_ptr() as *mut libc::c_void,
411 mmap.len(),
412 libc::MADV_WILLNEED,
413 );
414 }
415 }
416 return Ok(blake2b_hash_data(&mmap, output_bytes));
417 }
418 }
419 }
420 let mut data = Vec::new();
422 io::stdin().lock().read_to_end(&mut data)?;
423 Ok(blake2b_hash_data(&data, output_bytes))
424}
425
426pub fn print_hash(
428 out: &mut impl Write,
429 hash: &str,
430 filename: &str,
431 binary: bool,
432) -> io::Result<()> {
433 let mode_char = if binary { '*' } else { ' ' };
434 writeln!(out, "{} {}{}", hash, mode_char, filename)
435}
436
437pub fn print_hash_zero(
439 out: &mut impl Write,
440 hash: &str,
441 filename: &str,
442 binary: bool,
443) -> io::Result<()> {
444 let mode_char = if binary { '*' } else { ' ' };
445 write!(out, "{} {}{}\0", hash, mode_char, filename)
446}
447
448pub fn print_hash_tag(
450 out: &mut impl Write,
451 algo: HashAlgorithm,
452 hash: &str,
453 filename: &str,
454) -> io::Result<()> {
455 writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
456}
457
458pub fn print_hash_tag_zero(
460 out: &mut impl Write,
461 algo: HashAlgorithm,
462 hash: &str,
463 filename: &str,
464) -> io::Result<()> {
465 write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
466}
467
468pub fn print_hash_tag_b2sum(
472 out: &mut impl Write,
473 hash: &str,
474 filename: &str,
475 bits: usize,
476) -> io::Result<()> {
477 if bits == 512 {
478 writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
479 } else {
480 writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
481 }
482}
483
484pub fn print_hash_tag_b2sum_zero(
486 out: &mut impl Write,
487 hash: &str,
488 filename: &str,
489 bits: usize,
490) -> io::Result<()> {
491 if bits == 512 {
492 write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
493 } else {
494 write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
495 }
496}
497
498pub struct CheckOptions {
500 pub quiet: bool,
501 pub status_only: bool,
502 pub strict: bool,
503 pub warn: bool,
504 pub ignore_missing: bool,
505 pub warn_prefix: String,
509}
510
511pub struct CheckResult {
513 pub ok: usize,
514 pub mismatches: usize,
515 pub format_errors: usize,
516 pub read_errors: usize,
517 pub ignored_missing: usize,
519}
520
521pub fn check_file<R: BufRead>(
524 algo: HashAlgorithm,
525 reader: R,
526 opts: &CheckOptions,
527 out: &mut impl Write,
528 err_out: &mut impl Write,
529) -> io::Result<CheckResult> {
530 let quiet = opts.quiet;
531 let status_only = opts.status_only;
532 let warn = opts.warn;
533 let ignore_missing = opts.ignore_missing;
534 let mut ok_count = 0;
535 let mut mismatch_count = 0;
536 let mut format_errors = 0;
537 let mut read_errors = 0;
538 let mut ignored_missing_count = 0;
539 let mut line_num = 0;
540
541 for line_result in reader.lines() {
542 line_num += 1;
543 let line = line_result?;
544 let line = line.trim_end();
545
546 if line.is_empty() {
547 continue;
548 }
549
550 let (expected_hash, filename) = match parse_check_line(line) {
552 Some(v) => v,
553 None => {
554 format_errors += 1;
555 if warn {
556 out.flush()?;
557 if opts.warn_prefix.is_empty() {
558 writeln!(
559 err_out,
560 "line {}: improperly formatted {} checksum line",
561 line_num,
562 algo.name()
563 )?;
564 } else {
565 writeln!(
566 err_out,
567 "{}: {}: improperly formatted {} checksum line",
568 opts.warn_prefix,
569 line_num,
570 algo.name()
571 )?;
572 }
573 }
574 continue;
575 }
576 };
577
578 let actual = match hash_file(algo, Path::new(filename)) {
580 Ok(h) => h,
581 Err(e) => {
582 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
583 ignored_missing_count += 1;
584 continue;
585 }
586 read_errors += 1;
587 if !status_only {
588 out.flush()?;
589 writeln!(err_out, "{}: {}", filename, e)?;
590 writeln!(out, "{}: FAILED open or read", filename)?;
591 }
592 continue;
593 }
594 };
595
596 if actual.eq_ignore_ascii_case(expected_hash) {
597 ok_count += 1;
598 if !quiet && !status_only {
599 writeln!(out, "{}: OK", filename)?;
600 }
601 } else {
602 mismatch_count += 1;
603 if !status_only {
604 writeln!(out, "{}: FAILED", filename)?;
605 }
606 }
607 }
608
609 Ok(CheckResult {
610 ok: ok_count,
611 mismatches: mismatch_count,
612 format_errors,
613 read_errors,
614 ignored_missing: ignored_missing_count,
615 })
616}
617
618pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
620 let rest = line
622 .strip_prefix("MD5 (")
623 .or_else(|| line.strip_prefix("SHA256 ("))
624 .or_else(|| line.strip_prefix("BLAKE2b ("))
625 .or_else(|| {
626 if line.starts_with("BLAKE2b-") {
628 let after = &line["BLAKE2b-".len()..];
629 if let Some(sp) = after.find(" (") {
630 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
631 return Some(&after[sp + 2..]);
632 }
633 }
634 }
635 None
636 });
637 if let Some(rest) = rest {
638 if let Some(paren_idx) = rest.find(") = ") {
639 let filename = &rest[..paren_idx];
640 let hash = &rest[paren_idx + 4..];
641 return Some((hash, filename));
642 }
643 }
644
645 let line = line.strip_prefix('\\').unwrap_or(line);
647
648 if let Some(idx) = line.find(" ") {
650 let hash = &line[..idx];
651 let rest = &line[idx + 2..];
652 return Some((hash, rest));
653 }
654 if let Some(idx) = line.find(" *") {
656 let hash = &line[..idx];
657 let rest = &line[idx + 2..];
658 return Some((hash, rest));
659 }
660 None
661}
662
663pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
667 let paren_start = line.find(" (")?;
668 let algo_part = &line[..paren_start];
669 let rest = &line[paren_start + 2..];
670 let paren_end = rest.find(") = ")?;
671 let filename = &rest[..paren_end];
672 let hash = &rest[paren_end + 4..];
673
674 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
676 algo_part[dash_pos + 1..].parse::<usize>().ok()
677 } else {
678 None
679 };
680
681 Some((hash, filename, bits))
682}
683
684const fn generate_hex_table() -> [[u8; 2]; 256] {
687 let hex = b"0123456789abcdef";
688 let mut table = [[0u8; 2]; 256];
689 let mut i = 0;
690 while i < 256 {
691 table[i] = [hex[i >> 4], hex[i & 0xf]];
692 i += 1;
693 }
694 table
695}
696
697const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
698
699pub(crate) fn hex_encode(bytes: &[u8]) -> String {
702 let len = bytes.len() * 2;
703 let mut hex = String::with_capacity(len);
704 unsafe {
706 let buf = hex.as_mut_vec();
707 buf.set_len(len);
708 let ptr = buf.as_mut_ptr();
709 for (i, &b) in bytes.iter().enumerate() {
710 let pair = *HEX_TABLE.get_unchecked(b as usize);
711 *ptr.add(i * 2) = pair[0];
712 *ptr.add(i * 2 + 1) = pair[1];
713 }
714 }
715 hex
716}