1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, BufReader, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use md5::Md5;
10use memmap2::MmapOptions;
11use sha2::{Digest, Sha256};
12
13#[derive(Debug, Clone, Copy)]
15pub enum HashAlgorithm {
16 Sha256,
17 Md5,
18 Blake2b,
19}
20
21impl HashAlgorithm {
22 pub fn name(self) -> &'static str {
23 match self {
24 HashAlgorithm::Sha256 => "SHA256",
25 HashAlgorithm::Md5 => "MD5",
26 HashAlgorithm::Blake2b => "BLAKE2b",
27 }
28 }
29}
30
31fn hash_digest<D: Digest>(data: &[u8]) -> String {
34 hex_encode(&D::digest(data))
35}
36
37fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
38 let mut hasher = D::new();
39 let mut buf = vec![0u8; 16 * 1024 * 1024]; loop {
41 let n = reader.read(&mut buf)?;
42 if n == 0 {
43 break;
44 }
45 hasher.update(&buf[..n]);
46 }
47 Ok(hex_encode(&hasher.finalize()))
48}
49
50const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024;
58
59pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
61 match algo {
62 HashAlgorithm::Sha256 => hash_digest::<Sha256>(data),
63 HashAlgorithm::Md5 => hash_digest::<Md5>(data),
64 HashAlgorithm::Blake2b => {
65 let hash = blake2b_simd::blake2b(data);
66 hex_encode(hash.as_bytes())
67 }
68 }
69}
70
71fn hash_bytes_chunked(algo: HashAlgorithm, data: &[u8]) -> String {
75 if data.len() <= HASH_CHUNK_SIZE {
76 return hash_bytes(algo, data);
77 }
78 match algo {
79 HashAlgorithm::Sha256 => {
80 let mut hasher = Sha256::new();
81 for chunk in data.chunks(HASH_CHUNK_SIZE) {
82 hasher.update(chunk);
83 }
84 hex_encode(&hasher.finalize())
85 }
86 HashAlgorithm::Md5 => {
87 let mut hasher = Md5::new();
88 for chunk in data.chunks(HASH_CHUNK_SIZE) {
89 hasher.update(chunk);
90 }
91 hex_encode(&hasher.finalize())
92 }
93 HashAlgorithm::Blake2b => {
94 let mut state = blake2b_simd::State::new();
95 for chunk in data.chunks(HASH_CHUNK_SIZE) {
96 state.update(chunk);
97 }
98 hex_encode(state.finalize().as_bytes())
99 }
100 }
101}
102
103fn blake2b_hash_data_chunked(data: &[u8], output_bytes: usize) -> String {
105 if data.len() <= HASH_CHUNK_SIZE {
106 return blake2b_hash_data(data, output_bytes);
107 }
108 let mut state = blake2b_simd::Params::new()
109 .hash_length(output_bytes)
110 .to_state();
111 for chunk in data.chunks(HASH_CHUNK_SIZE) {
112 state.update(chunk);
113 }
114 hex_encode(state.finalize().as_bytes())
115}
116
117pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
119 match algo {
120 HashAlgorithm::Sha256 => hash_reader_impl::<Sha256>(reader),
121 HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
122 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
123 }
124}
125
126const MMAP_THRESHOLD: u64 = 1024 * 1024; thread_local! {
136 static READ_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(MMAP_THRESHOLD as usize));
137}
138
139#[cfg(target_os = "linux")]
142static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
143
144#[cfg(target_os = "linux")]
147fn open_noatime(path: &Path) -> io::Result<File> {
148 use std::os::unix::fs::OpenOptionsExt;
149 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
150 match fs::OpenOptions::new()
151 .read(true)
152 .custom_flags(libc::O_NOATIME)
153 .open(path)
154 {
155 Ok(f) => return Ok(f),
156 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
157 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
159 }
160 Err(e) => return Err(e), }
162 }
163 File::open(path)
164}
165
166#[cfg(not(target_os = "linux"))]
167fn open_noatime(path: &Path) -> io::Result<File> {
168 File::open(path)
169}
170
171pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
174 let file = open_noatime(path)?;
176 let metadata = file.metadata()?; let len = metadata.len();
178 let is_regular = metadata.file_type().is_file();
179
180 if is_regular && len == 0 {
181 return Ok(hash_bytes(algo, &[]));
182 }
183
184 if is_regular && len > 0 {
185 if len < MMAP_THRESHOLD {
187 return READ_BUF.with(|cell| {
188 let mut buf = cell.borrow_mut();
189 buf.clear();
190 buf.reserve(len as usize);
192 Read::read_to_end(&mut &file, &mut buf)?;
193 Ok(hash_bytes(algo, &buf))
194 });
195 }
196
197 return mmap_and_hash(algo, &file);
199 }
200
201 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
203 hash_reader(algo, reader)
204}
205
206fn mmap_and_hash(algo: HashAlgorithm, file: &File) -> io::Result<String> {
210 match unsafe { MmapOptions::new().map(file) } {
211 Ok(mmap) => {
212 #[cfg(target_os = "linux")]
213 {
214 let _ = mmap.advise(memmap2::Advice::Sequential);
215 }
216 Ok(hash_bytes_chunked(algo, &mmap))
217 }
218 Err(_) => {
219 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
221 hash_reader(algo, reader)
222 }
223 }
224}
225
226fn mmap_and_hash_blake2b(file: &File, output_bytes: usize) -> io::Result<String> {
228 match unsafe { MmapOptions::new().map(file) } {
229 Ok(mmap) => {
230 #[cfg(target_os = "linux")]
231 {
232 let _ = mmap.advise(memmap2::Advice::Sequential);
233 }
234 Ok(blake2b_hash_data_chunked(&mmap, output_bytes))
235 }
236 Err(_) => {
237 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
238 blake2b_hash_reader(reader, output_bytes)
239 }
240 }
241}
242
243pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
245 #[cfg(unix)]
247 {
248 use std::os::unix::io::AsRawFd;
249 let stdin = io::stdin();
250 let fd = stdin.as_raw_fd();
251 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
252 if unsafe { libc::fstat(fd, &mut stat) } == 0
253 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
254 && stat.st_size > 0
255 {
256 use std::os::unix::io::FromRawFd;
257 let file = unsafe { File::from_raw_fd(fd) };
258 let result = unsafe { MmapOptions::new().map(&file) };
259 std::mem::forget(file); if let Ok(mmap) = result {
261 #[cfg(target_os = "linux")]
262 {
263 let _ = mmap.advise(memmap2::Advice::Sequential);
264 }
265 return Ok(hash_bytes_chunked(algo, &mmap));
266 }
267 }
268 }
269 let mut data = Vec::new();
271 io::stdin().lock().read_to_end(&mut data)?;
272 Ok(hash_bytes(algo, &data))
273}
274
275pub fn estimate_total_size(paths: &[&Path]) -> u64 {
279 if paths.is_empty() {
280 return 0;
281 }
282 if let Ok(meta) = fs::metadata(paths[0]) {
284 meta.len().saturating_mul(paths.len() as u64)
285 } else {
286 0
287 }
288}
289
290pub fn should_use_parallel(paths: &[&Path]) -> bool {
295 if paths.len() < 2 {
296 return false;
297 }
298 let total = estimate_total_size(paths);
299 let avg = total / paths.len() as u64;
300 avg >= 1024 * 1024
303}
304
305#[cfg(target_os = "linux")]
308pub fn readahead_files(paths: &[&Path]) {
309 use std::os::unix::io::AsRawFd;
310 for path in paths {
311 if let Ok(file) = open_noatime(path) {
312 if let Ok(meta) = file.metadata() {
313 let len = meta.len();
314 if meta.file_type().is_file() && len > 0 {
315 unsafe {
316 libc::posix_fadvise(
317 file.as_raw_fd(),
318 0,
319 len as i64,
320 libc::POSIX_FADV_WILLNEED,
321 );
322 }
323 }
324 }
325 }
326 }
327}
328
329#[cfg(not(target_os = "linux"))]
330pub fn readahead_files(_paths: &[&Path]) {
331 }
333
334pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
339 let hash = blake2b_simd::Params::new()
340 .hash_length(output_bytes)
341 .hash(data);
342 hex_encode(hash.as_bytes())
343}
344
345pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
347 let mut state = blake2b_simd::Params::new()
348 .hash_length(output_bytes)
349 .to_state();
350 let mut buf = vec![0u8; 16 * 1024 * 1024]; loop {
352 let n = reader.read(&mut buf)?;
353 if n == 0 {
354 break;
355 }
356 state.update(&buf[..n]);
357 }
358 Ok(hex_encode(state.finalize().as_bytes()))
359}
360
361pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
364 let file = open_noatime(path)?;
366 let metadata = file.metadata()?;
367 let len = metadata.len();
368 let is_regular = metadata.file_type().is_file();
369
370 if is_regular && len == 0 {
371 return Ok(blake2b_hash_data(&[], output_bytes));
372 }
373
374 if is_regular && len > 0 {
375 if len < MMAP_THRESHOLD {
377 return READ_BUF.with(|cell| {
378 let mut buf = cell.borrow_mut();
379 buf.clear();
380 buf.reserve(len as usize);
381 Read::read_to_end(&mut &file, &mut buf)?;
382 Ok(blake2b_hash_data(&buf, output_bytes))
383 });
384 }
385
386 return mmap_and_hash_blake2b(&file, output_bytes);
388 }
389
390 let reader = BufReader::with_capacity(16 * 1024 * 1024, file);
392 blake2b_hash_reader(reader, output_bytes)
393}
394
395pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
398 #[cfg(unix)]
400 {
401 use std::os::unix::io::AsRawFd;
402 let stdin = io::stdin();
403 let fd = stdin.as_raw_fd();
404 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
405 if unsafe { libc::fstat(fd, &mut stat) } == 0
406 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
407 && stat.st_size > 0
408 {
409 use std::os::unix::io::FromRawFd;
410 let file = unsafe { File::from_raw_fd(fd) };
411 let result = unsafe { MmapOptions::new().map(&file) };
412 std::mem::forget(file); if let Ok(mmap) = result {
414 #[cfg(target_os = "linux")]
415 {
416 let _ = mmap.advise(memmap2::Advice::Sequential);
417 }
418 return Ok(blake2b_hash_data_chunked(&mmap, output_bytes));
419 }
420 }
421 }
422 let mut data = Vec::new();
424 io::stdin().lock().read_to_end(&mut data)?;
425 Ok(blake2b_hash_data(&data, output_bytes))
426}
427
428pub fn print_hash(
430 out: &mut impl Write,
431 hash: &str,
432 filename: &str,
433 binary: bool,
434) -> io::Result<()> {
435 let mode_char = if binary { '*' } else { ' ' };
436 writeln!(out, "{} {}{}", hash, mode_char, filename)
437}
438
439pub fn print_hash_zero(
441 out: &mut impl Write,
442 hash: &str,
443 filename: &str,
444 binary: bool,
445) -> io::Result<()> {
446 let mode_char = if binary { '*' } else { ' ' };
447 write!(out, "{} {}{}\0", hash, mode_char, filename)
448}
449
450pub fn print_hash_tag(
452 out: &mut impl Write,
453 algo: HashAlgorithm,
454 hash: &str,
455 filename: &str,
456) -> io::Result<()> {
457 writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
458}
459
460pub fn print_hash_tag_zero(
462 out: &mut impl Write,
463 algo: HashAlgorithm,
464 hash: &str,
465 filename: &str,
466) -> io::Result<()> {
467 write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
468}
469
470pub fn print_hash_tag_b2sum(
474 out: &mut impl Write,
475 hash: &str,
476 filename: &str,
477 bits: usize,
478) -> io::Result<()> {
479 if bits == 512 {
480 writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
481 } else {
482 writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
483 }
484}
485
486pub fn print_hash_tag_b2sum_zero(
488 out: &mut impl Write,
489 hash: &str,
490 filename: &str,
491 bits: usize,
492) -> io::Result<()> {
493 if bits == 512 {
494 write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
495 } else {
496 write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
497 }
498}
499
500pub struct CheckOptions {
502 pub quiet: bool,
503 pub status_only: bool,
504 pub strict: bool,
505 pub warn: bool,
506 pub ignore_missing: bool,
507 pub warn_prefix: String,
511}
512
513pub struct CheckResult {
515 pub ok: usize,
516 pub mismatches: usize,
517 pub format_errors: usize,
518 pub read_errors: usize,
519 pub ignored_missing: usize,
521}
522
523pub fn check_file<R: BufRead>(
526 algo: HashAlgorithm,
527 reader: R,
528 opts: &CheckOptions,
529 out: &mut impl Write,
530 err_out: &mut impl Write,
531) -> io::Result<CheckResult> {
532 let quiet = opts.quiet;
533 let status_only = opts.status_only;
534 let warn = opts.warn;
535 let ignore_missing = opts.ignore_missing;
536 let mut ok_count = 0;
537 let mut mismatch_count = 0;
538 let mut format_errors = 0;
539 let mut read_errors = 0;
540 let mut ignored_missing_count = 0;
541 let mut line_num = 0;
542
543 for line_result in reader.lines() {
544 line_num += 1;
545 let line = line_result?;
546 let line = line.trim_end();
547
548 if line.is_empty() {
549 continue;
550 }
551
552 let (expected_hash, filename) = match parse_check_line(line) {
554 Some(v) => v,
555 None => {
556 format_errors += 1;
557 if warn {
558 out.flush()?;
559 if opts.warn_prefix.is_empty() {
560 writeln!(
561 err_out,
562 "line {}: improperly formatted {} checksum line",
563 line_num,
564 algo.name()
565 )?;
566 } else {
567 writeln!(
568 err_out,
569 "{}: {}: improperly formatted {} checksum line",
570 opts.warn_prefix,
571 line_num,
572 algo.name()
573 )?;
574 }
575 }
576 continue;
577 }
578 };
579
580 let actual = match hash_file(algo, Path::new(filename)) {
582 Ok(h) => h,
583 Err(e) => {
584 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
585 ignored_missing_count += 1;
586 continue;
587 }
588 read_errors += 1;
589 if !status_only {
590 out.flush()?;
591 writeln!(err_out, "{}: {}", filename, e)?;
592 writeln!(out, "{}: FAILED open or read", filename)?;
593 }
594 continue;
595 }
596 };
597
598 if actual.eq_ignore_ascii_case(expected_hash) {
599 ok_count += 1;
600 if !quiet && !status_only {
601 writeln!(out, "{}: OK", filename)?;
602 }
603 } else {
604 mismatch_count += 1;
605 if !status_only {
606 writeln!(out, "{}: FAILED", filename)?;
607 }
608 }
609 }
610
611 Ok(CheckResult {
612 ok: ok_count,
613 mismatches: mismatch_count,
614 format_errors,
615 read_errors,
616 ignored_missing: ignored_missing_count,
617 })
618}
619
620pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
622 let rest = line
624 .strip_prefix("MD5 (")
625 .or_else(|| line.strip_prefix("SHA256 ("))
626 .or_else(|| line.strip_prefix("BLAKE2b ("))
627 .or_else(|| {
628 if line.starts_with("BLAKE2b-") {
630 let after = &line["BLAKE2b-".len()..];
631 if let Some(sp) = after.find(" (") {
632 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
633 return Some(&after[sp + 2..]);
634 }
635 }
636 }
637 None
638 });
639 if let Some(rest) = rest {
640 if let Some(paren_idx) = rest.find(") = ") {
641 let filename = &rest[..paren_idx];
642 let hash = &rest[paren_idx + 4..];
643 return Some((hash, filename));
644 }
645 }
646
647 let line = line.strip_prefix('\\').unwrap_or(line);
649
650 if let Some(idx) = line.find(" ") {
652 let hash = &line[..idx];
653 let rest = &line[idx + 2..];
654 return Some((hash, rest));
655 }
656 if let Some(idx) = line.find(" *") {
658 let hash = &line[..idx];
659 let rest = &line[idx + 2..];
660 return Some((hash, rest));
661 }
662 None
663}
664
665pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
669 let paren_start = line.find(" (")?;
670 let algo_part = &line[..paren_start];
671 let rest = &line[paren_start + 2..];
672 let paren_end = rest.find(") = ")?;
673 let filename = &rest[..paren_end];
674 let hash = &rest[paren_end + 4..];
675
676 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
678 algo_part[dash_pos + 1..].parse::<usize>().ok()
679 } else {
680 None
681 };
682
683 Some((hash, filename, bits))
684}
685
686const fn generate_hex_table() -> [[u8; 2]; 256] {
689 let hex = b"0123456789abcdef";
690 let mut table = [[0u8; 2]; 256];
691 let mut i = 0;
692 while i < 256 {
693 table[i] = [hex[i >> 4], hex[i & 0xf]];
694 i += 1;
695 }
696 table
697}
698
699const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
700
701pub(crate) fn hex_encode(bytes: &[u8]) -> String {
704 let len = bytes.len() * 2;
705 let mut hex = String::with_capacity(len);
706 unsafe {
708 let buf = hex.as_mut_vec();
709 buf.set_len(len);
710 let ptr = buf.as_mut_ptr();
711 for (i, &b) in bytes.iter().enumerate() {
712 let pair = *HEX_TABLE.get_unchecked(b as usize);
713 *ptr.add(i * 2) = pair[0];
714 *ptr.add(i * 2 + 1) = pair[1];
715 }
716 }
717 hex
718}