1use std::cell::RefCell;
2use std::fs::{self, File};
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use md5::Md5;
10use sha2::{Digest, Sha256};
11
12#[derive(Debug, Clone, Copy)]
14pub enum HashAlgorithm {
15 Sha256,
16 Md5,
17 Blake2b,
18}
19
20impl HashAlgorithm {
21 pub fn name(self) -> &'static str {
22 match self {
23 HashAlgorithm::Sha256 => "SHA256",
24 HashAlgorithm::Md5 => "MD5",
25 HashAlgorithm::Blake2b => "BLAKE2b",
26 }
27 }
28}
29
30fn hash_digest<D: Digest>(data: &[u8]) -> String {
33 hex_encode(&D::digest(data))
34}
35
36fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
39 STREAM_BUF.with(|cell| {
40 let mut buf = cell.borrow_mut();
41 let mut hasher = D::new();
42 loop {
43 let n = reader.read(&mut buf)?;
44 if n == 0 {
45 break;
46 }
47 hasher.update(&buf[..n]);
48 }
49 Ok(hex_encode(&hasher.finalize()))
50 })
51}
52
53const HASH_READ_BUF: usize = 4 * 1024 * 1024;
59
60const MMAP_THRESHOLD: u64 = 1024 * 1024; thread_local! {
68 static READ_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(MMAP_THRESHOLD as usize));
69 static STREAM_BUF: RefCell<Vec<u8>> = RefCell::new(vec![0u8; HASH_READ_BUF]);
70}
71
72pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
74 match algo {
75 HashAlgorithm::Sha256 => hash_digest::<Sha256>(data),
76 HashAlgorithm::Md5 => hash_digest::<Md5>(data),
77 HashAlgorithm::Blake2b => {
78 let hash = blake2b_simd::blake2b(data);
79 hex_encode(hash.as_bytes())
80 }
81 }
82}
83
84pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
86 match algo {
87 HashAlgorithm::Sha256 => hash_reader_impl::<Sha256>(reader),
88 HashAlgorithm::Md5 => hash_reader_impl::<Md5>(reader),
89 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
90 }
91}
92
93#[cfg(target_os = "linux")]
96static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
97
98#[cfg(target_os = "linux")]
101fn open_noatime(path: &Path) -> io::Result<File> {
102 use std::os::unix::fs::OpenOptionsExt;
103 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
104 match fs::OpenOptions::new()
105 .read(true)
106 .custom_flags(libc::O_NOATIME)
107 .open(path)
108 {
109 Ok(f) => return Ok(f),
110 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
111 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
113 }
114 Err(e) => return Err(e), }
116 }
117 File::open(path)
118}
119
120#[cfg(not(target_os = "linux"))]
121fn open_noatime(path: &Path) -> io::Result<File> {
122 File::open(path)
123}
124
125#[cfg(target_os = "linux")]
127#[inline]
128fn fadvise_sequential(file: &File, len: u64) {
129 use std::os::unix::io::AsRawFd;
130 unsafe {
131 libc::posix_fadvise(file.as_raw_fd(), 0, len as i64, libc::POSIX_FADV_SEQUENTIAL);
132 }
133}
134
135#[cfg(not(target_os = "linux"))]
136#[inline]
137fn fadvise_sequential(_file: &File, _len: u64) {}
138
139pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
145 let file = open_noatime(path)?;
147 let metadata = file.metadata()?; let len = metadata.len();
149 let is_regular = metadata.file_type().is_file();
150
151 if is_regular && len == 0 {
152 return Ok(hash_bytes(algo, &[]));
153 }
154
155 if is_regular && len > 0 {
156 if len < MMAP_THRESHOLD {
158 return READ_BUF.with(|cell| {
159 let mut buf = cell.borrow_mut();
160 buf.clear();
161 buf.reserve(len as usize);
163 Read::read_to_end(&mut &file, &mut buf)?;
164 Ok(hash_bytes(algo, &buf))
165 });
166 }
167
168 fadvise_sequential(&file, len);
171 return hash_reader(algo, file);
172 }
173
174 hash_reader(algo, file)
176}
177
178pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
180 let stdin = io::stdin();
181 #[cfg(target_os = "linux")]
183 {
184 use std::os::unix::io::AsRawFd;
185 let fd = stdin.as_raw_fd();
186 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
187 if unsafe { libc::fstat(fd, &mut stat) } == 0
188 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
189 && stat.st_size > 0
190 {
191 unsafe {
192 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
193 }
194 }
195 }
196 hash_reader(algo, stdin.lock())
198}
199
200pub fn estimate_total_size(paths: &[&Path]) -> u64 {
204 if paths.is_empty() {
205 return 0;
206 }
207 if let Ok(meta) = fs::metadata(paths[0]) {
209 meta.len().saturating_mul(paths.len() as u64)
210 } else {
211 0
212 }
213}
214
215pub fn should_use_parallel(paths: &[&Path]) -> bool {
220 if paths.len() < 2 {
221 return false;
222 }
223 let total = estimate_total_size(paths);
224 let avg = total / paths.len() as u64;
225 avg >= 1024 * 1024
228}
229
230#[cfg(target_os = "linux")]
233pub fn readahead_files(paths: &[&Path]) {
234 use std::os::unix::io::AsRawFd;
235 for path in paths {
236 if let Ok(file) = open_noatime(path) {
237 if let Ok(meta) = file.metadata() {
238 let len = meta.len();
239 if meta.file_type().is_file() && len > 0 {
240 unsafe {
241 libc::posix_fadvise(
242 file.as_raw_fd(),
243 0,
244 len as i64,
245 libc::POSIX_FADV_WILLNEED,
246 );
247 }
248 }
249 }
250 }
251 }
252}
253
254#[cfg(not(target_os = "linux"))]
255pub fn readahead_files(_paths: &[&Path]) {
256 }
258
259pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
264 let hash = blake2b_simd::Params::new()
265 .hash_length(output_bytes)
266 .hash(data);
267 hex_encode(hash.as_bytes())
268}
269
270pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
273 STREAM_BUF.with(|cell| {
274 let mut buf = cell.borrow_mut();
275 let mut state = blake2b_simd::Params::new()
276 .hash_length(output_bytes)
277 .to_state();
278 loop {
279 let n = reader.read(&mut buf)?;
280 if n == 0 {
281 break;
282 }
283 state.update(&buf[..n]);
284 }
285 Ok(hex_encode(state.finalize().as_bytes()))
286 })
287}
288
289pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
292 let file = open_noatime(path)?;
294 let metadata = file.metadata()?;
295 let len = metadata.len();
296 let is_regular = metadata.file_type().is_file();
297
298 if is_regular && len == 0 {
299 return Ok(blake2b_hash_data(&[], output_bytes));
300 }
301
302 if is_regular && len > 0 {
303 if len < MMAP_THRESHOLD {
305 return READ_BUF.with(|cell| {
306 let mut buf = cell.borrow_mut();
307 buf.clear();
308 buf.reserve(len as usize);
309 Read::read_to_end(&mut &file, &mut buf)?;
310 Ok(blake2b_hash_data(&buf, output_bytes))
311 });
312 }
313
314 fadvise_sequential(&file, len);
316 return blake2b_hash_reader(file, output_bytes);
317 }
318
319 blake2b_hash_reader(file, output_bytes)
321}
322
323pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
326 let stdin = io::stdin();
327 #[cfg(target_os = "linux")]
328 {
329 use std::os::unix::io::AsRawFd;
330 let fd = stdin.as_raw_fd();
331 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
332 if unsafe { libc::fstat(fd, &mut stat) } == 0
333 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
334 && stat.st_size > 0
335 {
336 unsafe {
337 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
338 }
339 }
340 }
341 blake2b_hash_reader(stdin.lock(), output_bytes)
342}
343
344pub fn print_hash(
346 out: &mut impl Write,
347 hash: &str,
348 filename: &str,
349 binary: bool,
350) -> io::Result<()> {
351 let mode_char = if binary { '*' } else { ' ' };
352 writeln!(out, "{} {}{}", hash, mode_char, filename)
353}
354
355pub fn print_hash_zero(
357 out: &mut impl Write,
358 hash: &str,
359 filename: &str,
360 binary: bool,
361) -> io::Result<()> {
362 let mode_char = if binary { '*' } else { ' ' };
363 write!(out, "{} {}{}\0", hash, mode_char, filename)
364}
365
366pub fn print_hash_tag(
368 out: &mut impl Write,
369 algo: HashAlgorithm,
370 hash: &str,
371 filename: &str,
372) -> io::Result<()> {
373 writeln!(out, "{} ({}) = {}", algo.name(), filename, hash)
374}
375
376pub fn print_hash_tag_zero(
378 out: &mut impl Write,
379 algo: HashAlgorithm,
380 hash: &str,
381 filename: &str,
382) -> io::Result<()> {
383 write!(out, "{} ({}) = {}\0", algo.name(), filename, hash)
384}
385
386pub fn print_hash_tag_b2sum(
390 out: &mut impl Write,
391 hash: &str,
392 filename: &str,
393 bits: usize,
394) -> io::Result<()> {
395 if bits == 512 {
396 writeln!(out, "BLAKE2b ({}) = {}", filename, hash)
397 } else {
398 writeln!(out, "BLAKE2b-{} ({}) = {}", bits, filename, hash)
399 }
400}
401
402pub fn print_hash_tag_b2sum_zero(
404 out: &mut impl Write,
405 hash: &str,
406 filename: &str,
407 bits: usize,
408) -> io::Result<()> {
409 if bits == 512 {
410 write!(out, "BLAKE2b ({}) = {}\0", filename, hash)
411 } else {
412 write!(out, "BLAKE2b-{} ({}) = {}\0", bits, filename, hash)
413 }
414}
415
416pub struct CheckOptions {
418 pub quiet: bool,
419 pub status_only: bool,
420 pub strict: bool,
421 pub warn: bool,
422 pub ignore_missing: bool,
423 pub warn_prefix: String,
427}
428
429pub struct CheckResult {
431 pub ok: usize,
432 pub mismatches: usize,
433 pub format_errors: usize,
434 pub read_errors: usize,
435 pub ignored_missing: usize,
437}
438
439pub fn check_file<R: BufRead>(
442 algo: HashAlgorithm,
443 reader: R,
444 opts: &CheckOptions,
445 out: &mut impl Write,
446 err_out: &mut impl Write,
447) -> io::Result<CheckResult> {
448 let quiet = opts.quiet;
449 let status_only = opts.status_only;
450 let warn = opts.warn;
451 let ignore_missing = opts.ignore_missing;
452 let mut ok_count = 0;
453 let mut mismatch_count = 0;
454 let mut format_errors = 0;
455 let mut read_errors = 0;
456 let mut ignored_missing_count = 0;
457 let mut line_num = 0;
458
459 for line_result in reader.lines() {
460 line_num += 1;
461 let line = line_result?;
462 let line = line.trim_end();
463
464 if line.is_empty() {
465 continue;
466 }
467
468 let (expected_hash, filename) = match parse_check_line(line) {
470 Some(v) => v,
471 None => {
472 format_errors += 1;
473 if warn {
474 out.flush()?;
475 if opts.warn_prefix.is_empty() {
476 writeln!(
477 err_out,
478 "line {}: improperly formatted {} checksum line",
479 line_num,
480 algo.name()
481 )?;
482 } else {
483 writeln!(
484 err_out,
485 "{}: {}: improperly formatted {} checksum line",
486 opts.warn_prefix,
487 line_num,
488 algo.name()
489 )?;
490 }
491 }
492 continue;
493 }
494 };
495
496 let actual = match hash_file(algo, Path::new(filename)) {
498 Ok(h) => h,
499 Err(e) => {
500 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
501 ignored_missing_count += 1;
502 continue;
503 }
504 read_errors += 1;
505 if !status_only {
506 out.flush()?;
507 writeln!(err_out, "{}: {}", filename, e)?;
508 writeln!(out, "{}: FAILED open or read", filename)?;
509 }
510 continue;
511 }
512 };
513
514 if actual.eq_ignore_ascii_case(expected_hash) {
515 ok_count += 1;
516 if !quiet && !status_only {
517 writeln!(out, "{}: OK", filename)?;
518 }
519 } else {
520 mismatch_count += 1;
521 if !status_only {
522 writeln!(out, "{}: FAILED", filename)?;
523 }
524 }
525 }
526
527 Ok(CheckResult {
528 ok: ok_count,
529 mismatches: mismatch_count,
530 format_errors,
531 read_errors,
532 ignored_missing: ignored_missing_count,
533 })
534}
535
536pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
538 let rest = line
540 .strip_prefix("MD5 (")
541 .or_else(|| line.strip_prefix("SHA256 ("))
542 .or_else(|| line.strip_prefix("BLAKE2b ("))
543 .or_else(|| {
544 if line.starts_with("BLAKE2b-") {
546 let after = &line["BLAKE2b-".len()..];
547 if let Some(sp) = after.find(" (") {
548 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
549 return Some(&after[sp + 2..]);
550 }
551 }
552 }
553 None
554 });
555 if let Some(rest) = rest {
556 if let Some(paren_idx) = rest.find(") = ") {
557 let filename = &rest[..paren_idx];
558 let hash = &rest[paren_idx + 4..];
559 return Some((hash, filename));
560 }
561 }
562
563 let line = line.strip_prefix('\\').unwrap_or(line);
565
566 if let Some(idx) = line.find(" ") {
568 let hash = &line[..idx];
569 let rest = &line[idx + 2..];
570 return Some((hash, rest));
571 }
572 if let Some(idx) = line.find(" *") {
574 let hash = &line[..idx];
575 let rest = &line[idx + 2..];
576 return Some((hash, rest));
577 }
578 None
579}
580
581pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
585 let paren_start = line.find(" (")?;
586 let algo_part = &line[..paren_start];
587 let rest = &line[paren_start + 2..];
588 let paren_end = rest.find(") = ")?;
589 let filename = &rest[..paren_end];
590 let hash = &rest[paren_end + 4..];
591
592 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
594 algo_part[dash_pos + 1..].parse::<usize>().ok()
595 } else {
596 None
597 };
598
599 Some((hash, filename, bits))
600}
601
602const fn generate_hex_table() -> [[u8; 2]; 256] {
605 let hex = b"0123456789abcdef";
606 let mut table = [[0u8; 2]; 256];
607 let mut i = 0;
608 while i < 256 {
609 table[i] = [hex[i >> 4], hex[i & 0xf]];
610 i += 1;
611 }
612 table
613}
614
615const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
616
617pub(crate) fn hex_encode(bytes: &[u8]) -> String {
620 let len = bytes.len() * 2;
621 let mut hex = String::with_capacity(len);
622 unsafe {
624 let buf = hex.as_mut_vec();
625 buf.set_len(len);
626 let ptr = buf.as_mut_ptr();
627 for (i, &b) in bytes.iter().enumerate() {
628 let pair = *HEX_TABLE.get_unchecked(b as usize);
629 *ptr.add(i * 2) = pair[0];
630 *ptr.add(i * 2 + 1) = pair[1];
631 }
632 }
633 hex
634}