1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9#[cfg(not(target_os = "linux"))]
10use digest::Digest;
11#[cfg(not(target_os = "linux"))]
12use md5::Md5;
13
14#[derive(Debug, Clone, Copy)]
16pub enum HashAlgorithm {
17 Sha256,
18 Md5,
19 Blake2b,
20}
21
22impl HashAlgorithm {
23 pub fn name(self) -> &'static str {
24 match self {
25 HashAlgorithm::Sha256 => "SHA256",
26 HashAlgorithm::Md5 => "MD5",
27 HashAlgorithm::Blake2b => "BLAKE2b",
28 }
29 }
30}
31
32#[cfg(not(target_os = "linux"))]
36fn hash_digest<D: Digest>(data: &[u8]) -> String {
37 hex_encode(&D::digest(data))
38}
39
40#[cfg(not(target_os = "linux"))]
42fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
43 STREAM_BUF.with(|cell| {
44 let mut buf = cell.borrow_mut();
45 ensure_stream_buf(&mut buf);
46 let mut hasher = D::new();
47 loop {
48 let n = read_full(&mut reader, &mut buf)?;
49 if n == 0 {
50 break;
51 }
52 hasher.update(&buf[..n]);
53 }
54 Ok(hex_encode(&hasher.finalize()))
55 })
56}
57
58const HASH_READ_BUF: usize = 8 * 1024 * 1024;
64
65thread_local! {
69 static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
70}
71
72#[inline]
75fn ensure_stream_buf(buf: &mut Vec<u8>) {
76 if buf.len() < HASH_READ_BUF {
77 buf.resize(HASH_READ_BUF, 0);
78 }
79}
80
81#[cfg(target_os = "linux")]
86fn sha256_bytes(data: &[u8]) -> String {
87 if data.len() < TINY_FILE_LIMIT as usize {
92 use digest::Digest;
93 return hex_encode(&sha2::Sha256::digest(data));
94 }
95 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
96 .expect("SHA256 hash failed");
97 hex_encode(&digest)
98}
99
100#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
102fn sha256_bytes(data: &[u8]) -> String {
103 hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
104}
105
106#[cfg(target_vendor = "apple")]
108fn sha256_bytes(data: &[u8]) -> String {
109 hash_digest::<sha2::Sha256>(data)
110}
111
112#[cfg(target_os = "linux")]
115fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
116 STREAM_BUF.with(|cell| {
117 let mut buf = cell.borrow_mut();
118 ensure_stream_buf(&mut buf);
119 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
120 .map_err(|e| io::Error::other(e))?;
121 loop {
122 let n = read_full(&mut reader, &mut buf)?;
123 if n == 0 {
124 break;
125 }
126 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
127 }
128 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
129 Ok(hex_encode(&digest))
130 })
131}
132
133#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
135fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
136 STREAM_BUF.with(|cell| {
137 let mut buf = cell.borrow_mut();
138 ensure_stream_buf(&mut buf);
139 let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
140 loop {
141 let n = read_full(&mut reader, &mut buf)?;
142 if n == 0 {
143 break;
144 }
145 ctx.update(&buf[..n]);
146 }
147 Ok(hex_encode(ctx.finish().as_ref()))
148 })
149}
150
151#[cfg(target_vendor = "apple")]
153fn sha256_reader(reader: impl Read) -> io::Result<String> {
154 hash_reader_impl::<sha2::Sha256>(reader)
155}
156
157pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
159 match algo {
160 HashAlgorithm::Sha256 => sha256_bytes(data),
161 HashAlgorithm::Md5 => md5_bytes(data),
162 HashAlgorithm::Blake2b => {
163 let hash = blake2b_simd::blake2b(data);
164 hex_encode(hash.as_bytes())
165 }
166 }
167}
168
169#[cfg(target_os = "linux")]
173fn md5_bytes(data: &[u8]) -> String {
174 if data.len() < TINY_FILE_LIMIT as usize {
178 use digest::Digest;
179 return hex_encode(&md5::Md5::digest(data));
180 }
181 let digest =
182 openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
183 hex_encode(&digest)
184}
185
186#[cfg(not(target_os = "linux"))]
188fn md5_bytes(data: &[u8]) -> String {
189 hash_digest::<Md5>(data)
190}
191
192pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
194 match algo {
195 HashAlgorithm::Sha256 => sha256_reader(reader),
196 HashAlgorithm::Md5 => md5_reader(reader),
197 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
198 }
199}
200
201#[cfg(target_os = "linux")]
203fn md5_reader(mut reader: impl Read) -> io::Result<String> {
204 STREAM_BUF.with(|cell| {
205 let mut buf = cell.borrow_mut();
206 ensure_stream_buf(&mut buf);
207 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
208 .map_err(|e| io::Error::other(e))?;
209 loop {
210 let n = read_full(&mut reader, &mut buf)?;
211 if n == 0 {
212 break;
213 }
214 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
215 }
216 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
217 Ok(hex_encode(&digest))
218 })
219}
220
221#[cfg(not(target_os = "linux"))]
223fn md5_reader(reader: impl Read) -> io::Result<String> {
224 hash_reader_impl::<Md5>(reader)
225}
226
227#[cfg(target_os = "linux")]
230static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
231
232#[cfg(target_os = "linux")]
235fn open_noatime(path: &Path) -> io::Result<File> {
236 use std::os::unix::fs::OpenOptionsExt;
237 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
238 match std::fs::OpenOptions::new()
239 .read(true)
240 .custom_flags(libc::O_NOATIME)
241 .open(path)
242 {
243 Ok(f) => return Ok(f),
244 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
245 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
247 }
248 Err(e) => return Err(e), }
250 }
251 File::open(path)
252}
253
254#[cfg(not(target_os = "linux"))]
255fn open_noatime(path: &Path) -> io::Result<File> {
256 File::open(path)
257}
258
259#[cfg(target_os = "linux")]
262#[inline]
263fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
264 let file = open_noatime(path)?;
265 let fd = {
266 use std::os::unix::io::AsRawFd;
267 file.as_raw_fd()
268 };
269 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
270 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
271 return Err(io::Error::last_os_error());
272 }
273 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
274 let size = stat.st_size as u64;
275 Ok((file, size, is_regular))
276}
277
278#[cfg(not(target_os = "linux"))]
279#[inline]
280fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
281 let file = open_noatime(path)?;
282 let metadata = file.metadata()?;
283 Ok((file, metadata.len(), metadata.file_type().is_file()))
284}
285
286#[cfg(target_os = "linux")]
289const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
290
291const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
298
299const TINY_FILE_LIMIT: u64 = 8 * 1024;
303
304thread_local! {
308 static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
309}
310
311pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
314 let (file, file_size, is_regular) = open_and_stat(path)?;
315
316 if is_regular && file_size == 0 {
317 return Ok(hash_bytes(algo, &[]));
318 }
319
320 if file_size > 0 && is_regular {
321 if file_size < TINY_FILE_LIMIT {
323 return hash_file_tiny(algo, file, file_size as usize);
324 }
325 if file_size >= SMALL_FILE_LIMIT {
327 #[cfg(target_os = "linux")]
328 if file_size >= FADVISE_MIN_SIZE {
329 use std::os::unix::io::AsRawFd;
330 unsafe {
331 libc::posix_fadvise(
332 file.as_raw_fd(),
333 0,
334 file_size as i64,
335 libc::POSIX_FADV_SEQUENTIAL,
336 );
337 }
338 }
339 let mmap_result = if file_size >= 4 * 1024 * 1024 {
343 unsafe { memmap2::MmapOptions::new().populate().map(&file) }
344 } else {
345 unsafe { memmap2::MmapOptions::new().map(&file) }
346 };
347 if let Ok(mmap) = mmap_result {
348 #[cfg(target_os = "linux")]
349 {
350 let _ = mmap.advise(memmap2::Advice::Sequential);
351 if file_size >= 2 * 1024 * 1024 {
352 let _ = mmap.advise(memmap2::Advice::HugePage);
353 }
354 }
355 return Ok(hash_bytes(algo, &mmap));
356 }
357 }
358 if file_size < SMALL_FILE_LIMIT {
361 return hash_file_small(algo, file, file_size as usize);
362 }
363 }
364
365 #[cfg(target_os = "linux")]
367 if file_size >= FADVISE_MIN_SIZE {
368 use std::os::unix::io::AsRawFd;
369 unsafe {
370 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
371 }
372 }
373 hash_reader(algo, file)
374}
375
376#[inline]
380fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
381 let mut buf = [0u8; 8192];
382 let mut total = 0;
383 while total < size {
385 match file.read(&mut buf[total..size]) {
386 Ok(0) => break,
387 Ok(n) => total += n,
388 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
389 Err(e) => return Err(e),
390 }
391 }
392 Ok(hash_bytes(algo, &buf[..total]))
393}
394
395#[inline]
398fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
399 SMALL_FILE_BUF.with(|cell| {
400 let mut buf = cell.borrow_mut();
401 buf.clear();
403 buf.reserve(size);
404 unsafe {
407 buf.set_len(size);
408 }
409 let mut total = 0;
410 while total < size {
411 match file.read(&mut buf[total..size]) {
412 Ok(0) => break,
413 Ok(n) => total += n,
414 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
415 Err(e) => return Err(e),
416 }
417 }
418 Ok(hash_bytes(algo, &buf[..total]))
419 })
420}
421
422pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
424 let stdin = io::stdin();
425 #[cfg(target_os = "linux")]
427 {
428 use std::os::unix::io::AsRawFd;
429 let fd = stdin.as_raw_fd();
430 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
431 if unsafe { libc::fstat(fd, &mut stat) } == 0
432 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
433 && stat.st_size > 0
434 {
435 unsafe {
436 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
437 }
438 }
439 }
440 hash_reader(algo, stdin.lock())
442}
443
444pub fn should_use_parallel(paths: &[&Path]) -> bool {
449 paths.len() >= 2
450}
451
452#[cfg(target_os = "linux")]
457pub fn readahead_files(paths: &[&Path]) {
458 use std::os::unix::io::AsRawFd;
459 for path in paths {
460 if let Ok(file) = open_noatime(path) {
461 if let Ok(meta) = file.metadata() {
462 let len = meta.len();
463 if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
464 unsafe {
465 libc::posix_fadvise(
466 file.as_raw_fd(),
467 0,
468 len as i64,
469 libc::POSIX_FADV_WILLNEED,
470 );
471 }
472 }
473 }
474 }
475 }
476}
477
478#[cfg(not(target_os = "linux"))]
479pub fn readahead_files(_paths: &[&Path]) {
480 }
482
483pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
488 let hash = blake2b_simd::Params::new()
489 .hash_length(output_bytes)
490 .hash(data);
491 hex_encode(hash.as_bytes())
492}
493
494pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
497 STREAM_BUF.with(|cell| {
498 let mut buf = cell.borrow_mut();
499 ensure_stream_buf(&mut buf);
500 let mut state = blake2b_simd::Params::new()
501 .hash_length(output_bytes)
502 .to_state();
503 loop {
504 let n = read_full(&mut reader, &mut buf)?;
505 if n == 0 {
506 break;
507 }
508 state.update(&buf[..n]);
509 }
510 Ok(hex_encode(state.finalize().as_bytes()))
511 })
512}
513
514pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
518 let (file, file_size, is_regular) = open_and_stat(path)?;
519
520 if is_regular && file_size == 0 {
521 return Ok(blake2b_hash_data(&[], output_bytes));
522 }
523
524 if file_size > 0 && is_regular {
525 if file_size < TINY_FILE_LIMIT {
527 return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
528 }
529 if file_size >= SMALL_FILE_LIMIT {
531 #[cfg(target_os = "linux")]
532 if file_size >= FADVISE_MIN_SIZE {
533 use std::os::unix::io::AsRawFd;
534 unsafe {
535 libc::posix_fadvise(
536 file.as_raw_fd(),
537 0,
538 file_size as i64,
539 libc::POSIX_FADV_SEQUENTIAL,
540 );
541 }
542 }
543 let mmap_result = if file_size >= 4 * 1024 * 1024 {
545 unsafe { memmap2::MmapOptions::new().populate().map(&file) }
546 } else {
547 unsafe { memmap2::MmapOptions::new().map(&file) }
548 };
549 if let Ok(mmap) = mmap_result {
550 #[cfg(target_os = "linux")]
551 {
552 let _ = mmap.advise(memmap2::Advice::Sequential);
553 if file_size >= 2 * 1024 * 1024 {
554 let _ = mmap.advise(memmap2::Advice::HugePage);
555 }
556 }
557 return Ok(blake2b_hash_data(&mmap, output_bytes));
558 }
559 }
560 if file_size < SMALL_FILE_LIMIT {
562 return blake2b_hash_file_small(file, file_size as usize, output_bytes);
563 }
564 }
565
566 #[cfg(target_os = "linux")]
568 if file_size >= FADVISE_MIN_SIZE {
569 use std::os::unix::io::AsRawFd;
570 unsafe {
571 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
572 }
573 }
574 blake2b_hash_reader(file, output_bytes)
575}
576
577#[inline]
579fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
580 let mut buf = [0u8; 8192];
581 let mut total = 0;
582 while total < size {
583 match file.read(&mut buf[total..size]) {
584 Ok(0) => break,
585 Ok(n) => total += n,
586 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
587 Err(e) => return Err(e),
588 }
589 }
590 Ok(blake2b_hash_data(&buf[..total], output_bytes))
591}
592
593#[inline]
595fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
596 SMALL_FILE_BUF.with(|cell| {
597 let mut buf = cell.borrow_mut();
598 buf.clear();
599 buf.reserve(size);
600 unsafe {
602 buf.set_len(size);
603 }
604 let mut total = 0;
605 while total < size {
606 match file.read(&mut buf[total..size]) {
607 Ok(0) => break,
608 Ok(n) => total += n,
609 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
610 Err(e) => return Err(e),
611 }
612 }
613 Ok(blake2b_hash_data(&buf[..total], output_bytes))
614 })
615}
616
617pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
620 let stdin = io::stdin();
621 #[cfg(target_os = "linux")]
622 {
623 use std::os::unix::io::AsRawFd;
624 let fd = stdin.as_raw_fd();
625 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
626 if unsafe { libc::fstat(fd, &mut stat) } == 0
627 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
628 && stat.st_size > 0
629 {
630 unsafe {
631 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
632 }
633 }
634 }
635 blake2b_hash_reader(stdin.lock(), output_bytes)
636}
637
638enum FileContent {
641 Mmap(memmap2::Mmap),
642 Buf(Vec<u8>),
643}
644
645impl AsRef<[u8]> for FileContent {
646 fn as_ref(&self) -> &[u8] {
647 match self {
648 FileContent::Mmap(m) => m,
649 FileContent::Buf(v) => v,
650 }
651 }
652}
653
654fn open_file_content(path: &Path) -> io::Result<FileContent> {
658 let (file, size, is_regular) = open_and_stat(path)?;
659 if is_regular && size == 0 {
660 return Ok(FileContent::Buf(Vec::new()));
661 }
662 if is_regular && size > 0 {
663 if size < TINY_FILE_LIMIT {
667 let mut buf = vec![0u8; size as usize];
668 let mut total = 0;
669 let mut f = file;
670 while total < size as usize {
671 match f.read(&mut buf[total..]) {
672 Ok(0) => break,
673 Ok(n) => total += n,
674 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
675 Err(e) => return Err(e),
676 }
677 }
678 buf.truncate(total);
679 return Ok(FileContent::Buf(buf));
680 }
681 let mmap_result = if size >= 4 * 1024 * 1024 {
683 unsafe { memmap2::MmapOptions::new().populate().map(&file) }
684 } else {
685 unsafe { memmap2::MmapOptions::new().map(&file) }
686 };
687 if let Ok(mmap) = mmap_result {
688 #[cfg(target_os = "linux")]
689 {
690 let _ = mmap.advise(memmap2::Advice::Sequential);
691 if size >= 2 * 1024 * 1024 {
692 let _ = mmap.advise(memmap2::Advice::HugePage);
693 }
694 }
695 return Ok(FileContent::Mmap(mmap));
696 }
697 let mut buf = vec![0u8; size as usize];
699 let mut total = 0;
700 let mut f = file;
701 while total < size as usize {
702 match f.read(&mut buf[total..]) {
703 Ok(0) => break,
704 Ok(n) => total += n,
705 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
706 Err(e) => return Err(e),
707 }
708 }
709 buf.truncate(total);
710 return Ok(FileContent::Buf(buf));
711 }
712 let mut buf = Vec::new();
714 let mut f = file;
715 f.read_to_end(&mut buf)?;
716 Ok(FileContent::Buf(buf))
717}
718
719fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
724 let mut file = open_noatime(path)?;
725 let mut small_buf = [0u8; 4096];
728 match file.read(&mut small_buf) {
729 Ok(0) => return Ok(FileContent::Buf(Vec::new())),
730 Ok(n) if n < small_buf.len() => {
731 return Ok(FileContent::Buf(small_buf[..n].to_vec()));
733 }
734 Ok(n) => {
735 let mut buf = [0u8; 65536];
737 buf[..n].copy_from_slice(&small_buf[..n]);
738 let mut total = n;
739 loop {
740 match file.read(&mut buf[total..]) {
741 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
742 Ok(n) => {
743 total += n;
744 if total >= buf.len() {
745 return open_file_content(path);
746 }
747 }
748 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
749 Err(e) => return Err(e),
750 }
751 }
752 }
753 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
754 let mut buf = [0u8; 65536];
755 let mut total = 0;
756 loop {
757 match file.read(&mut buf[total..]) {
758 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
759 Ok(n) => {
760 total += n;
761 if total >= buf.len() {
762 return open_file_content(path);
763 }
764 }
765 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
766 Err(e) => return Err(e),
767 }
768 }
769 }
770 Err(e) => return Err(e),
771 }
772}
773
774pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
782 use blake2b_simd::many::{HashManyJob, hash_many};
783
784 let use_fast = paths.len() >= 20;
789
790 let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
791 paths.iter().map(|&path| open_file_content(path)).collect()
793 } else {
794 let num_threads = std::thread::available_parallelism()
795 .map(|n| n.get())
796 .unwrap_or(4)
797 .min(paths.len());
798 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
799
800 std::thread::scope(|s| {
801 let handles: Vec<_> = paths
802 .chunks(chunk_size)
803 .map(|chunk| {
804 s.spawn(move || {
805 chunk
806 .iter()
807 .map(|&path| {
808 if use_fast {
809 open_file_content_fast(path)
810 } else {
811 open_file_content(path)
812 }
813 })
814 .collect::<Vec<_>>()
815 })
816 })
817 .collect();
818
819 handles
820 .into_iter()
821 .flat_map(|h| h.join().unwrap())
822 .collect()
823 })
824 };
825
826 let hash_results = {
828 let mut params = blake2b_simd::Params::new();
829 params.hash_length(output_bytes);
830
831 let ok_entries: Vec<(usize, &[u8])> = file_data
832 .iter()
833 .enumerate()
834 .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
835 .collect();
836
837 let mut jobs: Vec<HashManyJob> = ok_entries
838 .iter()
839 .map(|(_, data)| HashManyJob::new(¶ms, data))
840 .collect();
841
842 hash_many(jobs.iter_mut());
844
845 let mut hm: Vec<Option<String>> = vec![None; paths.len()];
847 for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
848 hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
849 }
850 hm
851 }; hash_results
855 .into_iter()
856 .zip(file_data)
857 .map(|(hash_opt, result)| match result {
858 Ok(_) => Ok(hash_opt.unwrap()),
859 Err(e) => Err(e),
860 })
861 .collect()
862}
863
864pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
870 if paths.len() <= 20 {
874 readahead_files_all(paths);
875 }
876
877 let use_fast = paths.len() >= 2;
879
880 let num_threads = std::thread::available_parallelism()
884 .map(|n| n.get())
885 .unwrap_or(4)
886 .min(paths.len());
887 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
888
889 std::thread::scope(|s| {
890 let handles: Vec<_> = paths
891 .chunks(chunk_size)
892 .map(|chunk| {
893 s.spawn(move || {
894 chunk
895 .iter()
896 .map(|&path| {
897 if use_fast {
898 hash_file_nostat(algo, path)
899 } else {
900 hash_file(algo, path)
901 }
902 })
903 .collect::<Vec<_>>()
904 })
905 })
906 .collect();
907
908 handles
909 .into_iter()
910 .flat_map(|h| h.join().unwrap())
911 .collect()
912 })
913}
914
915pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
921 let mut file = open_noatime(path)?;
922 let mut small_buf = [0u8; 4096];
926 match file.read(&mut small_buf) {
927 Ok(0) => return Ok(hash_bytes(algo, &[])),
928 Ok(n) if n < small_buf.len() => {
929 return Ok(hash_bytes(algo, &small_buf[..n]));
931 }
932 Ok(n) => {
933 let mut buf = [0u8; 65536];
935 buf[..n].copy_from_slice(&small_buf[..n]);
936 let mut total = n;
937 loop {
938 match file.read(&mut buf[total..]) {
939 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
940 Ok(n) => {
941 total += n;
942 if total >= buf.len() {
943 return hash_file(algo, path);
944 }
945 }
946 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
947 Err(e) => return Err(e),
948 }
949 }
950 }
951 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
952 let mut buf = [0u8; 65536];
954 let mut total = 0;
955 loop {
956 match file.read(&mut buf[total..]) {
957 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
958 Ok(n) => {
959 total += n;
960 if total >= buf.len() {
961 return hash_file(algo, path);
962 }
963 }
964 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
965 Err(e) => return Err(e),
966 }
967 }
968 }
969 Err(e) => return Err(e),
970 }
971}
972
973#[cfg(target_os = "linux")]
976pub fn readahead_files_all(paths: &[&Path]) {
977 use std::os::unix::io::AsRawFd;
978 for path in paths {
979 if let Ok(file) = open_noatime(path) {
980 if let Ok(meta) = file.metadata() {
981 if meta.file_type().is_file() {
982 let len = meta.len();
983 unsafe {
984 libc::posix_fadvise(
985 file.as_raw_fd(),
986 0,
987 len as i64,
988 libc::POSIX_FADV_WILLNEED,
989 );
990 }
991 }
992 }
993 }
994 }
995}
996
997#[cfg(not(target_os = "linux"))]
998pub fn readahead_files_all(_paths: &[&Path]) {}
999
1000pub fn print_hash(
1003 out: &mut impl Write,
1004 hash: &str,
1005 filename: &str,
1006 binary: bool,
1007) -> io::Result<()> {
1008 let mode = if binary { b'*' } else { b' ' };
1009 out.write_all(hash.as_bytes())?;
1010 out.write_all(&[b' ', mode])?;
1011 out.write_all(filename.as_bytes())?;
1012 out.write_all(b"\n")
1013}
1014
1015pub fn print_hash_zero(
1017 out: &mut impl Write,
1018 hash: &str,
1019 filename: &str,
1020 binary: bool,
1021) -> io::Result<()> {
1022 let mode = if binary { b'*' } else { b' ' };
1023 out.write_all(hash.as_bytes())?;
1024 out.write_all(&[b' ', mode])?;
1025 out.write_all(filename.as_bytes())?;
1026 out.write_all(b"\0")
1027}
1028
1029thread_local! {
1036 static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1037}
1038
1039#[inline]
1043pub fn write_hash_line(
1044 out: &mut impl Write,
1045 hash: &str,
1046 filename: &str,
1047 binary: bool,
1048 zero: bool,
1049 escaped: bool,
1050) -> io::Result<()> {
1051 LINE_BUF.with(|cell| {
1052 let mut buf = cell.borrow_mut();
1053 buf.clear();
1054 let mode = if binary { b'*' } else { b' ' };
1055 let term = if zero { b'\0' } else { b'\n' };
1056 if escaped {
1057 buf.push(b'\\');
1058 }
1059 buf.extend_from_slice(hash.as_bytes());
1060 buf.push(b' ');
1061 buf.push(mode);
1062 buf.extend_from_slice(filename.as_bytes());
1063 buf.push(term);
1064 out.write_all(&buf)
1065 })
1066}
1067
1068#[inline]
1071pub fn write_hash_tag_line(
1072 out: &mut impl Write,
1073 algo_name: &str,
1074 hash: &str,
1075 filename: &str,
1076 zero: bool,
1077) -> io::Result<()> {
1078 LINE_BUF.with(|cell| {
1079 let mut buf = cell.borrow_mut();
1080 buf.clear();
1081 let term = if zero { b'\0' } else { b'\n' };
1082 buf.extend_from_slice(algo_name.as_bytes());
1083 buf.extend_from_slice(b" (");
1084 buf.extend_from_slice(filename.as_bytes());
1085 buf.extend_from_slice(b") = ");
1086 buf.extend_from_slice(hash.as_bytes());
1087 buf.push(term);
1088 out.write_all(&buf)
1089 })
1090}
1091
1092pub fn print_hash_tag(
1094 out: &mut impl Write,
1095 algo: HashAlgorithm,
1096 hash: &str,
1097 filename: &str,
1098) -> io::Result<()> {
1099 out.write_all(algo.name().as_bytes())?;
1100 out.write_all(b" (")?;
1101 out.write_all(filename.as_bytes())?;
1102 out.write_all(b") = ")?;
1103 out.write_all(hash.as_bytes())?;
1104 out.write_all(b"\n")
1105}
1106
1107pub fn print_hash_tag_zero(
1109 out: &mut impl Write,
1110 algo: HashAlgorithm,
1111 hash: &str,
1112 filename: &str,
1113) -> io::Result<()> {
1114 out.write_all(algo.name().as_bytes())?;
1115 out.write_all(b" (")?;
1116 out.write_all(filename.as_bytes())?;
1117 out.write_all(b") = ")?;
1118 out.write_all(hash.as_bytes())?;
1119 out.write_all(b"\0")
1120}
1121
1122pub fn print_hash_tag_b2sum(
1126 out: &mut impl Write,
1127 hash: &str,
1128 filename: &str,
1129 bits: usize,
1130) -> io::Result<()> {
1131 if bits == 512 {
1132 out.write_all(b"BLAKE2b (")?;
1133 } else {
1134 write!(out, "BLAKE2b-{} (", bits)?;
1136 }
1137 out.write_all(filename.as_bytes())?;
1138 out.write_all(b") = ")?;
1139 out.write_all(hash.as_bytes())?;
1140 out.write_all(b"\n")
1141}
1142
1143pub fn print_hash_tag_b2sum_zero(
1145 out: &mut impl Write,
1146 hash: &str,
1147 filename: &str,
1148 bits: usize,
1149) -> io::Result<()> {
1150 if bits == 512 {
1151 out.write_all(b"BLAKE2b (")?;
1152 } else {
1153 write!(out, "BLAKE2b-{} (", bits)?;
1154 }
1155 out.write_all(filename.as_bytes())?;
1156 out.write_all(b") = ")?;
1157 out.write_all(hash.as_bytes())?;
1158 out.write_all(b"\0")
1159}
1160
1161pub struct CheckOptions {
1163 pub quiet: bool,
1164 pub status_only: bool,
1165 pub strict: bool,
1166 pub warn: bool,
1167 pub ignore_missing: bool,
1168 pub warn_prefix: String,
1172}
1173
1174pub struct CheckResult {
1176 pub ok: usize,
1177 pub mismatches: usize,
1178 pub format_errors: usize,
1179 pub read_errors: usize,
1180 pub ignored_missing: usize,
1182}
1183
1184pub fn check_file<R: BufRead>(
1187 algo: HashAlgorithm,
1188 reader: R,
1189 opts: &CheckOptions,
1190 out: &mut impl Write,
1191 err_out: &mut impl Write,
1192) -> io::Result<CheckResult> {
1193 let quiet = opts.quiet;
1194 let status_only = opts.status_only;
1195 let warn = opts.warn;
1196 let ignore_missing = opts.ignore_missing;
1197 let mut ok_count = 0;
1198 let mut mismatch_count = 0;
1199 let mut format_errors = 0;
1200 let mut read_errors = 0;
1201 let mut ignored_missing_count = 0;
1202 let mut line_num = 0;
1203
1204 for line_result in reader.lines() {
1205 line_num += 1;
1206 let line = line_result?;
1207 let line = line.trim_end();
1208
1209 if line.is_empty() {
1210 continue;
1211 }
1212
1213 let (expected_hash, filename) = match parse_check_line(line) {
1215 Some(v) => v,
1216 None => {
1217 format_errors += 1;
1218 if warn {
1219 out.flush()?;
1220 if opts.warn_prefix.is_empty() {
1221 writeln!(
1222 err_out,
1223 "line {}: improperly formatted {} checksum line",
1224 line_num,
1225 algo.name()
1226 )?;
1227 } else {
1228 writeln!(
1229 err_out,
1230 "{}: {}: improperly formatted {} checksum line",
1231 opts.warn_prefix,
1232 line_num,
1233 algo.name()
1234 )?;
1235 }
1236 }
1237 continue;
1238 }
1239 };
1240
1241 let actual = match hash_file(algo, Path::new(filename)) {
1243 Ok(h) => h,
1244 Err(e) => {
1245 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
1246 ignored_missing_count += 1;
1247 continue;
1248 }
1249 read_errors += 1;
1250 if !status_only {
1251 out.flush()?;
1252 writeln!(err_out, "{}: {}", filename, e)?;
1253 writeln!(out, "{}: FAILED open or read", filename)?;
1254 }
1255 continue;
1256 }
1257 };
1258
1259 if actual.eq_ignore_ascii_case(expected_hash) {
1260 ok_count += 1;
1261 if !quiet && !status_only {
1262 writeln!(out, "{}: OK", filename)?;
1263 }
1264 } else {
1265 mismatch_count += 1;
1266 if !status_only {
1267 writeln!(out, "{}: FAILED", filename)?;
1268 }
1269 }
1270 }
1271
1272 Ok(CheckResult {
1273 ok: ok_count,
1274 mismatches: mismatch_count,
1275 format_errors,
1276 read_errors,
1277 ignored_missing: ignored_missing_count,
1278 })
1279}
1280
1281pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
1283 let rest = line
1285 .strip_prefix("MD5 (")
1286 .or_else(|| line.strip_prefix("SHA256 ("))
1287 .or_else(|| line.strip_prefix("BLAKE2b ("))
1288 .or_else(|| {
1289 if line.starts_with("BLAKE2b-") {
1291 let after = &line["BLAKE2b-".len()..];
1292 if let Some(sp) = after.find(" (") {
1293 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
1294 return Some(&after[sp + 2..]);
1295 }
1296 }
1297 }
1298 None
1299 });
1300 if let Some(rest) = rest {
1301 if let Some(paren_idx) = rest.find(") = ") {
1302 let filename = &rest[..paren_idx];
1303 let hash = &rest[paren_idx + 4..];
1304 return Some((hash, filename));
1305 }
1306 }
1307
1308 let line = line.strip_prefix('\\').unwrap_or(line);
1310
1311 if let Some(idx) = line.find(" ") {
1313 let hash = &line[..idx];
1314 let rest = &line[idx + 2..];
1315 return Some((hash, rest));
1316 }
1317 if let Some(idx) = line.find(" *") {
1319 let hash = &line[..idx];
1320 let rest = &line[idx + 2..];
1321 return Some((hash, rest));
1322 }
1323 None
1324}
1325
1326pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
1330 let paren_start = line.find(" (")?;
1331 let algo_part = &line[..paren_start];
1332 let rest = &line[paren_start + 2..];
1333 let paren_end = rest.find(") = ")?;
1334 let filename = &rest[..paren_end];
1335 let hash = &rest[paren_end + 4..];
1336
1337 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
1339 algo_part[dash_pos + 1..].parse::<usize>().ok()
1340 } else {
1341 None
1342 };
1343
1344 Some((hash, filename, bits))
1345}
1346
1347#[inline]
1351fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
1352 let n = reader.read(buf)?;
1354 if n == buf.len() || n == 0 {
1355 return Ok(n);
1356 }
1357 let mut total = n;
1359 while total < buf.len() {
1360 match reader.read(&mut buf[total..]) {
1361 Ok(0) => break,
1362 Ok(n) => total += n,
1363 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
1364 Err(e) => return Err(e),
1365 }
1366 }
1367 Ok(total)
1368}
1369
1370const fn generate_hex_table() -> [[u8; 2]; 256] {
1373 let hex = b"0123456789abcdef";
1374 let mut table = [[0u8; 2]; 256];
1375 let mut i = 0;
1376 while i < 256 {
1377 table[i] = [hex[i >> 4], hex[i & 0xf]];
1378 i += 1;
1379 }
1380 table
1381}
1382
1383const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
1384
1385pub(crate) fn hex_encode(bytes: &[u8]) -> String {
1388 let len = bytes.len() * 2;
1389 let mut hex = String::with_capacity(len);
1390 unsafe {
1392 let buf = hex.as_mut_vec();
1393 buf.set_len(len);
1394 hex_encode_to_slice(bytes, buf);
1395 }
1396 hex
1397}
1398
1399#[inline]
1402fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
1403 unsafe {
1405 let ptr = out.as_mut_ptr();
1406 for (i, &b) in bytes.iter().enumerate() {
1407 let pair = *HEX_TABLE.get_unchecked(b as usize);
1408 *ptr.add(i * 2) = pair[0];
1409 *ptr.add(i * 2 + 1) = pair[1];
1410 }
1411 }
1412}