1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18 Sha256,
19 Md5,
20 Blake2b,
21}
22
23impl HashAlgorithm {
24 pub fn name(self) -> &'static str {
25 match self {
26 HashAlgorithm::Sha256 => "SHA256",
27 HashAlgorithm::Md5 => "MD5",
28 HashAlgorithm::Blake2b => "BLAKE2b",
29 }
30 }
31}
32
33#[cfg(not(target_os = "linux"))]
37fn hash_digest<D: Digest>(data: &[u8]) -> String {
38 hex_encode(&D::digest(data))
39}
40
41#[cfg(not(target_os = "linux"))]
43fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
44 STREAM_BUF.with(|cell| {
45 let mut buf = cell.borrow_mut();
46 ensure_stream_buf(&mut buf);
47 let mut hasher = D::new();
48 loop {
49 let n = read_full(&mut reader, &mut buf)?;
50 if n == 0 {
51 break;
52 }
53 hasher.update(&buf[..n]);
54 }
55 Ok(hex_encode(&hasher.finalize()))
56 })
57}
58
59const HASH_READ_BUF: usize = 8 * 1024 * 1024;
65
66thread_local! {
70 static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
71}
72
73#[inline]
76fn ensure_stream_buf(buf: &mut Vec<u8>) {
77 if buf.len() < HASH_READ_BUF {
78 buf.resize(HASH_READ_BUF, 0);
79 }
80}
81
82#[cfg(target_os = "linux")]
87fn sha256_bytes(data: &[u8]) -> String {
88 if data.len() < TINY_FILE_LIMIT as usize {
93 use digest::Digest;
94 return hex_encode(&sha2::Sha256::digest(data));
95 }
96 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
97 .expect("SHA256 hash failed");
98 hex_encode(&digest)
99}
100
101#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
103fn sha256_bytes(data: &[u8]) -> String {
104 hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
105}
106
107#[cfg(target_vendor = "apple")]
109fn sha256_bytes(data: &[u8]) -> String {
110 hash_digest::<sha2::Sha256>(data)
111}
112
113#[cfg(target_os = "linux")]
116fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
117 STREAM_BUF.with(|cell| {
118 let mut buf = cell.borrow_mut();
119 ensure_stream_buf(&mut buf);
120 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
121 .map_err(|e| io::Error::other(e))?;
122 loop {
123 let n = read_full(&mut reader, &mut buf)?;
124 if n == 0 {
125 break;
126 }
127 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
128 }
129 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
130 Ok(hex_encode(&digest))
131 })
132}
133
134#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
136fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
137 STREAM_BUF.with(|cell| {
138 let mut buf = cell.borrow_mut();
139 ensure_stream_buf(&mut buf);
140 let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
141 loop {
142 let n = read_full(&mut reader, &mut buf)?;
143 if n == 0 {
144 break;
145 }
146 ctx.update(&buf[..n]);
147 }
148 Ok(hex_encode(ctx.finish().as_ref()))
149 })
150}
151
152#[cfg(target_vendor = "apple")]
154fn sha256_reader(reader: impl Read) -> io::Result<String> {
155 hash_reader_impl::<sha2::Sha256>(reader)
156}
157
158pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
160 match algo {
161 HashAlgorithm::Sha256 => sha256_bytes(data),
162 HashAlgorithm::Md5 => md5_bytes(data),
163 HashAlgorithm::Blake2b => {
164 let hash = blake2b_simd::blake2b(data);
165 hex_encode(hash.as_bytes())
166 }
167 }
168}
169
170#[cfg(target_os = "linux")]
174fn md5_bytes(data: &[u8]) -> String {
175 if data.len() < TINY_FILE_LIMIT as usize {
179 use digest::Digest;
180 return hex_encode(&md5::Md5::digest(data));
181 }
182 let digest =
183 openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
184 hex_encode(&digest)
185}
186
187#[cfg(not(target_os = "linux"))]
189fn md5_bytes(data: &[u8]) -> String {
190 hash_digest::<Md5>(data)
191}
192
193pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
195 match algo {
196 HashAlgorithm::Sha256 => sha256_reader(reader),
197 HashAlgorithm::Md5 => md5_reader(reader),
198 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
199 }
200}
201
202#[cfg(target_os = "linux")]
204fn md5_reader(mut reader: impl Read) -> io::Result<String> {
205 STREAM_BUF.with(|cell| {
206 let mut buf = cell.borrow_mut();
207 ensure_stream_buf(&mut buf);
208 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
209 .map_err(|e| io::Error::other(e))?;
210 loop {
211 let n = read_full(&mut reader, &mut buf)?;
212 if n == 0 {
213 break;
214 }
215 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
216 }
217 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
218 Ok(hex_encode(&digest))
219 })
220}
221
222#[cfg(not(target_os = "linux"))]
224fn md5_reader(reader: impl Read) -> io::Result<String> {
225 hash_reader_impl::<Md5>(reader)
226}
227
228#[cfg(target_os = "linux")]
231static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
232
233#[cfg(target_os = "linux")]
236fn open_noatime(path: &Path) -> io::Result<File> {
237 use std::os::unix::fs::OpenOptionsExt;
238 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
239 match std::fs::OpenOptions::new()
240 .read(true)
241 .custom_flags(libc::O_NOATIME)
242 .open(path)
243 {
244 Ok(f) => return Ok(f),
245 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
246 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
248 }
249 Err(e) => return Err(e), }
251 }
252 File::open(path)
253}
254
255#[cfg(not(target_os = "linux"))]
256fn open_noatime(path: &Path) -> io::Result<File> {
257 File::open(path)
258}
259
260#[cfg(target_os = "linux")]
263#[inline]
264fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
265 let file = open_noatime(path)?;
266 let fd = {
267 use std::os::unix::io::AsRawFd;
268 file.as_raw_fd()
269 };
270 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
271 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
272 return Err(io::Error::last_os_error());
273 }
274 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
275 let size = stat.st_size as u64;
276 Ok((file, size, is_regular))
277}
278
279#[cfg(not(target_os = "linux"))]
280#[inline]
281fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
282 let file = open_noatime(path)?;
283 let metadata = file.metadata()?;
284 Ok((file, metadata.len(), metadata.file_type().is_file()))
285}
286
287#[cfg(target_os = "linux")]
290const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
291
292const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
299
300const TINY_FILE_LIMIT: u64 = 8 * 1024;
304
305thread_local! {
309 static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
310}
311
312#[cfg(target_os = "linux")]
317fn hash_file_pipelined(algo: HashAlgorithm, mut file: File, file_size: u64) -> io::Result<String> {
318 use std::os::unix::io::AsRawFd;
319
320 const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; unsafe {
324 libc::posix_fadvise(
325 file.as_raw_fd(),
326 0,
327 file_size as i64,
328 libc::POSIX_FADV_SEQUENTIAL,
329 );
330 }
331
332 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
336 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
337
338 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
340
341 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
343 let mut own_buf = vec![0u8; PIPE_BUF_SIZE];
344 loop {
345 let mut buf = buf_rx
347 .try_recv()
348 .unwrap_or_else(|_| std::mem::take(&mut own_buf));
349 if buf.is_empty() {
350 buf = vec![0u8; PIPE_BUF_SIZE];
351 }
352
353 let mut total = 0;
354 while total < buf.len() {
355 match file.read(&mut buf[total..]) {
356 Ok(0) => break,
357 Ok(n) => total += n,
358 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
359 Err(e) => return Err(e),
360 }
361 }
362 if total == 0 {
363 break;
364 }
365 if tx.send((buf, total)).is_err() {
366 break;
367 }
368 }
369 Ok(())
370 });
371
372 let hash_result = match algo {
374 HashAlgorithm::Sha256 => {
375 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
376 .map_err(|e| io::Error::other(e))?;
377 while let Ok((buf, n)) = rx.recv() {
378 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
379 let _ = buf_tx.send(buf);
381 }
382 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
383 Ok(hex_encode(&digest))
384 }
385 HashAlgorithm::Md5 => {
386 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
387 .map_err(|e| io::Error::other(e))?;
388 while let Ok((buf, n)) = rx.recv() {
389 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
390 let _ = buf_tx.send(buf);
391 }
392 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
393 Ok(hex_encode(&digest))
394 }
395 HashAlgorithm::Blake2b => {
396 let mut state = blake2b_simd::Params::new().to_state();
397 while let Ok((buf, n)) = rx.recv() {
398 state.update(&buf[..n]);
399 let _ = buf_tx.send(buf);
400 }
401 Ok(hex_encode(state.finalize().as_bytes()))
402 }
403 };
404
405 match reader_handle.join() {
407 Ok(Ok(())) => {}
408 Ok(Err(e)) => {
409 if hash_result.is_ok() {
411 return Err(e);
412 }
413 }
414 Err(_) => {
415 return Err(io::Error::other("reader thread panicked"));
416 }
417 }
418
419 hash_result
420}
421
422pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
426 let (file, file_size, is_regular) = open_and_stat(path)?;
427
428 if is_regular && file_size == 0 {
429 return Ok(hash_bytes(algo, &[]));
430 }
431
432 if file_size > 0 && is_regular {
433 if file_size < TINY_FILE_LIMIT {
435 return hash_file_tiny(algo, file, file_size as usize);
436 }
437 if file_size >= SMALL_FILE_LIMIT {
439 #[cfg(target_os = "linux")]
440 {
441 return hash_file_pipelined(algo, file, file_size);
442 }
443 #[cfg(not(target_os = "linux"))]
445 {
446 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
447 if let Ok(mmap) = mmap_result {
448 return Ok(hash_bytes(algo, &mmap));
449 }
450 }
451 }
452 if file_size < SMALL_FILE_LIMIT {
455 return hash_file_small(algo, file, file_size as usize);
456 }
457 }
458
459 #[cfg(target_os = "linux")]
461 if file_size >= FADVISE_MIN_SIZE {
462 use std::os::unix::io::AsRawFd;
463 unsafe {
464 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
465 }
466 }
467 hash_reader(algo, file)
468}
469
470#[inline]
474fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
475 let mut buf = [0u8; 8192];
476 let mut total = 0;
477 while total < size {
479 match file.read(&mut buf[total..size]) {
480 Ok(0) => break,
481 Ok(n) => total += n,
482 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
483 Err(e) => return Err(e),
484 }
485 }
486 Ok(hash_bytes(algo, &buf[..total]))
487}
488
489#[inline]
492fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
493 SMALL_FILE_BUF.with(|cell| {
494 let mut buf = cell.borrow_mut();
495 buf.clear();
497 buf.reserve(size);
498 unsafe {
501 buf.set_len(size);
502 }
503 let mut total = 0;
504 while total < size {
505 match file.read(&mut buf[total..size]) {
506 Ok(0) => break,
507 Ok(n) => total += n,
508 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
509 Err(e) => return Err(e),
510 }
511 }
512 Ok(hash_bytes(algo, &buf[..total]))
513 })
514}
515
516pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
518 let stdin = io::stdin();
519 #[cfg(target_os = "linux")]
521 {
522 use std::os::unix::io::AsRawFd;
523 let fd = stdin.as_raw_fd();
524 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
525 if unsafe { libc::fstat(fd, &mut stat) } == 0
526 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
527 && stat.st_size > 0
528 {
529 unsafe {
530 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
531 }
532 }
533 }
534 hash_reader(algo, stdin.lock())
536}
537
538pub fn should_use_parallel(paths: &[&Path]) -> bool {
543 paths.len() >= 2
544}
545
546#[cfg(target_os = "linux")]
551pub fn readahead_files(paths: &[&Path]) {
552 use std::os::unix::io::AsRawFd;
553 for path in paths {
554 if let Ok(file) = open_noatime(path) {
555 if let Ok(meta) = file.metadata() {
556 let len = meta.len();
557 if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
558 unsafe {
559 libc::posix_fadvise(
560 file.as_raw_fd(),
561 0,
562 len as i64,
563 libc::POSIX_FADV_WILLNEED,
564 );
565 }
566 }
567 }
568 }
569 }
570}
571
572#[cfg(not(target_os = "linux"))]
573pub fn readahead_files(_paths: &[&Path]) {
574 }
576
577pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
582 let hash = blake2b_simd::Params::new()
583 .hash_length(output_bytes)
584 .hash(data);
585 hex_encode(hash.as_bytes())
586}
587
588pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
591 STREAM_BUF.with(|cell| {
592 let mut buf = cell.borrow_mut();
593 ensure_stream_buf(&mut buf);
594 let mut state = blake2b_simd::Params::new()
595 .hash_length(output_bytes)
596 .to_state();
597 loop {
598 let n = read_full(&mut reader, &mut buf)?;
599 if n == 0 {
600 break;
601 }
602 state.update(&buf[..n]);
603 }
604 Ok(hex_encode(state.finalize().as_bytes()))
605 })
606}
607
608pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
612 let (file, file_size, is_regular) = open_and_stat(path)?;
613
614 if is_regular && file_size == 0 {
615 return Ok(blake2b_hash_data(&[], output_bytes));
616 }
617
618 if file_size > 0 && is_regular {
619 if file_size < TINY_FILE_LIMIT {
621 return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
622 }
623 if file_size >= SMALL_FILE_LIMIT {
625 #[cfg(target_os = "linux")]
626 if file_size >= FADVISE_MIN_SIZE {
627 use std::os::unix::io::AsRawFd;
628 unsafe {
629 libc::posix_fadvise(
630 file.as_raw_fd(),
631 0,
632 file_size as i64,
633 libc::POSIX_FADV_SEQUENTIAL,
634 );
635 }
636 }
637 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
639 if let Ok(mmap) = mmap_result {
640 #[cfg(target_os = "linux")]
641 {
642 if file_size >= 2 * 1024 * 1024 {
643 let _ = mmap.advise(memmap2::Advice::HugePage);
644 }
645 let _ = mmap.advise(memmap2::Advice::Sequential);
646 let _ = mmap.advise(memmap2::Advice::WillNeed);
647 }
648 return Ok(blake2b_hash_data(&mmap, output_bytes));
649 }
650 }
651 if file_size < SMALL_FILE_LIMIT {
653 return blake2b_hash_file_small(file, file_size as usize, output_bytes);
654 }
655 }
656
657 #[cfg(target_os = "linux")]
659 if file_size >= FADVISE_MIN_SIZE {
660 use std::os::unix::io::AsRawFd;
661 unsafe {
662 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
663 }
664 }
665 blake2b_hash_reader(file, output_bytes)
666}
667
668#[inline]
670fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
671 let mut buf = [0u8; 8192];
672 let mut total = 0;
673 while total < size {
674 match file.read(&mut buf[total..size]) {
675 Ok(0) => break,
676 Ok(n) => total += n,
677 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
678 Err(e) => return Err(e),
679 }
680 }
681 Ok(blake2b_hash_data(&buf[..total], output_bytes))
682}
683
684#[inline]
686fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
687 SMALL_FILE_BUF.with(|cell| {
688 let mut buf = cell.borrow_mut();
689 buf.clear();
690 buf.reserve(size);
691 unsafe {
693 buf.set_len(size);
694 }
695 let mut total = 0;
696 while total < size {
697 match file.read(&mut buf[total..size]) {
698 Ok(0) => break,
699 Ok(n) => total += n,
700 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
701 Err(e) => return Err(e),
702 }
703 }
704 Ok(blake2b_hash_data(&buf[..total], output_bytes))
705 })
706}
707
708pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
711 let stdin = io::stdin();
712 #[cfg(target_os = "linux")]
713 {
714 use std::os::unix::io::AsRawFd;
715 let fd = stdin.as_raw_fd();
716 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
717 if unsafe { libc::fstat(fd, &mut stat) } == 0
718 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
719 && stat.st_size > 0
720 {
721 unsafe {
722 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
723 }
724 }
725 }
726 blake2b_hash_reader(stdin.lock(), output_bytes)
727}
728
729enum FileContent {
732 Mmap(memmap2::Mmap),
733 Buf(Vec<u8>),
734}
735
736impl AsRef<[u8]> for FileContent {
737 fn as_ref(&self) -> &[u8] {
738 match self {
739 FileContent::Mmap(m) => m,
740 FileContent::Buf(v) => v,
741 }
742 }
743}
744
745fn open_file_content(path: &Path) -> io::Result<FileContent> {
749 let (file, size, is_regular) = open_and_stat(path)?;
750 if is_regular && size == 0 {
751 return Ok(FileContent::Buf(Vec::new()));
752 }
753 if is_regular && size > 0 {
754 if size < TINY_FILE_LIMIT {
758 let mut buf = vec![0u8; size as usize];
759 let mut total = 0;
760 let mut f = file;
761 while total < size as usize {
762 match f.read(&mut buf[total..]) {
763 Ok(0) => break,
764 Ok(n) => total += n,
765 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
766 Err(e) => return Err(e),
767 }
768 }
769 buf.truncate(total);
770 return Ok(FileContent::Buf(buf));
771 }
772 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
774 if let Ok(mmap) = mmap_result {
775 #[cfg(target_os = "linux")]
776 {
777 if size >= 2 * 1024 * 1024 {
778 let _ = mmap.advise(memmap2::Advice::HugePage);
779 }
780 let _ = mmap.advise(memmap2::Advice::Sequential);
781 let _ = mmap.advise(memmap2::Advice::WillNeed);
782 }
783 return Ok(FileContent::Mmap(mmap));
784 }
785 let mut buf = vec![0u8; size as usize];
787 let mut total = 0;
788 let mut f = file;
789 while total < size as usize {
790 match f.read(&mut buf[total..]) {
791 Ok(0) => break,
792 Ok(n) => total += n,
793 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
794 Err(e) => return Err(e),
795 }
796 }
797 buf.truncate(total);
798 return Ok(FileContent::Buf(buf));
799 }
800 let mut buf = Vec::new();
802 let mut f = file;
803 f.read_to_end(&mut buf)?;
804 Ok(FileContent::Buf(buf))
805}
806
807fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
812 let mut file = open_noatime(path)?;
813 let mut small_buf = [0u8; 4096];
816 match file.read(&mut small_buf) {
817 Ok(0) => return Ok(FileContent::Buf(Vec::new())),
818 Ok(n) if n < small_buf.len() => {
819 return Ok(FileContent::Buf(small_buf[..n].to_vec()));
821 }
822 Ok(n) => {
823 let mut buf = [0u8; 65536];
825 buf[..n].copy_from_slice(&small_buf[..n]);
826 let mut total = n;
827 loop {
828 match file.read(&mut buf[total..]) {
829 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
830 Ok(n) => {
831 total += n;
832 if total >= buf.len() {
833 return open_file_content(path);
834 }
835 }
836 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
837 Err(e) => return Err(e),
838 }
839 }
840 }
841 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
842 let mut buf = [0u8; 65536];
843 let mut total = 0;
844 loop {
845 match file.read(&mut buf[total..]) {
846 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
847 Ok(n) => {
848 total += n;
849 if total >= buf.len() {
850 return open_file_content(path);
851 }
852 }
853 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
854 Err(e) => return Err(e),
855 }
856 }
857 }
858 Err(e) => return Err(e),
859 }
860}
861
862pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
870 use blake2b_simd::many::{HashManyJob, hash_many};
871
872 let use_fast = paths.len() >= 20;
877
878 let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
879 paths.iter().map(|&path| open_file_content(path)).collect()
881 } else {
882 let num_threads = std::thread::available_parallelism()
883 .map(|n| n.get())
884 .unwrap_or(4)
885 .min(paths.len());
886 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
887
888 std::thread::scope(|s| {
889 let handles: Vec<_> = paths
890 .chunks(chunk_size)
891 .map(|chunk| {
892 s.spawn(move || {
893 chunk
894 .iter()
895 .map(|&path| {
896 if use_fast {
897 open_file_content_fast(path)
898 } else {
899 open_file_content(path)
900 }
901 })
902 .collect::<Vec<_>>()
903 })
904 })
905 .collect();
906
907 handles
908 .into_iter()
909 .flat_map(|h| h.join().unwrap())
910 .collect()
911 })
912 };
913
914 let hash_results = {
916 let mut params = blake2b_simd::Params::new();
917 params.hash_length(output_bytes);
918
919 let ok_entries: Vec<(usize, &[u8])> = file_data
920 .iter()
921 .enumerate()
922 .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
923 .collect();
924
925 let mut jobs: Vec<HashManyJob> = ok_entries
926 .iter()
927 .map(|(_, data)| HashManyJob::new(¶ms, data))
928 .collect();
929
930 hash_many(jobs.iter_mut());
932
933 let mut hm: Vec<Option<String>> = vec![None; paths.len()];
935 for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
936 hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
937 }
938 hm
939 }; hash_results
943 .into_iter()
944 .zip(file_data)
945 .map(|(hash_opt, result)| match result {
946 Ok(_) => Ok(hash_opt.unwrap()),
947 Err(e) => Err(e),
948 })
949 .collect()
950}
951
952pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
958 let n = paths.len();
959
960 let mut indexed: Vec<(usize, &Path, u64)> = paths
963 .iter()
964 .enumerate()
965 .map(|(i, &p)| {
966 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
967 (i, p, size)
968 })
969 .collect();
970
971 indexed.sort_by(|a, b| b.2.cmp(&a.2));
974
975 #[cfg(target_os = "linux")]
977 {
978 use std::os::unix::io::AsRawFd;
979 for &(_, path, size) in indexed.iter().take(20) {
980 if size >= 1024 * 1024 {
981 if let Ok(file) = open_noatime(path) {
982 unsafe {
983 libc::posix_fadvise(
984 file.as_raw_fd(),
985 0,
986 size as i64,
987 libc::POSIX_FADV_WILLNEED,
988 );
989 }
990 }
991 }
992 }
993 }
994
995 let num_threads = std::thread::available_parallelism()
996 .map(|n| n.get())
997 .unwrap_or(4)
998 .min(n);
999
1000 let work_idx = AtomicUsize::new(0);
1002
1003 std::thread::scope(|s| {
1004 let work_idx = &work_idx;
1005 let indexed = &indexed;
1006
1007 let handles: Vec<_> = (0..num_threads)
1008 .map(|_| {
1009 s.spawn(move || {
1010 let mut local_results = Vec::new();
1011 loop {
1012 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1013 if idx >= indexed.len() {
1014 break;
1015 }
1016 let (orig_idx, path, _size) = indexed[idx];
1017 let result = hash_file(algo, path);
1018 local_results.push((orig_idx, result));
1019 }
1020 local_results
1021 })
1022 })
1023 .collect();
1024
1025 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1027 for handle in handles {
1028 for (orig_idx, result) in handle.join().unwrap() {
1029 results[orig_idx] = Some(result);
1030 }
1031 }
1032 results
1033 .into_iter()
1034 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1035 .collect()
1036 })
1037}
1038
1039pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1045 let mut file = open_noatime(path)?;
1046 let mut small_buf = [0u8; 4096];
1050 match file.read(&mut small_buf) {
1051 Ok(0) => return Ok(hash_bytes(algo, &[])),
1052 Ok(n) if n < small_buf.len() => {
1053 return Ok(hash_bytes(algo, &small_buf[..n]));
1055 }
1056 Ok(n) => {
1057 let mut buf = [0u8; 65536];
1059 buf[..n].copy_from_slice(&small_buf[..n]);
1060 let mut total = n;
1061 loop {
1062 match file.read(&mut buf[total..]) {
1063 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1064 Ok(n) => {
1065 total += n;
1066 if total >= buf.len() {
1067 return hash_file(algo, path);
1068 }
1069 }
1070 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1071 Err(e) => return Err(e),
1072 }
1073 }
1074 }
1075 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1076 let mut buf = [0u8; 65536];
1078 let mut total = 0;
1079 loop {
1080 match file.read(&mut buf[total..]) {
1081 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1082 Ok(n) => {
1083 total += n;
1084 if total >= buf.len() {
1085 return hash_file(algo, path);
1086 }
1087 }
1088 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1089 Err(e) => return Err(e),
1090 }
1091 }
1092 }
1093 Err(e) => return Err(e),
1094 }
1095}
1096
1097#[cfg(target_os = "linux")]
1100pub fn readahead_files_all(paths: &[&Path]) {
1101 use std::os::unix::io::AsRawFd;
1102 for path in paths {
1103 if let Ok(file) = open_noatime(path) {
1104 if let Ok(meta) = file.metadata() {
1105 if meta.file_type().is_file() {
1106 let len = meta.len();
1107 unsafe {
1108 libc::posix_fadvise(
1109 file.as_raw_fd(),
1110 0,
1111 len as i64,
1112 libc::POSIX_FADV_WILLNEED,
1113 );
1114 }
1115 }
1116 }
1117 }
1118 }
1119}
1120
1121#[cfg(not(target_os = "linux"))]
1122pub fn readahead_files_all(_paths: &[&Path]) {}
1123
1124pub fn print_hash(
1127 out: &mut impl Write,
1128 hash: &str,
1129 filename: &str,
1130 binary: bool,
1131) -> io::Result<()> {
1132 let mode = if binary { b'*' } else { b' ' };
1133 out.write_all(hash.as_bytes())?;
1134 out.write_all(&[b' ', mode])?;
1135 out.write_all(filename.as_bytes())?;
1136 out.write_all(b"\n")
1137}
1138
1139pub fn print_hash_zero(
1141 out: &mut impl Write,
1142 hash: &str,
1143 filename: &str,
1144 binary: bool,
1145) -> io::Result<()> {
1146 let mode = if binary { b'*' } else { b' ' };
1147 out.write_all(hash.as_bytes())?;
1148 out.write_all(&[b' ', mode])?;
1149 out.write_all(filename.as_bytes())?;
1150 out.write_all(b"\0")
1151}
1152
1153thread_local! {
1160 static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1161}
1162
1163#[inline]
1167pub fn write_hash_line(
1168 out: &mut impl Write,
1169 hash: &str,
1170 filename: &str,
1171 binary: bool,
1172 zero: bool,
1173 escaped: bool,
1174) -> io::Result<()> {
1175 LINE_BUF.with(|cell| {
1176 let mut buf = cell.borrow_mut();
1177 buf.clear();
1178 let mode = if binary { b'*' } else { b' ' };
1179 let term = if zero { b'\0' } else { b'\n' };
1180 if escaped {
1181 buf.push(b'\\');
1182 }
1183 buf.extend_from_slice(hash.as_bytes());
1184 buf.push(b' ');
1185 buf.push(mode);
1186 buf.extend_from_slice(filename.as_bytes());
1187 buf.push(term);
1188 out.write_all(&buf)
1189 })
1190}
1191
1192#[inline]
1195pub fn write_hash_tag_line(
1196 out: &mut impl Write,
1197 algo_name: &str,
1198 hash: &str,
1199 filename: &str,
1200 zero: bool,
1201) -> io::Result<()> {
1202 LINE_BUF.with(|cell| {
1203 let mut buf = cell.borrow_mut();
1204 buf.clear();
1205 let term = if zero { b'\0' } else { b'\n' };
1206 buf.extend_from_slice(algo_name.as_bytes());
1207 buf.extend_from_slice(b" (");
1208 buf.extend_from_slice(filename.as_bytes());
1209 buf.extend_from_slice(b") = ");
1210 buf.extend_from_slice(hash.as_bytes());
1211 buf.push(term);
1212 out.write_all(&buf)
1213 })
1214}
1215
1216pub fn print_hash_tag(
1218 out: &mut impl Write,
1219 algo: HashAlgorithm,
1220 hash: &str,
1221 filename: &str,
1222) -> io::Result<()> {
1223 out.write_all(algo.name().as_bytes())?;
1224 out.write_all(b" (")?;
1225 out.write_all(filename.as_bytes())?;
1226 out.write_all(b") = ")?;
1227 out.write_all(hash.as_bytes())?;
1228 out.write_all(b"\n")
1229}
1230
1231pub fn print_hash_tag_zero(
1233 out: &mut impl Write,
1234 algo: HashAlgorithm,
1235 hash: &str,
1236 filename: &str,
1237) -> io::Result<()> {
1238 out.write_all(algo.name().as_bytes())?;
1239 out.write_all(b" (")?;
1240 out.write_all(filename.as_bytes())?;
1241 out.write_all(b") = ")?;
1242 out.write_all(hash.as_bytes())?;
1243 out.write_all(b"\0")
1244}
1245
1246pub fn print_hash_tag_b2sum(
1250 out: &mut impl Write,
1251 hash: &str,
1252 filename: &str,
1253 bits: usize,
1254) -> io::Result<()> {
1255 if bits == 512 {
1256 out.write_all(b"BLAKE2b (")?;
1257 } else {
1258 write!(out, "BLAKE2b-{} (", bits)?;
1260 }
1261 out.write_all(filename.as_bytes())?;
1262 out.write_all(b") = ")?;
1263 out.write_all(hash.as_bytes())?;
1264 out.write_all(b"\n")
1265}
1266
1267pub fn print_hash_tag_b2sum_zero(
1269 out: &mut impl Write,
1270 hash: &str,
1271 filename: &str,
1272 bits: usize,
1273) -> io::Result<()> {
1274 if bits == 512 {
1275 out.write_all(b"BLAKE2b (")?;
1276 } else {
1277 write!(out, "BLAKE2b-{} (", bits)?;
1278 }
1279 out.write_all(filename.as_bytes())?;
1280 out.write_all(b") = ")?;
1281 out.write_all(hash.as_bytes())?;
1282 out.write_all(b"\0")
1283}
1284
1285pub struct CheckOptions {
1287 pub quiet: bool,
1288 pub status_only: bool,
1289 pub strict: bool,
1290 pub warn: bool,
1291 pub ignore_missing: bool,
1292 pub warn_prefix: String,
1296}
1297
1298pub struct CheckResult {
1300 pub ok: usize,
1301 pub mismatches: usize,
1302 pub format_errors: usize,
1303 pub read_errors: usize,
1304 pub ignored_missing: usize,
1306}
1307
1308pub fn check_file<R: BufRead>(
1311 algo: HashAlgorithm,
1312 reader: R,
1313 opts: &CheckOptions,
1314 out: &mut impl Write,
1315 err_out: &mut impl Write,
1316) -> io::Result<CheckResult> {
1317 let quiet = opts.quiet;
1318 let status_only = opts.status_only;
1319 let warn = opts.warn;
1320 let ignore_missing = opts.ignore_missing;
1321 let mut ok_count = 0;
1322 let mut mismatch_count = 0;
1323 let mut format_errors = 0;
1324 let mut read_errors = 0;
1325 let mut ignored_missing_count = 0;
1326 let mut line_num = 0;
1327
1328 for line_result in reader.lines() {
1329 line_num += 1;
1330 let line = line_result?;
1331 let line = line.trim_end();
1332
1333 if line.is_empty() {
1334 continue;
1335 }
1336
1337 let (expected_hash, filename) = match parse_check_line(line) {
1339 Some(v) => v,
1340 None => {
1341 format_errors += 1;
1342 if warn {
1343 out.flush()?;
1344 if opts.warn_prefix.is_empty() {
1345 writeln!(
1346 err_out,
1347 "line {}: improperly formatted {} checksum line",
1348 line_num,
1349 algo.name()
1350 )?;
1351 } else {
1352 writeln!(
1353 err_out,
1354 "{}: {}: improperly formatted {} checksum line",
1355 opts.warn_prefix,
1356 line_num,
1357 algo.name()
1358 )?;
1359 }
1360 }
1361 continue;
1362 }
1363 };
1364
1365 let actual = match hash_file(algo, Path::new(filename)) {
1367 Ok(h) => h,
1368 Err(e) => {
1369 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
1370 ignored_missing_count += 1;
1371 continue;
1372 }
1373 read_errors += 1;
1374 if !status_only {
1375 out.flush()?;
1376 writeln!(err_out, "{}: {}", filename, e)?;
1377 writeln!(out, "{}: FAILED open or read", filename)?;
1378 }
1379 continue;
1380 }
1381 };
1382
1383 if actual.eq_ignore_ascii_case(expected_hash) {
1384 ok_count += 1;
1385 if !quiet && !status_only {
1386 writeln!(out, "{}: OK", filename)?;
1387 }
1388 } else {
1389 mismatch_count += 1;
1390 if !status_only {
1391 writeln!(out, "{}: FAILED", filename)?;
1392 }
1393 }
1394 }
1395
1396 Ok(CheckResult {
1397 ok: ok_count,
1398 mismatches: mismatch_count,
1399 format_errors,
1400 read_errors,
1401 ignored_missing: ignored_missing_count,
1402 })
1403}
1404
1405pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
1407 let rest = line
1409 .strip_prefix("MD5 (")
1410 .or_else(|| line.strip_prefix("SHA256 ("))
1411 .or_else(|| line.strip_prefix("BLAKE2b ("))
1412 .or_else(|| {
1413 if line.starts_with("BLAKE2b-") {
1415 let after = &line["BLAKE2b-".len()..];
1416 if let Some(sp) = after.find(" (") {
1417 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
1418 return Some(&after[sp + 2..]);
1419 }
1420 }
1421 }
1422 None
1423 });
1424 if let Some(rest) = rest {
1425 if let Some(paren_idx) = rest.find(") = ") {
1426 let filename = &rest[..paren_idx];
1427 let hash = &rest[paren_idx + 4..];
1428 return Some((hash, filename));
1429 }
1430 }
1431
1432 let line = line.strip_prefix('\\').unwrap_or(line);
1434
1435 if let Some(idx) = line.find(" ") {
1437 let hash = &line[..idx];
1438 let rest = &line[idx + 2..];
1439 return Some((hash, rest));
1440 }
1441 if let Some(idx) = line.find(" *") {
1443 let hash = &line[..idx];
1444 let rest = &line[idx + 2..];
1445 return Some((hash, rest));
1446 }
1447 None
1448}
1449
1450pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
1454 let paren_start = line.find(" (")?;
1455 let algo_part = &line[..paren_start];
1456 let rest = &line[paren_start + 2..];
1457 let paren_end = rest.find(") = ")?;
1458 let filename = &rest[..paren_end];
1459 let hash = &rest[paren_end + 4..];
1460
1461 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
1463 algo_part[dash_pos + 1..].parse::<usize>().ok()
1464 } else {
1465 None
1466 };
1467
1468 Some((hash, filename, bits))
1469}
1470
1471#[inline]
1475fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
1476 let n = reader.read(buf)?;
1478 if n == buf.len() || n == 0 {
1479 return Ok(n);
1480 }
1481 let mut total = n;
1483 while total < buf.len() {
1484 match reader.read(&mut buf[total..]) {
1485 Ok(0) => break,
1486 Ok(n) => total += n,
1487 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
1488 Err(e) => return Err(e),
1489 }
1490 }
1491 Ok(total)
1492}
1493
1494const fn generate_hex_table() -> [[u8; 2]; 256] {
1497 let hex = b"0123456789abcdef";
1498 let mut table = [[0u8; 2]; 256];
1499 let mut i = 0;
1500 while i < 256 {
1501 table[i] = [hex[i >> 4], hex[i & 0xf]];
1502 i += 1;
1503 }
1504 table
1505}
1506
1507const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
1508
1509pub(crate) fn hex_encode(bytes: &[u8]) -> String {
1512 let len = bytes.len() * 2;
1513 let mut hex = String::with_capacity(len);
1514 unsafe {
1516 let buf = hex.as_mut_vec();
1517 buf.set_len(len);
1518 hex_encode_to_slice(bytes, buf);
1519 }
1520 hex
1521}
1522
1523#[inline]
1526fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
1527 unsafe {
1529 let ptr = out.as_mut_ptr();
1530 for (i, &b) in bytes.iter().enumerate() {
1531 let pair = *HEX_TABLE.get_unchecked(b as usize);
1532 *ptr.add(i * 2) = pair[0];
1533 *ptr.add(i * 2 + 1) = pair[1];
1534 }
1535 }
1536}