1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14
15#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18 Sha256,
19 Md5,
20 Blake2b,
21}
22
23impl HashAlgorithm {
24 pub fn name(self) -> &'static str {
25 match self {
26 HashAlgorithm::Sha256 => "SHA256",
27 HashAlgorithm::Md5 => "MD5",
28 HashAlgorithm::Blake2b => "BLAKE2b",
29 }
30 }
31}
32
33#[cfg(not(target_os = "linux"))]
37fn hash_digest<D: Digest>(data: &[u8]) -> String {
38 hex_encode(&D::digest(data))
39}
40
41#[cfg(not(target_os = "linux"))]
43fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
44 STREAM_BUF.with(|cell| {
45 let mut buf = cell.borrow_mut();
46 ensure_stream_buf(&mut buf);
47 let mut hasher = D::new();
48 loop {
49 let n = read_full(&mut reader, &mut buf)?;
50 if n == 0 {
51 break;
52 }
53 hasher.update(&buf[..n]);
54 }
55 Ok(hex_encode(&hasher.finalize()))
56 })
57}
58
59const HASH_READ_BUF: usize = 8 * 1024 * 1024;
65
66thread_local! {
70 static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
71}
72
73#[inline]
76fn ensure_stream_buf(buf: &mut Vec<u8>) {
77 if buf.len() < HASH_READ_BUF {
78 buf.resize(HASH_READ_BUF, 0);
79 }
80}
81
82#[cfg(target_os = "linux")]
87fn sha256_bytes(data: &[u8]) -> String {
88 if data.len() < TINY_FILE_LIMIT as usize {
93 use digest::Digest;
94 return hex_encode(&sha2::Sha256::digest(data));
95 }
96 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
97 .expect("SHA256 hash failed");
98 hex_encode(&digest)
99}
100
101#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
103fn sha256_bytes(data: &[u8]) -> String {
104 hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
105}
106
107#[cfg(target_vendor = "apple")]
109fn sha256_bytes(data: &[u8]) -> String {
110 hash_digest::<sha2::Sha256>(data)
111}
112
113#[cfg(target_os = "linux")]
116fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
117 STREAM_BUF.with(|cell| {
118 let mut buf = cell.borrow_mut();
119 ensure_stream_buf(&mut buf);
120 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
121 .map_err(|e| io::Error::other(e))?;
122 loop {
123 let n = read_full(&mut reader, &mut buf)?;
124 if n == 0 {
125 break;
126 }
127 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
128 }
129 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
130 Ok(hex_encode(&digest))
131 })
132}
133
134#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
136fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
137 STREAM_BUF.with(|cell| {
138 let mut buf = cell.borrow_mut();
139 ensure_stream_buf(&mut buf);
140 let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
141 loop {
142 let n = read_full(&mut reader, &mut buf)?;
143 if n == 0 {
144 break;
145 }
146 ctx.update(&buf[..n]);
147 }
148 Ok(hex_encode(ctx.finish().as_ref()))
149 })
150}
151
152#[cfg(target_vendor = "apple")]
154fn sha256_reader(reader: impl Read) -> io::Result<String> {
155 hash_reader_impl::<sha2::Sha256>(reader)
156}
157
158pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
160 match algo {
161 HashAlgorithm::Sha256 => sha256_bytes(data),
162 HashAlgorithm::Md5 => md5_bytes(data),
163 HashAlgorithm::Blake2b => {
164 let hash = blake2b_simd::blake2b(data);
165 hex_encode(hash.as_bytes())
166 }
167 }
168}
169
170#[cfg(target_os = "linux")]
175pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> usize {
176 match algo {
177 HashAlgorithm::Md5 => {
178 use digest::Digest;
179 let digest = md5::Md5::digest(data);
180 hex_encode_to_slice(&digest, out);
181 32
182 }
183 HashAlgorithm::Sha256 => {
184 use digest::Digest;
185 let digest = sha2::Sha256::digest(data);
186 hex_encode_to_slice(&digest, out);
187 64
188 }
189 HashAlgorithm::Blake2b => {
190 let hash = blake2b_simd::blake2b(data);
191 let bytes = hash.as_bytes();
192 hex_encode_to_slice(bytes, out);
193 bytes.len() * 2
194 }
195 }
196}
197
198#[cfg(target_os = "linux")]
203pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
204 use std::os::unix::ffi::OsStrExt;
205
206 let path_bytes = path.as_os_str().as_bytes();
207 let c_path = std::ffi::CString::new(path_bytes)
208 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
209
210 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
211 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
212 flags |= libc::O_NOATIME;
213 }
214
215 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
216 if fd < 0 {
217 let err = io::Error::last_os_error();
218 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
219 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
220 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
221 if fd2 < 0 {
222 return Err(io::Error::last_os_error());
223 }
224 return hash_from_raw_fd_to_buf(algo, fd2, out);
225 }
226 return Err(err);
227 }
228 hash_from_raw_fd_to_buf(algo, fd, out)
229}
230
231#[cfg(target_os = "linux")]
235fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
236 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
237 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
238 let err = io::Error::last_os_error();
239 unsafe {
240 libc::close(fd);
241 }
242 return Err(err);
243 }
244 let size = stat.st_size as u64;
245 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
246
247 if is_regular && size == 0 {
249 unsafe {
250 libc::close(fd);
251 }
252 return Ok(hash_bytes_to_buf(algo, &[], out));
253 }
254
255 if is_regular && size < TINY_FILE_LIMIT {
257 let mut buf = [0u8; 8192];
258 let mut total = 0usize;
259 while total < size as usize {
260 let n = unsafe {
261 libc::read(
262 fd,
263 buf[total..].as_mut_ptr() as *mut libc::c_void,
264 (size as usize) - total,
265 )
266 };
267 if n < 0 {
268 let err = io::Error::last_os_error();
269 if err.kind() == io::ErrorKind::Interrupted {
270 continue;
271 }
272 unsafe {
273 libc::close(fd);
274 }
275 return Err(err);
276 }
277 if n == 0 {
278 break;
279 }
280 total += n as usize;
281 }
282 unsafe {
283 libc::close(fd);
284 }
285 return Ok(hash_bytes_to_buf(algo, &buf[..total], out));
286 }
287
288 use std::os::unix::io::FromRawFd;
291 let file = unsafe { File::from_raw_fd(fd) };
292 let hash_str = if is_regular && size > 0 {
293 if size >= SMALL_FILE_LIMIT {
294 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
295 if let Ok(mmap) = mmap_result {
296 if size >= 2 * 1024 * 1024 {
297 let _ = mmap.advise(memmap2::Advice::HugePage);
298 }
299 let _ = mmap.advise(memmap2::Advice::Sequential);
300 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
301 let _ = mmap.advise(memmap2::Advice::WillNeed);
302 }
303 hash_bytes(algo, &mmap)
304 } else {
305 hash_file_small(algo, file, size as usize)?
306 }
307 } else {
308 hash_file_small(algo, file, size as usize)?
309 }
310 } else {
311 hash_reader(algo, file)?
312 };
313 let hex_bytes = hash_str.as_bytes();
314 out[..hex_bytes.len()].copy_from_slice(hex_bytes);
315 Ok(hex_bytes.len())
316}
317
318#[cfg(target_os = "linux")]
322fn md5_bytes(data: &[u8]) -> String {
323 if data.len() < TINY_FILE_LIMIT as usize {
327 use digest::Digest;
328 return hex_encode(&md5::Md5::digest(data));
329 }
330 let digest =
331 openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
332 hex_encode(&digest)
333}
334
335#[cfg(not(target_os = "linux"))]
337fn md5_bytes(data: &[u8]) -> String {
338 hash_digest::<Md5>(data)
339}
340
341pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
343 match algo {
344 HashAlgorithm::Sha256 => sha256_reader(reader),
345 HashAlgorithm::Md5 => md5_reader(reader),
346 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
347 }
348}
349
350#[cfg(target_os = "linux")]
352fn md5_reader(mut reader: impl Read) -> io::Result<String> {
353 STREAM_BUF.with(|cell| {
354 let mut buf = cell.borrow_mut();
355 ensure_stream_buf(&mut buf);
356 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
357 .map_err(|e| io::Error::other(e))?;
358 loop {
359 let n = read_full(&mut reader, &mut buf)?;
360 if n == 0 {
361 break;
362 }
363 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
364 }
365 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
366 Ok(hex_encode(&digest))
367 })
368}
369
370#[cfg(not(target_os = "linux"))]
372fn md5_reader(reader: impl Read) -> io::Result<String> {
373 hash_reader_impl::<Md5>(reader)
374}
375
376#[cfg(target_os = "linux")]
379static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
380
381#[cfg(target_os = "linux")]
384fn open_noatime(path: &Path) -> io::Result<File> {
385 use std::os::unix::fs::OpenOptionsExt;
386 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
387 match std::fs::OpenOptions::new()
388 .read(true)
389 .custom_flags(libc::O_NOATIME)
390 .open(path)
391 {
392 Ok(f) => return Ok(f),
393 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
394 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
396 }
397 Err(e) => return Err(e), }
399 }
400 File::open(path)
401}
402
403#[cfg(not(target_os = "linux"))]
404fn open_noatime(path: &Path) -> io::Result<File> {
405 File::open(path)
406}
407
408#[cfg(target_os = "linux")]
411#[inline]
412fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
413 let file = open_noatime(path)?;
414 let fd = {
415 use std::os::unix::io::AsRawFd;
416 file.as_raw_fd()
417 };
418 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
419 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
420 return Err(io::Error::last_os_error());
421 }
422 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
423 let size = stat.st_size as u64;
424 Ok((file, size, is_regular))
425}
426
427#[cfg(not(target_os = "linux"))]
428#[inline]
429fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
430 let file = open_noatime(path)?;
431 let metadata = file.metadata()?;
432 Ok((file, metadata.len(), metadata.file_type().is_file()))
433}
434
435#[cfg(target_os = "linux")]
438const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
439
440const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
447
448const TINY_FILE_LIMIT: u64 = 8 * 1024;
452
453thread_local! {
457 static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
458}
459
460#[cfg(target_os = "linux")]
464fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
465 match unsafe { memmap2::MmapOptions::new().map(&file) } {
467 Ok(mmap) => {
468 if file_size >= 2 * 1024 * 1024 {
469 let _ = mmap.advise(memmap2::Advice::HugePage);
470 }
471 let _ = mmap.advise(memmap2::Advice::Sequential);
472 if file_size >= 4 * 1024 * 1024 {
473 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
474 let _ = mmap.advise(memmap2::Advice::WillNeed);
475 }
476 } else {
477 let _ = mmap.advise(memmap2::Advice::WillNeed);
478 }
479 Ok(hash_bytes(algo, &mmap))
480 }
481 Err(_) => hash_file_pipelined_read(algo, file, file_size),
482 }
483}
484
485#[cfg(target_os = "linux")]
489fn hash_file_pipelined_read(
490 algo: HashAlgorithm,
491 mut file: File,
492 file_size: u64,
493) -> io::Result<String> {
494 use std::os::unix::io::AsRawFd;
495
496 const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; unsafe {
499 libc::posix_fadvise(
500 file.as_raw_fd(),
501 0,
502 file_size as i64,
503 libc::POSIX_FADV_SEQUENTIAL,
504 );
505 }
506
507 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
508 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
509 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
510
511 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
512 while let Ok(mut buf) = buf_rx.recv() {
513 let mut total = 0;
514 while total < buf.len() {
515 match file.read(&mut buf[total..]) {
516 Ok(0) => break,
517 Ok(n) => total += n,
518 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
519 Err(e) => return Err(e),
520 }
521 }
522 if total == 0 {
523 break;
524 }
525 if tx.send((buf, total)).is_err() {
526 break;
527 }
528 }
529 Ok(())
530 });
531
532 let hash_result = match algo {
533 HashAlgorithm::Sha256 => {
534 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
535 .map_err(|e| io::Error::other(e))?;
536 while let Ok((buf, n)) = rx.recv() {
537 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
538 let _ = buf_tx.send(buf);
539 }
540 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
541 Ok(hex_encode(&digest))
542 }
543 HashAlgorithm::Md5 => {
544 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
545 .map_err(|e| io::Error::other(e))?;
546 while let Ok((buf, n)) = rx.recv() {
547 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
548 let _ = buf_tx.send(buf);
549 }
550 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
551 Ok(hex_encode(&digest))
552 }
553 HashAlgorithm::Blake2b => {
554 let mut state = blake2b_simd::Params::new().to_state();
555 while let Ok((buf, n)) = rx.recv() {
556 state.update(&buf[..n]);
557 let _ = buf_tx.send(buf);
558 }
559 Ok(hex_encode(state.finalize().as_bytes()))
560 }
561 };
562
563 match reader_handle.join() {
564 Ok(Ok(())) => {}
565 Ok(Err(e)) => {
566 if hash_result.is_ok() {
567 return Err(e);
568 }
569 }
570 Err(payload) => {
571 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
572 format!("reader thread panicked: {}", s)
573 } else if let Some(s) = payload.downcast_ref::<String>() {
574 format!("reader thread panicked: {}", s)
575 } else {
576 "reader thread panicked".to_string()
577 };
578 return Err(io::Error::other(msg));
579 }
580 }
581
582 hash_result
583}
584
585pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
589 let (file, file_size, is_regular) = open_and_stat(path)?;
590
591 if is_regular && file_size == 0 {
592 return Ok(hash_bytes(algo, &[]));
593 }
594
595 if file_size > 0 && is_regular {
596 if file_size < TINY_FILE_LIMIT {
598 return hash_file_tiny(algo, file, file_size as usize);
599 }
600 if file_size >= SMALL_FILE_LIMIT {
602 #[cfg(target_os = "linux")]
603 {
604 return hash_file_pipelined(algo, file, file_size);
605 }
606 #[cfg(not(target_os = "linux"))]
608 {
609 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
610 if let Ok(mmap) = mmap_result {
611 return Ok(hash_bytes(algo, &mmap));
612 }
613 }
614 }
615 if file_size < SMALL_FILE_LIMIT {
618 return hash_file_small(algo, file, file_size as usize);
619 }
620 }
621
622 #[cfg(target_os = "linux")]
624 if file_size >= FADVISE_MIN_SIZE {
625 use std::os::unix::io::AsRawFd;
626 unsafe {
627 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
628 }
629 }
630 hash_reader(algo, file)
631}
632
633#[inline]
637fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
638 let mut buf = [0u8; 8192];
639 let mut total = 0;
640 while total < size {
642 match file.read(&mut buf[total..size]) {
643 Ok(0) => break,
644 Ok(n) => total += n,
645 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
646 Err(e) => return Err(e),
647 }
648 }
649 Ok(hash_bytes(algo, &buf[..total]))
650}
651
652#[inline]
655fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
656 SMALL_FILE_BUF.with(|cell| {
657 let mut buf = cell.borrow_mut();
658 buf.clear();
660 buf.reserve(size);
661 unsafe {
664 buf.set_len(size);
665 }
666 let mut total = 0;
667 while total < size {
668 match file.read(&mut buf[total..size]) {
669 Ok(0) => break,
670 Ok(n) => total += n,
671 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
672 Err(e) => return Err(e),
673 }
674 }
675 Ok(hash_bytes(algo, &buf[..total]))
676 })
677}
678
679pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
681 let stdin = io::stdin();
682 #[cfg(target_os = "linux")]
684 {
685 use std::os::unix::io::AsRawFd;
686 let fd = stdin.as_raw_fd();
687 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
688 if unsafe { libc::fstat(fd, &mut stat) } == 0
689 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
690 && stat.st_size > 0
691 {
692 unsafe {
693 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
694 }
695 }
696 }
697 hash_reader(algo, stdin.lock())
699}
700
701pub fn should_use_parallel(paths: &[&Path]) -> bool {
706 paths.len() >= 2
707}
708
709#[cfg(target_os = "linux")]
714pub fn readahead_files(paths: &[&Path]) {
715 use std::os::unix::io::AsRawFd;
716 for path in paths {
717 if let Ok(file) = open_noatime(path) {
718 if let Ok(meta) = file.metadata() {
719 let len = meta.len();
720 if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
721 unsafe {
722 libc::posix_fadvise(
723 file.as_raw_fd(),
724 0,
725 len as i64,
726 libc::POSIX_FADV_WILLNEED,
727 );
728 }
729 }
730 }
731 }
732 }
733}
734
735#[cfg(not(target_os = "linux"))]
736pub fn readahead_files(_paths: &[&Path]) {
737 }
739
740pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
745 let hash = blake2b_simd::Params::new()
746 .hash_length(output_bytes)
747 .hash(data);
748 hex_encode(hash.as_bytes())
749}
750
751pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
754 STREAM_BUF.with(|cell| {
755 let mut buf = cell.borrow_mut();
756 ensure_stream_buf(&mut buf);
757 let mut state = blake2b_simd::Params::new()
758 .hash_length(output_bytes)
759 .to_state();
760 loop {
761 let n = read_full(&mut reader, &mut buf)?;
762 if n == 0 {
763 break;
764 }
765 state.update(&buf[..n]);
766 }
767 Ok(hex_encode(state.finalize().as_bytes()))
768 })
769}
770
771pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
775 let (file, file_size, is_regular) = open_and_stat(path)?;
776
777 if is_regular && file_size == 0 {
778 return Ok(blake2b_hash_data(&[], output_bytes));
779 }
780
781 if file_size > 0 && is_regular {
782 if file_size < TINY_FILE_LIMIT {
784 return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
785 }
786 if file_size >= SMALL_FILE_LIMIT {
788 #[cfg(target_os = "linux")]
789 {
790 return blake2b_hash_file_pipelined(file, file_size, output_bytes);
791 }
792 #[cfg(not(target_os = "linux"))]
793 {
794 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
795 if let Ok(mmap) = mmap_result {
796 return Ok(blake2b_hash_data(&mmap, output_bytes));
797 }
798 }
799 }
800 if file_size < SMALL_FILE_LIMIT {
802 return blake2b_hash_file_small(file, file_size as usize, output_bytes);
803 }
804 }
805
806 #[cfg(target_os = "linux")]
808 if file_size >= FADVISE_MIN_SIZE {
809 use std::os::unix::io::AsRawFd;
810 unsafe {
811 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
812 }
813 }
814 blake2b_hash_reader(file, output_bytes)
815}
816
817#[inline]
819fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
820 let mut buf = [0u8; 8192];
821 let mut total = 0;
822 while total < size {
823 match file.read(&mut buf[total..size]) {
824 Ok(0) => break,
825 Ok(n) => total += n,
826 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
827 Err(e) => return Err(e),
828 }
829 }
830 Ok(blake2b_hash_data(&buf[..total], output_bytes))
831}
832
833#[inline]
835fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
836 SMALL_FILE_BUF.with(|cell| {
837 let mut buf = cell.borrow_mut();
838 buf.clear();
839 buf.reserve(size);
840 unsafe {
842 buf.set_len(size);
843 }
844 let mut total = 0;
845 while total < size {
846 match file.read(&mut buf[total..size]) {
847 Ok(0) => break,
848 Ok(n) => total += n,
849 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
850 Err(e) => return Err(e),
851 }
852 }
853 Ok(blake2b_hash_data(&buf[..total], output_bytes))
854 })
855}
856
857#[cfg(target_os = "linux")]
862fn blake2b_hash_file_pipelined(
863 file: File,
864 file_size: u64,
865 output_bytes: usize,
866) -> io::Result<String> {
867 match unsafe { memmap2::MmapOptions::new().map(&file) } {
871 Ok(mmap) => {
872 if file_size >= 2 * 1024 * 1024 {
875 let _ = mmap.advise(memmap2::Advice::HugePage);
876 }
877 let _ = mmap.advise(memmap2::Advice::Sequential);
878 if file_size >= 4 * 1024 * 1024 {
881 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
882 let _ = mmap.advise(memmap2::Advice::WillNeed);
883 }
884 } else {
885 let _ = mmap.advise(memmap2::Advice::WillNeed);
886 }
887 Ok(blake2b_hash_data(&mmap, output_bytes))
890 }
891 Err(_) => {
892 blake2b_hash_file_streamed(file, file_size, output_bytes)
895 }
896 }
897}
898
899#[cfg(target_os = "linux")]
903fn blake2b_hash_file_streamed(
904 mut file: File,
905 file_size: u64,
906 output_bytes: usize,
907) -> io::Result<String> {
908 use std::os::unix::io::AsRawFd;
909
910 const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; unsafe {
914 libc::posix_fadvise(
915 file.as_raw_fd(),
916 0,
917 file_size as i64,
918 libc::POSIX_FADV_SEQUENTIAL,
919 );
920 }
921
922 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
924 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
925 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
926
927 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
928 while let Ok(mut buf) = buf_rx.recv() {
930 let mut total = 0;
931 while total < buf.len() {
932 match file.read(&mut buf[total..]) {
933 Ok(0) => break,
934 Ok(n) => total += n,
935 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
936 Err(e) => return Err(e),
937 }
938 }
939 if total == 0 {
940 break;
941 }
942 if tx.send((buf, total)).is_err() {
943 break;
944 }
945 }
946 Ok(())
947 });
948
949 let mut state = blake2b_simd::Params::new()
950 .hash_length(output_bytes)
951 .to_state();
952 while let Ok((buf, n)) = rx.recv() {
953 state.update(&buf[..n]);
954 let _ = buf_tx.send(buf);
955 }
956 let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
957
958 match reader_handle.join() {
959 Ok(Ok(())) => {}
960 Ok(Err(e)) => {
961 if hash_result.is_ok() {
962 return Err(e);
963 }
964 }
965 Err(payload) => {
966 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
967 format!("reader thread panicked: {}", s)
968 } else if let Some(s) = payload.downcast_ref::<String>() {
969 format!("reader thread panicked: {}", s)
970 } else {
971 "reader thread panicked".to_string()
972 };
973 return Err(io::Error::other(msg));
974 }
975 }
976
977 hash_result
978}
979
980pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
983 let stdin = io::stdin();
984 #[cfg(target_os = "linux")]
985 {
986 use std::os::unix::io::AsRawFd;
987 let fd = stdin.as_raw_fd();
988 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
989 if unsafe { libc::fstat(fd, &mut stat) } == 0
990 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
991 && stat.st_size > 0
992 {
993 unsafe {
994 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
995 }
996 }
997 }
998 blake2b_hash_reader(stdin.lock(), output_bytes)
999}
1000
1001enum FileContent {
1004 Mmap(memmap2::Mmap),
1005 Buf(Vec<u8>),
1006}
1007
1008impl AsRef<[u8]> for FileContent {
1009 fn as_ref(&self) -> &[u8] {
1010 match self {
1011 FileContent::Mmap(m) => m,
1012 FileContent::Buf(v) => v,
1013 }
1014 }
1015}
1016
1017fn open_file_content(path: &Path) -> io::Result<FileContent> {
1021 let (file, size, is_regular) = open_and_stat(path)?;
1022 if is_regular && size == 0 {
1023 return Ok(FileContent::Buf(Vec::new()));
1024 }
1025 if is_regular && size > 0 {
1026 if size < TINY_FILE_LIMIT {
1030 let mut buf = vec![0u8; size as usize];
1031 let mut total = 0;
1032 let mut f = file;
1033 while total < size as usize {
1034 match f.read(&mut buf[total..]) {
1035 Ok(0) => break,
1036 Ok(n) => total += n,
1037 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1038 Err(e) => return Err(e),
1039 }
1040 }
1041 buf.truncate(total);
1042 return Ok(FileContent::Buf(buf));
1043 }
1044 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1046 if let Ok(mmap) = mmap_result {
1047 #[cfg(target_os = "linux")]
1048 {
1049 if size >= 2 * 1024 * 1024 {
1050 let _ = mmap.advise(memmap2::Advice::HugePage);
1051 }
1052 let _ = mmap.advise(memmap2::Advice::Sequential);
1053 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1054 let _ = mmap.advise(memmap2::Advice::WillNeed);
1055 }
1056 }
1057 return Ok(FileContent::Mmap(mmap));
1058 }
1059 let mut buf = vec![0u8; size as usize];
1061 let mut total = 0;
1062 let mut f = file;
1063 while total < size as usize {
1064 match f.read(&mut buf[total..]) {
1065 Ok(0) => break,
1066 Ok(n) => total += n,
1067 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1068 Err(e) => return Err(e),
1069 }
1070 }
1071 buf.truncate(total);
1072 return Ok(FileContent::Buf(buf));
1073 }
1074 let mut buf = Vec::new();
1076 let mut f = file;
1077 f.read_to_end(&mut buf)?;
1078 Ok(FileContent::Buf(buf))
1079}
1080
1081fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1085 let mut buf = Vec::with_capacity(prefix.len() + 65536);
1086 buf.extend_from_slice(prefix);
1087 file.read_to_end(&mut buf)?;
1088 Ok(FileContent::Buf(buf))
1089}
1090
1091fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1096 let mut file = open_noatime(path)?;
1097 let mut small_buf = [0u8; 4096];
1100 match file.read(&mut small_buf) {
1101 Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1102 Ok(n) if n < small_buf.len() => {
1103 return Ok(FileContent::Buf(small_buf[..n].to_vec()));
1105 }
1106 Ok(n) => {
1107 let mut buf = [0u8; 65536];
1109 buf[..n].copy_from_slice(&small_buf[..n]);
1110 let mut total = n;
1111 loop {
1112 match file.read(&mut buf[total..]) {
1113 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
1114 Ok(n) => {
1115 total += n;
1116 if total >= buf.len() {
1117 return read_remaining_to_vec(&buf[..total], file);
1119 }
1120 }
1121 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1122 Err(e) => return Err(e),
1123 }
1124 }
1125 }
1126 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1127 let mut buf = [0u8; 65536];
1128 let mut total = 0;
1129 loop {
1130 match file.read(&mut buf[total..]) {
1131 Ok(0) => return Ok(FileContent::Buf(buf[..total].to_vec())),
1132 Ok(n) => {
1133 total += n;
1134 if total >= buf.len() {
1135 return read_remaining_to_vec(&buf[..total], file);
1137 }
1138 }
1139 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1140 Err(e) => return Err(e),
1141 }
1142 }
1143 }
1144 Err(e) => return Err(e),
1145 }
1146}
1147
1148pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1156 use blake2b_simd::many::{HashManyJob, hash_many};
1157
1158 let use_fast = paths.len() >= 20;
1163
1164 let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1165 paths.iter().map(|&path| open_file_content(path)).collect()
1167 } else {
1168 let num_threads = std::thread::available_parallelism()
1169 .map(|n| n.get())
1170 .unwrap_or(4)
1171 .min(paths.len());
1172 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1173
1174 std::thread::scope(|s| {
1175 let handles: Vec<_> = paths
1176 .chunks(chunk_size)
1177 .map(|chunk| {
1178 s.spawn(move || {
1179 chunk
1180 .iter()
1181 .map(|&path| {
1182 if use_fast {
1183 open_file_content_fast(path)
1184 } else {
1185 open_file_content(path)
1186 }
1187 })
1188 .collect::<Vec<_>>()
1189 })
1190 })
1191 .collect();
1192
1193 handles
1194 .into_iter()
1195 .flat_map(|h| h.join().unwrap())
1196 .collect()
1197 })
1198 };
1199
1200 let hash_results = {
1202 let mut params = blake2b_simd::Params::new();
1203 params.hash_length(output_bytes);
1204
1205 let ok_entries: Vec<(usize, &[u8])> = file_data
1206 .iter()
1207 .enumerate()
1208 .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1209 .collect();
1210
1211 let mut jobs: Vec<HashManyJob> = ok_entries
1212 .iter()
1213 .map(|(_, data)| HashManyJob::new(¶ms, data))
1214 .collect();
1215
1216 hash_many(jobs.iter_mut());
1218
1219 let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1221 for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1222 hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1223 }
1224 hm
1225 }; hash_results
1229 .into_iter()
1230 .zip(file_data)
1231 .map(|(hash_opt, result)| match result {
1232 Ok(_) => Ok(hash_opt.unwrap()),
1233 Err(e) => Err(e),
1234 })
1235 .collect()
1236}
1237
1238pub fn blake2b_hash_files_parallel(
1246 paths: &[&Path],
1247 output_bytes: usize,
1248) -> Vec<io::Result<String>> {
1249 let n = paths.len();
1250
1251 let sample_count = n.min(5);
1255 let mut sample_max: u64 = 0;
1256 let mut sample_total: u64 = 0;
1257 for &p in paths.iter().take(sample_count) {
1258 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1259 sample_total += size;
1260 sample_max = sample_max.max(size);
1261 }
1262 let estimated_total = if sample_count > 0 {
1263 sample_total * (n as u64) / (sample_count as u64)
1264 } else {
1265 0
1266 };
1267
1268 if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1271 return blake2b_hash_files_many(paths, output_bytes);
1272 }
1273
1274 let mut indexed: Vec<(usize, &Path, u64)> = paths
1276 .iter()
1277 .enumerate()
1278 .map(|(i, &p)| {
1279 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1280 (i, p, size)
1281 })
1282 .collect();
1283
1284 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1287
1288 #[cfg(target_os = "linux")]
1293 {
1294 use std::os::unix::io::AsRawFd;
1295 for &(_, path, size) in indexed.iter().take(20) {
1296 if size >= 1024 * 1024 {
1297 if let Ok(file) = open_noatime(path) {
1298 unsafe {
1299 libc::readahead(file.as_raw_fd(), 0, size as usize);
1300 }
1301 }
1302 }
1303 }
1304 }
1305
1306 let num_threads = std::thread::available_parallelism()
1307 .map(|n| n.get())
1308 .unwrap_or(4)
1309 .min(n);
1310
1311 let work_idx = AtomicUsize::new(0);
1313
1314 std::thread::scope(|s| {
1315 let work_idx = &work_idx;
1316 let indexed = &indexed;
1317
1318 let handles: Vec<_> = (0..num_threads)
1319 .map(|_| {
1320 s.spawn(move || {
1321 let mut local_results = Vec::new();
1322 loop {
1323 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1324 if idx >= indexed.len() {
1325 break;
1326 }
1327 let (orig_idx, path, _size) = indexed[idx];
1328 let result = blake2b_hash_file(path, output_bytes);
1329 local_results.push((orig_idx, result));
1330 }
1331 local_results
1332 })
1333 })
1334 .collect();
1335
1336 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1338 for handle in handles {
1339 for (orig_idx, result) in handle.join().unwrap() {
1340 results[orig_idx] = Some(result);
1341 }
1342 }
1343 results
1344 .into_iter()
1345 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1346 .collect()
1347 })
1348}
1349
1350pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1356 let n = paths.len();
1357
1358 let mut indexed: Vec<(usize, &Path, u64)> = paths
1361 .iter()
1362 .enumerate()
1363 .map(|(i, &p)| {
1364 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1365 (i, p, size)
1366 })
1367 .collect();
1368
1369 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1372
1373 #[cfg(target_os = "linux")]
1378 {
1379 use std::os::unix::io::AsRawFd;
1380 for &(_, path, size) in indexed.iter().take(20) {
1381 if size >= 1024 * 1024 {
1382 if let Ok(file) = open_noatime(path) {
1383 unsafe {
1384 libc::readahead(file.as_raw_fd(), 0, size as usize);
1385 }
1386 }
1387 }
1388 }
1389 }
1390
1391 let num_threads = std::thread::available_parallelism()
1392 .map(|n| n.get())
1393 .unwrap_or(4)
1394 .min(n);
1395
1396 let work_idx = AtomicUsize::new(0);
1398
1399 std::thread::scope(|s| {
1400 let work_idx = &work_idx;
1401 let indexed = &indexed;
1402
1403 let handles: Vec<_> = (0..num_threads)
1404 .map(|_| {
1405 s.spawn(move || {
1406 let mut local_results = Vec::new();
1407 loop {
1408 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1409 if idx >= indexed.len() {
1410 break;
1411 }
1412 let (orig_idx, path, _size) = indexed[idx];
1413 let result = hash_file(algo, path);
1414 local_results.push((orig_idx, result));
1415 }
1416 local_results
1417 })
1418 })
1419 .collect();
1420
1421 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1423 for handle in handles {
1424 for (orig_idx, result) in handle.join().unwrap() {
1425 results[orig_idx] = Some(result);
1426 }
1427 }
1428 results
1429 .into_iter()
1430 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1431 .collect()
1432 })
1433}
1434
1435pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1441 let n = paths.len();
1442 if n == 0 {
1443 return Vec::new();
1444 }
1445 if n == 1 {
1446 return vec![hash_file_nostat(algo, paths[0])];
1447 }
1448
1449 #[cfg(target_os = "linux")]
1452 readahead_files_all(paths);
1453
1454 let num_threads = std::thread::available_parallelism()
1455 .map(|n| n.get())
1456 .unwrap_or(4)
1457 .min(n);
1458
1459 let work_idx = AtomicUsize::new(0);
1460
1461 std::thread::scope(|s| {
1462 let work_idx = &work_idx;
1463
1464 let handles: Vec<_> = (0..num_threads)
1465 .map(|_| {
1466 s.spawn(move || {
1467 let mut local_results = Vec::new();
1468 loop {
1469 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1470 if idx >= n {
1471 break;
1472 }
1473 let result = hash_file_nostat(algo, paths[idx]);
1474 local_results.push((idx, result));
1475 }
1476 local_results
1477 })
1478 })
1479 .collect();
1480
1481 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1482 for handle in handles {
1483 for (idx, result) in handle.join().unwrap() {
1484 results[idx] = Some(result);
1485 }
1486 }
1487 results
1488 .into_iter()
1489 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1490 .collect()
1491 })
1492}
1493
1494pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1503 let n = paths.len();
1504 if n == 0 {
1505 return Vec::new();
1506 }
1507
1508 #[cfg(target_os = "linux")]
1510 readahead_files_all(paths);
1511
1512 let use_fast = n >= 20;
1515
1516 let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1517 paths
1519 .iter()
1520 .map(|&path| {
1521 if use_fast {
1522 open_file_content_fast(path)
1523 } else {
1524 open_file_content(path)
1525 }
1526 })
1527 .collect()
1528 } else {
1529 let num_threads = std::thread::available_parallelism()
1530 .map(|t| t.get())
1531 .unwrap_or(4)
1532 .min(n);
1533 let chunk_size = (n + num_threads - 1) / num_threads;
1534
1535 std::thread::scope(|s| {
1536 let handles: Vec<_> = paths
1537 .chunks(chunk_size)
1538 .map(|chunk| {
1539 s.spawn(move || {
1540 chunk
1541 .iter()
1542 .map(|&path| {
1543 if use_fast {
1544 open_file_content_fast(path)
1545 } else {
1546 open_file_content(path)
1547 }
1548 })
1549 .collect::<Vec<_>>()
1550 })
1551 })
1552 .collect();
1553
1554 handles
1555 .into_iter()
1556 .flat_map(|h| h.join().unwrap())
1557 .collect()
1558 })
1559 };
1560
1561 let num_hash_threads = std::thread::available_parallelism()
1564 .map(|t| t.get())
1565 .unwrap_or(4)
1566 .min(n);
1567 let work_idx = AtomicUsize::new(0);
1568
1569 std::thread::scope(|s| {
1570 let work_idx = &work_idx;
1571 let file_data = &file_data;
1572
1573 let handles: Vec<_> = (0..num_hash_threads)
1574 .map(|_| {
1575 s.spawn(move || {
1576 let mut local_results = Vec::new();
1577 loop {
1578 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1579 if idx >= n {
1580 break;
1581 }
1582 let result = match &file_data[idx] {
1583 Ok(content) => Ok(hash_bytes(algo, content.as_ref())),
1584 Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1585 };
1586 local_results.push((idx, result));
1587 }
1588 local_results
1589 })
1590 })
1591 .collect();
1592
1593 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1594 for handle in handles {
1595 for (idx, result) in handle.join().unwrap() {
1596 results[idx] = Some(result);
1597 }
1598 }
1599 results
1600 .into_iter()
1601 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1602 .collect()
1603 })
1604}
1605
1606fn hash_stream_with_prefix(
1610 algo: HashAlgorithm,
1611 prefix: &[u8],
1612 mut file: File,
1613) -> io::Result<String> {
1614 match algo {
1615 HashAlgorithm::Sha256 => {
1616 #[cfg(target_os = "linux")]
1617 {
1618 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
1619 .map_err(|e| io::Error::other(e))?;
1620 hasher.update(prefix).map_err(|e| io::Error::other(e))?;
1621 STREAM_BUF.with(|cell| {
1622 let mut buf = cell.borrow_mut();
1623 ensure_stream_buf(&mut buf);
1624 loop {
1625 let n = read_full(&mut file, &mut buf)?;
1626 if n == 0 {
1627 break;
1628 }
1629 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
1630 }
1631 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
1632 Ok(hex_encode(&digest))
1633 })
1634 }
1635 #[cfg(not(target_os = "linux"))]
1636 {
1637 hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file)
1638 }
1639 }
1640 HashAlgorithm::Md5 => {
1641 #[cfg(target_os = "linux")]
1642 {
1643 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
1644 .map_err(|e| io::Error::other(e))?;
1645 hasher.update(prefix).map_err(|e| io::Error::other(e))?;
1646 STREAM_BUF.with(|cell| {
1647 let mut buf = cell.borrow_mut();
1648 ensure_stream_buf(&mut buf);
1649 loop {
1650 let n = read_full(&mut file, &mut buf)?;
1651 if n == 0 {
1652 break;
1653 }
1654 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
1655 }
1656 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
1657 Ok(hex_encode(&digest))
1658 })
1659 }
1660 #[cfg(not(target_os = "linux"))]
1661 {
1662 hash_stream_with_prefix_digest::<md5::Md5>(prefix, file)
1663 }
1664 }
1665 HashAlgorithm::Blake2b => {
1666 let mut state = blake2b_simd::Params::new().to_state();
1667 state.update(prefix);
1668 STREAM_BUF.with(|cell| {
1669 let mut buf = cell.borrow_mut();
1670 ensure_stream_buf(&mut buf);
1671 loop {
1672 let n = read_full(&mut file, &mut buf)?;
1673 if n == 0 {
1674 break;
1675 }
1676 state.update(&buf[..n]);
1677 }
1678 Ok(hex_encode(state.finalize().as_bytes()))
1679 })
1680 }
1681 }
1682}
1683
1684#[cfg(not(target_os = "linux"))]
1686fn hash_stream_with_prefix_digest<D: digest::Digest>(
1687 prefix: &[u8],
1688 mut file: File,
1689) -> io::Result<String> {
1690 STREAM_BUF.with(|cell| {
1691 let mut buf = cell.borrow_mut();
1692 ensure_stream_buf(&mut buf);
1693 let mut hasher = D::new();
1694 hasher.update(prefix);
1695 loop {
1696 let n = read_full(&mut file, &mut buf)?;
1697 if n == 0 {
1698 break;
1699 }
1700 hasher.update(&buf[..n]);
1701 }
1702 Ok(hex_encode(&hasher.finalize()))
1703 })
1704}
1705
1706pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1712 let mut file = open_noatime(path)?;
1713 let mut small_buf = [0u8; 4096];
1717 match file.read(&mut small_buf) {
1718 Ok(0) => return Ok(hash_bytes(algo, &[])),
1719 Ok(n) if n < small_buf.len() => {
1720 return Ok(hash_bytes(algo, &small_buf[..n]));
1722 }
1723 Ok(n) => {
1724 let mut buf = [0u8; 65536];
1726 buf[..n].copy_from_slice(&small_buf[..n]);
1727 let mut total = n;
1728 loop {
1729 match file.read(&mut buf[total..]) {
1730 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1731 Ok(n) => {
1732 total += n;
1733 if total >= buf.len() {
1734 return hash_stream_with_prefix(algo, &buf[..total], file);
1737 }
1738 }
1739 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1740 Err(e) => return Err(e),
1741 }
1742 }
1743 }
1744 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1745 let mut buf = [0u8; 65536];
1747 let mut total = 0;
1748 loop {
1749 match file.read(&mut buf[total..]) {
1750 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1751 Ok(n) => {
1752 total += n;
1753 if total >= buf.len() {
1754 return hash_stream_with_prefix(algo, &buf[..total], file);
1756 }
1757 }
1758 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1759 Err(e) => return Err(e),
1760 }
1761 }
1762 }
1763 Err(e) => return Err(e),
1764 }
1765}
1766
1767#[cfg(target_os = "linux")]
1778pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1779 use std::os::unix::ffi::OsStrExt;
1780
1781 let path_bytes = path.as_os_str().as_bytes();
1782 let c_path = std::ffi::CString::new(path_bytes)
1783 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
1784
1785 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
1787 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
1788 flags |= libc::O_NOATIME;
1789 }
1790
1791 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
1792 if fd < 0 {
1793 let err = io::Error::last_os_error();
1794 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
1795 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
1796 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
1797 if fd2 < 0 {
1798 return Err(io::Error::last_os_error());
1799 }
1800 return hash_from_raw_fd(algo, fd2);
1801 }
1802 return Err(err);
1803 }
1804 hash_from_raw_fd(algo, fd)
1805}
1806
1807#[cfg(target_os = "linux")]
1811fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
1812 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1814 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
1815 let err = io::Error::last_os_error();
1816 unsafe {
1817 libc::close(fd);
1818 }
1819 return Err(err);
1820 }
1821 let size = stat.st_size as u64;
1822 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
1823
1824 if is_regular && size == 0 {
1826 unsafe {
1827 libc::close(fd);
1828 }
1829 return Ok(hash_bytes(algo, &[]));
1830 }
1831
1832 if is_regular && size < TINY_FILE_LIMIT {
1835 let mut buf = [0u8; 8192];
1836 let mut total = 0usize;
1837 while total < size as usize {
1838 let n = unsafe {
1839 libc::read(
1840 fd,
1841 buf[total..].as_mut_ptr() as *mut libc::c_void,
1842 (size as usize) - total,
1843 )
1844 };
1845 if n < 0 {
1846 let err = io::Error::last_os_error();
1847 if err.kind() == io::ErrorKind::Interrupted {
1848 continue;
1849 }
1850 unsafe {
1851 libc::close(fd);
1852 }
1853 return Err(err);
1854 }
1855 if n == 0 {
1856 break;
1857 }
1858 total += n as usize;
1859 }
1860 unsafe {
1861 libc::close(fd);
1862 }
1863 return Ok(hash_bytes(algo, &buf[..total]));
1864 }
1865
1866 use std::os::unix::io::FromRawFd;
1868 let file = unsafe { File::from_raw_fd(fd) };
1869
1870 if is_regular && size > 0 {
1871 if size >= SMALL_FILE_LIMIT {
1873 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1874 if let Ok(mmap) = mmap_result {
1875 if size >= 2 * 1024 * 1024 {
1876 let _ = mmap.advise(memmap2::Advice::HugePage);
1877 }
1878 let _ = mmap.advise(memmap2::Advice::Sequential);
1879 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1881 let _ = mmap.advise(memmap2::Advice::WillNeed);
1882 }
1883 return Ok(hash_bytes(algo, &mmap));
1884 }
1885 }
1886 return hash_file_small(algo, file, size as usize);
1888 }
1889
1890 hash_reader(algo, file)
1892}
1893
1894#[cfg(target_os = "linux")]
1897pub fn readahead_files_all(paths: &[&Path]) {
1898 use std::os::unix::io::AsRawFd;
1899 for path in paths {
1900 if let Ok(file) = open_noatime(path) {
1901 if let Ok(meta) = file.metadata() {
1902 if meta.file_type().is_file() {
1903 let len = meta.len();
1904 unsafe {
1905 libc::posix_fadvise(
1906 file.as_raw_fd(),
1907 0,
1908 len as i64,
1909 libc::POSIX_FADV_WILLNEED,
1910 );
1911 }
1912 }
1913 }
1914 }
1915 }
1916}
1917
1918#[cfg(not(target_os = "linux"))]
1919pub fn readahead_files_all(_paths: &[&Path]) {}
1920
1921pub fn print_hash(
1924 out: &mut impl Write,
1925 hash: &str,
1926 filename: &str,
1927 binary: bool,
1928) -> io::Result<()> {
1929 let mode = if binary { b'*' } else { b' ' };
1930 out.write_all(hash.as_bytes())?;
1931 out.write_all(&[b' ', mode])?;
1932 out.write_all(filename.as_bytes())?;
1933 out.write_all(b"\n")
1934}
1935
1936pub fn print_hash_zero(
1938 out: &mut impl Write,
1939 hash: &str,
1940 filename: &str,
1941 binary: bool,
1942) -> io::Result<()> {
1943 let mode = if binary { b'*' } else { b' ' };
1944 out.write_all(hash.as_bytes())?;
1945 out.write_all(&[b' ', mode])?;
1946 out.write_all(filename.as_bytes())?;
1947 out.write_all(b"\0")
1948}
1949
1950thread_local! {
1957 static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1958}
1959
1960#[inline]
1964pub fn write_hash_line(
1965 out: &mut impl Write,
1966 hash: &str,
1967 filename: &str,
1968 binary: bool,
1969 zero: bool,
1970 escaped: bool,
1971) -> io::Result<()> {
1972 LINE_BUF.with(|cell| {
1973 let mut buf = cell.borrow_mut();
1974 buf.clear();
1975 let mode = if binary { b'*' } else { b' ' };
1976 let term = if zero { b'\0' } else { b'\n' };
1977 if escaped {
1978 buf.push(b'\\');
1979 }
1980 buf.extend_from_slice(hash.as_bytes());
1981 buf.push(b' ');
1982 buf.push(mode);
1983 buf.extend_from_slice(filename.as_bytes());
1984 buf.push(term);
1985 out.write_all(&buf)
1986 })
1987}
1988
1989#[inline]
1992pub fn write_hash_tag_line(
1993 out: &mut impl Write,
1994 algo_name: &str,
1995 hash: &str,
1996 filename: &str,
1997 zero: bool,
1998) -> io::Result<()> {
1999 LINE_BUF.with(|cell| {
2000 let mut buf = cell.borrow_mut();
2001 buf.clear();
2002 let term = if zero { b'\0' } else { b'\n' };
2003 buf.extend_from_slice(algo_name.as_bytes());
2004 buf.extend_from_slice(b" (");
2005 buf.extend_from_slice(filename.as_bytes());
2006 buf.extend_from_slice(b") = ");
2007 buf.extend_from_slice(hash.as_bytes());
2008 buf.push(term);
2009 out.write_all(&buf)
2010 })
2011}
2012
2013pub fn print_hash_tag(
2015 out: &mut impl Write,
2016 algo: HashAlgorithm,
2017 hash: &str,
2018 filename: &str,
2019) -> io::Result<()> {
2020 out.write_all(algo.name().as_bytes())?;
2021 out.write_all(b" (")?;
2022 out.write_all(filename.as_bytes())?;
2023 out.write_all(b") = ")?;
2024 out.write_all(hash.as_bytes())?;
2025 out.write_all(b"\n")
2026}
2027
2028pub fn print_hash_tag_zero(
2030 out: &mut impl Write,
2031 algo: HashAlgorithm,
2032 hash: &str,
2033 filename: &str,
2034) -> io::Result<()> {
2035 out.write_all(algo.name().as_bytes())?;
2036 out.write_all(b" (")?;
2037 out.write_all(filename.as_bytes())?;
2038 out.write_all(b") = ")?;
2039 out.write_all(hash.as_bytes())?;
2040 out.write_all(b"\0")
2041}
2042
2043pub fn print_hash_tag_b2sum(
2047 out: &mut impl Write,
2048 hash: &str,
2049 filename: &str,
2050 bits: usize,
2051) -> io::Result<()> {
2052 if bits == 512 {
2053 out.write_all(b"BLAKE2b (")?;
2054 } else {
2055 write!(out, "BLAKE2b-{} (", bits)?;
2057 }
2058 out.write_all(filename.as_bytes())?;
2059 out.write_all(b") = ")?;
2060 out.write_all(hash.as_bytes())?;
2061 out.write_all(b"\n")
2062}
2063
2064pub fn print_hash_tag_b2sum_zero(
2066 out: &mut impl Write,
2067 hash: &str,
2068 filename: &str,
2069 bits: usize,
2070) -> io::Result<()> {
2071 if bits == 512 {
2072 out.write_all(b"BLAKE2b (")?;
2073 } else {
2074 write!(out, "BLAKE2b-{} (", bits)?;
2075 }
2076 out.write_all(filename.as_bytes())?;
2077 out.write_all(b") = ")?;
2078 out.write_all(hash.as_bytes())?;
2079 out.write_all(b"\0")
2080}
2081
2082pub struct CheckOptions {
2084 pub quiet: bool,
2085 pub status_only: bool,
2086 pub strict: bool,
2087 pub warn: bool,
2088 pub ignore_missing: bool,
2089 pub warn_prefix: String,
2093}
2094
2095pub struct CheckResult {
2097 pub ok: usize,
2098 pub mismatches: usize,
2099 pub format_errors: usize,
2100 pub read_errors: usize,
2101 pub ignored_missing: usize,
2103}
2104
2105pub fn check_file<R: BufRead>(
2108 algo: HashAlgorithm,
2109 reader: R,
2110 opts: &CheckOptions,
2111 out: &mut impl Write,
2112 err_out: &mut impl Write,
2113) -> io::Result<CheckResult> {
2114 let quiet = opts.quiet;
2115 let status_only = opts.status_only;
2116 let warn = opts.warn;
2117 let ignore_missing = opts.ignore_missing;
2118 let mut ok_count = 0;
2119 let mut mismatch_count = 0;
2120 let mut format_errors = 0;
2121 let mut read_errors = 0;
2122 let mut ignored_missing_count = 0;
2123 let mut line_num = 0;
2124
2125 for line_result in reader.lines() {
2126 line_num += 1;
2127 let line = line_result?;
2128 let line = line.trim_end();
2129
2130 if line.is_empty() {
2131 continue;
2132 }
2133
2134 let (expected_hash, filename) = match parse_check_line(line) {
2136 Some(v) => v,
2137 None => {
2138 format_errors += 1;
2139 if warn {
2140 out.flush()?;
2141 if opts.warn_prefix.is_empty() {
2142 writeln!(
2143 err_out,
2144 "line {}: improperly formatted {} checksum line",
2145 line_num,
2146 algo.name()
2147 )?;
2148 } else {
2149 writeln!(
2150 err_out,
2151 "{}: {}: improperly formatted {} checksum line",
2152 opts.warn_prefix,
2153 line_num,
2154 algo.name()
2155 )?;
2156 }
2157 }
2158 continue;
2159 }
2160 };
2161
2162 let actual = match hash_file(algo, Path::new(filename)) {
2164 Ok(h) => h,
2165 Err(e) => {
2166 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2167 ignored_missing_count += 1;
2168 continue;
2169 }
2170 read_errors += 1;
2171 if !status_only {
2172 out.flush()?;
2173 writeln!(err_out, "{}: {}", filename, e)?;
2174 writeln!(out, "{}: FAILED open or read", filename)?;
2175 }
2176 continue;
2177 }
2178 };
2179
2180 if actual.eq_ignore_ascii_case(expected_hash) {
2181 ok_count += 1;
2182 if !quiet && !status_only {
2183 writeln!(out, "{}: OK", filename)?;
2184 }
2185 } else {
2186 mismatch_count += 1;
2187 if !status_only {
2188 writeln!(out, "{}: FAILED", filename)?;
2189 }
2190 }
2191 }
2192
2193 Ok(CheckResult {
2194 ok: ok_count,
2195 mismatches: mismatch_count,
2196 format_errors,
2197 read_errors,
2198 ignored_missing: ignored_missing_count,
2199 })
2200}
2201
2202pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2204 let rest = line
2206 .strip_prefix("MD5 (")
2207 .or_else(|| line.strip_prefix("SHA256 ("))
2208 .or_else(|| line.strip_prefix("BLAKE2b ("))
2209 .or_else(|| {
2210 if line.starts_with("BLAKE2b-") {
2212 let after = &line["BLAKE2b-".len()..];
2213 if let Some(sp) = after.find(" (") {
2214 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2215 return Some(&after[sp + 2..]);
2216 }
2217 }
2218 }
2219 None
2220 });
2221 if let Some(rest) = rest {
2222 if let Some(paren_idx) = rest.find(") = ") {
2223 let filename = &rest[..paren_idx];
2224 let hash = &rest[paren_idx + 4..];
2225 return Some((hash, filename));
2226 }
2227 }
2228
2229 let line = line.strip_prefix('\\').unwrap_or(line);
2231
2232 if let Some(idx) = line.find(" ") {
2234 let hash = &line[..idx];
2235 let rest = &line[idx + 2..];
2236 return Some((hash, rest));
2237 }
2238 if let Some(idx) = line.find(" *") {
2240 let hash = &line[..idx];
2241 let rest = &line[idx + 2..];
2242 return Some((hash, rest));
2243 }
2244 None
2245}
2246
2247pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2251 let paren_start = line.find(" (")?;
2252 let algo_part = &line[..paren_start];
2253 let rest = &line[paren_start + 2..];
2254 let paren_end = rest.find(") = ")?;
2255 let filename = &rest[..paren_end];
2256 let hash = &rest[paren_end + 4..];
2257
2258 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2260 algo_part[dash_pos + 1..].parse::<usize>().ok()
2261 } else {
2262 None
2263 };
2264
2265 Some((hash, filename, bits))
2266}
2267
2268#[inline]
2272fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2273 let n = reader.read(buf)?;
2275 if n == buf.len() || n == 0 {
2276 return Ok(n);
2277 }
2278 let mut total = n;
2280 while total < buf.len() {
2281 match reader.read(&mut buf[total..]) {
2282 Ok(0) => break,
2283 Ok(n) => total += n,
2284 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2285 Err(e) => return Err(e),
2286 }
2287 }
2288 Ok(total)
2289}
2290
2291const fn generate_hex_table() -> [[u8; 2]; 256] {
2294 let hex = b"0123456789abcdef";
2295 let mut table = [[0u8; 2]; 256];
2296 let mut i = 0;
2297 while i < 256 {
2298 table[i] = [hex[i >> 4], hex[i & 0xf]];
2299 i += 1;
2300 }
2301 table
2302}
2303
2304const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2305
2306pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2309 let len = bytes.len() * 2;
2310 let mut hex = String::with_capacity(len);
2311 unsafe {
2313 let buf = hex.as_mut_vec();
2314 buf.set_len(len);
2315 hex_encode_to_slice(bytes, buf);
2316 }
2317 hex
2318}
2319
2320#[inline]
2323fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2324 unsafe {
2326 let ptr = out.as_mut_ptr();
2327 for (i, &b) in bytes.iter().enumerate() {
2328 let pair = *HEX_TABLE.get_unchecked(b as usize);
2329 *ptr.add(i * 2) = pair[0];
2330 *ptr.add(i * 2 + 1) = pair[1];
2331 }
2332 }
2333}