1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10use digest::Digest;
11use md5::Md5;
12use sha1::Sha1;
13
14#[derive(Debug, Clone, Copy)]
16pub enum HashAlgorithm {
17 Sha1,
18 Sha224,
19 Sha256,
20 Sha384,
21 Sha512,
22 Md5,
23 Blake2b,
24}
25
26impl HashAlgorithm {
27 pub fn name(self) -> &'static str {
28 match self {
29 HashAlgorithm::Sha1 => "SHA1",
30 HashAlgorithm::Sha224 => "SHA224",
31 HashAlgorithm::Sha256 => "SHA256",
32 HashAlgorithm::Sha384 => "SHA384",
33 HashAlgorithm::Sha512 => "SHA512",
34 HashAlgorithm::Md5 => "MD5",
35 HashAlgorithm::Blake2b => "BLAKE2b",
36 }
37 }
38}
39
40fn hash_digest<D: Digest>(data: &[u8]) -> String {
44 hex_encode(&D::digest(data))
45}
46
47fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
49 STREAM_BUF.with(|cell| {
50 let mut buf = cell.borrow_mut();
51 ensure_stream_buf(&mut buf);
52 let mut hasher = D::new();
53 loop {
54 let n = read_full(&mut reader, &mut buf)?;
55 if n == 0 {
56 break;
57 }
58 hasher.update(&buf[..n]);
59 }
60 Ok(hex_encode(&hasher.finalize()))
61 })
62}
63
64const HASH_READ_BUF: usize = 8 * 1024 * 1024;
70
71thread_local! {
75 static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
76}
77
78#[inline]
81fn ensure_stream_buf(buf: &mut Vec<u8>) {
82 if buf.len() < HASH_READ_BUF {
83 buf.resize(HASH_READ_BUF, 0);
84 }
85}
86
87fn sha256_bytes(data: &[u8]) -> String {
91 hash_digest::<sha2::Sha256>(data)
92}
93
94fn sha256_reader(reader: impl Read) -> io::Result<String> {
96 hash_reader_impl::<sha2::Sha256>(reader)
97}
98
99fn sha1_bytes(data: &[u8]) -> String {
103 hash_digest::<Sha1>(data)
104}
105
106fn sha1_reader(reader: impl Read) -> io::Result<String> {
108 hash_reader_impl::<Sha1>(reader)
109}
110
111fn sha224_bytes(data: &[u8]) -> String {
115 hex_encode(&sha2::Sha224::digest(data))
116}
117
118fn sha224_reader(reader: impl Read) -> io::Result<String> {
120 STREAM_BUF.with(|cell| {
121 let mut buf = cell.borrow_mut();
122 ensure_stream_buf(&mut buf);
123 let mut hasher = <sha2::Sha224 as digest::Digest>::new();
124 let mut reader = reader;
125 loop {
126 let n = read_full(&mut reader, &mut buf)?;
127 if n == 0 {
128 break;
129 }
130 digest::Digest::update(&mut hasher, &buf[..n]);
131 }
132 Ok(hex_encode(&digest::Digest::finalize(hasher)))
133 })
134}
135
136fn sha384_bytes(data: &[u8]) -> String {
140 hex_encode(&sha2::Sha384::digest(data))
141}
142
143fn sha384_reader(reader: impl Read) -> io::Result<String> {
145 STREAM_BUF.with(|cell| {
146 let mut buf = cell.borrow_mut();
147 ensure_stream_buf(&mut buf);
148 let mut hasher = <sha2::Sha384 as digest::Digest>::new();
149 let mut reader = reader;
150 loop {
151 let n = read_full(&mut reader, &mut buf)?;
152 if n == 0 {
153 break;
154 }
155 digest::Digest::update(&mut hasher, &buf[..n]);
156 }
157 Ok(hex_encode(&digest::Digest::finalize(hasher)))
158 })
159}
160
161fn sha512_bytes(data: &[u8]) -> String {
165 hex_encode(&sha2::Sha512::digest(data))
166}
167
168fn sha512_reader(reader: impl Read) -> io::Result<String> {
170 STREAM_BUF.with(|cell| {
171 let mut buf = cell.borrow_mut();
172 ensure_stream_buf(&mut buf);
173 let mut hasher = <sha2::Sha512 as digest::Digest>::new();
174 let mut reader = reader;
175 loop {
176 let n = read_full(&mut reader, &mut buf)?;
177 if n == 0 {
178 break;
179 }
180 digest::Digest::update(&mut hasher, &buf[..n]);
181 }
182 Ok(hex_encode(&digest::Digest::finalize(hasher)))
183 })
184}
185
186pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
188 match algo {
189 HashAlgorithm::Sha1 => sha1_bytes(data),
190 HashAlgorithm::Sha224 => sha224_bytes(data),
191 HashAlgorithm::Sha256 => sha256_bytes(data),
192 HashAlgorithm::Sha384 => sha384_bytes(data),
193 HashAlgorithm::Sha512 => sha512_bytes(data),
194 HashAlgorithm::Md5 => md5_bytes(data),
195 HashAlgorithm::Blake2b => {
196 let hash = blake2b_simd::blake2b(data);
197 hex_encode(hash.as_bytes())
198 }
199 }
200}
201
202#[cfg(target_os = "linux")]
207pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> usize {
208 match algo {
209 HashAlgorithm::Md5 => {
210 let digest = md5::Md5::digest(data);
211 hex_encode_to_slice(&digest, out);
212 32
213 }
214 HashAlgorithm::Sha1 => {
215 let digest = sha1::Sha1::digest(data);
216 hex_encode_to_slice(&digest, out);
217 40
218 }
219 HashAlgorithm::Sha224 => {
220 let digest = sha2::Sha224::digest(data);
221 hex_encode_to_slice(&digest, out);
222 56
223 }
224 HashAlgorithm::Sha256 => {
225 let digest = sha2::Sha256::digest(data);
226 hex_encode_to_slice(&digest, out);
227 64
228 }
229 HashAlgorithm::Sha384 => {
230 let digest = sha2::Sha384::digest(data);
231 hex_encode_to_slice(&digest, out);
232 96
233 }
234 HashAlgorithm::Sha512 => {
235 let digest = sha2::Sha512::digest(data);
236 hex_encode_to_slice(&digest, out);
237 128
238 }
239 HashAlgorithm::Blake2b => {
240 let hash = blake2b_simd::blake2b(data);
241 let bytes = hash.as_bytes();
242 hex_encode_to_slice(bytes, out);
243 bytes.len() * 2
244 }
245 }
246}
247
248#[cfg(target_os = "linux")]
253pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
254 use std::os::unix::ffi::OsStrExt;
255
256 let path_bytes = path.as_os_str().as_bytes();
257 let c_path = std::ffi::CString::new(path_bytes)
258 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
259
260 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
261 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
262 flags |= libc::O_NOATIME;
263 }
264
265 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
266 if fd < 0 {
267 let err = io::Error::last_os_error();
268 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
269 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
270 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
271 if fd2 < 0 {
272 return Err(io::Error::last_os_error());
273 }
274 return hash_from_raw_fd_to_buf(algo, fd2, out);
275 }
276 return Err(err);
277 }
278 hash_from_raw_fd_to_buf(algo, fd, out)
279}
280
281#[cfg(target_os = "linux")]
285fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
286 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
287 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
288 let err = io::Error::last_os_error();
289 unsafe {
290 libc::close(fd);
291 }
292 return Err(err);
293 }
294 let size = stat.st_size as u64;
295 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
296
297 if is_regular && size == 0 {
299 unsafe {
300 libc::close(fd);
301 }
302 return Ok(hash_bytes_to_buf(algo, &[], out));
303 }
304
305 if is_regular && size < TINY_FILE_LIMIT {
307 let mut buf = [0u8; 8192];
308 let mut total = 0usize;
309 while total < size as usize {
310 let n = unsafe {
311 libc::read(
312 fd,
313 buf[total..].as_mut_ptr() as *mut libc::c_void,
314 (size as usize) - total,
315 )
316 };
317 if n < 0 {
318 let err = io::Error::last_os_error();
319 if err.kind() == io::ErrorKind::Interrupted {
320 continue;
321 }
322 unsafe {
323 libc::close(fd);
324 }
325 return Err(err);
326 }
327 if n == 0 {
328 break;
329 }
330 total += n as usize;
331 }
332 unsafe {
333 libc::close(fd);
334 }
335 return Ok(hash_bytes_to_buf(algo, &buf[..total], out));
336 }
337
338 use std::os::unix::io::FromRawFd;
341 let file = unsafe { File::from_raw_fd(fd) };
342 let hash_str = if is_regular && size > 0 {
343 if size >= SMALL_FILE_LIMIT {
344 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
345 if let Ok(mmap) = mmap_result {
346 if size >= 2 * 1024 * 1024 {
347 let _ = mmap.advise(memmap2::Advice::HugePage);
348 }
349 let _ = mmap.advise(memmap2::Advice::Sequential);
350 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
351 let _ = mmap.advise(memmap2::Advice::WillNeed);
352 }
353 hash_bytes(algo, &mmap)
354 } else {
355 hash_file_small(algo, file, size as usize)?
356 }
357 } else {
358 hash_file_small(algo, file, size as usize)?
359 }
360 } else {
361 hash_reader(algo, file)?
362 };
363 let hex_bytes = hash_str.as_bytes();
364 out[..hex_bytes.len()].copy_from_slice(hex_bytes);
365 Ok(hex_bytes.len())
366}
367
368fn md5_bytes(data: &[u8]) -> String {
372 hash_digest::<Md5>(data)
373}
374
375pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
377 match algo {
378 HashAlgorithm::Sha1 => sha1_reader(reader),
379 HashAlgorithm::Sha224 => sha224_reader(reader),
380 HashAlgorithm::Sha256 => sha256_reader(reader),
381 HashAlgorithm::Sha384 => sha384_reader(reader),
382 HashAlgorithm::Sha512 => sha512_reader(reader),
383 HashAlgorithm::Md5 => md5_reader(reader),
384 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
385 }
386}
387
388fn md5_reader(reader: impl Read) -> io::Result<String> {
390 hash_reader_impl::<Md5>(reader)
391}
392
393#[cfg(target_os = "linux")]
396static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
397
398#[cfg(target_os = "linux")]
401fn open_noatime(path: &Path) -> io::Result<File> {
402 use std::os::unix::fs::OpenOptionsExt;
403 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
404 match std::fs::OpenOptions::new()
405 .read(true)
406 .custom_flags(libc::O_NOATIME)
407 .open(path)
408 {
409 Ok(f) => return Ok(f),
410 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
411 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
413 }
414 Err(e) => return Err(e), }
416 }
417 File::open(path)
418}
419
420#[cfg(not(target_os = "linux"))]
421fn open_noatime(path: &Path) -> io::Result<File> {
422 File::open(path)
423}
424
425#[cfg(target_os = "linux")]
428#[inline]
429fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
430 let file = open_noatime(path)?;
431 let fd = {
432 use std::os::unix::io::AsRawFd;
433 file.as_raw_fd()
434 };
435 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
436 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
437 return Err(io::Error::last_os_error());
438 }
439 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
440 let size = stat.st_size as u64;
441 Ok((file, size, is_regular))
442}
443
444#[cfg(not(target_os = "linux"))]
445#[inline]
446fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
447 let file = open_noatime(path)?;
448 let metadata = file.metadata()?;
449 Ok((file, metadata.len(), metadata.file_type().is_file()))
450}
451
452#[cfg(target_os = "linux")]
455const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
456
457const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
464
465const TINY_FILE_LIMIT: u64 = 8 * 1024;
469
470thread_local! {
474 static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
475}
476
477#[cfg(target_os = "linux")]
481fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
482 match unsafe { memmap2::MmapOptions::new().map(&file) } {
484 Ok(mmap) => {
485 if file_size >= 2 * 1024 * 1024 {
486 let _ = mmap.advise(memmap2::Advice::HugePage);
487 }
488 let _ = mmap.advise(memmap2::Advice::Sequential);
489 if file_size >= 4 * 1024 * 1024 {
490 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
491 let _ = mmap.advise(memmap2::Advice::WillNeed);
492 }
493 } else {
494 let _ = mmap.advise(memmap2::Advice::WillNeed);
495 }
496 Ok(hash_bytes(algo, &mmap))
497 }
498 Err(_) => hash_file_pipelined_read(algo, file, file_size),
499 }
500}
501
502#[cfg(target_os = "linux")]
506fn hash_file_pipelined_read(
507 algo: HashAlgorithm,
508 mut file: File,
509 file_size: u64,
510) -> io::Result<String> {
511 use std::os::unix::io::AsRawFd;
512
513 const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; unsafe {
516 libc::posix_fadvise(
517 file.as_raw_fd(),
518 0,
519 file_size as i64,
520 libc::POSIX_FADV_SEQUENTIAL,
521 );
522 }
523
524 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
525 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
526 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
527
528 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
529 while let Ok(mut buf) = buf_rx.recv() {
530 let mut total = 0;
531 while total < buf.len() {
532 match file.read(&mut buf[total..]) {
533 Ok(0) => break,
534 Ok(n) => total += n,
535 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
536 Err(e) => return Err(e),
537 }
538 }
539 if total == 0 {
540 break;
541 }
542 if tx.send((buf, total)).is_err() {
543 break;
544 }
545 }
546 Ok(())
547 });
548
549 macro_rules! hash_pipelined_digest {
550 ($hasher:expr) => {{
551 let mut hasher = $hasher;
552 while let Ok((buf, n)) = rx.recv() {
553 Digest::update(&mut hasher, &buf[..n]);
554 let _ = buf_tx.send(buf);
555 }
556 Ok(hex_encode(&hasher.finalize()))
557 }};
558 }
559
560 let hash_result: io::Result<String> = match algo {
561 HashAlgorithm::Sha1 => hash_pipelined_digest!(Sha1::new()),
562 HashAlgorithm::Sha224 => hash_pipelined_digest!(sha2::Sha224::new()),
563 HashAlgorithm::Sha256 => hash_pipelined_digest!(sha2::Sha256::new()),
564 HashAlgorithm::Sha384 => hash_pipelined_digest!(sha2::Sha384::new()),
565 HashAlgorithm::Sha512 => hash_pipelined_digest!(sha2::Sha512::new()),
566 HashAlgorithm::Md5 => hash_pipelined_digest!(Md5::new()),
567 HashAlgorithm::Blake2b => {
568 let mut state = blake2b_simd::Params::new().to_state();
569 while let Ok((buf, n)) = rx.recv() {
570 state.update(&buf[..n]);
571 let _ = buf_tx.send(buf);
572 }
573 Ok(hex_encode(state.finalize().as_bytes()))
574 }
575 };
576
577 match reader_handle.join() {
578 Ok(Ok(())) => {}
579 Ok(Err(e)) => {
580 if hash_result.is_ok() {
581 return Err(e);
582 }
583 }
584 Err(payload) => {
585 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
586 format!("reader thread panicked: {}", s)
587 } else if let Some(s) = payload.downcast_ref::<String>() {
588 format!("reader thread panicked: {}", s)
589 } else {
590 "reader thread panicked".to_string()
591 };
592 return Err(io::Error::other(msg));
593 }
594 }
595
596 hash_result
597}
598
599pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
603 let (file, file_size, is_regular) = open_and_stat(path)?;
604
605 if is_regular && file_size == 0 {
606 return Ok(hash_bytes(algo, &[]));
607 }
608
609 if file_size > 0 && is_regular {
610 if file_size < TINY_FILE_LIMIT {
612 return hash_file_tiny(algo, file, file_size as usize);
613 }
614 if file_size >= SMALL_FILE_LIMIT {
616 #[cfg(target_os = "linux")]
617 {
618 return hash_file_pipelined(algo, file, file_size);
619 }
620 #[cfg(not(target_os = "linux"))]
622 {
623 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
624 if let Ok(mmap) = mmap_result {
625 return Ok(hash_bytes(algo, &mmap));
626 }
627 }
628 }
629 if file_size < SMALL_FILE_LIMIT {
632 return hash_file_small(algo, file, file_size as usize);
633 }
634 }
635
636 #[cfg(target_os = "linux")]
638 if file_size >= FADVISE_MIN_SIZE {
639 use std::os::unix::io::AsRawFd;
640 unsafe {
641 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
642 }
643 }
644 hash_reader(algo, file)
645}
646
647#[inline]
651fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
652 let mut buf = [0u8; 8192];
653 let mut total = 0;
654 while total < size {
656 match file.read(&mut buf[total..size]) {
657 Ok(0) => break,
658 Ok(n) => total += n,
659 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
660 Err(e) => return Err(e),
661 }
662 }
663 Ok(hash_bytes(algo, &buf[..total]))
664}
665
666#[inline]
669fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
670 SMALL_FILE_BUF.with(|cell| {
671 let mut buf = cell.borrow_mut();
672 buf.clear();
674 buf.reserve(size);
675 unsafe {
678 buf.set_len(size);
679 }
680 let mut total = 0;
681 while total < size {
682 match file.read(&mut buf[total..size]) {
683 Ok(0) => break,
684 Ok(n) => total += n,
685 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
686 Err(e) => return Err(e),
687 }
688 }
689 Ok(hash_bytes(algo, &buf[..total]))
690 })
691}
692
693pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
695 let stdin = io::stdin();
696 #[cfg(target_os = "linux")]
698 {
699 use std::os::unix::io::AsRawFd;
700 let fd = stdin.as_raw_fd();
701 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
702 if unsafe { libc::fstat(fd, &mut stat) } == 0
703 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
704 && stat.st_size > 0
705 {
706 unsafe {
707 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
708 }
709 }
710 }
711 hash_reader(algo, stdin.lock())
713}
714
715pub fn should_use_parallel(paths: &[&Path]) -> bool {
720 paths.len() >= 2
721}
722
723#[cfg(target_os = "linux")]
728pub fn readahead_files(paths: &[&Path]) {
729 use std::os::unix::io::AsRawFd;
730 for path in paths {
731 if let Ok(file) = open_noatime(path) {
732 if let Ok(meta) = file.metadata() {
733 let len = meta.len();
734 if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
735 unsafe {
736 libc::posix_fadvise(
737 file.as_raw_fd(),
738 0,
739 len as i64,
740 libc::POSIX_FADV_WILLNEED,
741 );
742 }
743 }
744 }
745 }
746 }
747}
748
749#[cfg(not(target_os = "linux"))]
750pub fn readahead_files(_paths: &[&Path]) {
751 }
753
754pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
759 let hash = blake2b_simd::Params::new()
760 .hash_length(output_bytes)
761 .hash(data);
762 hex_encode(hash.as_bytes())
763}
764
765pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
768 STREAM_BUF.with(|cell| {
769 let mut buf = cell.borrow_mut();
770 ensure_stream_buf(&mut buf);
771 let mut state = blake2b_simd::Params::new()
772 .hash_length(output_bytes)
773 .to_state();
774 loop {
775 let n = read_full(&mut reader, &mut buf)?;
776 if n == 0 {
777 break;
778 }
779 state.update(&buf[..n]);
780 }
781 Ok(hex_encode(state.finalize().as_bytes()))
782 })
783}
784
785pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
789 let (file, file_size, is_regular) = open_and_stat(path)?;
790
791 if is_regular && file_size == 0 {
792 return Ok(blake2b_hash_data(&[], output_bytes));
793 }
794
795 if file_size > 0 && is_regular {
796 if file_size < TINY_FILE_LIMIT {
798 return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
799 }
800 if file_size >= SMALL_FILE_LIMIT {
802 #[cfg(target_os = "linux")]
803 {
804 return blake2b_hash_file_pipelined(file, file_size, output_bytes);
805 }
806 #[cfg(not(target_os = "linux"))]
807 {
808 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
809 if let Ok(mmap) = mmap_result {
810 return Ok(blake2b_hash_data(&mmap, output_bytes));
811 }
812 }
813 }
814 if file_size < SMALL_FILE_LIMIT {
816 return blake2b_hash_file_small(file, file_size as usize, output_bytes);
817 }
818 }
819
820 #[cfg(target_os = "linux")]
822 if file_size >= FADVISE_MIN_SIZE {
823 use std::os::unix::io::AsRawFd;
824 unsafe {
825 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
826 }
827 }
828 blake2b_hash_reader(file, output_bytes)
829}
830
831#[inline]
833fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
834 let mut buf = [0u8; 8192];
835 let mut total = 0;
836 while total < size {
837 match file.read(&mut buf[total..size]) {
838 Ok(0) => break,
839 Ok(n) => total += n,
840 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
841 Err(e) => return Err(e),
842 }
843 }
844 Ok(blake2b_hash_data(&buf[..total], output_bytes))
845}
846
847#[inline]
849fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
850 SMALL_FILE_BUF.with(|cell| {
851 let mut buf = cell.borrow_mut();
852 buf.clear();
853 buf.reserve(size);
854 unsafe {
856 buf.set_len(size);
857 }
858 let mut total = 0;
859 while total < size {
860 match file.read(&mut buf[total..size]) {
861 Ok(0) => break,
862 Ok(n) => total += n,
863 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
864 Err(e) => return Err(e),
865 }
866 }
867 Ok(blake2b_hash_data(&buf[..total], output_bytes))
868 })
869}
870
871#[cfg(target_os = "linux")]
876fn blake2b_hash_file_pipelined(
877 file: File,
878 file_size: u64,
879 output_bytes: usize,
880) -> io::Result<String> {
881 match unsafe { memmap2::MmapOptions::new().map(&file) } {
885 Ok(mmap) => {
886 if file_size >= 2 * 1024 * 1024 {
889 let _ = mmap.advise(memmap2::Advice::HugePage);
890 }
891 let _ = mmap.advise(memmap2::Advice::Sequential);
892 if file_size >= 4 * 1024 * 1024 {
895 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
896 let _ = mmap.advise(memmap2::Advice::WillNeed);
897 }
898 } else {
899 let _ = mmap.advise(memmap2::Advice::WillNeed);
900 }
901 Ok(blake2b_hash_data(&mmap, output_bytes))
904 }
905 Err(_) => {
906 blake2b_hash_file_streamed(file, file_size, output_bytes)
909 }
910 }
911}
912
913#[cfg(target_os = "linux")]
917fn blake2b_hash_file_streamed(
918 mut file: File,
919 file_size: u64,
920 output_bytes: usize,
921) -> io::Result<String> {
922 use std::os::unix::io::AsRawFd;
923
924 const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; unsafe {
928 libc::posix_fadvise(
929 file.as_raw_fd(),
930 0,
931 file_size as i64,
932 libc::POSIX_FADV_SEQUENTIAL,
933 );
934 }
935
936 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
938 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
939 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
940
941 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
942 while let Ok(mut buf) = buf_rx.recv() {
944 let mut total = 0;
945 while total < buf.len() {
946 match file.read(&mut buf[total..]) {
947 Ok(0) => break,
948 Ok(n) => total += n,
949 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
950 Err(e) => return Err(e),
951 }
952 }
953 if total == 0 {
954 break;
955 }
956 if tx.send((buf, total)).is_err() {
957 break;
958 }
959 }
960 Ok(())
961 });
962
963 let mut state = blake2b_simd::Params::new()
964 .hash_length(output_bytes)
965 .to_state();
966 while let Ok((buf, n)) = rx.recv() {
967 state.update(&buf[..n]);
968 let _ = buf_tx.send(buf);
969 }
970 let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
971
972 match reader_handle.join() {
973 Ok(Ok(())) => {}
974 Ok(Err(e)) => {
975 if hash_result.is_ok() {
976 return Err(e);
977 }
978 }
979 Err(payload) => {
980 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
981 format!("reader thread panicked: {}", s)
982 } else if let Some(s) = payload.downcast_ref::<String>() {
983 format!("reader thread panicked: {}", s)
984 } else {
985 "reader thread panicked".to_string()
986 };
987 return Err(io::Error::other(msg));
988 }
989 }
990
991 hash_result
992}
993
994pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
997 let stdin = io::stdin();
998 #[cfg(target_os = "linux")]
999 {
1000 use std::os::unix::io::AsRawFd;
1001 let fd = stdin.as_raw_fd();
1002 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1003 if unsafe { libc::fstat(fd, &mut stat) } == 0
1004 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1005 && stat.st_size > 0
1006 {
1007 unsafe {
1008 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1009 }
1010 }
1011 }
1012 blake2b_hash_reader(stdin.lock(), output_bytes)
1013}
1014
1015enum FileContent {
1018 Mmap(memmap2::Mmap),
1019 Buf(Vec<u8>),
1020}
1021
1022impl AsRef<[u8]> for FileContent {
1023 fn as_ref(&self) -> &[u8] {
1024 match self {
1025 FileContent::Mmap(m) => m,
1026 FileContent::Buf(v) => v,
1027 }
1028 }
1029}
1030
1031fn open_file_content(path: &Path) -> io::Result<FileContent> {
1035 let (file, size, is_regular) = open_and_stat(path)?;
1036 if is_regular && size == 0 {
1037 return Ok(FileContent::Buf(Vec::new()));
1038 }
1039 if is_regular && size > 0 {
1040 if size < TINY_FILE_LIMIT {
1044 let mut buf = vec![0u8; size as usize];
1045 let mut total = 0;
1046 let mut f = file;
1047 while total < size as usize {
1048 match f.read(&mut buf[total..]) {
1049 Ok(0) => break,
1050 Ok(n) => total += n,
1051 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1052 Err(e) => return Err(e),
1053 }
1054 }
1055 buf.truncate(total);
1056 return Ok(FileContent::Buf(buf));
1057 }
1058 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1060 if let Ok(mmap) = mmap_result {
1061 #[cfg(target_os = "linux")]
1062 {
1063 if size >= 2 * 1024 * 1024 {
1064 let _ = mmap.advise(memmap2::Advice::HugePage);
1065 }
1066 let _ = mmap.advise(memmap2::Advice::Sequential);
1067 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1068 let _ = mmap.advise(memmap2::Advice::WillNeed);
1069 }
1070 }
1071 return Ok(FileContent::Mmap(mmap));
1072 }
1073 let mut buf = vec![0u8; size as usize];
1075 let mut total = 0;
1076 let mut f = file;
1077 while total < size as usize {
1078 match f.read(&mut buf[total..]) {
1079 Ok(0) => break,
1080 Ok(n) => total += n,
1081 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1082 Err(e) => return Err(e),
1083 }
1084 }
1085 buf.truncate(total);
1086 return Ok(FileContent::Buf(buf));
1087 }
1088 let mut buf = Vec::new();
1090 let mut f = file;
1091 f.read_to_end(&mut buf)?;
1092 Ok(FileContent::Buf(buf))
1093}
1094
1095fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1099 let mut buf = Vec::with_capacity(prefix.len() + 65536);
1100 buf.extend_from_slice(prefix);
1101 file.read_to_end(&mut buf)?;
1102 Ok(FileContent::Buf(buf))
1103}
1104
1105fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1110 let mut file = open_noatime(path)?;
1111 let mut small_buf = [0u8; 4096];
1114 match file.read(&mut small_buf) {
1115 Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1116 Ok(n) if n < small_buf.len() => {
1117 let mut vec = Vec::with_capacity(n);
1119 vec.extend_from_slice(&small_buf[..n]);
1120 return Ok(FileContent::Buf(vec));
1121 }
1122 Ok(n) => {
1123 let mut buf = vec![0u8; 65536];
1125 buf[..n].copy_from_slice(&small_buf[..n]);
1126 let mut total = n;
1127 loop {
1128 match file.read(&mut buf[total..]) {
1129 Ok(0) => {
1130 buf.truncate(total);
1131 return Ok(FileContent::Buf(buf));
1132 }
1133 Ok(n) => {
1134 total += n;
1135 if total >= buf.len() {
1136 return read_remaining_to_vec(&buf[..total], file);
1138 }
1139 }
1140 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1141 Err(e) => return Err(e),
1142 }
1143 }
1144 }
1145 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1146 let mut buf = vec![0u8; 65536];
1147 let mut total = 0;
1148 loop {
1149 match file.read(&mut buf[total..]) {
1150 Ok(0) => {
1151 buf.truncate(total);
1152 return Ok(FileContent::Buf(buf));
1153 }
1154 Ok(n) => {
1155 total += n;
1156 if total >= buf.len() {
1157 return read_remaining_to_vec(&buf[..total], file);
1159 }
1160 }
1161 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1162 Err(e) => return Err(e),
1163 }
1164 }
1165 }
1166 Err(e) => return Err(e),
1167 }
1168}
1169
1170pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1178 use blake2b_simd::many::{HashManyJob, hash_many};
1179
1180 let use_fast = paths.len() >= 20;
1185
1186 let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1187 paths.iter().map(|&path| open_file_content(path)).collect()
1189 } else {
1190 let num_threads = std::thread::available_parallelism()
1191 .map(|n| n.get())
1192 .unwrap_or(4)
1193 .min(paths.len());
1194 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1195
1196 std::thread::scope(|s| {
1197 let handles: Vec<_> = paths
1198 .chunks(chunk_size)
1199 .map(|chunk| {
1200 s.spawn(move || {
1201 chunk
1202 .iter()
1203 .map(|&path| {
1204 if use_fast {
1205 open_file_content_fast(path)
1206 } else {
1207 open_file_content(path)
1208 }
1209 })
1210 .collect::<Vec<_>>()
1211 })
1212 })
1213 .collect();
1214
1215 handles
1216 .into_iter()
1217 .flat_map(|h| h.join().unwrap())
1218 .collect()
1219 })
1220 };
1221
1222 let hash_results = {
1224 let mut params = blake2b_simd::Params::new();
1225 params.hash_length(output_bytes);
1226
1227 let ok_entries: Vec<(usize, &[u8])> = file_data
1228 .iter()
1229 .enumerate()
1230 .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1231 .collect();
1232
1233 let mut jobs: Vec<HashManyJob> = ok_entries
1234 .iter()
1235 .map(|(_, data)| HashManyJob::new(¶ms, data))
1236 .collect();
1237
1238 hash_many(jobs.iter_mut());
1240
1241 let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1243 for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1244 hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1245 }
1246 hm
1247 }; hash_results
1251 .into_iter()
1252 .zip(file_data)
1253 .map(|(hash_opt, result)| match result {
1254 Ok(_) => Ok(hash_opt.unwrap()),
1255 Err(e) => Err(e),
1256 })
1257 .collect()
1258}
1259
1260pub fn blake2b_hash_files_parallel(
1268 paths: &[&Path],
1269 output_bytes: usize,
1270) -> Vec<io::Result<String>> {
1271 let n = paths.len();
1272
1273 let sample_count = n.min(5);
1277 let mut sample_max: u64 = 0;
1278 let mut sample_total: u64 = 0;
1279 for &p in paths.iter().take(sample_count) {
1280 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1281 sample_total += size;
1282 sample_max = sample_max.max(size);
1283 }
1284 let estimated_total = if sample_count > 0 {
1285 sample_total * (n as u64) / (sample_count as u64)
1286 } else {
1287 0
1288 };
1289
1290 if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1293 return blake2b_hash_files_many(paths, output_bytes);
1294 }
1295
1296 let mut indexed: Vec<(usize, &Path, u64)> = paths
1298 .iter()
1299 .enumerate()
1300 .map(|(i, &p)| {
1301 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1302 (i, p, size)
1303 })
1304 .collect();
1305
1306 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1309
1310 #[cfg(target_os = "linux")]
1315 {
1316 use std::os::unix::io::AsRawFd;
1317 for &(_, path, size) in indexed.iter().take(20) {
1318 if size >= 1024 * 1024 {
1319 if let Ok(file) = open_noatime(path) {
1320 unsafe {
1321 libc::readahead(file.as_raw_fd(), 0, size as usize);
1322 }
1323 }
1324 }
1325 }
1326 }
1327
1328 let num_threads = std::thread::available_parallelism()
1329 .map(|n| n.get())
1330 .unwrap_or(4)
1331 .min(n);
1332
1333 let work_idx = AtomicUsize::new(0);
1335
1336 std::thread::scope(|s| {
1337 let work_idx = &work_idx;
1338 let indexed = &indexed;
1339
1340 let handles: Vec<_> = (0..num_threads)
1341 .map(|_| {
1342 s.spawn(move || {
1343 let mut local_results = Vec::new();
1344 loop {
1345 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1346 if idx >= indexed.len() {
1347 break;
1348 }
1349 let (orig_idx, path, _size) = indexed[idx];
1350 let result = blake2b_hash_file(path, output_bytes);
1351 local_results.push((orig_idx, result));
1352 }
1353 local_results
1354 })
1355 })
1356 .collect();
1357
1358 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1360 for handle in handles {
1361 for (orig_idx, result) in handle.join().unwrap() {
1362 results[orig_idx] = Some(result);
1363 }
1364 }
1365 results
1366 .into_iter()
1367 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1368 .collect()
1369 })
1370}
1371
1372pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1378 let n = paths.len();
1379
1380 let mut indexed: Vec<(usize, &Path, u64)> = paths
1383 .iter()
1384 .enumerate()
1385 .map(|(i, &p)| {
1386 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1387 (i, p, size)
1388 })
1389 .collect();
1390
1391 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1394
1395 #[cfg(target_os = "linux")]
1400 {
1401 use std::os::unix::io::AsRawFd;
1402 for &(_, path, size) in indexed.iter().take(20) {
1403 if size >= 1024 * 1024 {
1404 if let Ok(file) = open_noatime(path) {
1405 unsafe {
1406 libc::readahead(file.as_raw_fd(), 0, size as usize);
1407 }
1408 }
1409 }
1410 }
1411 }
1412
1413 let num_threads = std::thread::available_parallelism()
1414 .map(|n| n.get())
1415 .unwrap_or(4)
1416 .min(n);
1417
1418 let work_idx = AtomicUsize::new(0);
1420
1421 std::thread::scope(|s| {
1422 let work_idx = &work_idx;
1423 let indexed = &indexed;
1424
1425 let handles: Vec<_> = (0..num_threads)
1426 .map(|_| {
1427 s.spawn(move || {
1428 let mut local_results = Vec::new();
1429 loop {
1430 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1431 if idx >= indexed.len() {
1432 break;
1433 }
1434 let (orig_idx, path, _size) = indexed[idx];
1435 let result = hash_file(algo, path);
1436 local_results.push((orig_idx, result));
1437 }
1438 local_results
1439 })
1440 })
1441 .collect();
1442
1443 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1445 for handle in handles {
1446 for (orig_idx, result) in handle.join().unwrap() {
1447 results[orig_idx] = Some(result);
1448 }
1449 }
1450 results
1451 .into_iter()
1452 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1453 .collect()
1454 })
1455}
1456
1457pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1463 let n = paths.len();
1464 if n == 0 {
1465 return Vec::new();
1466 }
1467 if n == 1 {
1468 return vec![hash_file_nostat(algo, paths[0])];
1469 }
1470
1471 #[cfg(target_os = "linux")]
1474 readahead_files_all(paths);
1475
1476 let num_threads = std::thread::available_parallelism()
1477 .map(|n| n.get())
1478 .unwrap_or(4)
1479 .min(n);
1480
1481 let work_idx = AtomicUsize::new(0);
1482
1483 std::thread::scope(|s| {
1484 let work_idx = &work_idx;
1485
1486 let handles: Vec<_> = (0..num_threads)
1487 .map(|_| {
1488 s.spawn(move || {
1489 let mut local_results = Vec::new();
1490 loop {
1491 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1492 if idx >= n {
1493 break;
1494 }
1495 let result = hash_file_nostat(algo, paths[idx]);
1496 local_results.push((idx, result));
1497 }
1498 local_results
1499 })
1500 })
1501 .collect();
1502
1503 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1504 for handle in handles {
1505 for (idx, result) in handle.join().unwrap() {
1506 results[idx] = Some(result);
1507 }
1508 }
1509 results
1510 .into_iter()
1511 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1512 .collect()
1513 })
1514}
1515
1516pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1525 let n = paths.len();
1526 if n == 0 {
1527 return Vec::new();
1528 }
1529
1530 #[cfg(target_os = "linux")]
1532 readahead_files_all(paths);
1533
1534 let use_fast = n >= 20;
1537
1538 let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1539 paths
1541 .iter()
1542 .map(|&path| {
1543 if use_fast {
1544 open_file_content_fast(path)
1545 } else {
1546 open_file_content(path)
1547 }
1548 })
1549 .collect()
1550 } else {
1551 let num_threads = std::thread::available_parallelism()
1552 .map(|t| t.get())
1553 .unwrap_or(4)
1554 .min(n);
1555 let chunk_size = (n + num_threads - 1) / num_threads;
1556
1557 std::thread::scope(|s| {
1558 let handles: Vec<_> = paths
1559 .chunks(chunk_size)
1560 .map(|chunk| {
1561 s.spawn(move || {
1562 chunk
1563 .iter()
1564 .map(|&path| {
1565 if use_fast {
1566 open_file_content_fast(path)
1567 } else {
1568 open_file_content(path)
1569 }
1570 })
1571 .collect::<Vec<_>>()
1572 })
1573 })
1574 .collect();
1575
1576 handles
1577 .into_iter()
1578 .flat_map(|h| h.join().unwrap())
1579 .collect()
1580 })
1581 };
1582
1583 let num_hash_threads = std::thread::available_parallelism()
1586 .map(|t| t.get())
1587 .unwrap_or(4)
1588 .min(n);
1589 let work_idx = AtomicUsize::new(0);
1590
1591 std::thread::scope(|s| {
1592 let work_idx = &work_idx;
1593 let file_data = &file_data;
1594
1595 let handles: Vec<_> = (0..num_hash_threads)
1596 .map(|_| {
1597 s.spawn(move || {
1598 let mut local_results = Vec::new();
1599 loop {
1600 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1601 if idx >= n {
1602 break;
1603 }
1604 let result = match &file_data[idx] {
1605 Ok(content) => Ok(hash_bytes(algo, content.as_ref())),
1606 Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1607 };
1608 local_results.push((idx, result));
1609 }
1610 local_results
1611 })
1612 })
1613 .collect();
1614
1615 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1616 for handle in handles {
1617 for (idx, result) in handle.join().unwrap() {
1618 results[idx] = Some(result);
1619 }
1620 }
1621 results
1622 .into_iter()
1623 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1624 .collect()
1625 })
1626}
1627
1628fn hash_stream_with_prefix(
1632 algo: HashAlgorithm,
1633 prefix: &[u8],
1634 mut file: File,
1635) -> io::Result<String> {
1636 match algo {
1637 HashAlgorithm::Sha1 => hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file),
1638 HashAlgorithm::Sha224 => hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file),
1639 HashAlgorithm::Sha256 => hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file),
1640 HashAlgorithm::Sha384 => hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file),
1641 HashAlgorithm::Sha512 => hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file),
1642 HashAlgorithm::Md5 => hash_stream_with_prefix_digest::<md5::Md5>(prefix, file),
1643 HashAlgorithm::Blake2b => {
1644 let mut state = blake2b_simd::Params::new().to_state();
1645 state.update(prefix);
1646 STREAM_BUF.with(|cell| {
1647 let mut buf = cell.borrow_mut();
1648 ensure_stream_buf(&mut buf);
1649 loop {
1650 let n = read_full(&mut file, &mut buf)?;
1651 if n == 0 {
1652 break;
1653 }
1654 state.update(&buf[..n]);
1655 }
1656 Ok(hex_encode(state.finalize().as_bytes()))
1657 })
1658 }
1659 }
1660}
1661
1662fn hash_stream_with_prefix_digest<D: digest::Digest>(
1664 prefix: &[u8],
1665 mut file: File,
1666) -> io::Result<String> {
1667 STREAM_BUF.with(|cell| {
1668 let mut buf = cell.borrow_mut();
1669 ensure_stream_buf(&mut buf);
1670 let mut hasher = D::new();
1671 hasher.update(prefix);
1672 loop {
1673 let n = read_full(&mut file, &mut buf)?;
1674 if n == 0 {
1675 break;
1676 }
1677 hasher.update(&buf[..n]);
1678 }
1679 Ok(hex_encode(&hasher.finalize()))
1680 })
1681}
1682
1683pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1689 let mut file = open_noatime(path)?;
1690 let mut small_buf = [0u8; 4096];
1694 match file.read(&mut small_buf) {
1695 Ok(0) => return Ok(hash_bytes(algo, &[])),
1696 Ok(n) if n < small_buf.len() => {
1697 return Ok(hash_bytes(algo, &small_buf[..n]));
1699 }
1700 Ok(n) => {
1701 let mut buf = [0u8; 65536];
1703 buf[..n].copy_from_slice(&small_buf[..n]);
1704 let mut total = n;
1705 loop {
1706 match file.read(&mut buf[total..]) {
1707 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1708 Ok(n) => {
1709 total += n;
1710 if total >= buf.len() {
1711 return hash_stream_with_prefix(algo, &buf[..total], file);
1714 }
1715 }
1716 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1717 Err(e) => return Err(e),
1718 }
1719 }
1720 }
1721 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1722 let mut buf = [0u8; 65536];
1724 let mut total = 0;
1725 loop {
1726 match file.read(&mut buf[total..]) {
1727 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
1728 Ok(n) => {
1729 total += n;
1730 if total >= buf.len() {
1731 return hash_stream_with_prefix(algo, &buf[..total], file);
1733 }
1734 }
1735 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1736 Err(e) => return Err(e),
1737 }
1738 }
1739 }
1740 Err(e) => return Err(e),
1741 }
1742}
1743
1744#[cfg(target_os = "linux")]
1755pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
1756 use std::os::unix::ffi::OsStrExt;
1757
1758 let path_bytes = path.as_os_str().as_bytes();
1759 let c_path = std::ffi::CString::new(path_bytes)
1760 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
1761
1762 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
1764 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
1765 flags |= libc::O_NOATIME;
1766 }
1767
1768 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
1769 if fd < 0 {
1770 let err = io::Error::last_os_error();
1771 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
1772 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
1773 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
1774 if fd2 < 0 {
1775 return Err(io::Error::last_os_error());
1776 }
1777 return hash_from_raw_fd(algo, fd2);
1778 }
1779 return Err(err);
1780 }
1781 hash_from_raw_fd(algo, fd)
1782}
1783
1784#[cfg(target_os = "linux")]
1788fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
1789 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1791 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
1792 let err = io::Error::last_os_error();
1793 unsafe {
1794 libc::close(fd);
1795 }
1796 return Err(err);
1797 }
1798 let size = stat.st_size as u64;
1799 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
1800
1801 if is_regular && size == 0 {
1803 unsafe {
1804 libc::close(fd);
1805 }
1806 return Ok(hash_bytes(algo, &[]));
1807 }
1808
1809 if is_regular && size < TINY_FILE_LIMIT {
1812 let mut buf = [0u8; 8192];
1813 let mut total = 0usize;
1814 while total < size as usize {
1815 let n = unsafe {
1816 libc::read(
1817 fd,
1818 buf[total..].as_mut_ptr() as *mut libc::c_void,
1819 (size as usize) - total,
1820 )
1821 };
1822 if n < 0 {
1823 let err = io::Error::last_os_error();
1824 if err.kind() == io::ErrorKind::Interrupted {
1825 continue;
1826 }
1827 unsafe {
1828 libc::close(fd);
1829 }
1830 return Err(err);
1831 }
1832 if n == 0 {
1833 break;
1834 }
1835 total += n as usize;
1836 }
1837 unsafe {
1838 libc::close(fd);
1839 }
1840 return Ok(hash_bytes(algo, &buf[..total]));
1841 }
1842
1843 use std::os::unix::io::FromRawFd;
1845 let file = unsafe { File::from_raw_fd(fd) };
1846
1847 if is_regular && size > 0 {
1848 if size >= SMALL_FILE_LIMIT {
1850 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1851 if let Ok(mmap) = mmap_result {
1852 if size >= 2 * 1024 * 1024 {
1853 let _ = mmap.advise(memmap2::Advice::HugePage);
1854 }
1855 let _ = mmap.advise(memmap2::Advice::Sequential);
1856 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1858 let _ = mmap.advise(memmap2::Advice::WillNeed);
1859 }
1860 return Ok(hash_bytes(algo, &mmap));
1861 }
1862 }
1863 return hash_file_small(algo, file, size as usize);
1865 }
1866
1867 hash_reader(algo, file)
1869}
1870
1871#[cfg(target_os = "linux")]
1874pub fn readahead_files_all(paths: &[&Path]) {
1875 use std::os::unix::io::AsRawFd;
1876 for path in paths {
1877 if let Ok(file) = open_noatime(path) {
1878 if let Ok(meta) = file.metadata() {
1879 if meta.file_type().is_file() {
1880 let len = meta.len();
1881 unsafe {
1882 libc::posix_fadvise(
1883 file.as_raw_fd(),
1884 0,
1885 len as i64,
1886 libc::POSIX_FADV_WILLNEED,
1887 );
1888 }
1889 }
1890 }
1891 }
1892 }
1893}
1894
1895#[cfg(not(target_os = "linux"))]
1896pub fn readahead_files_all(_paths: &[&Path]) {}
1897
1898pub fn print_hash(
1901 out: &mut impl Write,
1902 hash: &str,
1903 filename: &str,
1904 binary: bool,
1905) -> io::Result<()> {
1906 let mode = if binary { b'*' } else { b' ' };
1907 out.write_all(hash.as_bytes())?;
1908 out.write_all(&[b' ', mode])?;
1909 out.write_all(filename.as_bytes())?;
1910 out.write_all(b"\n")
1911}
1912
1913pub fn print_hash_zero(
1915 out: &mut impl Write,
1916 hash: &str,
1917 filename: &str,
1918 binary: bool,
1919) -> io::Result<()> {
1920 let mode = if binary { b'*' } else { b' ' };
1921 out.write_all(hash.as_bytes())?;
1922 out.write_all(&[b' ', mode])?;
1923 out.write_all(filename.as_bytes())?;
1924 out.write_all(b"\0")
1925}
1926
1927thread_local! {
1934 static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
1935}
1936
1937#[inline]
1941pub fn write_hash_line(
1942 out: &mut impl Write,
1943 hash: &str,
1944 filename: &str,
1945 binary: bool,
1946 zero: bool,
1947 escaped: bool,
1948) -> io::Result<()> {
1949 LINE_BUF.with(|cell| {
1950 let mut buf = cell.borrow_mut();
1951 buf.clear();
1952 let mode = if binary { b'*' } else { b' ' };
1953 let term = if zero { b'\0' } else { b'\n' };
1954 if escaped {
1955 buf.push(b'\\');
1956 }
1957 buf.extend_from_slice(hash.as_bytes());
1958 buf.push(b' ');
1959 buf.push(mode);
1960 buf.extend_from_slice(filename.as_bytes());
1961 buf.push(term);
1962 out.write_all(&buf)
1963 })
1964}
1965
1966#[inline]
1969pub fn write_hash_tag_line(
1970 out: &mut impl Write,
1971 algo_name: &str,
1972 hash: &str,
1973 filename: &str,
1974 zero: bool,
1975) -> io::Result<()> {
1976 LINE_BUF.with(|cell| {
1977 let mut buf = cell.borrow_mut();
1978 buf.clear();
1979 let term = if zero { b'\0' } else { b'\n' };
1980 buf.extend_from_slice(algo_name.as_bytes());
1981 buf.extend_from_slice(b" (");
1982 buf.extend_from_slice(filename.as_bytes());
1983 buf.extend_from_slice(b") = ");
1984 buf.extend_from_slice(hash.as_bytes());
1985 buf.push(term);
1986 out.write_all(&buf)
1987 })
1988}
1989
1990pub fn print_hash_tag(
1992 out: &mut impl Write,
1993 algo: HashAlgorithm,
1994 hash: &str,
1995 filename: &str,
1996) -> io::Result<()> {
1997 out.write_all(algo.name().as_bytes())?;
1998 out.write_all(b" (")?;
1999 out.write_all(filename.as_bytes())?;
2000 out.write_all(b") = ")?;
2001 out.write_all(hash.as_bytes())?;
2002 out.write_all(b"\n")
2003}
2004
2005pub fn print_hash_tag_zero(
2007 out: &mut impl Write,
2008 algo: HashAlgorithm,
2009 hash: &str,
2010 filename: &str,
2011) -> io::Result<()> {
2012 out.write_all(algo.name().as_bytes())?;
2013 out.write_all(b" (")?;
2014 out.write_all(filename.as_bytes())?;
2015 out.write_all(b") = ")?;
2016 out.write_all(hash.as_bytes())?;
2017 out.write_all(b"\0")
2018}
2019
2020pub fn print_hash_tag_b2sum(
2024 out: &mut impl Write,
2025 hash: &str,
2026 filename: &str,
2027 bits: usize,
2028) -> io::Result<()> {
2029 if bits == 512 {
2030 out.write_all(b"BLAKE2b (")?;
2031 } else {
2032 write!(out, "BLAKE2b-{} (", bits)?;
2034 }
2035 out.write_all(filename.as_bytes())?;
2036 out.write_all(b") = ")?;
2037 out.write_all(hash.as_bytes())?;
2038 out.write_all(b"\n")
2039}
2040
2041pub fn print_hash_tag_b2sum_zero(
2043 out: &mut impl Write,
2044 hash: &str,
2045 filename: &str,
2046 bits: usize,
2047) -> io::Result<()> {
2048 if bits == 512 {
2049 out.write_all(b"BLAKE2b (")?;
2050 } else {
2051 write!(out, "BLAKE2b-{} (", bits)?;
2052 }
2053 out.write_all(filename.as_bytes())?;
2054 out.write_all(b") = ")?;
2055 out.write_all(hash.as_bytes())?;
2056 out.write_all(b"\0")
2057}
2058
2059pub struct CheckOptions {
2061 pub quiet: bool,
2062 pub status_only: bool,
2063 pub strict: bool,
2064 pub warn: bool,
2065 pub ignore_missing: bool,
2066 pub warn_prefix: String,
2070}
2071
2072pub struct CheckResult {
2074 pub ok: usize,
2075 pub mismatches: usize,
2076 pub format_errors: usize,
2077 pub read_errors: usize,
2078 pub ignored_missing: usize,
2080}
2081
2082pub fn check_file<R: BufRead>(
2085 algo: HashAlgorithm,
2086 reader: R,
2087 opts: &CheckOptions,
2088 out: &mut impl Write,
2089 err_out: &mut impl Write,
2090) -> io::Result<CheckResult> {
2091 let quiet = opts.quiet;
2092 let status_only = opts.status_only;
2093 let warn = opts.warn;
2094 let ignore_missing = opts.ignore_missing;
2095 let mut ok_count = 0;
2096 let mut mismatch_count = 0;
2097 let mut format_errors = 0;
2098 let mut read_errors = 0;
2099 let mut ignored_missing_count = 0;
2100 let mut line_num = 0;
2101
2102 for line_result in reader.lines() {
2103 line_num += 1;
2104 let line = line_result?;
2105 let line = line.trim_end();
2106
2107 if line.is_empty() {
2108 continue;
2109 }
2110
2111 let (expected_hash, filename) = match parse_check_line(line) {
2113 Some(v) => v,
2114 None => {
2115 format_errors += 1;
2116 if warn {
2117 out.flush()?;
2118 if opts.warn_prefix.is_empty() {
2119 writeln!(
2120 err_out,
2121 "line {}: improperly formatted {} checksum line",
2122 line_num,
2123 algo.name()
2124 )?;
2125 } else {
2126 writeln!(
2127 err_out,
2128 "{}: {}: improperly formatted {} checksum line",
2129 opts.warn_prefix,
2130 line_num,
2131 algo.name()
2132 )?;
2133 }
2134 }
2135 continue;
2136 }
2137 };
2138
2139 let actual = match hash_file(algo, Path::new(filename)) {
2141 Ok(h) => h,
2142 Err(e) => {
2143 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2144 ignored_missing_count += 1;
2145 continue;
2146 }
2147 read_errors += 1;
2148 if !status_only {
2149 out.flush()?;
2150 writeln!(err_out, "{}: {}", filename, e)?;
2151 writeln!(out, "{}: FAILED open or read", filename)?;
2152 }
2153 continue;
2154 }
2155 };
2156
2157 if actual.eq_ignore_ascii_case(expected_hash) {
2158 ok_count += 1;
2159 if !quiet && !status_only {
2160 writeln!(out, "{}: OK", filename)?;
2161 }
2162 } else {
2163 mismatch_count += 1;
2164 if !status_only {
2165 writeln!(out, "{}: FAILED", filename)?;
2166 }
2167 }
2168 }
2169
2170 Ok(CheckResult {
2171 ok: ok_count,
2172 mismatches: mismatch_count,
2173 format_errors,
2174 read_errors,
2175 ignored_missing: ignored_missing_count,
2176 })
2177}
2178
2179pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2181 let rest = line
2183 .strip_prefix("MD5 (")
2184 .or_else(|| line.strip_prefix("SHA1 ("))
2185 .or_else(|| line.strip_prefix("SHA224 ("))
2186 .or_else(|| line.strip_prefix("SHA256 ("))
2187 .or_else(|| line.strip_prefix("SHA384 ("))
2188 .or_else(|| line.strip_prefix("SHA512 ("))
2189 .or_else(|| line.strip_prefix("BLAKE2b ("))
2190 .or_else(|| {
2191 if line.starts_with("BLAKE2b-") {
2193 let after = &line["BLAKE2b-".len()..];
2194 if let Some(sp) = after.find(" (") {
2195 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2196 return Some(&after[sp + 2..]);
2197 }
2198 }
2199 }
2200 None
2201 });
2202 if let Some(rest) = rest {
2203 if let Some(paren_idx) = rest.find(") = ") {
2204 let filename = &rest[..paren_idx];
2205 let hash = &rest[paren_idx + 4..];
2206 return Some((hash, filename));
2207 }
2208 }
2209
2210 let line = line.strip_prefix('\\').unwrap_or(line);
2212
2213 if let Some(idx) = line.find(" ") {
2215 let hash = &line[..idx];
2216 let rest = &line[idx + 2..];
2217 return Some((hash, rest));
2218 }
2219 if let Some(idx) = line.find(" *") {
2221 let hash = &line[..idx];
2222 let rest = &line[idx + 2..];
2223 return Some((hash, rest));
2224 }
2225 None
2226}
2227
2228pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2232 let paren_start = line.find(" (")?;
2233 let algo_part = &line[..paren_start];
2234 let rest = &line[paren_start + 2..];
2235 let paren_end = rest.find(") = ")?;
2236 let filename = &rest[..paren_end];
2237 let hash = &rest[paren_end + 4..];
2238
2239 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2241 algo_part[dash_pos + 1..].parse::<usize>().ok()
2242 } else {
2243 None
2244 };
2245
2246 Some((hash, filename, bits))
2247}
2248
2249#[inline]
2253fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2254 let n = reader.read(buf)?;
2256 if n == buf.len() || n == 0 {
2257 return Ok(n);
2258 }
2259 let mut total = n;
2261 while total < buf.len() {
2262 match reader.read(&mut buf[total..]) {
2263 Ok(0) => break,
2264 Ok(n) => total += n,
2265 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2266 Err(e) => return Err(e),
2267 }
2268 }
2269 Ok(total)
2270}
2271
2272const fn generate_hex_table() -> [[u8; 2]; 256] {
2275 let hex = b"0123456789abcdef";
2276 let mut table = [[0u8; 2]; 256];
2277 let mut i = 0;
2278 while i < 256 {
2279 table[i] = [hex[i >> 4], hex[i & 0xf]];
2280 i += 1;
2281 }
2282 table
2283}
2284
2285const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2286
2287pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2290 let len = bytes.len() * 2;
2291 let mut hex = String::with_capacity(len);
2292 unsafe {
2294 let buf = hex.as_mut_vec();
2295 buf.set_len(len);
2296 hex_encode_to_slice(bytes, buf);
2297 }
2298 hex
2299}
2300
2301#[inline]
2304fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2305 unsafe {
2307 let ptr = out.as_mut_ptr();
2308 for (i, &b) in bytes.iter().enumerate() {
2309 let pair = *HEX_TABLE.get_unchecked(b as usize);
2310 *ptr.add(i * 2) = pair[0];
2311 *ptr.add(i * 2 + 1) = pair[1];
2312 }
2313 }
2314}