1use std::cell::RefCell;
2use std::fs::File;
3use std::io::{self, BufRead, Read, Write};
4use std::path::Path;
5
6use std::sync::atomic::AtomicUsize;
7#[cfg(target_os = "linux")]
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[cfg(not(target_os = "linux"))]
11use digest::Digest;
12#[cfg(not(target_os = "linux"))]
13use md5::Md5;
14#[cfg(not(target_os = "linux"))]
15use sha1::Sha1;
16
17#[derive(Debug, Clone, Copy)]
19pub enum HashAlgorithm {
20 Sha1,
21 Sha224,
22 Sha256,
23 Sha384,
24 Sha512,
25 Md5,
26 Blake2b,
27}
28
29impl HashAlgorithm {
30 pub fn name(self) -> &'static str {
31 match self {
32 HashAlgorithm::Sha1 => "SHA1",
33 HashAlgorithm::Sha224 => "SHA224",
34 HashAlgorithm::Sha256 => "SHA256",
35 HashAlgorithm::Sha384 => "SHA384",
36 HashAlgorithm::Sha512 => "SHA512",
37 HashAlgorithm::Md5 => "MD5",
38 HashAlgorithm::Blake2b => "BLAKE2b",
39 }
40 }
41}
42
43#[cfg(not(target_os = "linux"))]
47fn hash_digest<D: Digest>(data: &[u8]) -> String {
48 hex_encode(&D::digest(data))
49}
50
51#[cfg(not(target_os = "linux"))]
53fn hash_reader_impl<D: Digest>(mut reader: impl Read) -> io::Result<String> {
54 STREAM_BUF.with(|cell| {
55 let mut buf = cell.borrow_mut();
56 ensure_stream_buf(&mut buf);
57 let mut hasher = D::new();
58 loop {
59 let n = read_full(&mut reader, &mut buf)?;
60 if n == 0 {
61 break;
62 }
63 hasher.update(&buf[..n]);
64 }
65 Ok(hex_encode(&hasher.finalize()))
66 })
67}
68
69const HASH_READ_BUF: usize = 8 * 1024 * 1024;
75
76thread_local! {
80 static STREAM_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
81}
82
83#[inline]
86fn ensure_stream_buf(buf: &mut Vec<u8>) {
87 if buf.len() < HASH_READ_BUF {
88 buf.resize(HASH_READ_BUF, 0);
89 }
90}
91
92#[cfg(target_os = "linux")]
97fn sha256_bytes(data: &[u8]) -> String {
98 if data.len() < TINY_FILE_LIMIT as usize {
103 use digest::Digest;
104 return hex_encode(&sha2::Sha256::digest(data));
105 }
106 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), data)
107 .expect("SHA256 hash failed");
108 hex_encode(&digest)
109}
110
111#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
113fn sha256_bytes(data: &[u8]) -> String {
114 hex_encode(ring::digest::digest(&ring::digest::SHA256, data).as_ref())
115}
116
117#[cfg(target_vendor = "apple")]
119fn sha256_bytes(data: &[u8]) -> String {
120 hash_digest::<sha2::Sha256>(data)
121}
122
123#[cfg(target_os = "linux")]
126fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
127 STREAM_BUF.with(|cell| {
128 let mut buf = cell.borrow_mut();
129 ensure_stream_buf(&mut buf);
130 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
131 .map_err(|e| io::Error::other(e))?;
132 loop {
133 let n = read_full(&mut reader, &mut buf)?;
134 if n == 0 {
135 break;
136 }
137 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
138 }
139 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
140 Ok(hex_encode(&digest))
141 })
142}
143
144#[cfg(all(not(target_vendor = "apple"), not(target_os = "linux")))]
146fn sha256_reader(mut reader: impl Read) -> io::Result<String> {
147 STREAM_BUF.with(|cell| {
148 let mut buf = cell.borrow_mut();
149 ensure_stream_buf(&mut buf);
150 let mut ctx = ring::digest::Context::new(&ring::digest::SHA256);
151 loop {
152 let n = read_full(&mut reader, &mut buf)?;
153 if n == 0 {
154 break;
155 }
156 ctx.update(&buf[..n]);
157 }
158 Ok(hex_encode(ctx.finish().as_ref()))
159 })
160}
161
162#[cfg(target_vendor = "apple")]
164fn sha256_reader(reader: impl Read) -> io::Result<String> {
165 hash_reader_impl::<sha2::Sha256>(reader)
166}
167
168#[cfg(target_os = "linux")]
172fn sha1_bytes(data: &[u8]) -> String {
173 if data.len() < TINY_FILE_LIMIT as usize {
174 use digest::Digest;
175 return hex_encode(&sha1::Sha1::digest(data));
176 }
177 let digest =
178 openssl::hash::hash(openssl::hash::MessageDigest::sha1(), data).expect("SHA1 hash failed");
179 hex_encode(&digest)
180}
181
182#[cfg(not(target_os = "linux"))]
184fn sha1_bytes(data: &[u8]) -> String {
185 hash_digest::<Sha1>(data)
186}
187
188#[cfg(target_os = "linux")]
190fn sha1_reader(mut reader: impl Read) -> io::Result<String> {
191 STREAM_BUF.with(|cell| {
192 let mut buf = cell.borrow_mut();
193 ensure_stream_buf(&mut buf);
194 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha1())
195 .map_err(|e| io::Error::other(e))?;
196 loop {
197 let n = read_full(&mut reader, &mut buf)?;
198 if n == 0 {
199 break;
200 }
201 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
202 }
203 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
204 Ok(hex_encode(&digest))
205 })
206}
207
208#[cfg(not(target_os = "linux"))]
210fn sha1_reader(reader: impl Read) -> io::Result<String> {
211 hash_reader_impl::<Sha1>(reader)
212}
213
214#[cfg(target_os = "linux")]
218fn sha224_bytes(data: &[u8]) -> String {
219 if data.len() < TINY_FILE_LIMIT as usize {
220 use digest::Digest;
221 return hex_encode(&sha2::Sha224::digest(data));
222 }
223 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha224(), data)
224 .expect("SHA224 hash failed");
225 hex_encode(&digest)
226}
227
228#[cfg(not(target_os = "linux"))]
230fn sha224_bytes(data: &[u8]) -> String {
231 use digest::Digest;
232 hex_encode(&sha2::Sha224::digest(data))
233}
234
235#[cfg(target_os = "linux")]
237fn sha224_reader(mut reader: impl Read) -> io::Result<String> {
238 STREAM_BUF.with(|cell| {
239 let mut buf = cell.borrow_mut();
240 ensure_stream_buf(&mut buf);
241 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha224())
242 .map_err(|e| io::Error::other(e))?;
243 loop {
244 let n = read_full(&mut reader, &mut buf)?;
245 if n == 0 {
246 break;
247 }
248 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
249 }
250 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
251 Ok(hex_encode(&digest))
252 })
253}
254
255#[cfg(not(target_os = "linux"))]
257fn sha224_reader(reader: impl Read) -> io::Result<String> {
258 STREAM_BUF.with(|cell| {
259 let mut buf = cell.borrow_mut();
260 ensure_stream_buf(&mut buf);
261 let mut hasher = <sha2::Sha224 as digest::Digest>::new();
262 let mut reader = reader;
263 loop {
264 let n = read_full(&mut reader, &mut buf)?;
265 if n == 0 {
266 break;
267 }
268 digest::Digest::update(&mut hasher, &buf[..n]);
269 }
270 Ok(hex_encode(&digest::Digest::finalize(hasher)))
271 })
272}
273
274#[cfg(target_os = "linux")]
278fn sha384_bytes(data: &[u8]) -> String {
279 if data.len() < TINY_FILE_LIMIT as usize {
280 use digest::Digest;
281 return hex_encode(&sha2::Sha384::digest(data));
282 }
283 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha384(), data)
284 .expect("SHA384 hash failed");
285 hex_encode(&digest)
286}
287
288#[cfg(not(target_os = "linux"))]
290fn sha384_bytes(data: &[u8]) -> String {
291 use digest::Digest;
292 hex_encode(&sha2::Sha384::digest(data))
293}
294
295#[cfg(target_os = "linux")]
297fn sha384_reader(mut reader: impl Read) -> io::Result<String> {
298 STREAM_BUF.with(|cell| {
299 let mut buf = cell.borrow_mut();
300 ensure_stream_buf(&mut buf);
301 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha384())
302 .map_err(|e| io::Error::other(e))?;
303 loop {
304 let n = read_full(&mut reader, &mut buf)?;
305 if n == 0 {
306 break;
307 }
308 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
309 }
310 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
311 Ok(hex_encode(&digest))
312 })
313}
314
315#[cfg(not(target_os = "linux"))]
317fn sha384_reader(reader: impl Read) -> io::Result<String> {
318 STREAM_BUF.with(|cell| {
319 let mut buf = cell.borrow_mut();
320 ensure_stream_buf(&mut buf);
321 let mut hasher = <sha2::Sha384 as digest::Digest>::new();
322 let mut reader = reader;
323 loop {
324 let n = read_full(&mut reader, &mut buf)?;
325 if n == 0 {
326 break;
327 }
328 digest::Digest::update(&mut hasher, &buf[..n]);
329 }
330 Ok(hex_encode(&digest::Digest::finalize(hasher)))
331 })
332}
333
334#[cfg(target_os = "linux")]
338fn sha512_bytes(data: &[u8]) -> String {
339 if data.len() < TINY_FILE_LIMIT as usize {
340 use digest::Digest;
341 return hex_encode(&sha2::Sha512::digest(data));
342 }
343 let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha512(), data)
344 .expect("SHA512 hash failed");
345 hex_encode(&digest)
346}
347
348#[cfg(not(target_os = "linux"))]
350fn sha512_bytes(data: &[u8]) -> String {
351 use digest::Digest;
352 hex_encode(&sha2::Sha512::digest(data))
353}
354
355#[cfg(target_os = "linux")]
357fn sha512_reader(mut reader: impl Read) -> io::Result<String> {
358 STREAM_BUF.with(|cell| {
359 let mut buf = cell.borrow_mut();
360 ensure_stream_buf(&mut buf);
361 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha512())
362 .map_err(|e| io::Error::other(e))?;
363 loop {
364 let n = read_full(&mut reader, &mut buf)?;
365 if n == 0 {
366 break;
367 }
368 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
369 }
370 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
371 Ok(hex_encode(&digest))
372 })
373}
374
375#[cfg(not(target_os = "linux"))]
377fn sha512_reader(reader: impl Read) -> io::Result<String> {
378 STREAM_BUF.with(|cell| {
379 let mut buf = cell.borrow_mut();
380 ensure_stream_buf(&mut buf);
381 let mut hasher = <sha2::Sha512 as digest::Digest>::new();
382 let mut reader = reader;
383 loop {
384 let n = read_full(&mut reader, &mut buf)?;
385 if n == 0 {
386 break;
387 }
388 digest::Digest::update(&mut hasher, &buf[..n]);
389 }
390 Ok(hex_encode(&digest::Digest::finalize(hasher)))
391 })
392}
393
394pub fn hash_bytes(algo: HashAlgorithm, data: &[u8]) -> String {
396 match algo {
397 HashAlgorithm::Sha1 => sha1_bytes(data),
398 HashAlgorithm::Sha224 => sha224_bytes(data),
399 HashAlgorithm::Sha256 => sha256_bytes(data),
400 HashAlgorithm::Sha384 => sha384_bytes(data),
401 HashAlgorithm::Sha512 => sha512_bytes(data),
402 HashAlgorithm::Md5 => md5_bytes(data),
403 HashAlgorithm::Blake2b => {
404 let hash = blake2b_simd::blake2b(data);
405 hex_encode(hash.as_bytes())
406 }
407 }
408}
409
410#[cfg(target_os = "linux")]
415pub fn hash_bytes_to_buf(algo: HashAlgorithm, data: &[u8], out: &mut [u8]) -> usize {
416 match algo {
417 HashAlgorithm::Md5 => {
418 use digest::Digest;
419 let digest = md5::Md5::digest(data);
420 hex_encode_to_slice(&digest, out);
421 32
422 }
423 HashAlgorithm::Sha1 => {
424 use digest::Digest;
425 let digest = sha1::Sha1::digest(data);
426 hex_encode_to_slice(&digest, out);
427 40
428 }
429 HashAlgorithm::Sha224 => {
430 use digest::Digest;
431 let digest = sha2::Sha224::digest(data);
432 hex_encode_to_slice(&digest, out);
433 56
434 }
435 HashAlgorithm::Sha256 => {
436 use digest::Digest;
437 let digest = sha2::Sha256::digest(data);
438 hex_encode_to_slice(&digest, out);
439 64
440 }
441 HashAlgorithm::Sha384 => {
442 use digest::Digest;
443 let digest = sha2::Sha384::digest(data);
444 hex_encode_to_slice(&digest, out);
445 96
446 }
447 HashAlgorithm::Sha512 => {
448 use digest::Digest;
449 let digest = sha2::Sha512::digest(data);
450 hex_encode_to_slice(&digest, out);
451 128
452 }
453 HashAlgorithm::Blake2b => {
454 let hash = blake2b_simd::blake2b(data);
455 let bytes = hash.as_bytes();
456 hex_encode_to_slice(bytes, out);
457 bytes.len() * 2
458 }
459 }
460}
461
462#[cfg(target_os = "linux")]
467pub fn hash_file_raw_to_buf(algo: HashAlgorithm, path: &Path, out: &mut [u8]) -> io::Result<usize> {
468 use std::os::unix::ffi::OsStrExt;
469
470 let path_bytes = path.as_os_str().as_bytes();
471 let c_path = std::ffi::CString::new(path_bytes)
472 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
473
474 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
475 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
476 flags |= libc::O_NOATIME;
477 }
478
479 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
480 if fd < 0 {
481 let err = io::Error::last_os_error();
482 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
483 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
484 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
485 if fd2 < 0 {
486 return Err(io::Error::last_os_error());
487 }
488 return hash_from_raw_fd_to_buf(algo, fd2, out);
489 }
490 return Err(err);
491 }
492 hash_from_raw_fd_to_buf(algo, fd, out)
493}
494
495#[cfg(target_os = "linux")]
499fn hash_from_raw_fd_to_buf(algo: HashAlgorithm, fd: i32, out: &mut [u8]) -> io::Result<usize> {
500 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
501 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
502 let err = io::Error::last_os_error();
503 unsafe {
504 libc::close(fd);
505 }
506 return Err(err);
507 }
508 let size = stat.st_size as u64;
509 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
510
511 if is_regular && size == 0 {
513 unsafe {
514 libc::close(fd);
515 }
516 return Ok(hash_bytes_to_buf(algo, &[], out));
517 }
518
519 if is_regular && size < TINY_FILE_LIMIT {
521 let mut buf = [0u8; 8192];
522 let mut total = 0usize;
523 while total < size as usize {
524 let n = unsafe {
525 libc::read(
526 fd,
527 buf[total..].as_mut_ptr() as *mut libc::c_void,
528 (size as usize) - total,
529 )
530 };
531 if n < 0 {
532 let err = io::Error::last_os_error();
533 if err.kind() == io::ErrorKind::Interrupted {
534 continue;
535 }
536 unsafe {
537 libc::close(fd);
538 }
539 return Err(err);
540 }
541 if n == 0 {
542 break;
543 }
544 total += n as usize;
545 }
546 unsafe {
547 libc::close(fd);
548 }
549 return Ok(hash_bytes_to_buf(algo, &buf[..total], out));
550 }
551
552 use std::os::unix::io::FromRawFd;
555 let file = unsafe { File::from_raw_fd(fd) };
556 let hash_str = if is_regular && size > 0 {
557 if size >= SMALL_FILE_LIMIT {
558 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
559 if let Ok(mmap) = mmap_result {
560 if size >= 2 * 1024 * 1024 {
561 let _ = mmap.advise(memmap2::Advice::HugePage);
562 }
563 let _ = mmap.advise(memmap2::Advice::Sequential);
564 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
565 let _ = mmap.advise(memmap2::Advice::WillNeed);
566 }
567 hash_bytes(algo, &mmap)
568 } else {
569 hash_file_small(algo, file, size as usize)?
570 }
571 } else {
572 hash_file_small(algo, file, size as usize)?
573 }
574 } else {
575 hash_reader(algo, file)?
576 };
577 let hex_bytes = hash_str.as_bytes();
578 out[..hex_bytes.len()].copy_from_slice(hex_bytes);
579 Ok(hex_bytes.len())
580}
581
582#[cfg(target_os = "linux")]
586fn md5_bytes(data: &[u8]) -> String {
587 if data.len() < TINY_FILE_LIMIT as usize {
591 use digest::Digest;
592 return hex_encode(&md5::Md5::digest(data));
593 }
594 let digest =
595 openssl::hash::hash(openssl::hash::MessageDigest::md5(), data).expect("MD5 hash failed");
596 hex_encode(&digest)
597}
598
599#[cfg(not(target_os = "linux"))]
601fn md5_bytes(data: &[u8]) -> String {
602 hash_digest::<Md5>(data)
603}
604
605pub fn hash_reader<R: Read>(algo: HashAlgorithm, reader: R) -> io::Result<String> {
607 match algo {
608 HashAlgorithm::Sha1 => sha1_reader(reader),
609 HashAlgorithm::Sha224 => sha224_reader(reader),
610 HashAlgorithm::Sha256 => sha256_reader(reader),
611 HashAlgorithm::Sha384 => sha384_reader(reader),
612 HashAlgorithm::Sha512 => sha512_reader(reader),
613 HashAlgorithm::Md5 => md5_reader(reader),
614 HashAlgorithm::Blake2b => blake2b_hash_reader(reader, 64),
615 }
616}
617
618#[cfg(target_os = "linux")]
620fn md5_reader(mut reader: impl Read) -> io::Result<String> {
621 STREAM_BUF.with(|cell| {
622 let mut buf = cell.borrow_mut();
623 ensure_stream_buf(&mut buf);
624 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
625 .map_err(|e| io::Error::other(e))?;
626 loop {
627 let n = read_full(&mut reader, &mut buf)?;
628 if n == 0 {
629 break;
630 }
631 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
632 }
633 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
634 Ok(hex_encode(&digest))
635 })
636}
637
638#[cfg(not(target_os = "linux"))]
640fn md5_reader(reader: impl Read) -> io::Result<String> {
641 hash_reader_impl::<Md5>(reader)
642}
643
644#[cfg(target_os = "linux")]
647static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
648
649#[cfg(target_os = "linux")]
652fn open_noatime(path: &Path) -> io::Result<File> {
653 use std::os::unix::fs::OpenOptionsExt;
654 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
655 match std::fs::OpenOptions::new()
656 .read(true)
657 .custom_flags(libc::O_NOATIME)
658 .open(path)
659 {
660 Ok(f) => return Ok(f),
661 Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
662 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
664 }
665 Err(e) => return Err(e), }
667 }
668 File::open(path)
669}
670
671#[cfg(not(target_os = "linux"))]
672fn open_noatime(path: &Path) -> io::Result<File> {
673 File::open(path)
674}
675
676#[cfg(target_os = "linux")]
679#[inline]
680fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
681 let file = open_noatime(path)?;
682 let fd = {
683 use std::os::unix::io::AsRawFd;
684 file.as_raw_fd()
685 };
686 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
687 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
688 return Err(io::Error::last_os_error());
689 }
690 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
691 let size = stat.st_size as u64;
692 Ok((file, size, is_regular))
693}
694
695#[cfg(not(target_os = "linux"))]
696#[inline]
697fn open_and_stat(path: &Path) -> io::Result<(File, u64, bool)> {
698 let file = open_noatime(path)?;
699 let metadata = file.metadata()?;
700 Ok((file, metadata.len(), metadata.file_type().is_file()))
701}
702
703#[cfg(target_os = "linux")]
706const FADVISE_MIN_SIZE: u64 = 1024 * 1024;
707
708const SMALL_FILE_LIMIT: u64 = 16 * 1024 * 1024;
715
716const TINY_FILE_LIMIT: u64 = 8 * 1024;
720
721thread_local! {
725 static SMALL_FILE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64 * 1024));
726}
727
728#[cfg(target_os = "linux")]
732fn hash_file_pipelined(algo: HashAlgorithm, file: File, file_size: u64) -> io::Result<String> {
733 match unsafe { memmap2::MmapOptions::new().map(&file) } {
735 Ok(mmap) => {
736 if file_size >= 2 * 1024 * 1024 {
737 let _ = mmap.advise(memmap2::Advice::HugePage);
738 }
739 let _ = mmap.advise(memmap2::Advice::Sequential);
740 if file_size >= 4 * 1024 * 1024 {
741 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
742 let _ = mmap.advise(memmap2::Advice::WillNeed);
743 }
744 } else {
745 let _ = mmap.advise(memmap2::Advice::WillNeed);
746 }
747 Ok(hash_bytes(algo, &mmap))
748 }
749 Err(_) => hash_file_pipelined_read(algo, file, file_size),
750 }
751}
752
753#[cfg(target_os = "linux")]
757fn hash_file_pipelined_read(
758 algo: HashAlgorithm,
759 mut file: File,
760 file_size: u64,
761) -> io::Result<String> {
762 use std::os::unix::io::AsRawFd;
763
764 const PIPE_BUF_SIZE: usize = 4 * 1024 * 1024; unsafe {
767 libc::posix_fadvise(
768 file.as_raw_fd(),
769 0,
770 file_size as i64,
771 libc::POSIX_FADV_SEQUENTIAL,
772 );
773 }
774
775 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
776 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
777 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
778
779 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
780 while let Ok(mut buf) = buf_rx.recv() {
781 let mut total = 0;
782 while total < buf.len() {
783 match file.read(&mut buf[total..]) {
784 Ok(0) => break,
785 Ok(n) => total += n,
786 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
787 Err(e) => return Err(e),
788 }
789 }
790 if total == 0 {
791 break;
792 }
793 if tx.send((buf, total)).is_err() {
794 break;
795 }
796 }
797 Ok(())
798 });
799
800 let hash_result = match algo {
801 HashAlgorithm::Sha1 => {
802 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha1())
803 .map_err(|e| io::Error::other(e))?;
804 while let Ok((buf, n)) = rx.recv() {
805 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
806 let _ = buf_tx.send(buf);
807 }
808 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
809 Ok(hex_encode(&digest))
810 }
811 HashAlgorithm::Sha224 | HashAlgorithm::Sha384 | HashAlgorithm::Sha512 => {
812 let md = match algo {
813 HashAlgorithm::Sha224 => openssl::hash::MessageDigest::sha224(),
814 HashAlgorithm::Sha384 => openssl::hash::MessageDigest::sha384(),
815 _ => openssl::hash::MessageDigest::sha512(),
816 };
817 let mut hasher = openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e))?;
818 while let Ok((buf, n)) = rx.recv() {
819 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
820 let _ = buf_tx.send(buf);
821 }
822 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
823 Ok(hex_encode(&digest))
824 }
825 HashAlgorithm::Sha256 => {
826 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::sha256())
827 .map_err(|e| io::Error::other(e))?;
828 while let Ok((buf, n)) = rx.recv() {
829 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
830 let _ = buf_tx.send(buf);
831 }
832 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
833 Ok(hex_encode(&digest))
834 }
835 HashAlgorithm::Md5 => {
836 let mut hasher = openssl::hash::Hasher::new(openssl::hash::MessageDigest::md5())
837 .map_err(|e| io::Error::other(e))?;
838 while let Ok((buf, n)) = rx.recv() {
839 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
840 let _ = buf_tx.send(buf);
841 }
842 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
843 Ok(hex_encode(&digest))
844 }
845 HashAlgorithm::Blake2b => {
846 let mut state = blake2b_simd::Params::new().to_state();
847 while let Ok((buf, n)) = rx.recv() {
848 state.update(&buf[..n]);
849 let _ = buf_tx.send(buf);
850 }
851 Ok(hex_encode(state.finalize().as_bytes()))
852 }
853 };
854
855 match reader_handle.join() {
856 Ok(Ok(())) => {}
857 Ok(Err(e)) => {
858 if hash_result.is_ok() {
859 return Err(e);
860 }
861 }
862 Err(payload) => {
863 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
864 format!("reader thread panicked: {}", s)
865 } else if let Some(s) = payload.downcast_ref::<String>() {
866 format!("reader thread panicked: {}", s)
867 } else {
868 "reader thread panicked".to_string()
869 };
870 return Err(io::Error::other(msg));
871 }
872 }
873
874 hash_result
875}
876
877pub fn hash_file(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
881 let (file, file_size, is_regular) = open_and_stat(path)?;
882
883 if is_regular && file_size == 0 {
884 return Ok(hash_bytes(algo, &[]));
885 }
886
887 if file_size > 0 && is_regular {
888 if file_size < TINY_FILE_LIMIT {
890 return hash_file_tiny(algo, file, file_size as usize);
891 }
892 if file_size >= SMALL_FILE_LIMIT {
894 #[cfg(target_os = "linux")]
895 {
896 return hash_file_pipelined(algo, file, file_size);
897 }
898 #[cfg(not(target_os = "linux"))]
900 {
901 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
902 if let Ok(mmap) = mmap_result {
903 return Ok(hash_bytes(algo, &mmap));
904 }
905 }
906 }
907 if file_size < SMALL_FILE_LIMIT {
910 return hash_file_small(algo, file, file_size as usize);
911 }
912 }
913
914 #[cfg(target_os = "linux")]
916 if file_size >= FADVISE_MIN_SIZE {
917 use std::os::unix::io::AsRawFd;
918 unsafe {
919 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
920 }
921 }
922 hash_reader(algo, file)
923}
924
925#[inline]
929fn hash_file_tiny(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
930 let mut buf = [0u8; 8192];
931 let mut total = 0;
932 while total < size {
934 match file.read(&mut buf[total..size]) {
935 Ok(0) => break,
936 Ok(n) => total += n,
937 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
938 Err(e) => return Err(e),
939 }
940 }
941 Ok(hash_bytes(algo, &buf[..total]))
942}
943
944#[inline]
947fn hash_file_small(algo: HashAlgorithm, mut file: File, size: usize) -> io::Result<String> {
948 SMALL_FILE_BUF.with(|cell| {
949 let mut buf = cell.borrow_mut();
950 buf.clear();
952 buf.reserve(size);
953 unsafe {
956 buf.set_len(size);
957 }
958 let mut total = 0;
959 while total < size {
960 match file.read(&mut buf[total..size]) {
961 Ok(0) => break,
962 Ok(n) => total += n,
963 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
964 Err(e) => return Err(e),
965 }
966 }
967 Ok(hash_bytes(algo, &buf[..total]))
968 })
969}
970
971pub fn hash_stdin(algo: HashAlgorithm) -> io::Result<String> {
973 let stdin = io::stdin();
974 #[cfg(target_os = "linux")]
976 {
977 use std::os::unix::io::AsRawFd;
978 let fd = stdin.as_raw_fd();
979 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
980 if unsafe { libc::fstat(fd, &mut stat) } == 0
981 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
982 && stat.st_size > 0
983 {
984 unsafe {
985 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
986 }
987 }
988 }
989 hash_reader(algo, stdin.lock())
991}
992
993pub fn should_use_parallel(paths: &[&Path]) -> bool {
998 paths.len() >= 2
999}
1000
1001#[cfg(target_os = "linux")]
1006pub fn readahead_files(paths: &[&Path]) {
1007 use std::os::unix::io::AsRawFd;
1008 for path in paths {
1009 if let Ok(file) = open_noatime(path) {
1010 if let Ok(meta) = file.metadata() {
1011 let len = meta.len();
1012 if meta.file_type().is_file() && len >= FADVISE_MIN_SIZE {
1013 unsafe {
1014 libc::posix_fadvise(
1015 file.as_raw_fd(),
1016 0,
1017 len as i64,
1018 libc::POSIX_FADV_WILLNEED,
1019 );
1020 }
1021 }
1022 }
1023 }
1024 }
1025}
1026
1027#[cfg(not(target_os = "linux"))]
1028pub fn readahead_files(_paths: &[&Path]) {
1029 }
1031
1032pub fn blake2b_hash_data(data: &[u8], output_bytes: usize) -> String {
1037 let hash = blake2b_simd::Params::new()
1038 .hash_length(output_bytes)
1039 .hash(data);
1040 hex_encode(hash.as_bytes())
1041}
1042
1043pub fn blake2b_hash_reader<R: Read>(mut reader: R, output_bytes: usize) -> io::Result<String> {
1046 STREAM_BUF.with(|cell| {
1047 let mut buf = cell.borrow_mut();
1048 ensure_stream_buf(&mut buf);
1049 let mut state = blake2b_simd::Params::new()
1050 .hash_length(output_bytes)
1051 .to_state();
1052 loop {
1053 let n = read_full(&mut reader, &mut buf)?;
1054 if n == 0 {
1055 break;
1056 }
1057 state.update(&buf[..n]);
1058 }
1059 Ok(hex_encode(state.finalize().as_bytes()))
1060 })
1061}
1062
1063pub fn blake2b_hash_file(path: &Path, output_bytes: usize) -> io::Result<String> {
1067 let (file, file_size, is_regular) = open_and_stat(path)?;
1068
1069 if is_regular && file_size == 0 {
1070 return Ok(blake2b_hash_data(&[], output_bytes));
1071 }
1072
1073 if file_size > 0 && is_regular {
1074 if file_size < TINY_FILE_LIMIT {
1076 return blake2b_hash_file_tiny(file, file_size as usize, output_bytes);
1077 }
1078 if file_size >= SMALL_FILE_LIMIT {
1080 #[cfg(target_os = "linux")]
1081 {
1082 return blake2b_hash_file_pipelined(file, file_size, output_bytes);
1083 }
1084 #[cfg(not(target_os = "linux"))]
1085 {
1086 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1087 if let Ok(mmap) = mmap_result {
1088 return Ok(blake2b_hash_data(&mmap, output_bytes));
1089 }
1090 }
1091 }
1092 if file_size < SMALL_FILE_LIMIT {
1094 return blake2b_hash_file_small(file, file_size as usize, output_bytes);
1095 }
1096 }
1097
1098 #[cfg(target_os = "linux")]
1100 if file_size >= FADVISE_MIN_SIZE {
1101 use std::os::unix::io::AsRawFd;
1102 unsafe {
1103 libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
1104 }
1105 }
1106 blake2b_hash_reader(file, output_bytes)
1107}
1108
1109#[inline]
1111fn blake2b_hash_file_tiny(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1112 let mut buf = [0u8; 8192];
1113 let mut total = 0;
1114 while total < size {
1115 match file.read(&mut buf[total..size]) {
1116 Ok(0) => break,
1117 Ok(n) => total += n,
1118 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1119 Err(e) => return Err(e),
1120 }
1121 }
1122 Ok(blake2b_hash_data(&buf[..total], output_bytes))
1123}
1124
1125#[inline]
1127fn blake2b_hash_file_small(mut file: File, size: usize, output_bytes: usize) -> io::Result<String> {
1128 SMALL_FILE_BUF.with(|cell| {
1129 let mut buf = cell.borrow_mut();
1130 buf.clear();
1131 buf.reserve(size);
1132 unsafe {
1134 buf.set_len(size);
1135 }
1136 let mut total = 0;
1137 while total < size {
1138 match file.read(&mut buf[total..size]) {
1139 Ok(0) => break,
1140 Ok(n) => total += n,
1141 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1142 Err(e) => return Err(e),
1143 }
1144 }
1145 Ok(blake2b_hash_data(&buf[..total], output_bytes))
1146 })
1147}
1148
1149#[cfg(target_os = "linux")]
1154fn blake2b_hash_file_pipelined(
1155 file: File,
1156 file_size: u64,
1157 output_bytes: usize,
1158) -> io::Result<String> {
1159 match unsafe { memmap2::MmapOptions::new().map(&file) } {
1163 Ok(mmap) => {
1164 if file_size >= 2 * 1024 * 1024 {
1167 let _ = mmap.advise(memmap2::Advice::HugePage);
1168 }
1169 let _ = mmap.advise(memmap2::Advice::Sequential);
1170 if file_size >= 4 * 1024 * 1024 {
1173 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1174 let _ = mmap.advise(memmap2::Advice::WillNeed);
1175 }
1176 } else {
1177 let _ = mmap.advise(memmap2::Advice::WillNeed);
1178 }
1179 Ok(blake2b_hash_data(&mmap, output_bytes))
1182 }
1183 Err(_) => {
1184 blake2b_hash_file_streamed(file, file_size, output_bytes)
1187 }
1188 }
1189}
1190
1191#[cfg(target_os = "linux")]
1195fn blake2b_hash_file_streamed(
1196 mut file: File,
1197 file_size: u64,
1198 output_bytes: usize,
1199) -> io::Result<String> {
1200 use std::os::unix::io::AsRawFd;
1201
1202 const PIPE_BUF_SIZE: usize = 8 * 1024 * 1024; unsafe {
1206 libc::posix_fadvise(
1207 file.as_raw_fd(),
1208 0,
1209 file_size as i64,
1210 libc::POSIX_FADV_SEQUENTIAL,
1211 );
1212 }
1213
1214 let (tx, rx) = std::sync::mpsc::sync_channel::<(Vec<u8>, usize)>(1);
1216 let (buf_tx, buf_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(1);
1217 let _ = buf_tx.send(vec![0u8; PIPE_BUF_SIZE]);
1218
1219 let reader_handle = std::thread::spawn(move || -> io::Result<()> {
1220 while let Ok(mut buf) = buf_rx.recv() {
1222 let mut total = 0;
1223 while total < buf.len() {
1224 match file.read(&mut buf[total..]) {
1225 Ok(0) => break,
1226 Ok(n) => total += n,
1227 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1228 Err(e) => return Err(e),
1229 }
1230 }
1231 if total == 0 {
1232 break;
1233 }
1234 if tx.send((buf, total)).is_err() {
1235 break;
1236 }
1237 }
1238 Ok(())
1239 });
1240
1241 let mut state = blake2b_simd::Params::new()
1242 .hash_length(output_bytes)
1243 .to_state();
1244 while let Ok((buf, n)) = rx.recv() {
1245 state.update(&buf[..n]);
1246 let _ = buf_tx.send(buf);
1247 }
1248 let hash_result = Ok(hex_encode(state.finalize().as_bytes()));
1249
1250 match reader_handle.join() {
1251 Ok(Ok(())) => {}
1252 Ok(Err(e)) => {
1253 if hash_result.is_ok() {
1254 return Err(e);
1255 }
1256 }
1257 Err(payload) => {
1258 let msg = if let Some(s) = payload.downcast_ref::<&str>() {
1259 format!("reader thread panicked: {}", s)
1260 } else if let Some(s) = payload.downcast_ref::<String>() {
1261 format!("reader thread panicked: {}", s)
1262 } else {
1263 "reader thread panicked".to_string()
1264 };
1265 return Err(io::Error::other(msg));
1266 }
1267 }
1268
1269 hash_result
1270}
1271
1272pub fn blake2b_hash_stdin(output_bytes: usize) -> io::Result<String> {
1275 let stdin = io::stdin();
1276 #[cfg(target_os = "linux")]
1277 {
1278 use std::os::unix::io::AsRawFd;
1279 let fd = stdin.as_raw_fd();
1280 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
1281 if unsafe { libc::fstat(fd, &mut stat) } == 0
1282 && (stat.st_mode & libc::S_IFMT) == libc::S_IFREG
1283 && stat.st_size > 0
1284 {
1285 unsafe {
1286 libc::posix_fadvise(fd, 0, stat.st_size, libc::POSIX_FADV_SEQUENTIAL);
1287 }
1288 }
1289 }
1290 blake2b_hash_reader(stdin.lock(), output_bytes)
1291}
1292
1293enum FileContent {
1296 Mmap(memmap2::Mmap),
1297 Buf(Vec<u8>),
1298}
1299
1300impl AsRef<[u8]> for FileContent {
1301 fn as_ref(&self) -> &[u8] {
1302 match self {
1303 FileContent::Mmap(m) => m,
1304 FileContent::Buf(v) => v,
1305 }
1306 }
1307}
1308
1309fn open_file_content(path: &Path) -> io::Result<FileContent> {
1313 let (file, size, is_regular) = open_and_stat(path)?;
1314 if is_regular && size == 0 {
1315 return Ok(FileContent::Buf(Vec::new()));
1316 }
1317 if is_regular && size > 0 {
1318 if size < TINY_FILE_LIMIT {
1322 let mut buf = vec![0u8; size as usize];
1323 let mut total = 0;
1324 let mut f = file;
1325 while total < size as usize {
1326 match f.read(&mut buf[total..]) {
1327 Ok(0) => break,
1328 Ok(n) => total += n,
1329 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1330 Err(e) => return Err(e),
1331 }
1332 }
1333 buf.truncate(total);
1334 return Ok(FileContent::Buf(buf));
1335 }
1336 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
1338 if let Ok(mmap) = mmap_result {
1339 #[cfg(target_os = "linux")]
1340 {
1341 if size >= 2 * 1024 * 1024 {
1342 let _ = mmap.advise(memmap2::Advice::HugePage);
1343 }
1344 let _ = mmap.advise(memmap2::Advice::Sequential);
1345 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
1346 let _ = mmap.advise(memmap2::Advice::WillNeed);
1347 }
1348 }
1349 return Ok(FileContent::Mmap(mmap));
1350 }
1351 let mut buf = vec![0u8; size as usize];
1353 let mut total = 0;
1354 let mut f = file;
1355 while total < size as usize {
1356 match f.read(&mut buf[total..]) {
1357 Ok(0) => break,
1358 Ok(n) => total += n,
1359 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1360 Err(e) => return Err(e),
1361 }
1362 }
1363 buf.truncate(total);
1364 return Ok(FileContent::Buf(buf));
1365 }
1366 let mut buf = Vec::new();
1368 let mut f = file;
1369 f.read_to_end(&mut buf)?;
1370 Ok(FileContent::Buf(buf))
1371}
1372
1373fn read_remaining_to_vec(prefix: &[u8], mut file: File) -> io::Result<FileContent> {
1377 let mut buf = Vec::with_capacity(prefix.len() + 65536);
1378 buf.extend_from_slice(prefix);
1379 file.read_to_end(&mut buf)?;
1380 Ok(FileContent::Buf(buf))
1381}
1382
1383fn open_file_content_fast(path: &Path) -> io::Result<FileContent> {
1388 let mut file = open_noatime(path)?;
1389 let mut small_buf = [0u8; 4096];
1392 match file.read(&mut small_buf) {
1393 Ok(0) => return Ok(FileContent::Buf(Vec::new())),
1394 Ok(n) if n < small_buf.len() => {
1395 let mut vec = Vec::with_capacity(n);
1397 vec.extend_from_slice(&small_buf[..n]);
1398 return Ok(FileContent::Buf(vec));
1399 }
1400 Ok(n) => {
1401 let mut buf = vec![0u8; 65536];
1403 buf[..n].copy_from_slice(&small_buf[..n]);
1404 let mut total = n;
1405 loop {
1406 match file.read(&mut buf[total..]) {
1407 Ok(0) => {
1408 buf.truncate(total);
1409 return Ok(FileContent::Buf(buf));
1410 }
1411 Ok(n) => {
1412 total += n;
1413 if total >= buf.len() {
1414 return read_remaining_to_vec(&buf[..total], file);
1416 }
1417 }
1418 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1419 Err(e) => return Err(e),
1420 }
1421 }
1422 }
1423 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
1424 let mut buf = vec![0u8; 65536];
1425 let mut total = 0;
1426 loop {
1427 match file.read(&mut buf[total..]) {
1428 Ok(0) => {
1429 buf.truncate(total);
1430 return Ok(FileContent::Buf(buf));
1431 }
1432 Ok(n) => {
1433 total += n;
1434 if total >= buf.len() {
1435 return read_remaining_to_vec(&buf[..total], file);
1437 }
1438 }
1439 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1440 Err(e) => return Err(e),
1441 }
1442 }
1443 }
1444 Err(e) => return Err(e),
1445 }
1446}
1447
1448pub fn blake2b_hash_files_many(paths: &[&Path], output_bytes: usize) -> Vec<io::Result<String>> {
1456 use blake2b_simd::many::{HashManyJob, hash_many};
1457
1458 let use_fast = paths.len() >= 20;
1463
1464 let file_data: Vec<io::Result<FileContent>> = if paths.len() <= 10 {
1465 paths.iter().map(|&path| open_file_content(path)).collect()
1467 } else {
1468 let num_threads = std::thread::available_parallelism()
1469 .map(|n| n.get())
1470 .unwrap_or(4)
1471 .min(paths.len());
1472 let chunk_size = (paths.len() + num_threads - 1) / num_threads;
1473
1474 std::thread::scope(|s| {
1475 let handles: Vec<_> = paths
1476 .chunks(chunk_size)
1477 .map(|chunk| {
1478 s.spawn(move || {
1479 chunk
1480 .iter()
1481 .map(|&path| {
1482 if use_fast {
1483 open_file_content_fast(path)
1484 } else {
1485 open_file_content(path)
1486 }
1487 })
1488 .collect::<Vec<_>>()
1489 })
1490 })
1491 .collect();
1492
1493 handles
1494 .into_iter()
1495 .flat_map(|h| h.join().unwrap())
1496 .collect()
1497 })
1498 };
1499
1500 let hash_results = {
1502 let mut params = blake2b_simd::Params::new();
1503 params.hash_length(output_bytes);
1504
1505 let ok_entries: Vec<(usize, &[u8])> = file_data
1506 .iter()
1507 .enumerate()
1508 .filter_map(|(i, r)| r.as_ref().ok().map(|c| (i, c.as_ref())))
1509 .collect();
1510
1511 let mut jobs: Vec<HashManyJob> = ok_entries
1512 .iter()
1513 .map(|(_, data)| HashManyJob::new(¶ms, data))
1514 .collect();
1515
1516 hash_many(jobs.iter_mut());
1518
1519 let mut hm: Vec<Option<String>> = vec![None; paths.len()];
1521 for (j, &(orig_i, _)) in ok_entries.iter().enumerate() {
1522 hm[orig_i] = Some(hex_encode(jobs[j].to_hash().as_bytes()));
1523 }
1524 hm
1525 }; hash_results
1529 .into_iter()
1530 .zip(file_data)
1531 .map(|(hash_opt, result)| match result {
1532 Ok(_) => Ok(hash_opt.unwrap()),
1533 Err(e) => Err(e),
1534 })
1535 .collect()
1536}
1537
1538pub fn blake2b_hash_files_parallel(
1546 paths: &[&Path],
1547 output_bytes: usize,
1548) -> Vec<io::Result<String>> {
1549 let n = paths.len();
1550
1551 let sample_count = n.min(5);
1555 let mut sample_max: u64 = 0;
1556 let mut sample_total: u64 = 0;
1557 for &p in paths.iter().take(sample_count) {
1558 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1559 sample_total += size;
1560 sample_max = sample_max.max(size);
1561 }
1562 let estimated_total = if sample_count > 0 {
1563 sample_total * (n as u64) / (sample_count as u64)
1564 } else {
1565 0
1566 };
1567
1568 if estimated_total < 1024 * 1024 && sample_max < SMALL_FILE_LIMIT {
1571 return blake2b_hash_files_many(paths, output_bytes);
1572 }
1573
1574 let mut indexed: Vec<(usize, &Path, u64)> = paths
1576 .iter()
1577 .enumerate()
1578 .map(|(i, &p)| {
1579 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1580 (i, p, size)
1581 })
1582 .collect();
1583
1584 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1587
1588 #[cfg(target_os = "linux")]
1593 {
1594 use std::os::unix::io::AsRawFd;
1595 for &(_, path, size) in indexed.iter().take(20) {
1596 if size >= 1024 * 1024 {
1597 if let Ok(file) = open_noatime(path) {
1598 unsafe {
1599 libc::readahead(file.as_raw_fd(), 0, size as usize);
1600 }
1601 }
1602 }
1603 }
1604 }
1605
1606 let num_threads = std::thread::available_parallelism()
1607 .map(|n| n.get())
1608 .unwrap_or(4)
1609 .min(n);
1610
1611 let work_idx = AtomicUsize::new(0);
1613
1614 std::thread::scope(|s| {
1615 let work_idx = &work_idx;
1616 let indexed = &indexed;
1617
1618 let handles: Vec<_> = (0..num_threads)
1619 .map(|_| {
1620 s.spawn(move || {
1621 let mut local_results = Vec::new();
1622 loop {
1623 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1624 if idx >= indexed.len() {
1625 break;
1626 }
1627 let (orig_idx, path, _size) = indexed[idx];
1628 let result = blake2b_hash_file(path, output_bytes);
1629 local_results.push((orig_idx, result));
1630 }
1631 local_results
1632 })
1633 })
1634 .collect();
1635
1636 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1638 for handle in handles {
1639 for (orig_idx, result) in handle.join().unwrap() {
1640 results[orig_idx] = Some(result);
1641 }
1642 }
1643 results
1644 .into_iter()
1645 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1646 .collect()
1647 })
1648}
1649
1650pub fn hash_files_parallel(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1656 let n = paths.len();
1657
1658 let mut indexed: Vec<(usize, &Path, u64)> = paths
1661 .iter()
1662 .enumerate()
1663 .map(|(i, &p)| {
1664 let size = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
1665 (i, p, size)
1666 })
1667 .collect();
1668
1669 indexed.sort_by(|a, b| b.2.cmp(&a.2));
1672
1673 #[cfg(target_os = "linux")]
1678 {
1679 use std::os::unix::io::AsRawFd;
1680 for &(_, path, size) in indexed.iter().take(20) {
1681 if size >= 1024 * 1024 {
1682 if let Ok(file) = open_noatime(path) {
1683 unsafe {
1684 libc::readahead(file.as_raw_fd(), 0, size as usize);
1685 }
1686 }
1687 }
1688 }
1689 }
1690
1691 let num_threads = std::thread::available_parallelism()
1692 .map(|n| n.get())
1693 .unwrap_or(4)
1694 .min(n);
1695
1696 let work_idx = AtomicUsize::new(0);
1698
1699 std::thread::scope(|s| {
1700 let work_idx = &work_idx;
1701 let indexed = &indexed;
1702
1703 let handles: Vec<_> = (0..num_threads)
1704 .map(|_| {
1705 s.spawn(move || {
1706 let mut local_results = Vec::new();
1707 loop {
1708 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1709 if idx >= indexed.len() {
1710 break;
1711 }
1712 let (orig_idx, path, _size) = indexed[idx];
1713 let result = hash_file(algo, path);
1714 local_results.push((orig_idx, result));
1715 }
1716 local_results
1717 })
1718 })
1719 .collect();
1720
1721 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1723 for handle in handles {
1724 for (orig_idx, result) in handle.join().unwrap() {
1725 results[orig_idx] = Some(result);
1726 }
1727 }
1728 results
1729 .into_iter()
1730 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1731 .collect()
1732 })
1733}
1734
1735pub fn hash_files_parallel_fast(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1741 let n = paths.len();
1742 if n == 0 {
1743 return Vec::new();
1744 }
1745 if n == 1 {
1746 return vec![hash_file_nostat(algo, paths[0])];
1747 }
1748
1749 #[cfg(target_os = "linux")]
1752 readahead_files_all(paths);
1753
1754 let num_threads = std::thread::available_parallelism()
1755 .map(|n| n.get())
1756 .unwrap_or(4)
1757 .min(n);
1758
1759 let work_idx = AtomicUsize::new(0);
1760
1761 std::thread::scope(|s| {
1762 let work_idx = &work_idx;
1763
1764 let handles: Vec<_> = (0..num_threads)
1765 .map(|_| {
1766 s.spawn(move || {
1767 let mut local_results = Vec::new();
1768 loop {
1769 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1770 if idx >= n {
1771 break;
1772 }
1773 let result = hash_file_nostat(algo, paths[idx]);
1774 local_results.push((idx, result));
1775 }
1776 local_results
1777 })
1778 })
1779 .collect();
1780
1781 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1782 for handle in handles {
1783 for (idx, result) in handle.join().unwrap() {
1784 results[idx] = Some(result);
1785 }
1786 }
1787 results
1788 .into_iter()
1789 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1790 .collect()
1791 })
1792}
1793
1794pub fn hash_files_batch(paths: &[&Path], algo: HashAlgorithm) -> Vec<io::Result<String>> {
1803 let n = paths.len();
1804 if n == 0 {
1805 return Vec::new();
1806 }
1807
1808 #[cfg(target_os = "linux")]
1810 readahead_files_all(paths);
1811
1812 let use_fast = n >= 20;
1815
1816 let file_data: Vec<io::Result<FileContent>> = if n <= 10 {
1817 paths
1819 .iter()
1820 .map(|&path| {
1821 if use_fast {
1822 open_file_content_fast(path)
1823 } else {
1824 open_file_content(path)
1825 }
1826 })
1827 .collect()
1828 } else {
1829 let num_threads = std::thread::available_parallelism()
1830 .map(|t| t.get())
1831 .unwrap_or(4)
1832 .min(n);
1833 let chunk_size = (n + num_threads - 1) / num_threads;
1834
1835 std::thread::scope(|s| {
1836 let handles: Vec<_> = paths
1837 .chunks(chunk_size)
1838 .map(|chunk| {
1839 s.spawn(move || {
1840 chunk
1841 .iter()
1842 .map(|&path| {
1843 if use_fast {
1844 open_file_content_fast(path)
1845 } else {
1846 open_file_content(path)
1847 }
1848 })
1849 .collect::<Vec<_>>()
1850 })
1851 })
1852 .collect();
1853
1854 handles
1855 .into_iter()
1856 .flat_map(|h| h.join().unwrap())
1857 .collect()
1858 })
1859 };
1860
1861 let num_hash_threads = std::thread::available_parallelism()
1864 .map(|t| t.get())
1865 .unwrap_or(4)
1866 .min(n);
1867 let work_idx = AtomicUsize::new(0);
1868
1869 std::thread::scope(|s| {
1870 let work_idx = &work_idx;
1871 let file_data = &file_data;
1872
1873 let handles: Vec<_> = (0..num_hash_threads)
1874 .map(|_| {
1875 s.spawn(move || {
1876 let mut local_results = Vec::new();
1877 loop {
1878 let idx = work_idx.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1879 if idx >= n {
1880 break;
1881 }
1882 let result = match &file_data[idx] {
1883 Ok(content) => Ok(hash_bytes(algo, content.as_ref())),
1884 Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
1885 };
1886 local_results.push((idx, result));
1887 }
1888 local_results
1889 })
1890 })
1891 .collect();
1892
1893 let mut results: Vec<Option<io::Result<String>>> = (0..n).map(|_| None).collect();
1894 for handle in handles {
1895 for (idx, result) in handle.join().unwrap() {
1896 results[idx] = Some(result);
1897 }
1898 }
1899 results
1900 .into_iter()
1901 .map(|opt| opt.unwrap_or_else(|| Err(io::Error::other("missing result"))))
1902 .collect()
1903 })
1904}
1905
1906fn hash_stream_with_prefix(
1910 algo: HashAlgorithm,
1911 prefix: &[u8],
1912 mut file: File,
1913) -> io::Result<String> {
1914 match algo {
1915 HashAlgorithm::Sha1 => {
1916 #[cfg(target_os = "linux")]
1917 {
1918 hash_stream_with_prefix_openssl(openssl::hash::MessageDigest::sha1(), prefix, file)
1919 }
1920 #[cfg(not(target_os = "linux"))]
1921 {
1922 hash_stream_with_prefix_digest::<sha1::Sha1>(prefix, file)
1923 }
1924 }
1925 HashAlgorithm::Sha224 => {
1926 #[cfg(target_os = "linux")]
1927 {
1928 hash_stream_with_prefix_openssl(
1929 openssl::hash::MessageDigest::sha224(),
1930 prefix,
1931 file,
1932 )
1933 }
1934 #[cfg(not(target_os = "linux"))]
1935 {
1936 hash_stream_with_prefix_digest::<sha2::Sha224>(prefix, file)
1937 }
1938 }
1939 HashAlgorithm::Sha256 => {
1940 #[cfg(target_os = "linux")]
1941 {
1942 hash_stream_with_prefix_openssl(
1943 openssl::hash::MessageDigest::sha256(),
1944 prefix,
1945 file,
1946 )
1947 }
1948 #[cfg(not(target_os = "linux"))]
1949 {
1950 hash_stream_with_prefix_digest::<sha2::Sha256>(prefix, file)
1951 }
1952 }
1953 HashAlgorithm::Sha384 => {
1954 #[cfg(target_os = "linux")]
1955 {
1956 hash_stream_with_prefix_openssl(
1957 openssl::hash::MessageDigest::sha384(),
1958 prefix,
1959 file,
1960 )
1961 }
1962 #[cfg(not(target_os = "linux"))]
1963 {
1964 hash_stream_with_prefix_digest::<sha2::Sha384>(prefix, file)
1965 }
1966 }
1967 HashAlgorithm::Sha512 => {
1968 #[cfg(target_os = "linux")]
1969 {
1970 hash_stream_with_prefix_openssl(
1971 openssl::hash::MessageDigest::sha512(),
1972 prefix,
1973 file,
1974 )
1975 }
1976 #[cfg(not(target_os = "linux"))]
1977 {
1978 hash_stream_with_prefix_digest::<sha2::Sha512>(prefix, file)
1979 }
1980 }
1981 HashAlgorithm::Md5 => {
1982 #[cfg(target_os = "linux")]
1983 {
1984 hash_stream_with_prefix_openssl(openssl::hash::MessageDigest::md5(), prefix, file)
1985 }
1986 #[cfg(not(target_os = "linux"))]
1987 {
1988 hash_stream_with_prefix_digest::<md5::Md5>(prefix, file)
1989 }
1990 }
1991 HashAlgorithm::Blake2b => {
1992 let mut state = blake2b_simd::Params::new().to_state();
1993 state.update(prefix);
1994 STREAM_BUF.with(|cell| {
1995 let mut buf = cell.borrow_mut();
1996 ensure_stream_buf(&mut buf);
1997 loop {
1998 let n = read_full(&mut file, &mut buf)?;
1999 if n == 0 {
2000 break;
2001 }
2002 state.update(&buf[..n]);
2003 }
2004 Ok(hex_encode(state.finalize().as_bytes()))
2005 })
2006 }
2007 }
2008}
2009
2010#[cfg(target_os = "linux")]
2012fn hash_stream_with_prefix_openssl(
2013 md: openssl::hash::MessageDigest,
2014 prefix: &[u8],
2015 mut file: File,
2016) -> io::Result<String> {
2017 let mut hasher = openssl::hash::Hasher::new(md).map_err(|e| io::Error::other(e))?;
2018 hasher.update(prefix).map_err(|e| io::Error::other(e))?;
2019 STREAM_BUF.with(|cell| {
2020 let mut buf = cell.borrow_mut();
2021 ensure_stream_buf(&mut buf);
2022 loop {
2023 let n = read_full(&mut file, &mut buf)?;
2024 if n == 0 {
2025 break;
2026 }
2027 hasher.update(&buf[..n]).map_err(|e| io::Error::other(e))?;
2028 }
2029 let digest = hasher.finish().map_err(|e| io::Error::other(e))?;
2030 Ok(hex_encode(&digest))
2031 })
2032}
2033
2034#[cfg(not(target_os = "linux"))]
2036fn hash_stream_with_prefix_digest<D: digest::Digest>(
2037 prefix: &[u8],
2038 mut file: File,
2039) -> io::Result<String> {
2040 STREAM_BUF.with(|cell| {
2041 let mut buf = cell.borrow_mut();
2042 ensure_stream_buf(&mut buf);
2043 let mut hasher = D::new();
2044 hasher.update(prefix);
2045 loop {
2046 let n = read_full(&mut file, &mut buf)?;
2047 if n == 0 {
2048 break;
2049 }
2050 hasher.update(&buf[..n]);
2051 }
2052 Ok(hex_encode(&hasher.finalize()))
2053 })
2054}
2055
2056pub fn hash_file_nostat(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2062 let mut file = open_noatime(path)?;
2063 let mut small_buf = [0u8; 4096];
2067 match file.read(&mut small_buf) {
2068 Ok(0) => return Ok(hash_bytes(algo, &[])),
2069 Ok(n) if n < small_buf.len() => {
2070 return Ok(hash_bytes(algo, &small_buf[..n]));
2072 }
2073 Ok(n) => {
2074 let mut buf = [0u8; 65536];
2076 buf[..n].copy_from_slice(&small_buf[..n]);
2077 let mut total = n;
2078 loop {
2079 match file.read(&mut buf[total..]) {
2080 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
2081 Ok(n) => {
2082 total += n;
2083 if total >= buf.len() {
2084 return hash_stream_with_prefix(algo, &buf[..total], file);
2087 }
2088 }
2089 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2090 Err(e) => return Err(e),
2091 }
2092 }
2093 }
2094 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
2095 let mut buf = [0u8; 65536];
2097 let mut total = 0;
2098 loop {
2099 match file.read(&mut buf[total..]) {
2100 Ok(0) => return Ok(hash_bytes(algo, &buf[..total])),
2101 Ok(n) => {
2102 total += n;
2103 if total >= buf.len() {
2104 return hash_stream_with_prefix(algo, &buf[..total], file);
2106 }
2107 }
2108 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
2109 Err(e) => return Err(e),
2110 }
2111 }
2112 }
2113 Err(e) => return Err(e),
2114 }
2115}
2116
2117#[cfg(target_os = "linux")]
2128pub fn hash_file_raw(algo: HashAlgorithm, path: &Path) -> io::Result<String> {
2129 use std::os::unix::ffi::OsStrExt;
2130
2131 let path_bytes = path.as_os_str().as_bytes();
2132 let c_path = std::ffi::CString::new(path_bytes)
2133 .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains null byte"))?;
2134
2135 let mut flags = libc::O_RDONLY | libc::O_CLOEXEC;
2137 if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
2138 flags |= libc::O_NOATIME;
2139 }
2140
2141 let fd = unsafe { libc::open(c_path.as_ptr(), flags) };
2142 if fd < 0 {
2143 let err = io::Error::last_os_error();
2144 if err.raw_os_error() == Some(libc::EPERM) && flags & libc::O_NOATIME != 0 {
2145 NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
2146 let fd2 = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
2147 if fd2 < 0 {
2148 return Err(io::Error::last_os_error());
2149 }
2150 return hash_from_raw_fd(algo, fd2);
2151 }
2152 return Err(err);
2153 }
2154 hash_from_raw_fd(algo, fd)
2155}
2156
2157#[cfg(target_os = "linux")]
2161fn hash_from_raw_fd(algo: HashAlgorithm, fd: i32) -> io::Result<String> {
2162 let mut stat: libc::stat = unsafe { std::mem::zeroed() };
2164 if unsafe { libc::fstat(fd, &mut stat) } != 0 {
2165 let err = io::Error::last_os_error();
2166 unsafe {
2167 libc::close(fd);
2168 }
2169 return Err(err);
2170 }
2171 let size = stat.st_size as u64;
2172 let is_regular = (stat.st_mode & libc::S_IFMT) == libc::S_IFREG;
2173
2174 if is_regular && size == 0 {
2176 unsafe {
2177 libc::close(fd);
2178 }
2179 return Ok(hash_bytes(algo, &[]));
2180 }
2181
2182 if is_regular && size < TINY_FILE_LIMIT {
2185 let mut buf = [0u8; 8192];
2186 let mut total = 0usize;
2187 while total < size as usize {
2188 let n = unsafe {
2189 libc::read(
2190 fd,
2191 buf[total..].as_mut_ptr() as *mut libc::c_void,
2192 (size as usize) - total,
2193 )
2194 };
2195 if n < 0 {
2196 let err = io::Error::last_os_error();
2197 if err.kind() == io::ErrorKind::Interrupted {
2198 continue;
2199 }
2200 unsafe {
2201 libc::close(fd);
2202 }
2203 return Err(err);
2204 }
2205 if n == 0 {
2206 break;
2207 }
2208 total += n as usize;
2209 }
2210 unsafe {
2211 libc::close(fd);
2212 }
2213 return Ok(hash_bytes(algo, &buf[..total]));
2214 }
2215
2216 use std::os::unix::io::FromRawFd;
2218 let file = unsafe { File::from_raw_fd(fd) };
2219
2220 if is_regular && size > 0 {
2221 if size >= SMALL_FILE_LIMIT {
2223 let mmap_result = unsafe { memmap2::MmapOptions::new().map(&file) };
2224 if let Ok(mmap) = mmap_result {
2225 if size >= 2 * 1024 * 1024 {
2226 let _ = mmap.advise(memmap2::Advice::HugePage);
2227 }
2228 let _ = mmap.advise(memmap2::Advice::Sequential);
2229 if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
2231 let _ = mmap.advise(memmap2::Advice::WillNeed);
2232 }
2233 return Ok(hash_bytes(algo, &mmap));
2234 }
2235 }
2236 return hash_file_small(algo, file, size as usize);
2238 }
2239
2240 hash_reader(algo, file)
2242}
2243
2244#[cfg(target_os = "linux")]
2247pub fn readahead_files_all(paths: &[&Path]) {
2248 use std::os::unix::io::AsRawFd;
2249 for path in paths {
2250 if let Ok(file) = open_noatime(path) {
2251 if let Ok(meta) = file.metadata() {
2252 if meta.file_type().is_file() {
2253 let len = meta.len();
2254 unsafe {
2255 libc::posix_fadvise(
2256 file.as_raw_fd(),
2257 0,
2258 len as i64,
2259 libc::POSIX_FADV_WILLNEED,
2260 );
2261 }
2262 }
2263 }
2264 }
2265 }
2266}
2267
2268#[cfg(not(target_os = "linux"))]
2269pub fn readahead_files_all(_paths: &[&Path]) {}
2270
2271pub fn print_hash(
2274 out: &mut impl Write,
2275 hash: &str,
2276 filename: &str,
2277 binary: bool,
2278) -> io::Result<()> {
2279 let mode = if binary { b'*' } else { b' ' };
2280 out.write_all(hash.as_bytes())?;
2281 out.write_all(&[b' ', mode])?;
2282 out.write_all(filename.as_bytes())?;
2283 out.write_all(b"\n")
2284}
2285
2286pub fn print_hash_zero(
2288 out: &mut impl Write,
2289 hash: &str,
2290 filename: &str,
2291 binary: bool,
2292) -> io::Result<()> {
2293 let mode = if binary { b'*' } else { b' ' };
2294 out.write_all(hash.as_bytes())?;
2295 out.write_all(&[b' ', mode])?;
2296 out.write_all(filename.as_bytes())?;
2297 out.write_all(b"\0")
2298}
2299
2300thread_local! {
2307 static LINE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(256));
2308}
2309
2310#[inline]
2314pub fn write_hash_line(
2315 out: &mut impl Write,
2316 hash: &str,
2317 filename: &str,
2318 binary: bool,
2319 zero: bool,
2320 escaped: bool,
2321) -> io::Result<()> {
2322 LINE_BUF.with(|cell| {
2323 let mut buf = cell.borrow_mut();
2324 buf.clear();
2325 let mode = if binary { b'*' } else { b' ' };
2326 let term = if zero { b'\0' } else { b'\n' };
2327 if escaped {
2328 buf.push(b'\\');
2329 }
2330 buf.extend_from_slice(hash.as_bytes());
2331 buf.push(b' ');
2332 buf.push(mode);
2333 buf.extend_from_slice(filename.as_bytes());
2334 buf.push(term);
2335 out.write_all(&buf)
2336 })
2337}
2338
2339#[inline]
2342pub fn write_hash_tag_line(
2343 out: &mut impl Write,
2344 algo_name: &str,
2345 hash: &str,
2346 filename: &str,
2347 zero: bool,
2348) -> io::Result<()> {
2349 LINE_BUF.with(|cell| {
2350 let mut buf = cell.borrow_mut();
2351 buf.clear();
2352 let term = if zero { b'\0' } else { b'\n' };
2353 buf.extend_from_slice(algo_name.as_bytes());
2354 buf.extend_from_slice(b" (");
2355 buf.extend_from_slice(filename.as_bytes());
2356 buf.extend_from_slice(b") = ");
2357 buf.extend_from_slice(hash.as_bytes());
2358 buf.push(term);
2359 out.write_all(&buf)
2360 })
2361}
2362
2363pub fn print_hash_tag(
2365 out: &mut impl Write,
2366 algo: HashAlgorithm,
2367 hash: &str,
2368 filename: &str,
2369) -> io::Result<()> {
2370 out.write_all(algo.name().as_bytes())?;
2371 out.write_all(b" (")?;
2372 out.write_all(filename.as_bytes())?;
2373 out.write_all(b") = ")?;
2374 out.write_all(hash.as_bytes())?;
2375 out.write_all(b"\n")
2376}
2377
2378pub fn print_hash_tag_zero(
2380 out: &mut impl Write,
2381 algo: HashAlgorithm,
2382 hash: &str,
2383 filename: &str,
2384) -> io::Result<()> {
2385 out.write_all(algo.name().as_bytes())?;
2386 out.write_all(b" (")?;
2387 out.write_all(filename.as_bytes())?;
2388 out.write_all(b") = ")?;
2389 out.write_all(hash.as_bytes())?;
2390 out.write_all(b"\0")
2391}
2392
2393pub fn print_hash_tag_b2sum(
2397 out: &mut impl Write,
2398 hash: &str,
2399 filename: &str,
2400 bits: usize,
2401) -> io::Result<()> {
2402 if bits == 512 {
2403 out.write_all(b"BLAKE2b (")?;
2404 } else {
2405 write!(out, "BLAKE2b-{} (", bits)?;
2407 }
2408 out.write_all(filename.as_bytes())?;
2409 out.write_all(b") = ")?;
2410 out.write_all(hash.as_bytes())?;
2411 out.write_all(b"\n")
2412}
2413
2414pub fn print_hash_tag_b2sum_zero(
2416 out: &mut impl Write,
2417 hash: &str,
2418 filename: &str,
2419 bits: usize,
2420) -> io::Result<()> {
2421 if bits == 512 {
2422 out.write_all(b"BLAKE2b (")?;
2423 } else {
2424 write!(out, "BLAKE2b-{} (", bits)?;
2425 }
2426 out.write_all(filename.as_bytes())?;
2427 out.write_all(b") = ")?;
2428 out.write_all(hash.as_bytes())?;
2429 out.write_all(b"\0")
2430}
2431
2432pub struct CheckOptions {
2434 pub quiet: bool,
2435 pub status_only: bool,
2436 pub strict: bool,
2437 pub warn: bool,
2438 pub ignore_missing: bool,
2439 pub warn_prefix: String,
2443}
2444
2445pub struct CheckResult {
2447 pub ok: usize,
2448 pub mismatches: usize,
2449 pub format_errors: usize,
2450 pub read_errors: usize,
2451 pub ignored_missing: usize,
2453}
2454
2455pub fn check_file<R: BufRead>(
2458 algo: HashAlgorithm,
2459 reader: R,
2460 opts: &CheckOptions,
2461 out: &mut impl Write,
2462 err_out: &mut impl Write,
2463) -> io::Result<CheckResult> {
2464 let quiet = opts.quiet;
2465 let status_only = opts.status_only;
2466 let warn = opts.warn;
2467 let ignore_missing = opts.ignore_missing;
2468 let mut ok_count = 0;
2469 let mut mismatch_count = 0;
2470 let mut format_errors = 0;
2471 let mut read_errors = 0;
2472 let mut ignored_missing_count = 0;
2473 let mut line_num = 0;
2474
2475 for line_result in reader.lines() {
2476 line_num += 1;
2477 let line = line_result?;
2478 let line = line.trim_end();
2479
2480 if line.is_empty() {
2481 continue;
2482 }
2483
2484 let (expected_hash, filename) = match parse_check_line(line) {
2486 Some(v) => v,
2487 None => {
2488 format_errors += 1;
2489 if warn {
2490 out.flush()?;
2491 if opts.warn_prefix.is_empty() {
2492 writeln!(
2493 err_out,
2494 "line {}: improperly formatted {} checksum line",
2495 line_num,
2496 algo.name()
2497 )?;
2498 } else {
2499 writeln!(
2500 err_out,
2501 "{}: {}: improperly formatted {} checksum line",
2502 opts.warn_prefix,
2503 line_num,
2504 algo.name()
2505 )?;
2506 }
2507 }
2508 continue;
2509 }
2510 };
2511
2512 let actual = match hash_file(algo, Path::new(filename)) {
2514 Ok(h) => h,
2515 Err(e) => {
2516 if ignore_missing && e.kind() == io::ErrorKind::NotFound {
2517 ignored_missing_count += 1;
2518 continue;
2519 }
2520 read_errors += 1;
2521 if !status_only {
2522 out.flush()?;
2523 writeln!(err_out, "{}: {}", filename, e)?;
2524 writeln!(out, "{}: FAILED open or read", filename)?;
2525 }
2526 continue;
2527 }
2528 };
2529
2530 if actual.eq_ignore_ascii_case(expected_hash) {
2531 ok_count += 1;
2532 if !quiet && !status_only {
2533 writeln!(out, "{}: OK", filename)?;
2534 }
2535 } else {
2536 mismatch_count += 1;
2537 if !status_only {
2538 writeln!(out, "{}: FAILED", filename)?;
2539 }
2540 }
2541 }
2542
2543 Ok(CheckResult {
2544 ok: ok_count,
2545 mismatches: mismatch_count,
2546 format_errors,
2547 read_errors,
2548 ignored_missing: ignored_missing_count,
2549 })
2550}
2551
2552pub fn parse_check_line(line: &str) -> Option<(&str, &str)> {
2554 let rest = line
2556 .strip_prefix("MD5 (")
2557 .or_else(|| line.strip_prefix("SHA1 ("))
2558 .or_else(|| line.strip_prefix("SHA224 ("))
2559 .or_else(|| line.strip_prefix("SHA256 ("))
2560 .or_else(|| line.strip_prefix("SHA384 ("))
2561 .or_else(|| line.strip_prefix("SHA512 ("))
2562 .or_else(|| line.strip_prefix("BLAKE2b ("))
2563 .or_else(|| {
2564 if line.starts_with("BLAKE2b-") {
2566 let after = &line["BLAKE2b-".len()..];
2567 if let Some(sp) = after.find(" (") {
2568 if after[..sp].bytes().all(|b| b.is_ascii_digit()) {
2569 return Some(&after[sp + 2..]);
2570 }
2571 }
2572 }
2573 None
2574 });
2575 if let Some(rest) = rest {
2576 if let Some(paren_idx) = rest.find(") = ") {
2577 let filename = &rest[..paren_idx];
2578 let hash = &rest[paren_idx + 4..];
2579 return Some((hash, filename));
2580 }
2581 }
2582
2583 let line = line.strip_prefix('\\').unwrap_or(line);
2585
2586 if let Some(idx) = line.find(" ") {
2588 let hash = &line[..idx];
2589 let rest = &line[idx + 2..];
2590 return Some((hash, rest));
2591 }
2592 if let Some(idx) = line.find(" *") {
2594 let hash = &line[..idx];
2595 let rest = &line[idx + 2..];
2596 return Some((hash, rest));
2597 }
2598 None
2599}
2600
2601pub fn parse_check_line_tag(line: &str) -> Option<(&str, &str, Option<usize>)> {
2605 let paren_start = line.find(" (")?;
2606 let algo_part = &line[..paren_start];
2607 let rest = &line[paren_start + 2..];
2608 let paren_end = rest.find(") = ")?;
2609 let filename = &rest[..paren_end];
2610 let hash = &rest[paren_end + 4..];
2611
2612 let bits = if let Some(dash_pos) = algo_part.rfind('-') {
2614 algo_part[dash_pos + 1..].parse::<usize>().ok()
2615 } else {
2616 None
2617 };
2618
2619 Some((hash, filename, bits))
2620}
2621
2622#[inline]
2626fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2627 let n = reader.read(buf)?;
2629 if n == buf.len() || n == 0 {
2630 return Ok(n);
2631 }
2632 let mut total = n;
2634 while total < buf.len() {
2635 match reader.read(&mut buf[total..]) {
2636 Ok(0) => break,
2637 Ok(n) => total += n,
2638 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2639 Err(e) => return Err(e),
2640 }
2641 }
2642 Ok(total)
2643}
2644
2645const fn generate_hex_table() -> [[u8; 2]; 256] {
2648 let hex = b"0123456789abcdef";
2649 let mut table = [[0u8; 2]; 256];
2650 let mut i = 0;
2651 while i < 256 {
2652 table[i] = [hex[i >> 4], hex[i & 0xf]];
2653 i += 1;
2654 }
2655 table
2656}
2657
2658const HEX_TABLE: [[u8; 2]; 256] = generate_hex_table();
2659
2660pub(crate) fn hex_encode(bytes: &[u8]) -> String {
2663 let len = bytes.len() * 2;
2664 let mut hex = String::with_capacity(len);
2665 unsafe {
2667 let buf = hex.as_mut_vec();
2668 buf.set_len(len);
2669 hex_encode_to_slice(bytes, buf);
2670 }
2671 hex
2672}
2673
2674#[inline]
2677fn hex_encode_to_slice(bytes: &[u8], out: &mut [u8]) {
2678 unsafe {
2680 let ptr = out.as_mut_ptr();
2681 for (i, &b) in bytes.iter().enumerate() {
2682 let pair = *HEX_TABLE.get_unchecked(b as usize);
2683 *ptr.add(i * 2) = pair[0];
2684 *ptr.add(i * 2 + 1) = pair[1];
2685 }
2686 }
2687}