1use crate::lazyfile::LazyFile;
2use smallvec::SmallVec;
3use std::cmp::{min, Ordering};
4use std::convert::TryInto;
5use std::io;
6use std::io::{Read, Seek, SeekFrom};
7use std::path::Path;
8
9#[derive(Debug, PartialOrd, Eq, PartialEq, Ord)]
11struct HashedRange {
12 size: u64,
13 hash: [u8; 20],
14}
15
16impl HashedRange {
17 pub fn from_file(file: &mut LazyFile<'_>, start: u64, size: u64) -> Result<Self, io::Error> {
18 let fd = file.fd()?;
19 fd.seek(SeekFrom::Start(start))?;
20 let mut hasher = blake3::Hasher::new();
21 let mut to_read = size as usize;
22 let mut data = vec![0; to_read];
23 loop {
24 match fd.read(&mut data[0..to_read]) {
25 Ok(0) => break,
26 Ok(n) => {
27 debug_assert!(n <= to_read);
28 hasher.update(&data[0..n]);
29
30 to_read -= n;
31 if to_read == 0 {
32 break;
33 }
34 },
35 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {},
36 Err(e) => return Err(e),
37 }
38 }
39 Ok(Self {
40 hash: hasher.finalize().as_bytes()[0..20].try_into().unwrap(),
41 size,
42 })
43 }
44}
45
46#[derive(Debug)]
47pub struct Hasher {
48 ranges: SmallVec<[Option<HashedRange>; 1]>,
49}
50
51struct HashIter<'a> {
53 pub index: usize,
54 pub start_offset: u64,
55 pub end_offset: u64,
56 next_buffer_size: u64,
57 a_file: LazyFile<'a>,
58 b_file: LazyFile<'a>,
59}
60
61impl<'h> HashIter<'h> {
62 pub fn new(size: u64, a_path: &'h Path, b_path: &'h Path) -> Self {
63 HashIter {
64 index: 0,
65 start_offset: 0,
66 end_offset: size,
67 next_buffer_size: 2048,
68 a_file: LazyFile::new(a_path),
69 b_file: LazyFile::new(b_path),
70 }
71 }
72
73 pub fn next<'a,'b>(&mut self, a_hash: &'a mut Hasher, b_hash: &'b mut Hasher) -> Result<Option<(&'a HashedRange, &'b HashedRange)>, io::Error> {
75 if self.start_offset >= self.end_offset {
76 return Ok(None);
77 }
78
79 let i = self.index;
80 let (a_none, b_none, size) = {
81 let a = a_hash.ranges.get(i);
82 let b = b_hash.ranges.get(i);
83
84 let failed = a.map_or(false, |a| a.is_none()) || b.map_or(false, |b| b.is_none());
85 if failed {
86 return Err(io::Error::other("cmp i/o"));
87 }
88
89 let size = a
91 .and_then(|a| a.as_ref().map(|a| a.size))
92 .or(b.and_then(|b| b.as_ref().map(|b| b.size)))
93 .unwrap_or(min(self.end_offset - self.start_offset, self.next_buffer_size));
94 (a.is_none(), b.is_none(), size)
95 };
96
97 if a_none {
99 a_hash.push(HashedRange::from_file(&mut self.a_file, self.start_offset, size));
100 }
101 if b_none {
102 b_hash.push(HashedRange::from_file(&mut self.b_file, self.start_offset, size));
103 }
104
105 self.index += 1;
106 self.start_offset += size;
107 self.next_buffer_size = min(size * 16, 128 * 1024 * 1024);
112
113 match (a_hash.ranges.get(i), b_hash.ranges.get(i)) {
114 (Some(Some(a)), Some(Some(b))) => Ok(Some((a, b))),
115 _ => Err(io::Error::other("cmp i/o")),
116 }
117 }
118}
119
120impl Hasher {
121 #[inline]
122 pub fn new() -> Self {
123 Self { ranges: SmallVec::new() }
124 }
125
126 #[inline]
127 fn push(&mut self, range: Result<HashedRange, io::Error>) {
128 let r = match range {
129 Ok(r) => Some(r),
130 Err(err) => {
131 eprintln!("Can't compare files: {err}");
132 None
133 },
134 };
135 self.ranges.push(r);
136 }
137
138 #[inline]
140 pub fn compare(&mut self, other: &mut Self, size: u64, self_path: &Path, other_path: &Path) -> Result<Ordering, io::Error> {
141 let mut iter = HashIter::new(size, self_path, other_path);
142
143 while let Some((a, b)) = iter.next(self, other)? {
144 let ord = a.cmp(b);
145 if ord != Ordering::Equal {
146 return Ok(ord);
147 }
148 }
149 Ok(Ordering::Equal)
150 }
151}
152
153#[cfg(test)]
154mod test {
155 use super::*;
156 use std::fs;
157
158 #[test]
159 fn range_hash() {
160 let tmp = tempdir::TempDir::new("hashtest").expect("tmp");
161 let path = &tmp.path().join("a");
162 fs::write(path, "aaa\n").expect("write");
163 let mut file = LazyFile::new(path);
164 let hashed = HashedRange::from_file(&mut file, 0, 4).expect("hash");
165
166 assert_eq!(4, hashed.size);
167 assert_eq!([22, 179, 164, 66, 194, 34, 185, 88, 69, 62, 115, 203, 129, 138, 81, 160, 96, 190, 209, 11], hashed.hash);
168
169 let hashed = HashedRange::from_file(&mut file, 1, 2).expect("hash2");
170 assert_eq!(2, hashed.size);
171 }
172}