1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10 OnlyInDir1,
12 OnlyInDir2,
14 InBoth,
16}
17
18pub struct FileComparer<'a> {
20 path1: &'a Path,
21 path2: &'a Path,
22 pub buffer_size: usize,
23 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27 pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30 pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31 Self {
32 path1,
33 path2,
34 buffer_size: Self::DEFAULT_BUFFER_SIZE,
35 hashers: None,
36 }
37 }
38
39 pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40 let m1 = fs::metadata(self.path1)?;
41 let m2 = fs::metadata(self.path2)?;
42 Ok((m1, m2))
43 }
44
45 pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46 if let Some((hasher1, hasher2)) = self.hashers {
47 let (hash1, hash2) = rayon::join(
48 || hasher1.get_hash(self.path1),
49 || hasher2.get_hash(self.path2),
50 );
51 return Ok(hash1? == hash2?);
52 }
53
54 let mut f1 = fs::File::open(self.path1)?;
55 let mut f2 = fs::File::open(self.path2)?;
56 if self.buffer_size == 0 {
57 let len1 = f1.metadata()?.len();
58 let len2 = f2.metadata()?.len();
59 if len1 != len2 {
60 return Ok(false);
61 }
62 if len1 == 0 {
63 return Ok(true);
64 }
65 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
66 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
67 return Ok(mmap1[..] == mmap2[..]);
68 }
69
70 let mut buf1 = vec![0u8; self.buffer_size];
71 let mut buf2 = vec![0u8; self.buffer_size];
72 loop {
73 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
77 let n1 = n1?;
78 let n2 = n2?;
79 if n1 != n2 || buf1[..n1] != buf2[..n2] {
80 return Ok(false);
81 }
82 if n1 == 0 {
83 return Ok(true);
84 }
85 }
86 }
87}
88
89#[derive(Debug, Clone)]
91pub struct FileComparisonResult {
92 pub relative_path: PathBuf,
94 pub classification: Classification,
96 pub modified_time_comparison: Option<Ordering>,
98 pub size_comparison: Option<Ordering>,
100 pub is_content_same: Option<bool>,
102}
103
104impl FileComparisonResult {
105 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
106 Self {
107 relative_path,
108 classification,
109 modified_time_comparison: None,
110 size_comparison: None,
111 is_content_same: None,
112 }
113 }
114
115 pub fn update(
116 &mut self,
117 comparer: &FileComparer,
118 should_compare_content: bool,
119 ) -> anyhow::Result<()> {
120 let (m1, m2) = comparer.metadata()?;
121 let t1 = m1.modified()?;
122 let t2 = m2.modified()?;
123 self.modified_time_comparison = Some(t1.cmp(&t2));
124
125 let s1 = m1.len();
126 let s2 = m2.len();
127 self.size_comparison = Some(s1.cmp(&s2));
128
129 if should_compare_content && s1 == s2 {
130 log::trace!("Comparing content: {:?}", self.relative_path);
131 self.is_content_same = Some(comparer.compare_contents()?);
132 }
133 Ok(())
134 }
135
136 pub fn is_identical(&self) -> bool {
139 self.classification == Classification::InBoth
140 && self.modified_time_comparison == Some(Ordering::Equal)
141 && self.size_comparison == Some(Ordering::Equal)
142 && self.is_content_same != Some(false)
143 }
144
145 pub fn to_symbol_string(&self) -> String {
146 String::from_iter([
147 match self.classification {
148 Classification::OnlyInDir1 => '>',
149 Classification::OnlyInDir2 => '<',
150 Classification::InBoth => '=',
151 },
152 match self.modified_time_comparison {
153 None => '?',
154 Some(Ordering::Greater) => '>',
155 Some(Ordering::Less) => '<',
156 Some(Ordering::Equal) => '=',
157 },
158 match self.size_comparison {
159 None => '?',
160 Some(Ordering::Greater) => '>',
161 Some(Ordering::Less) => '<',
162 Some(Ordering::Equal) => {
163 if self.is_content_same == Some(false) {
164 '!'
165 } else {
166 '='
167 }
168 }
169 },
170 ])
171 }
172
173 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
174 let mut parts = Vec::new();
175 match self.classification {
176 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
177 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
178 Classification::InBoth => {}
179 }
180 let mut has_equals = false;
181 match self.modified_time_comparison {
182 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
183 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
184 Some(Ordering::Equal) => has_equals = true,
185 None => {}
186 }
187 match self.size_comparison {
188 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
189 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
190 Some(Ordering::Equal) => has_equals = true,
191 None => {}
192 }
193 match self.is_content_same {
194 Some(false) => parts.push("Contents differ".to_string()),
195 Some(true) => has_equals = true,
196 None => {}
197 }
198
199 if parts.is_empty() {
200 if !has_equals {
201 return "Unknown".to_string();
202 }
203 return "Identical".to_string();
204 }
205 parts.join(", ")
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
214 let dir1 = tempfile::tempdir()?;
215 let dir2 = tempfile::tempdir()?;
216 let f1_path = dir1.path().join("file");
217 let f2_path = dir2.path().join("file");
218 fs::write(&f1_path, content1)?;
219 fs::write(&f2_path, content2)?;
220
221 let mut comparer = FileComparer::new(&f1_path, &f2_path);
223 comparer.buffer_size = 8192;
224 assert_eq!(comparer.compare_contents()?, expected);
225
226 comparer.buffer_size = 0;
228 assert_eq!(comparer.compare_contents()?, expected);
229
230 let hasher1 = FileHasher::new(dir1.path().to_path_buf());
232 let hasher2 = FileHasher::new(dir2.path().to_path_buf());
233 comparer.hashers = Some((&hasher1, &hasher2));
234 assert_eq!(comparer.compare_contents()?, expected);
235
236 Ok(())
237 }
238
239 #[test]
240 fn test_compare_contents_identical() -> io::Result<()> {
241 check_compare(b"hello world", b"hello world", true)
242 }
243
244 #[test]
245 fn test_compare_contents_different() -> io::Result<()> {
246 check_compare(b"hello world", b"hello rust", false)
247 }
248
249 #[test]
250 fn test_compare_contents_different_size() -> io::Result<()> {
251 check_compare(b"hello world", b"hello", false)
252 }
253
254 #[test]
255 fn test_compare_contents_empty_files() -> io::Result<()> {
256 check_compare(b"", b"", true)
257 }
258
259 #[test]
260 fn test_comparison_result_empty() {
261 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
262 assert!(!result.is_identical());
263 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
264 assert_eq!(result.to_symbol_string(), "=??");
265 }
266
267 #[test]
268 fn test_comparison_result_contents_skipped() {
269 let mut result =
270 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
271 result.modified_time_comparison = Some(Ordering::Equal);
272 result.size_comparison = Some(Ordering::Equal);
273 assert!(result.is_identical());
274 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
275 assert_eq!(result.to_symbol_string(), "===");
276 }
277}