compare_dir/
file_comparer.rs1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10 OnlyInDir1,
12 OnlyInDir2,
14 InBoth,
16}
17
18pub struct FileComparer<'a> {
20 path1: &'a Path,
21 path2: &'a Path,
22 pub buffer_size: usize,
23 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27 pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30 pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31 Self {
32 path1,
33 path2,
34 buffer_size: Self::DEFAULT_BUFFER_SIZE,
35 hashers: None,
36 }
37 }
38
39 pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40 let m1 = fs::metadata(self.path1)?;
41 let m2 = fs::metadata(self.path2)?;
42 Ok((m1, m2))
43 }
44
45 pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46 if let Some((hasher1, hasher2)) = self.hashers {
47 let hash1 = hasher1.get_hash(self.path1)?;
48 let hash2 = hasher2.get_hash(self.path2)?;
49 return Ok(hash1 == hash2);
50 }
51
52 let mut f1 = fs::File::open(self.path1)?;
53 let mut f2 = fs::File::open(self.path2)?;
54
55 if self.buffer_size == 0 {
56 let len1 = f1.metadata()?.len();
57 let len2 = f2.metadata()?.len();
58 if len1 != len2 {
59 return Ok(false);
60 }
61 if len1 == 0 {
62 return Ok(true);
63 }
64
65 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
66 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
67 return Ok(mmap1[..] == mmap2[..]);
68 }
69
70 let mut buf1 = vec![0u8; self.buffer_size];
71 let mut buf2 = vec![0u8; self.buffer_size];
72
73 loop {
74 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
78 let n1 = n1?;
79 let n2 = n2?;
80
81 if n1 != n2 || buf1[..n1] != buf2[..n2] {
82 return Ok(false);
83 }
84
85 if n1 == 0 {
86 return Ok(true);
87 }
88 }
89 }
90}
91
92#[derive(Debug, Clone)]
94pub struct FileComparisonResult {
95 pub relative_path: PathBuf,
97 pub classification: Classification,
99 pub modified_time_comparison: Option<Ordering>,
101 pub size_comparison: Option<Ordering>,
103 pub is_content_same: Option<bool>,
105}
106
107impl FileComparisonResult {
108 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
109 Self {
110 relative_path,
111 classification,
112 modified_time_comparison: None,
113 size_comparison: None,
114 is_content_same: None,
115 }
116 }
117
118 pub fn update(
119 &mut self,
120 comparer: &FileComparer,
121 should_compare_content: bool,
122 ) -> anyhow::Result<()> {
123 let (m1, m2) = comparer.metadata()?;
124 let t1 = m1.modified()?;
125 let t2 = m2.modified()?;
126 self.modified_time_comparison = Some(t1.cmp(&t2));
127
128 let s1 = m1.len();
129 let s2 = m2.len();
130 self.size_comparison = Some(s1.cmp(&s2));
131
132 if should_compare_content && s1 == s2 {
133 log::trace!("Comparing content: {:?}", self.relative_path);
134 self.is_content_same = Some(comparer.compare_contents()?);
135 }
136 Ok(())
137 }
138
139 pub fn is_identical(&self) -> bool {
142 self.classification == Classification::InBoth
143 && self.modified_time_comparison == Some(Ordering::Equal)
144 && self.size_comparison == Some(Ordering::Equal)
145 && self.is_content_same != Some(false)
146 }
147
148 pub fn to_symbol_string(&self) -> String {
149 String::from_iter([
150 match self.classification {
151 Classification::OnlyInDir1 => '>',
152 Classification::OnlyInDir2 => '<',
153 Classification::InBoth => '=',
154 },
155 match self.modified_time_comparison {
156 None => ' ',
157 Some(Ordering::Greater) => '>',
158 Some(Ordering::Less) => '<',
159 Some(Ordering::Equal) => '=',
160 },
161 match self.size_comparison {
162 None => ' ',
163 Some(Ordering::Greater) => '>',
164 Some(Ordering::Less) => '<',
165 Some(Ordering::Equal) => {
166 if self.is_content_same == Some(false) {
167 '!'
168 } else {
169 '='
170 }
171 }
172 },
173 ])
174 }
175
176 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
177 let mut parts = Vec::new();
178 match self.classification {
179 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
180 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
181 Classification::InBoth => {}
182 }
183 match self.modified_time_comparison {
184 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
185 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
186 Some(Ordering::Equal) | None => {}
187 }
188 match self.size_comparison {
189 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
190 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
191 Some(Ordering::Equal) | None => {}
192 }
193 if self.is_content_same == Some(false) {
194 parts.push("Contents differ".to_string());
195 }
196
197 if parts.is_empty() {
198 "Identical".to_string()
199 } else {
200 parts.join(", ")
201 }
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208 use std::io::Write;
209 use tempfile::NamedTempFile;
210
211 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
212 let mut f1 = NamedTempFile::new()?;
213 let mut f2 = NamedTempFile::new()?;
214 f1.write_all(content1)?;
215 f2.write_all(content2)?;
216 f1.as_file().sync_all()?;
217 f2.as_file().sync_all()?;
218
219 let mut comparer = FileComparer::new(f1.path(), f2.path());
221 comparer.buffer_size = 8192;
222 assert_eq!(comparer.compare_contents()?, expected);
223
224 comparer.buffer_size = 0;
225 assert_eq!(comparer.compare_contents()?, expected);
226
227 let dir1 = f1.path().parent().unwrap();
229 let dir2 = f2.path().parent().unwrap();
230
231 let hasher1 = FileHasher::new(dir1.to_path_buf());
232 let hasher2 = FileHasher::new(dir2.to_path_buf());
233
234 let mut comparer_hash = FileComparer::new(f1.path(), f2.path());
235 comparer_hash.hashers = Some((&hasher1, &hasher2));
236
237 assert_eq!(comparer_hash.compare_contents()?, expected);
238
239 Ok(())
240 }
241
242 #[test]
243 fn test_compare_contents_identical() -> io::Result<()> {
244 check_compare(b"hello world", b"hello world", true)
245 }
246
247 #[test]
248 fn test_compare_contents_different() -> io::Result<()> {
249 check_compare(b"hello world", b"hello rust", false)
250 }
251
252 #[test]
253 fn test_compare_contents_different_size() -> io::Result<()> {
254 check_compare(b"hello world", b"hello", false)
255 }
256
257 #[test]
258 fn test_compare_contents_empty_files() -> io::Result<()> {
259 check_compare(b"", b"", true)
260 }
261}