compare_dir/
file_comparer.rs1use std::cmp::Ordering;
2use std::fs;
3use std::io::{self, Read};
4use std::path::{Path, PathBuf};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Classification {
9 OnlyInDir1,
11 OnlyInDir2,
13 InBoth,
15}
16
17pub struct FileComparer<'a> {
19 path1: &'a Path,
20 path2: &'a Path,
21 pub buffer_size: usize,
22}
23
24impl<'a> FileComparer<'a> {
25 pub const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
26
27 pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
28 Self {
29 path1,
30 path2,
31 buffer_size: Self::DEFAULT_BUFFER_SIZE,
32 }
33 }
34
35 pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
36 let m1 = fs::metadata(self.path1)?;
37 let m2 = fs::metadata(self.path2)?;
38 Ok((m1, m2))
39 }
40
41 pub(crate) fn compare_contents(&self) -> io::Result<bool> {
42 let mut f1 = fs::File::open(self.path1)?;
43 let mut f2 = fs::File::open(self.path2)?;
44
45 if self.buffer_size == 0 {
46 let len1 = f1.metadata()?.len();
47 let len2 = f2.metadata()?.len();
48 if len1 != len2 {
49 return Ok(false);
50 }
51 if len1 == 0 {
52 return Ok(true);
53 }
54
55 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
56 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
57 return Ok(mmap1[..] == mmap2[..]);
58 }
59
60 let mut buf1 = vec![0u8; self.buffer_size];
61 let mut buf2 = vec![0u8; self.buffer_size];
62
63 loop {
64 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
68 let n1 = n1?;
69 let n2 = n2?;
70
71 if n1 != n2 || buf1[..n1] != buf2[..n2] {
72 return Ok(false);
73 }
74
75 if n1 == 0 {
76 return Ok(true);
77 }
78 }
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct FileComparisonResult {
85 pub relative_path: PathBuf,
87 pub classification: Classification,
89 pub modified_time_comparison: Option<Ordering>,
91 pub size_comparison: Option<Ordering>,
93 pub is_content_same: Option<bool>,
95}
96
97impl FileComparisonResult {
98 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
99 Self {
100 relative_path,
101 classification,
102 modified_time_comparison: None,
103 size_comparison: None,
104 is_content_same: None,
105 }
106 }
107
108 pub(crate) fn update(&mut self, comparer: &FileComparer) -> anyhow::Result<()> {
109 let (m1, m2) = comparer.metadata()?;
110 let t1 = m1.modified()?;
111 let t2 = m2.modified()?;
112 self.modified_time_comparison = Some(t1.cmp(&t2));
113
114 let s1 = m1.len();
115 let s2 = m2.len();
116 self.size_comparison = Some(s1.cmp(&s2));
117
118 if s1 == s2 {
119 log::info!("Comparing content: {:?}", self.relative_path);
120 self.is_content_same = Some(comparer.compare_contents()?);
121 }
122 Ok(())
123 }
124
125 pub fn is_identical(&self) -> bool {
126 self.classification == Classification::InBoth
127 && self.modified_time_comparison == Some(Ordering::Equal)
128 && self.size_comparison == Some(Ordering::Equal)
129 && self.is_content_same == Some(true)
130 }
131
132 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
133 let mut parts = Vec::new();
134 match self.classification {
135 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
136 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
137 Classification::InBoth => {}
138 }
139
140 if let Some(comp) = &self.modified_time_comparison {
141 match comp {
142 Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
143 Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
144 Ordering::Equal => {}
145 }
146 }
147
148 if let Some(comp) = &self.size_comparison {
149 match comp {
150 Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
151 Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
152 Ordering::Equal => {}
153 }
154 }
155
156 if let Some(same) = self.is_content_same
157 && !same
158 {
159 parts.push("Content differ".to_string());
160 }
161
162 format!("{}: {}", self.relative_path.display(), parts.join(", "))
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169 use std::io::Write;
170 use tempfile::NamedTempFile;
171
172 #[test]
173 fn test_compare_contents_identical() -> io::Result<()> {
174 let mut f1 = NamedTempFile::new()?;
175 let mut f2 = NamedTempFile::new()?;
176 f1.write_all(b"hello world")?;
177 f2.write_all(b"hello world")?;
178 let mut comparer = FileComparer::new(f1.path(), f2.path());
179
180 comparer.buffer_size = 8192;
182 assert!(comparer.compare_contents()?);
183
184 comparer.buffer_size = 0;
186 assert!(comparer.compare_contents()?);
187 Ok(())
188 }
189
190 #[test]
191 fn test_compare_contents_different() -> io::Result<()> {
192 let mut f1 = NamedTempFile::new()?;
193 let mut f2 = NamedTempFile::new()?;
194 f1.write_all(b"hello world")?;
195 f2.write_all(b"hello rust")?;
196 let mut comparer = FileComparer::new(f1.path(), f2.path());
197
198 comparer.buffer_size = 8192;
200 assert!(!comparer.compare_contents()?);
201
202 comparer.buffer_size = 0;
204 assert!(!comparer.compare_contents()?);
205 Ok(())
206 }
207
208 #[test]
209 fn test_compare_contents_different_size() -> io::Result<()> {
210 let mut f1 = NamedTempFile::new()?;
211 let mut f2 = NamedTempFile::new()?;
212 f1.write_all(b"hello world")?;
213 f2.write_all(b"hello")?;
214 let mut comparer = FileComparer::new(f1.path(), f2.path());
215
216 comparer.buffer_size = 8192;
218 assert!(!comparer.compare_contents()?);
219
220 comparer.buffer_size = 0;
222 assert!(!comparer.compare_contents()?);
223 Ok(())
224 }
225
226 #[test]
227 fn test_compare_contents_empty_files() -> io::Result<()> {
228 let f1 = NamedTempFile::new()?;
229 let f2 = NamedTempFile::new()?;
230 let mut comparer = FileComparer::new(f1.path(), f2.path());
231
232 comparer.buffer_size = 8192;
234 assert!(comparer.compare_contents()?);
235
236 comparer.buffer_size = 0;
238 assert!(comparer.compare_contents()?);
239 Ok(())
240 }
241}