Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{Classification, FileComparer, FileComparisonResult, FileHasher, ProgressReporter};
2
3use std::cmp::Ordering;
4use std::collections::HashMap;
5use std::path::{Path, PathBuf};
6use std::sync::mpsc;
7use walkdir::WalkDir;
8
9#[derive(Debug, Clone)]
10enum CompareProgress {
11    StartOfComparison,
12    TotalFiles(usize),
13    Result(usize, FileComparisonResult),
14}
15
16#[derive(Default)]
17struct ComparisonSummary {
18    pub in_both: usize,
19    pub only_in_dir1: usize,
20    pub only_in_dir2: usize,
21    pub dir1_newer: usize,
22    pub dir2_newer: usize,
23    pub same_time_diff_size: usize,
24    pub same_time_size_diff_content: usize,
25}
26
27impl ComparisonSummary {
28    pub fn update(&mut self, result: &FileComparisonResult) {
29        match result.classification {
30            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
31            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
32            Classification::InBoth => {
33                self.in_both += 1;
34                match result.modified_time_comparison {
35                    Some(Ordering::Greater) => self.dir1_newer += 1,
36                    Some(Ordering::Less) => self.dir2_newer += 1,
37                    _ => {
38                        if result.size_comparison != Some(Ordering::Equal) {
39                            self.same_time_diff_size += 1;
40                        } else if result.is_content_same == Some(false) {
41                            self.same_time_size_diff_content += 1;
42                        }
43                    }
44                }
45            }
46        }
47    }
48
49    pub fn print(
50        &self,
51        mut writer: impl std::io::Write,
52        dir1_name: &str,
53        dir2_name: &str,
54    ) -> std::io::Result<()> {
55        writeln!(writer, "Files in both: {}", self.in_both)?;
56        writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
57        writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
58        writeln!(
59            writer,
60            "Files in both ({} is newer): {}",
61            dir1_name, self.dir1_newer
62        )?;
63        writeln!(
64            writer,
65            "Files in both ({} is newer): {}",
66            dir2_name, self.dir2_newer
67        )?;
68        writeln!(
69            writer,
70            "Files in both (same time, different size): {}",
71            self.same_time_diff_size
72        )?;
73        writeln!(
74            writer,
75            "Files in both (same time and size, different content): {}",
76            self.same_time_size_diff_content
77        )?;
78        Ok(())
79    }
80}
81
82/// Methods for comparing files.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub enum FileComparisonMethod {
85    /// Compare only size and modification time.
86    Size,
87    /// Compare by hash (BLAKE3).
88    Hash,
89    /// Compare by hash, without using the cached hashes.
90    Rehash,
91    /// Compare byte-by-byte.
92    Full,
93}
94
95/// A tool for comparing the contents of two directories.
96pub struct DirectoryComparer {
97    dir1: PathBuf,
98    dir2: PathBuf,
99    pub is_symbols_format: bool,
100    pub buffer_size: usize,
101    pub comparison_method: FileComparisonMethod,
102}
103
104impl DirectoryComparer {
105    /// Creates a new `DirectoryComparer` for the two given directories.
106    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
107        Self {
108            dir1,
109            dir2,
110            is_symbols_format: false,
111            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
112            comparison_method: FileComparisonMethod::Hash,
113        }
114    }
115
116    /// Sets the maximum number of threads for parallel processing.
117    /// This initializes the global Rayon thread pool.
118    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
119        rayon::ThreadPoolBuilder::new()
120            .num_threads(parallel)
121            .build_global()
122            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
123        Ok(())
124    }
125
126    /// Executes the directory comparison and prints results to stdout.
127    /// This is a convenience method for CLI usage.
128    pub fn run(&self) -> anyhow::Result<()> {
129        let progress = ProgressReporter::new();
130        progress.set_message("Scanning directories...");
131        let start_time = std::time::Instant::now();
132        let mut summary = ComparisonSummary::default();
133        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
134        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
135        let (tx, rx) = mpsc::channel();
136        std::thread::scope(|scope| {
137            scope.spawn(move || {
138                if let Err(e) = self.compare_streaming_ordered(tx) {
139                    log::error!("Error during comparison: {}", e);
140                }
141            });
142
143            // Receive results and update summary/UI
144            while let Ok(event) = rx.recv() {
145                match event {
146                    CompareProgress::StartOfComparison => {
147                        progress.set_message("Comparing files...");
148                    }
149                    CompareProgress::TotalFiles(total_files) => {
150                        progress.set_length(total_files as u64);
151                        progress.set_message("");
152                    }
153                    CompareProgress::Result(_, result) => {
154                        summary.update(&result);
155                        if self.is_symbols_format {
156                            progress.suspend(|| {
157                                println!(
158                                    "{} {}",
159                                    result.to_symbol_string(),
160                                    result.relative_path.display()
161                                );
162                            })
163                        } else if !result.is_identical() {
164                            progress.suspend(|| {
165                                println!(
166                                    "{}: {}",
167                                    result.relative_path.display(),
168                                    result.to_string(dir1_str, dir2_str)
169                                );
170                            });
171                        }
172                        progress.inc(1);
173                    }
174                }
175            }
176        });
177        progress.finish();
178        eprintln!("\n--- Comparison Summary ---");
179        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
180        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
181        Ok(())
182    }
183
184    /// Performs the directory comparison and streams results via a channel.
185    ///
186    /// # Arguments
187    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
188    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
189        let (tx_unordered, rx_unordered) = mpsc::channel();
190        std::thread::scope(|scope| {
191            scope.spawn(move || {
192                if let Err(e) = self.compare_streaming(tx_unordered) {
193                    log::error!("Error during unordered comparison: {}", e);
194                }
195            });
196
197            let mut buffer = HashMap::new();
198            let mut next_index = 0;
199            for event in rx_unordered {
200                if let CompareProgress::Result(i, _) = &event {
201                    let index = *i;
202                    if index == next_index {
203                        tx.send(event)?;
204                        next_index += 1;
205                        while let Some(buffered) = buffer.remove(&next_index) {
206                            tx.send(buffered)?;
207                            next_index += 1;
208                        }
209                    } else {
210                        buffer.insert(index, event);
211                    }
212                } else {
213                    tx.send(event)?;
214                }
215            }
216            Ok::<(), anyhow::Error>(())
217        })?;
218
219        Ok(())
220    }
221
222    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
223        log::info!("Scanning directory: {:?}", self.dir1);
224        let mut it1 = WalkDir::new(&self.dir1).sort_by_file_name().into_iter();
225        log::info!("Scanning directory: {:?}", self.dir2);
226        let mut it2 = WalkDir::new(&self.dir2).sort_by_file_name().into_iter();
227        let mut next1 = Self::get_next_file(&mut it1, &self.dir1);
228        let mut next2 = Self::get_next_file(&mut it2, &self.dir2);
229        let mut index = 0;
230        let hashers = if self.comparison_method == FileComparisonMethod::Hash
231            || self.comparison_method == FileComparisonMethod::Rehash
232        {
233            let (h1, h2) = rayon::join(
234                || FileHasher::new(self.dir1.clone()),
235                || FileHasher::new(self.dir2.clone()),
236            );
237            if self.comparison_method == FileComparisonMethod::Rehash {
238                h1.clear_cache()?;
239                h2.clear_cache()?;
240            }
241            Some((h1, h2))
242        } else {
243            None
244        };
245        tx.send(CompareProgress::StartOfComparison)?;
246        rayon::scope(|scope| {
247            loop {
248                let cmp = match (&next1, &next2) {
249                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
250                    (Some(_), None) => Ordering::Less,
251                    (None, Some(_)) => Ordering::Greater,
252                    (None, None) => break,
253                };
254                match cmp {
255                    Ordering::Less => {
256                        let (rel1, _) = next1.take().unwrap();
257                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
258                        tx.send(CompareProgress::Result(index, result))?;
259                        index += 1;
260                        next1 = Self::get_next_file(&mut it1, &self.dir1);
261                    }
262                    Ordering::Greater => {
263                        let (rel2, _) = next2.take().unwrap();
264                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
265                        tx.send(CompareProgress::Result(index, result))?;
266                        index += 1;
267                        next2 = Self::get_next_file(&mut it2, &self.dir2);
268                    }
269                    Ordering::Equal => {
270                        let (rel_path, path1) = next1.take().unwrap();
271                        let (_, path2) = next2.take().unwrap();
272                        let buffer_size = self.buffer_size;
273                        let tx_clone = tx.clone();
274                        let i = index;
275                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
276                        let hashers_ref = hashers.as_ref();
277                        scope.spawn(move |_| {
278                            let mut comparer = FileComparer::new(&path1, &path2);
279                            comparer.buffer_size = buffer_size;
280                            if let Some((h1, h2)) = hashers_ref {
281                                comparer.hashers = Some((h1, h2));
282                            }
283                            let mut result =
284                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
285                            if let Err(error) = result.update(&comparer, should_compare) {
286                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
287                            }
288                            if tx_clone.send(CompareProgress::Result(i, result)).is_err() {
289                                log::error!(
290                                    "Receiver dropped, stopping comparison of {:?}",
291                                    rel_path
292                                );
293                            }
294                        });
295                        index += 1;
296                        next1 = Self::get_next_file(&mut it1, &self.dir1);
297                        next2 = Self::get_next_file(&mut it2, &self.dir2);
298                    }
299                }
300            }
301            tx.send(CompareProgress::TotalFiles(index))
302        })?;
303        if let Some((h1, h2)) = hashers {
304            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
305            r1?;
306            r2?;
307        }
308        Ok(())
309    }
310
311    fn get_next_file(it: &mut walkdir::IntoIter, dir: &Path) -> Option<(PathBuf, PathBuf)> {
312        for entry in it {
313            match entry {
314                Ok(entry) => {
315                    if entry.file_type().is_file() {
316                        let rel_path = entry.path().strip_prefix(dir).unwrap();
317                        return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
318                    }
319                }
320                Err(error) => {
321                    log::error!("Error while walking directory: {}", error);
322                }
323            }
324        }
325        None
326    }
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332    use std::fs;
333    use std::io::Write;
334
335    #[test]
336    fn test_comparison_summary() {
337        let mut summary = ComparisonSummary::default();
338        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
339        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
340        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
341        res3.modified_time_comparison = Some(Ordering::Greater);
342
343        summary.update(&res1);
344        summary.update(&res2);
345        summary.update(&res3);
346
347        assert_eq!(summary.only_in_dir1, 1);
348        assert_eq!(summary.only_in_dir2, 1);
349        assert_eq!(summary.in_both, 1);
350        assert_eq!(summary.dir1_newer, 1);
351    }
352
353    #[test]
354    fn test_directory_comparer_integration() -> anyhow::Result<()> {
355        let dir1 = tempfile::tempdir()?;
356        let dir2 = tempfile::tempdir()?;
357
358        // Create files in dir1
359        let file1_path = dir1.path().join("same.txt");
360        let mut file1 = fs::File::create(&file1_path)?;
361        file1.write_all(b"same content")?;
362
363        let only1_path = dir1.path().join("only1.txt");
364        let mut only1 = fs::File::create(&only1_path)?;
365        only1.write_all(b"only in dir1")?;
366
367        // Create files in dir2
368        let file2_path = dir2.path().join("same.txt");
369        let mut file2 = fs::File::create(&file2_path)?;
370        file2.write_all(b"same content")?;
371
372        let only2_path = dir2.path().join("only2.txt");
373        let mut only2 = fs::File::create(&only2_path)?;
374        only2.write_all(b"only in dir2")?;
375
376        // Create a different file
377        let diff1_path = dir1.path().join("diff.txt");
378        let mut diff1 = fs::File::create(&diff1_path)?;
379        diff1.write_all(b"content 1")?;
380
381        let diff2_path = dir2.path().join("diff.txt");
382        let mut diff2 = fs::File::create(&diff2_path)?;
383        diff2.write_all(b"content 222")?; // different length and content
384
385        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
386        let (tx, rx) = mpsc::channel();
387
388        comparer.compare_streaming_ordered(tx)?;
389
390        let mut results = Vec::new();
391        while let Ok(res) = rx.recv() {
392            if let CompareProgress::Result(_, r) = res {
393                results.push(r);
394            }
395        }
396
397        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
398
399        assert_eq!(results.len(), 4);
400
401        // diff.txt
402        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
403        assert_eq!(results[0].classification, Classification::InBoth);
404        assert!(
405            results[0].is_content_same == Some(false)
406                || results[0].size_comparison != Some(Ordering::Equal)
407        );
408
409        // only1.txt
410        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
411        assert_eq!(results[1].classification, Classification::OnlyInDir1);
412
413        // only2.txt
414        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
415        assert_eq!(results[2].classification, Classification::OnlyInDir2);
416
417        // same.txt
418        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
419        assert_eq!(results[3].classification, Classification::InBoth);
420        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
421
422        Ok(())
423    }
424
425    #[test]
426    fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
427        let dir1 = tempfile::tempdir()?;
428        let dir2 = tempfile::tempdir()?;
429
430        let file1_path = dir1.path().join("file.txt");
431        let mut file1 = fs::File::create(&file1_path)?;
432        file1.write_all(b"content 1")?;
433
434        let file2_path = dir2.path().join("file.txt");
435        let mut file2 = fs::File::create(&file2_path)?;
436        file2.write_all(b"content 2")?; // same length, different content
437
438        let mut comparer =
439            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
440        comparer.comparison_method = FileComparisonMethod::Size;
441        let (tx, rx) = mpsc::channel();
442
443        comparer.compare_streaming_ordered(tx)?;
444
445        let mut results = Vec::new();
446        while let Ok(res) = rx.recv() {
447            if let CompareProgress::Result(_, r) = res {
448                results.push(r);
449            }
450        }
451
452        assert_eq!(results.len(), 1);
453        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
454        assert_eq!(results[0].classification, Classification::InBoth);
455        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
456        assert_eq!(results[0].is_content_same, None);
457
458        Ok(())
459    }
460}