Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, ProgressReporter,
3};
4
5use std::cmp::Ordering;
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::sync::mpsc;
9
10#[derive(Debug, Clone)]
11enum CompareProgress {
12    StartOfComparison,
13    FileDone,
14    TotalFiles(usize),
15    Result(usize, FileComparisonResult),
16}
17
18#[derive(Default)]
19struct ComparisonSummary {
20    pub in_both: usize,
21    pub only_in_dir1: usize,
22    pub only_in_dir2: usize,
23    pub dir1_newer: usize,
24    pub dir2_newer: usize,
25    pub same_time_diff_size: usize,
26    pub same_time_size_diff_content: usize,
27}
28
29impl ComparisonSummary {
30    pub fn update(&mut self, result: &FileComparisonResult) {
31        match result.classification {
32            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
33            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
34            Classification::InBoth => {
35                self.in_both += 1;
36                match result.modified_time_comparison {
37                    Some(Ordering::Greater) => self.dir1_newer += 1,
38                    Some(Ordering::Less) => self.dir2_newer += 1,
39                    _ => {
40                        if result.size_comparison != Some(Ordering::Equal) {
41                            self.same_time_diff_size += 1;
42                        } else if result.is_content_same == Some(false) {
43                            self.same_time_size_diff_content += 1;
44                        }
45                    }
46                }
47            }
48        }
49    }
50
51    pub fn print(
52        &self,
53        mut writer: impl std::io::Write,
54        dir1_name: &str,
55        dir2_name: &str,
56    ) -> std::io::Result<()> {
57        writeln!(writer, "Files in both: {}", self.in_both)?;
58        writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
59        writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
60        writeln!(
61            writer,
62            "Files in both ({} is newer): {}",
63            dir1_name, self.dir1_newer
64        )?;
65        writeln!(
66            writer,
67            "Files in both ({} is newer): {}",
68            dir2_name, self.dir2_newer
69        )?;
70        writeln!(
71            writer,
72            "Files in both (same time, different size): {}",
73            self.same_time_diff_size
74        )?;
75        writeln!(
76            writer,
77            "Files in both (same time and size, different content): {}",
78            self.same_time_size_diff_content
79        )?;
80        Ok(())
81    }
82}
83
84/// Methods for comparing files.
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86pub enum FileComparisonMethod {
87    /// Compare only size and modification time.
88    Size,
89    /// Compare by hash (BLAKE3).
90    Hash,
91    /// Compare by hash, without using the cached hashes.
92    Rehash,
93    /// Compare byte-by-byte.
94    Full,
95}
96
97/// A tool for comparing the contents of two directories.
98pub struct DirectoryComparer {
99    dir1: PathBuf,
100    dir2: PathBuf,
101    pub is_symbols_format: bool,
102    pub buffer_size: usize,
103    pub comparison_method: FileComparisonMethod,
104}
105
106impl DirectoryComparer {
107    /// Creates a new `DirectoryComparer` for the two given directories.
108    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
109        Self {
110            dir1,
111            dir2,
112            is_symbols_format: false,
113            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
114            comparison_method: FileComparisonMethod::Hash,
115        }
116    }
117
118    /// Sets the maximum number of threads for parallel processing.
119    /// This initializes the global Rayon thread pool.
120    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
121        rayon::ThreadPoolBuilder::new()
122            .num_threads(parallel)
123            .build_global()
124            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
125        Ok(())
126    }
127
128    /// Executes the directory comparison and prints results to stdout.
129    /// This is a convenience method for CLI usage.
130    pub fn run(&self) -> anyhow::Result<()> {
131        if self.dir1.is_file() {
132            return self.run_file_comparer();
133        }
134
135        let progress = ProgressReporter::new();
136        progress.set_message("Scanning directories...");
137        let start_time = std::time::Instant::now();
138        let mut summary = ComparisonSummary::default();
139        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
140        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
141        let (tx, rx) = mpsc::channel();
142        std::thread::scope(|scope| {
143            scope.spawn(move || {
144                if let Err(e) = self.compare_streaming_ordered(tx) {
145                    log::error!("Error during comparison: {}", e);
146                }
147            });
148
149            // Receive results and update summary/UI
150            while let Ok(event) = rx.recv() {
151                match event {
152                    CompareProgress::StartOfComparison => {
153                        progress.set_message("Comparing files...");
154                    }
155                    CompareProgress::TotalFiles(total_files) => {
156                        progress.set_length(total_files as u64);
157                        progress.set_message("");
158                    }
159                    CompareProgress::Result(_, result) => {
160                        summary.update(&result);
161                        if self.is_symbols_format {
162                            progress.suspend(|| {
163                                println!(
164                                    "{} {}",
165                                    result.to_symbol_string(),
166                                    result.relative_path.display()
167                                );
168                            })
169                        } else if !result.is_identical() {
170                            progress.suspend(|| {
171                                println!(
172                                    "{}: {}",
173                                    result.relative_path.display(),
174                                    result.to_string(dir1_str, dir2_str)
175                                );
176                            });
177                        }
178                    }
179                    CompareProgress::FileDone => progress.inc(1),
180                }
181            }
182        });
183        progress.finish();
184        eprintln!("\n--- Comparison Summary ---");
185        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
186        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
187        Ok(())
188    }
189
190    /// Performs the directory comparison and streams results via a channel.
191    ///
192    /// # Arguments
193    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
194    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
195        let (tx_unordered, rx_unordered) = mpsc::channel();
196        std::thread::scope(|scope| {
197            scope.spawn(move || {
198                if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
199                    log::error!("Error during unordered comparison: {}", e);
200                }
201            });
202
203            let mut buffer = HashMap::new();
204            let mut next_index = 0;
205            for event in rx_unordered {
206                if let CompareProgress::Result(i, _) = &event {
207                    let index = *i;
208                    if index == next_index {
209                        tx.send(event)?;
210                        next_index += 1;
211                        while let Some(buffered) = buffer.remove(&next_index) {
212                            tx.send(buffered)?;
213                            next_index += 1;
214                        }
215                    } else {
216                        buffer.insert(index, event);
217                    }
218                } else {
219                    tx.send(event)?;
220                }
221            }
222            Ok::<(), anyhow::Error>(())
223        })?;
224        Ok(())
225    }
226
227    fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
228        let mut it1 = FileIterator::new(self.dir1.clone());
229        let mut it2 = FileIterator::new(self.dir2.clone());
230        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
231        if let Some((h1, h2)) = &hashers {
232            it1.hasher = Some(h1);
233            it2.hasher = Some(h2);
234            if self.comparison_method == FileComparisonMethod::Rehash {
235                h1.clear_cache()?;
236                h2.clear_cache()?;
237            }
238        }
239
240        let mut cur1 = it1.next();
241        let mut cur2 = it2.next();
242        let mut index = 0;
243        tx.send(CompareProgress::StartOfComparison)?;
244        rayon::scope(|scope| {
245            loop {
246                let cmp = match (&cur1, &cur2) {
247                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
248                    (Some(_), None) => Ordering::Less,
249                    (None, Some(_)) => Ordering::Greater,
250                    (None, None) => break,
251                };
252                match cmp {
253                    Ordering::Less => {
254                        let (rel1, _) = cur1.take().unwrap();
255                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
256                        tx.send(CompareProgress::Result(index, result))?;
257                        tx.send(CompareProgress::FileDone)?;
258                        index += 1;
259                        cur1 = it1.next();
260                    }
261                    Ordering::Greater => {
262                        let (rel2, _) = cur2.take().unwrap();
263                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
264                        tx.send(CompareProgress::Result(index, result))?;
265                        tx.send(CompareProgress::FileDone)?;
266                        index += 1;
267                        cur2 = it2.next();
268                    }
269                    Ordering::Equal => {
270                        let (rel_path, path1) = cur1.take().unwrap();
271                        let (_, path2) = cur2.take().unwrap();
272                        let buffer_size = self.buffer_size;
273                        let tx_clone = tx.clone();
274                        let i = index;
275                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
276                        let hashers_ref = hashers.as_ref();
277                        scope.spawn(move |_| {
278                            let mut comparer = FileComparer::new(&path1, &path2);
279                            comparer.buffer_size = buffer_size;
280                            if let Some((h1, h2)) = hashers_ref {
281                                comparer.hashers = Some((h1, h2));
282                            }
283                            let mut result =
284                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
285                            if let Err(error) = result.update(&comparer, should_compare) {
286                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
287                            }
288                            if tx_clone.send(CompareProgress::Result(i, result)).is_err()
289                                || tx_clone.send(CompareProgress::FileDone).is_err()
290                            {
291                                log::error!("Send failed during comparison of {:?}", rel_path);
292                            }
293                        });
294                        index += 1;
295                        cur1 = it1.next();
296                        cur2 = it2.next();
297                    }
298                }
299            }
300            tx.send(CompareProgress::TotalFiles(index))
301        })?;
302        Self::save_hashers(hashers)?;
303        Ok(())
304    }
305
306    fn get_hashers(
307        &self,
308        dir1: &Path,
309        dir2: &Path,
310    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
311        if self.comparison_method == FileComparisonMethod::Hash
312            || self.comparison_method == FileComparisonMethod::Rehash
313        {
314            let (h1, h2) = rayon::join(
315                || FileHasher::new(dir1.to_path_buf()),
316                || FileHasher::new(dir2.to_path_buf()),
317            );
318            return Ok(Some((h1, h2)));
319        }
320        Ok(None)
321    }
322
323    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
324        if let Some((h1, h2)) = hashers {
325            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
326            r1?;
327            r2?;
328        }
329        Ok(())
330    }
331
332    fn run_file_comparer(&self) -> anyhow::Result<()> {
333        assert!(self.dir1.is_file());
334        let file1 = &self.dir1;
335        let dir1 = file1.parent().unwrap();
336        let file1_name = file1.file_name().unwrap();
337        let (dir2, file2) = if self.dir2.is_file() {
338            (self.dir2.parent().unwrap(), self.dir2.clone())
339        } else {
340            (self.dir2.as_path(), self.dir2.join(file1_name))
341        };
342
343        let mut comparer = FileComparer::new(file1, &file2);
344        comparer.buffer_size = self.buffer_size;
345        let hashers = self.get_hashers(dir1, dir2)?;
346        if let Some((h1, h2)) = &hashers {
347            if self.comparison_method == FileComparisonMethod::Rehash {
348                h1.remove_cache_entry(file1)?;
349                h2.remove_cache_entry(&file2)?;
350            }
351            comparer.hashers = Some((h1, h2));
352        }
353        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
354        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
355        result.update(&comparer, should_compare_content)?;
356        let file1_str = file1.to_str().unwrap_or("file1");
357        if self.is_symbols_format {
358            println!("{} {}", result.to_symbol_string(), file1_str);
359        } else {
360            let file2_str = file2.to_str().unwrap_or("file2");
361            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
362        }
363        Self::save_hashers(hashers)?;
364        Ok(())
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371    use std::fs;
372    use std::io::Write;
373
374    #[test]
375    fn test_comparison_summary() {
376        let mut summary = ComparisonSummary::default();
377        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
378        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
379        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
380        res3.modified_time_comparison = Some(Ordering::Greater);
381
382        summary.update(&res1);
383        summary.update(&res2);
384        summary.update(&res3);
385
386        assert_eq!(summary.only_in_dir1, 1);
387        assert_eq!(summary.only_in_dir2, 1);
388        assert_eq!(summary.in_both, 1);
389        assert_eq!(summary.dir1_newer, 1);
390    }
391
392    #[test]
393    fn test_directory_comparer_integration() -> anyhow::Result<()> {
394        let dir1 = tempfile::tempdir()?;
395        let dir2 = tempfile::tempdir()?;
396
397        // Create files in dir1
398        let file1_path = dir1.path().join("same.txt");
399        let mut file1 = fs::File::create(&file1_path)?;
400        file1.write_all(b"same content")?;
401
402        let only1_path = dir1.path().join("only1.txt");
403        let mut only1 = fs::File::create(&only1_path)?;
404        only1.write_all(b"only in dir1")?;
405
406        // Create files in dir2
407        let file2_path = dir2.path().join("same.txt");
408        let mut file2 = fs::File::create(&file2_path)?;
409        file2.write_all(b"same content")?;
410
411        let only2_path = dir2.path().join("only2.txt");
412        let mut only2 = fs::File::create(&only2_path)?;
413        only2.write_all(b"only in dir2")?;
414
415        // Create a different file
416        let diff1_path = dir1.path().join("diff.txt");
417        let mut diff1 = fs::File::create(&diff1_path)?;
418        diff1.write_all(b"content 1")?;
419
420        let diff2_path = dir2.path().join("diff.txt");
421        let mut diff2 = fs::File::create(&diff2_path)?;
422        diff2.write_all(b"content 222")?; // different length and content
423
424        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
425        let (tx, rx) = mpsc::channel();
426
427        comparer.compare_streaming_ordered(tx)?;
428
429        let mut results = Vec::new();
430        while let Ok(res) = rx.recv() {
431            if let CompareProgress::Result(_, r) = res {
432                results.push(r);
433            }
434        }
435
436        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
437
438        assert_eq!(results.len(), 4);
439
440        // diff.txt
441        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
442        assert_eq!(results[0].classification, Classification::InBoth);
443        assert!(
444            results[0].is_content_same == Some(false)
445                || results[0].size_comparison != Some(Ordering::Equal)
446        );
447
448        // only1.txt
449        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
450        assert_eq!(results[1].classification, Classification::OnlyInDir1);
451
452        // only2.txt
453        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
454        assert_eq!(results[2].classification, Classification::OnlyInDir2);
455
456        // same.txt
457        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
458        assert_eq!(results[3].classification, Classification::InBoth);
459        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
460
461        Ok(())
462    }
463
464    #[test]
465    fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
466        let dir1 = tempfile::tempdir()?;
467        let dir2 = tempfile::tempdir()?;
468
469        let file1_path = dir1.path().join("file.txt");
470        let mut file1 = fs::File::create(&file1_path)?;
471        file1.write_all(b"content 1")?;
472
473        let file2_path = dir2.path().join("file.txt");
474        let mut file2 = fs::File::create(&file2_path)?;
475        file2.write_all(b"content 2")?; // same length, different content
476
477        let mut comparer =
478            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
479        comparer.comparison_method = FileComparisonMethod::Size;
480        let (tx, rx) = mpsc::channel();
481
482        comparer.compare_streaming_ordered(tx)?;
483
484        let mut results = Vec::new();
485        while let Ok(res) = rx.recv() {
486            if let CompareProgress::Result(_, r) = res {
487                results.push(r);
488            }
489        }
490
491        assert_eq!(results.len(), 1);
492        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
493        assert_eq!(results[0].classification, Classification::InBoth);
494        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
495        assert_eq!(results[0].is_content_same, None);
496
497        Ok(())
498    }
499}