Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, ProgressReporter,
3};
4use globset::GlobSet;
5
6use std::cmp::Ordering;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::mpsc;
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13    StartOfComparison,
14    FileDone,
15    TotalFiles(usize),
16    Result(usize, FileComparisonResult),
17}
18
19#[derive(Default)]
20struct ComparisonSummary {
21    pub in_both: usize,
22    pub only_in_dir1: usize,
23    pub only_in_dir2: usize,
24    pub dir1_newer: usize,
25    pub dir2_newer: usize,
26    pub same_time_diff_size: usize,
27    pub same_time_size_diff_content: usize,
28}
29
30impl ComparisonSummary {
31    pub fn update(&mut self, result: &FileComparisonResult) {
32        match result.classification {
33            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
34            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
35            Classification::InBoth => {
36                self.in_both += 1;
37                match result.modified_time_comparison {
38                    Some(Ordering::Greater) => self.dir1_newer += 1,
39                    Some(Ordering::Less) => self.dir2_newer += 1,
40                    _ => {
41                        if result.size_comparison != Some(Ordering::Equal) {
42                            self.same_time_diff_size += 1;
43                        } else if result.is_content_same == Some(false) {
44                            self.same_time_size_diff_content += 1;
45                        }
46                    }
47                }
48            }
49        }
50    }
51
52    pub fn print(
53        &self,
54        mut writer: impl std::io::Write,
55        dir1_name: &str,
56        dir2_name: &str,
57    ) -> std::io::Result<()> {
58        writeln!(writer, "Files in both: {}", self.in_both)?;
59        writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
60        writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
61        writeln!(
62            writer,
63            "Files in both ({} is newer): {}",
64            dir1_name, self.dir1_newer
65        )?;
66        writeln!(
67            writer,
68            "Files in both ({} is newer): {}",
69            dir2_name, self.dir2_newer
70        )?;
71        writeln!(
72            writer,
73            "Files in both (same time, different size): {}",
74            self.same_time_diff_size
75        )?;
76        writeln!(
77            writer,
78            "Files in both (same time and size, different content): {}",
79            self.same_time_size_diff_content
80        )?;
81        Ok(())
82    }
83}
84
85/// Methods for comparing files.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum FileComparisonMethod {
88    /// Compare only size and modification time.
89    Size,
90    /// Compare by hash (BLAKE3).
91    Hash,
92    /// Compare by hash, without using the cached hashes.
93    Rehash,
94    /// Compare byte-by-byte.
95    Full,
96}
97
98/// A tool for comparing the contents of two directories.
99pub struct DirectoryComparer {
100    dir1: PathBuf,
101    dir2: PathBuf,
102    pub is_symbols_format: bool,
103    pub buffer_size: usize,
104    pub comparison_method: FileComparisonMethod,
105    pub exclude: Option<GlobSet>,
106}
107
108impl DirectoryComparer {
109    /// Creates a new `DirectoryComparer` for the two given directories.
110    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
111        Self {
112            dir1,
113            dir2,
114            is_symbols_format: false,
115            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
116            comparison_method: FileComparisonMethod::Hash,
117            exclude: None,
118        }
119    }
120
121    /// Sets the maximum number of threads for parallel processing.
122    /// This initializes the global Rayon thread pool.
123    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
124        rayon::ThreadPoolBuilder::new()
125            .num_threads(parallel)
126            .build_global()
127            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
128        Ok(())
129    }
130
131    /// Executes the directory comparison and prints results to stdout.
132    /// This is a convenience method for CLI usage.
133    pub fn run(&self) -> anyhow::Result<()> {
134        if self.dir1.is_file() {
135            return self.run_file_comparer();
136        }
137
138        let progress = ProgressReporter::new();
139        progress.set_message("Scanning directories...");
140        let start_time = std::time::Instant::now();
141        let mut summary = ComparisonSummary::default();
142        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
143        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
144        let (tx, rx) = mpsc::channel();
145        std::thread::scope(|scope| {
146            scope.spawn(move || {
147                if let Err(e) = self.compare_streaming_ordered(tx) {
148                    log::error!("Error during comparison: {}", e);
149                }
150            });
151
152            // Receive results and update summary/UI
153            while let Ok(event) = rx.recv() {
154                match event {
155                    CompareProgress::StartOfComparison => {
156                        progress.set_message("Comparing files...");
157                    }
158                    CompareProgress::TotalFiles(total_files) => {
159                        progress.set_length(total_files as u64);
160                        progress.set_message("");
161                    }
162                    CompareProgress::Result(_, result) => {
163                        summary.update(&result);
164                        if self.is_symbols_format {
165                            progress.suspend(|| {
166                                println!(
167                                    "{} {}",
168                                    result.to_symbol_string(),
169                                    result.relative_path.display()
170                                );
171                            })
172                        } else if !result.is_identical() {
173                            progress.suspend(|| {
174                                println!(
175                                    "{}: {}",
176                                    result.relative_path.display(),
177                                    result.to_string(dir1_str, dir2_str)
178                                );
179                            });
180                        }
181                    }
182                    CompareProgress::FileDone => progress.inc(1),
183                }
184            }
185        });
186        progress.finish();
187        eprintln!("\n--- Comparison Summary ---");
188        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
189        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
190        Ok(())
191    }
192
193    /// Performs the directory comparison and streams results via a channel.
194    ///
195    /// # Arguments
196    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
197    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
198        let (tx_unordered, rx_unordered) = mpsc::channel();
199        std::thread::scope(|scope| {
200            scope.spawn(move || {
201                if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
202                    log::error!("Error during unordered comparison: {}", e);
203                }
204            });
205
206            let mut buffer = HashMap::new();
207            let mut next_index = 0;
208            for event in rx_unordered {
209                if let CompareProgress::Result(i, _) = &event {
210                    let index = *i;
211                    if index == next_index {
212                        tx.send(event)?;
213                        next_index += 1;
214                        while let Some(buffered) = buffer.remove(&next_index) {
215                            tx.send(buffered)?;
216                            next_index += 1;
217                        }
218                    } else {
219                        buffer.insert(index, event);
220                    }
221                } else {
222                    tx.send(event)?;
223                }
224            }
225            Ok::<(), anyhow::Error>(())
226        })?;
227        Ok(())
228    }
229
230    fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
231        let mut it1 = FileIterator::new(self.dir1.clone());
232        let mut it2 = FileIterator::new(self.dir2.clone());
233        it1.exclude = self.exclude.as_ref();
234        it2.exclude = self.exclude.as_ref();
235        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
236        if let Some((h1, h2)) = &hashers {
237            it1.hasher = Some(h1);
238            it2.hasher = Some(h2);
239            if self.comparison_method == FileComparisonMethod::Rehash {
240                h1.clear_cache()?;
241                h2.clear_cache()?;
242            }
243        }
244
245        let mut cur1 = it1.next();
246        let mut cur2 = it2.next();
247        let mut index = 0;
248        tx.send(CompareProgress::StartOfComparison)?;
249        rayon::scope(|scope| {
250            loop {
251                let cmp = match (&cur1, &cur2) {
252                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
253                    (Some(_), None) => Ordering::Less,
254                    (None, Some(_)) => Ordering::Greater,
255                    (None, None) => break,
256                };
257                match cmp {
258                    Ordering::Less => {
259                        let (rel1, _) = cur1.take().unwrap();
260                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
261                        tx.send(CompareProgress::Result(index, result))?;
262                        tx.send(CompareProgress::FileDone)?;
263                        index += 1;
264                        cur1 = it1.next();
265                    }
266                    Ordering::Greater => {
267                        let (rel2, _) = cur2.take().unwrap();
268                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
269                        tx.send(CompareProgress::Result(index, result))?;
270                        tx.send(CompareProgress::FileDone)?;
271                        index += 1;
272                        cur2 = it2.next();
273                    }
274                    Ordering::Equal => {
275                        let (rel_path, path1) = cur1.take().unwrap();
276                        let (_, path2) = cur2.take().unwrap();
277                        let buffer_size = self.buffer_size;
278                        let tx_clone = tx.clone();
279                        let i = index;
280                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
281                        let hashers_ref = hashers.as_ref();
282                        scope.spawn(move |_| {
283                            let mut comparer = FileComparer::new(&path1, &path2);
284                            comparer.buffer_size = buffer_size;
285                            if let Some((h1, h2)) = hashers_ref {
286                                comparer.hashers = Some((h1, h2));
287                            }
288                            let mut result =
289                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
290                            if let Err(error) = result.update(&comparer, should_compare) {
291                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
292                            }
293                            if tx_clone.send(CompareProgress::Result(i, result)).is_err()
294                                || tx_clone.send(CompareProgress::FileDone).is_err()
295                            {
296                                log::error!("Send failed during comparison of {:?}", rel_path);
297                            }
298                        });
299                        index += 1;
300                        cur1 = it1.next();
301                        cur2 = it2.next();
302                    }
303                }
304            }
305            tx.send(CompareProgress::TotalFiles(index))
306        })?;
307        Self::save_hashers(hashers)?;
308        Ok(())
309    }
310
311    fn get_hashers(
312        &self,
313        dir1: &Path,
314        dir2: &Path,
315    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
316        if self.comparison_method == FileComparisonMethod::Hash
317            || self.comparison_method == FileComparisonMethod::Rehash
318        {
319            let (h1, h2) = rayon::join(
320                || FileHasher::new(dir1.to_path_buf()),
321                || FileHasher::new(dir2.to_path_buf()),
322            );
323            return Ok(Some((h1, h2)));
324        }
325        Ok(None)
326    }
327
328    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
329        if let Some((h1, h2)) = hashers {
330            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
331            r1?;
332            r2?;
333        }
334        Ok(())
335    }
336
337    fn run_file_comparer(&self) -> anyhow::Result<()> {
338        assert!(self.dir1.is_file());
339        let file1 = &self.dir1;
340        let dir1 = file1.parent().unwrap();
341        let file1_name = file1.file_name().unwrap();
342        let (dir2, file2) = if self.dir2.is_file() {
343            (self.dir2.parent().unwrap(), self.dir2.clone())
344        } else {
345            (self.dir2.as_path(), self.dir2.join(file1_name))
346        };
347
348        let mut comparer = FileComparer::new(file1, &file2);
349        comparer.buffer_size = self.buffer_size;
350        let hashers = self.get_hashers(dir1, dir2)?;
351        if let Some((h1, h2)) = &hashers {
352            if self.comparison_method == FileComparisonMethod::Rehash {
353                h1.remove_cache_entry(file1)?;
354                h2.remove_cache_entry(&file2)?;
355            }
356            comparer.hashers = Some((h1, h2));
357        }
358        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
359        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
360        result.update(&comparer, should_compare_content)?;
361        let file1_str = file1.to_str().unwrap_or("file1");
362        if self.is_symbols_format {
363            println!("{} {}", result.to_symbol_string(), file1_str);
364        } else {
365            let file2_str = file2.to_str().unwrap_or("file2");
366            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
367        }
368        Self::save_hashers(hashers)?;
369        Ok(())
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use std::fs;
377    use std::io::Write;
378
379    #[test]
380    fn test_comparison_summary() {
381        let mut summary = ComparisonSummary::default();
382        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
383        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
384        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
385        res3.modified_time_comparison = Some(Ordering::Greater);
386
387        summary.update(&res1);
388        summary.update(&res2);
389        summary.update(&res3);
390
391        assert_eq!(summary.only_in_dir1, 1);
392        assert_eq!(summary.only_in_dir2, 1);
393        assert_eq!(summary.in_both, 1);
394        assert_eq!(summary.dir1_newer, 1);
395    }
396
397    #[test]
398    fn test_directory_comparer_integration() -> anyhow::Result<()> {
399        let dir1 = tempfile::tempdir()?;
400        let dir2 = tempfile::tempdir()?;
401
402        // Create files in dir1
403        let file1_path = dir1.path().join("same.txt");
404        let mut file1 = fs::File::create(&file1_path)?;
405        file1.write_all(b"same content")?;
406
407        let only1_path = dir1.path().join("only1.txt");
408        let mut only1 = fs::File::create(&only1_path)?;
409        only1.write_all(b"only in dir1")?;
410
411        // Create files in dir2
412        let file2_path = dir2.path().join("same.txt");
413        let mut file2 = fs::File::create(&file2_path)?;
414        file2.write_all(b"same content")?;
415
416        let only2_path = dir2.path().join("only2.txt");
417        let mut only2 = fs::File::create(&only2_path)?;
418        only2.write_all(b"only in dir2")?;
419
420        // Create a different file
421        let diff1_path = dir1.path().join("diff.txt");
422        let mut diff1 = fs::File::create(&diff1_path)?;
423        diff1.write_all(b"content 1")?;
424
425        let diff2_path = dir2.path().join("diff.txt");
426        let mut diff2 = fs::File::create(&diff2_path)?;
427        diff2.write_all(b"content 222")?; // different length and content
428
429        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
430        let (tx, rx) = mpsc::channel();
431
432        comparer.compare_streaming_ordered(tx)?;
433
434        let mut results = Vec::new();
435        while let Ok(res) = rx.recv() {
436            if let CompareProgress::Result(_, r) = res {
437                results.push(r);
438            }
439        }
440
441        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
442
443        assert_eq!(results.len(), 4);
444
445        // diff.txt
446        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
447        assert_eq!(results[0].classification, Classification::InBoth);
448        assert!(
449            results[0].is_content_same == Some(false)
450                || results[0].size_comparison != Some(Ordering::Equal)
451        );
452
453        // only1.txt
454        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
455        assert_eq!(results[1].classification, Classification::OnlyInDir1);
456
457        // only2.txt
458        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
459        assert_eq!(results[2].classification, Classification::OnlyInDir2);
460
461        // same.txt
462        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
463        assert_eq!(results[3].classification, Classification::InBoth);
464        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
465
466        Ok(())
467    }
468
469    #[test]
470    fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
471        let dir1 = tempfile::tempdir()?;
472        let dir2 = tempfile::tempdir()?;
473
474        let file1_path = dir1.path().join("file.txt");
475        let mut file1 = fs::File::create(&file1_path)?;
476        file1.write_all(b"content 1")?;
477
478        let file2_path = dir2.path().join("file.txt");
479        let mut file2 = fs::File::create(&file2_path)?;
480        file2.write_all(b"content 2")?; // same length, different content
481
482        let mut comparer =
483            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
484        comparer.comparison_method = FileComparisonMethod::Size;
485        let (tx, rx) = mpsc::channel();
486
487        comparer.compare_streaming_ordered(tx)?;
488
489        let mut results = Vec::new();
490        while let Ok(res) = rx.recv() {
491            if let CompareProgress::Result(_, r) = res {
492                results.push(r);
493            }
494        }
495
496        assert_eq!(results.len(), 1);
497        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
498        assert_eq!(results[0].classification, Classification::InBoth);
499        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
500        assert_eq!(results[0].is_content_same, None);
501
502        Ok(())
503    }
504}