Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3    Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::{self, stdout},
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12    time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17    StartOfComparison,
18    FileDone,
19    TotalFiles(usize),
20    Result(usize, FileComparisonResult),
21    Error,
22}
23
24/// Methods for comparing files.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27    /// Compare only size and modification time.
28    Size,
29    /// Compare by hash (BLAKE3).
30    Hash,
31    /// Compare by hash, without using the cached hashes.
32    Rehash,
33    /// Compare byte-by-byte.
34    Full,
35}
36
37/// A tool for comparing the contents of two directories.
38pub struct DirectoryComparer {
39    dir1: PathBuf,
40    dir2: PathBuf,
41    pub is_symbols_format: bool,
42    pub buffer_size: usize,
43    pub comparison_method: FileComparisonMethod,
44    pub exclude: Option<GlobSet>,
45    pub progress: Option<Arc<ProgressBuilder>>,
46    pub jobs: usize,
47}
48
49impl DirectoryComparer {
50    pub const DEFAULT_JOBS: usize = 8;
51
52    /// Creates a new `DirectoryComparer` for the two given directories.
53    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54        Self {
55            dir1,
56            dir2,
57            is_symbols_format: false,
58            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59            comparison_method: FileComparisonMethod::Hash,
60            exclude: None,
61            progress: None,
62            jobs: Self::DEFAULT_JOBS,
63        }
64    }
65
66    /// Executes the directory comparison and prints results to stdout.
67    /// This is a convenience method for CLI usage.
68    pub fn run(&self) -> anyhow::Result<()> {
69        if self.dir1.is_file() {
70            return self.run_file_comparer();
71        }
72
73        let progress = self
74            .progress
75            .as_ref()
76            .map(|progress| progress.add_spinner())
77            .unwrap_or_else(Progress::none);
78        progress.set_message("Scanning directories...");
79        let start_time = std::time::Instant::now();
80        let mut summary = ComparisonSummary::default();
81        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
82        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
83        let (tx, rx) = mpsc::channel();
84        std::thread::scope(|scope| {
85            scope.spawn(move || {
86                if let Err(e) = self.compare_streaming_ordered(tx) {
87                    log::error!("Error during comparison: {}", e);
88                }
89            });
90
91            // Receive results and update summary/UI
92            while let Ok(event) = rx.recv() {
93                match event {
94                    CompareProgress::StartOfComparison => {
95                        progress.set_message("Comparing files...");
96                    }
97                    CompareProgress::TotalFiles(total_files) => {
98                        progress.set_length(total_files as u64);
99                        progress.set_message("");
100                    }
101                    CompareProgress::Result(_, result) => {
102                        summary.update(&result);
103                        if self.is_symbols_format {
104                            progress.suspend_for(stdout(), || {
105                                println!(
106                                    "{} {}",
107                                    result.to_symbol_string(),
108                                    result.relative_path.display()
109                                );
110                            })
111                        } else if !result.is_identical() {
112                            progress.suspend_for(stdout(), || {
113                                println!(
114                                    "{}: {}",
115                                    result.relative_path.display(),
116                                    result.to_string(dir1_str, dir2_str)
117                                );
118                            });
119                        }
120                    }
121                    CompareProgress::FileDone => progress.inc(1),
122                    CompareProgress::Error => summary.num_errors += 1,
123                }
124            }
125        });
126        progress.finish();
127        eprintln!("\n--- Comparison Summary ---");
128        summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
129        Ok(())
130    }
131
132    /// Performs the directory comparison and streams results via a channel.
133    ///
134    /// # Arguments
135    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
136    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
137        crate::sort_stream(
138            tx,
139            |tx_unordered| self.compare_streaming(tx_unordered),
140            |event| match event {
141                CompareProgress::Result(i, _) => Some(*i),
142                _ => None,
143            },
144        )
145    }
146
147    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
148        let mut it1 = FileIterator::new(self.dir1.clone());
149        let mut it2 = FileIterator::new(self.dir2.clone());
150        it1.exclude = self.exclude.as_ref();
151        it2.exclude = self.exclude.as_ref();
152        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
153        if let Some((h1, h2)) = &hashers {
154            it1.hasher = Some(h1);
155            it2.hasher = Some(h2);
156            if self.comparison_method == FileComparisonMethod::Rehash {
157                h1.clear_cache()?;
158                h2.clear_cache()?;
159            }
160        }
161        let hashers_ref = hashers.as_ref();
162        std::thread::scope(|global_scope| {
163            let it1_rx = it1.spawn_in_scope(global_scope);
164            let it2_rx = it2.spawn_in_scope(global_scope);
165            let pool = crate::build_thread_pool(self.jobs)?;
166            pool.scope(move |scope| {
167                let mut cur1 = it1_rx.recv().ok();
168                let mut cur2 = it2_rx.recv().ok();
169                let mut index = 0;
170                tx.send(CompareProgress::StartOfComparison)?;
171                loop {
172                    let cmp = match (&cur1, &cur2) {
173                        (Some(p1), Some(p2)) => {
174                            let rel1 = crate::strip_prefix(p1, &self.dir1).unwrap();
175                            let rel2 = crate::strip_prefix(p2, &self.dir2).unwrap();
176                            rel1.cmp(rel2)
177                        }
178                        (Some(_), None) => Ordering::Less,
179                        (None, Some(_)) => Ordering::Greater,
180                        (None, None) => break,
181                    };
182                    match cmp {
183                        Ordering::Less => {
184                            let path1 = cur1.take().unwrap();
185                            let rel1 = crate::strip_prefix(&path1, &self.dir1).unwrap();
186                            let result =
187                                FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
188                            tx.send(CompareProgress::Result(index, result))?;
189                            tx.send(CompareProgress::FileDone)?;
190                            index += 1;
191                            cur1 = it1_rx.recv().ok();
192                        }
193                        Ordering::Greater => {
194                            let path2 = cur2.take().unwrap();
195                            let rel2 = crate::strip_prefix(&path2, &self.dir2).unwrap();
196                            let result =
197                                FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
198                            tx.send(CompareProgress::Result(index, result))?;
199                            tx.send(CompareProgress::FileDone)?;
200                            index += 1;
201                            cur2 = it2_rx.recv().ok();
202                        }
203                        Ordering::Equal => {
204                            let path1 = cur1.take().unwrap();
205                            let path2 = cur2.take().unwrap();
206                            let buffer_size = self.buffer_size;
207                            let tx_clone = tx.clone();
208                            let i = index;
209                            let should_compare =
210                                self.comparison_method != FileComparisonMethod::Size;
211                            scope.spawn(move |_| {
212                                let mut comparer = FileComparer::new(&path1, &path2);
213                                comparer.buffer_size = buffer_size;
214                                if let Some((h1, h2)) = hashers_ref {
215                                    comparer.hashers = Some((h1, h2));
216                                }
217                                let rel_path = crate::strip_prefix(&path1, &self.dir1).unwrap();
218                                let mut result = FileComparisonResult::new(
219                                    rel_path.into(),
220                                    Classification::InBoth,
221                                );
222                                let event = match result.update(&comparer, should_compare) {
223                                    Ok(_) => CompareProgress::Result(i, result),
224                                    Err(error) => {
225                                        log::error!(
226                                            "Error comparing {:?}: {}",
227                                            result.relative_path,
228                                            error
229                                        );
230                                        CompareProgress::Error
231                                    }
232                                };
233                                if tx_clone.send(event).is_err()
234                                    || tx_clone.send(CompareProgress::FileDone).is_err()
235                                {
236                                    log::error!("Send failed");
237                                }
238                            });
239                            index += 1;
240                            cur1 = it1_rx.recv().ok();
241                            cur2 = it2_rx.recv().ok();
242                        }
243                    }
244                }
245                tx.send(CompareProgress::TotalFiles(index))
246            })?;
247            Ok::<(), anyhow::Error>(())
248        })?;
249
250        Self::save_hashers(hashers)?;
251        Ok(())
252    }
253
254    fn get_hashers(
255        &self,
256        dir1: &Path,
257        dir2: &Path,
258    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
259        if self.comparison_method == FileComparisonMethod::Hash
260            || self.comparison_method == FileComparisonMethod::Rehash
261        {
262            let (h1_res, h2_res) =
263                rayon::join(|| FileHasher::new(&[dir1]), || FileHasher::new(&[dir2]));
264            let mut h1 = h1_res?;
265            let mut h2 = h2_res?;
266            h1.buffer_size = self.buffer_size;
267            h2.buffer_size = self.buffer_size;
268            if let Some(progress) = self.progress.as_ref() {
269                h1.progress = Some(Arc::clone(progress));
270                h2.progress = Some(Arc::clone(progress));
271            }
272            return Ok(Some((h1, h2)));
273        }
274        Ok(None)
275    }
276
277    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
278        if let Some((h1, h2)) = hashers {
279            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
280            r1?;
281            r2?;
282        }
283        Ok(())
284    }
285
286    fn run_file_comparer(&self) -> anyhow::Result<()> {
287        assert!(self.dir1.is_file());
288        let file1 = &self.dir1;
289        let dir1 = file1.parent().unwrap();
290        let file1_name = file1.file_name().unwrap();
291        let (dir2, file2) = if self.dir2.is_file() {
292            (self.dir2.parent().unwrap(), self.dir2.clone())
293        } else {
294            (self.dir2.as_path(), self.dir2.join(file1_name))
295        };
296
297        let mut comparer = FileComparer::new(file1, &file2);
298        comparer.buffer_size = self.buffer_size;
299        let hashers = self.get_hashers(dir1, dir2)?;
300        if let Some((h1, h2)) = &hashers {
301            if self.comparison_method == FileComparisonMethod::Rehash {
302                h1.remove_cache_entry(file1)?;
303                h2.remove_cache_entry(&file2)?;
304            }
305            comparer.hashers = Some((h1, h2));
306        }
307        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
308        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
309        result.update(&comparer, should_compare_content)?;
310        let file1_str = file1.to_str().unwrap_or("file1");
311        if self.is_symbols_format {
312            println!("{} {}", result.to_symbol_string(), file1_str);
313        } else {
314            let file2_str = file2.to_str().unwrap_or("file2");
315            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
316        }
317        Self::save_hashers(hashers)?;
318        Ok(())
319    }
320}
321
322#[derive(Default)]
323struct ComparisonSummary {
324    pub in_both: usize,
325    pub only_in_dir1: usize,
326    pub only_in_dir2: usize,
327    pub dir1_newer: usize,
328    pub dir2_newer: usize,
329    pub dir1_larger: usize,
330    pub dir2_larger: usize,
331    pub diff_content: usize,
332    pub not_comparable: usize,
333    pub num_errors: usize,
334}
335
336impl ComparisonSummary {
337    pub fn update(&mut self, result: &FileComparisonResult) {
338        match result.classification {
339            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
340            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
341            Classification::InBoth => {
342                self.in_both += 1;
343                let mut is_not_comparable = false;
344                match result.modified_time_comparison {
345                    Some(Ordering::Greater) => self.dir1_newer += 1,
346                    Some(Ordering::Less) => self.dir2_newer += 1,
347                    Some(Ordering::Equal) => {}
348                    None => is_not_comparable = true,
349                }
350                match result.size_comparison {
351                    Some(Ordering::Greater) => self.dir1_larger += 1,
352                    Some(Ordering::Less) => self.dir2_larger += 1,
353                    Some(Ordering::Equal) => match result.is_content_same {
354                        Some(false) => self.diff_content += 1,
355                        Some(true) => {}
356                        None => is_not_comparable = true,
357                    },
358                    None => is_not_comparable = true,
359                }
360                if is_not_comparable {
361                    self.not_comparable += 1;
362                }
363            }
364        }
365    }
366
367    pub fn print(
368        &self,
369        mut writer: impl std::io::Write,
370        start_time: &time::Instant,
371        dir1_name: &str,
372        dir2_name: &str,
373    ) -> std::io::Result<()> {
374        let values = [
375            ("Elapsed:", 0),
376            ("Files in both:", self.in_both),
377            ("Only in left:", self.only_in_dir1),
378            ("Only in right:", self.only_in_dir2),
379            ("Left is newer:", self.dir1_newer),
380            ("Right is newer:", self.dir2_newer),
381            ("Left is larger:", self.dir1_larger),
382            ("Right is larger:", self.dir2_larger),
383            ("Different content:", self.diff_content),
384            ("Not comparable:", self.not_comparable),
385            ("Errors:", self.num_errors),
386        ];
387        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
388        formatter.write_value(&mut writer, "Left:", dir1_name)?;
389        formatter.write_value(&mut writer, "Right:", dir2_name)?;
390        formatter.write_value(
391            &mut writer,
392            values[0].0,
393            FormattedDuration(start_time.elapsed()),
394        )?;
395        formatter.write_values(&mut writer, &values[1..])?;
396        Ok(())
397    }
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403    use std::fs;
404    use std::io::Write;
405
406    #[test]
407    fn comparison_summary() {
408        let mut summary = ComparisonSummary::default();
409        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
410        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
411        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
412        res3.modified_time_comparison = Some(Ordering::Greater);
413
414        summary.update(&res1);
415        summary.update(&res2);
416        summary.update(&res3);
417
418        assert_eq!(summary.only_in_dir1, 1);
419        assert_eq!(summary.only_in_dir2, 1);
420        assert_eq!(summary.in_both, 1);
421        assert_eq!(summary.dir1_newer, 1);
422    }
423
424    #[test]
425    fn directory_comparer_integration() -> anyhow::Result<()> {
426        let dir1 = tempfile::tempdir()?;
427        let dir2 = tempfile::tempdir()?;
428
429        // Create files in dir1
430        let file1_path = dir1.path().join("same.txt");
431        let mut file1 = fs::File::create(&file1_path)?;
432        file1.write_all(b"same content")?;
433
434        let only1_path = dir1.path().join("only1.txt");
435        let mut only1 = fs::File::create(&only1_path)?;
436        only1.write_all(b"only in dir1")?;
437
438        // Create files in dir2
439        let file2_path = dir2.path().join("same.txt");
440        let mut file2 = fs::File::create(&file2_path)?;
441        file2.write_all(b"same content")?;
442
443        let only2_path = dir2.path().join("only2.txt");
444        let mut only2 = fs::File::create(&only2_path)?;
445        only2.write_all(b"only in dir2")?;
446
447        // Create a different file
448        let diff1_path = dir1.path().join("diff.txt");
449        let mut diff1 = fs::File::create(&diff1_path)?;
450        diff1.write_all(b"content 1")?;
451
452        let diff2_path = dir2.path().join("diff.txt");
453        let mut diff2 = fs::File::create(&diff2_path)?;
454        diff2.write_all(b"content 222")?; // different length and content
455
456        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
457        let (tx, rx) = mpsc::channel();
458
459        comparer.compare_streaming_ordered(tx)?;
460
461        let mut results = Vec::new();
462        while let Ok(res) = rx.recv() {
463            if let CompareProgress::Result(_, r) = res {
464                results.push(r);
465            }
466        }
467
468        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
469
470        assert_eq!(results.len(), 4);
471
472        // diff.txt
473        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
474        assert_eq!(results[0].classification, Classification::InBoth);
475        assert!(
476            results[0].is_content_same == Some(false)
477                || results[0].size_comparison != Some(Ordering::Equal)
478        );
479
480        // only1.txt
481        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
482        assert_eq!(results[1].classification, Classification::OnlyInDir1);
483
484        // only2.txt
485        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
486        assert_eq!(results[2].classification, Classification::OnlyInDir2);
487
488        // same.txt
489        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
490        assert_eq!(results[3].classification, Classification::InBoth);
491        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
492
493        Ok(())
494    }
495
496    #[test]
497    fn directory_comparer_size_mode() -> anyhow::Result<()> {
498        let dir1 = tempfile::tempdir()?;
499        let dir2 = tempfile::tempdir()?;
500
501        let file1_path = dir1.path().join("file.txt");
502        let mut file1 = fs::File::create(&file1_path)?;
503        file1.write_all(b"content 1")?;
504
505        let file2_path = dir2.path().join("file.txt");
506        let mut file2 = fs::File::create(&file2_path)?;
507        file2.write_all(b"content 2")?; // same length, different content
508
509        let mut comparer =
510            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
511        comparer.comparison_method = FileComparisonMethod::Size;
512        let (tx, rx) = mpsc::channel();
513
514        comparer.compare_streaming_ordered(tx)?;
515
516        let mut results = Vec::new();
517        while let Ok(res) = rx.recv() {
518            if let CompareProgress::Result(_, r) = res {
519                results.push(r);
520            }
521        }
522
523        assert_eq!(results.len(), 1);
524        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
525        assert_eq!(results[0].classification, Classification::InBoth);
526        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
527        assert_eq!(results[0].is_content_same, None);
528
529        Ok(())
530    }
531}