Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3    Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::stdout,
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12};
13
14#[derive(Debug, Clone)]
15enum CompareProgress {
16    StartOfComparison,
17    FileDone,
18    TotalFiles(usize),
19    Result(usize, FileComparisonResult),
20    Error,
21}
22
23/// Methods for comparing files.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum FileComparisonMethod {
26    /// Compare only size and modification time.
27    Size,
28    /// Compare by hash (BLAKE3).
29    Hash,
30    /// Compare by hash, without using the cached hashes.
31    Rehash,
32    /// Compare byte-by-byte.
33    Full,
34}
35
36/// A tool for comparing the contents of two directories.
37pub struct DirectoryComparer {
38    dir1: PathBuf,
39    dir2: PathBuf,
40    pub is_symbols_format: bool,
41    pub buffer_size: usize,
42    pub comparison_method: FileComparisonMethod,
43    pub exclude: Option<GlobSet>,
44    pub progress: Option<Arc<ProgressBuilder>>,
45    pub jobs: usize,
46}
47
48impl DirectoryComparer {
49    pub const DEFAULT_JOBS: usize = 8;
50
51    /// Creates a new `DirectoryComparer` for the two given directories.
52    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
53        Self {
54            dir1,
55            dir2,
56            is_symbols_format: false,
57            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
58            comparison_method: FileComparisonMethod::Hash,
59            exclude: None,
60            progress: None,
61            jobs: Self::DEFAULT_JOBS,
62        }
63    }
64
65    /// Executes the directory comparison and prints results to stdout.
66    /// This is a convenience method for CLI usage.
67    pub fn run(&self) -> anyhow::Result<()> {
68        if self.dir1.is_file() {
69            return self.run_file_comparer();
70        }
71
72        let progress = self
73            .progress
74            .as_ref()
75            .map(|progress| progress.add_spinner())
76            .unwrap_or_else(Progress::none);
77        progress.set_message("Scanning directories...");
78        let start_time = std::time::Instant::now();
79        let mut summary = ComparisonSummary::default();
80        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
81        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
82        let (tx, rx) = mpsc::channel();
83        std::thread::scope(|scope| {
84            scope.spawn(move || {
85                if let Err(e) = self.compare_streaming_ordered(tx) {
86                    log::error!("Error during comparison: {}", e);
87                }
88            });
89
90            // Receive results and update summary/UI
91            while let Ok(event) = rx.recv() {
92                match event {
93                    CompareProgress::StartOfComparison => {
94                        progress.set_message("Comparing files...");
95                    }
96                    CompareProgress::TotalFiles(total_files) => {
97                        progress.set_length(total_files as u64);
98                        progress.set_message("");
99                    }
100                    CompareProgress::Result(_, result) => {
101                        summary.update(&result);
102                        if self.is_symbols_format {
103                            progress.suspend_for(stdout(), || {
104                                println!(
105                                    "{} {}",
106                                    result.to_symbol_string(),
107                                    result.relative_path.display()
108                                );
109                            })
110                        } else if !result.is_identical() {
111                            progress.suspend_for(stdout(), || {
112                                println!(
113                                    "{}: {}",
114                                    result.relative_path.display(),
115                                    result.to_string(dir1_str, dir2_str)
116                                );
117                            });
118                        }
119                    }
120                    CompareProgress::FileDone => progress.inc(1),
121                    CompareProgress::Error => summary.num_errors += 1,
122                }
123            }
124        });
125        progress.finish();
126        eprintln!("\n--- Comparison Summary ---");
127        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
128        eprintln!(
129            "Comparison finished in {}.",
130            FormattedDuration(start_time.elapsed())
131        );
132        Ok(())
133    }
134
135    /// Performs the directory comparison and streams results via a channel.
136    ///
137    /// # Arguments
138    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
139    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
140        crate::sort_stream(
141            tx,
142            |tx_unordered| self.compare_streaming(tx_unordered),
143            |event| match event {
144                CompareProgress::Result(i, _) => Some(*i),
145                _ => None,
146            },
147        )
148    }
149
150    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
151        let mut it1 = FileIterator::new(self.dir1.clone());
152        let mut it2 = FileIterator::new(self.dir2.clone());
153        it1.exclude = self.exclude.as_ref();
154        it2.exclude = self.exclude.as_ref();
155        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
156        if let Some((h1, h2)) = &hashers {
157            it1.hasher = Some(h1);
158            it2.hasher = Some(h2);
159            if self.comparison_method == FileComparisonMethod::Rehash {
160                h1.clear_cache()?;
161                h2.clear_cache()?;
162            }
163        }
164        let hashers_ref = hashers.as_ref();
165        std::thread::scope(|global_scope| {
166            let it1_rx = it1.spawn_in_scope(global_scope);
167            let it2_rx = it2.spawn_in_scope(global_scope);
168            let pool = crate::build_thread_pool(self.jobs)?;
169            pool.scope(move |scope| {
170                let mut cur1 = it1_rx.recv().ok();
171                let mut cur2 = it2_rx.recv().ok();
172                let mut index = 0;
173                tx.send(CompareProgress::StartOfComparison)?;
174                loop {
175                    let cmp = match (&cur1, &cur2) {
176                        (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
177                        (Some(_), None) => Ordering::Less,
178                        (None, Some(_)) => Ordering::Greater,
179                        (None, None) => break,
180                    };
181                    match cmp {
182                        Ordering::Less => {
183                            let (rel1, _) = cur1.take().unwrap();
184                            let result =
185                                FileComparisonResult::new(rel1, Classification::OnlyInDir1);
186                            tx.send(CompareProgress::Result(index, result))?;
187                            tx.send(CompareProgress::FileDone)?;
188                            index += 1;
189                            cur1 = it1_rx.recv().ok();
190                        }
191                        Ordering::Greater => {
192                            let (rel2, _) = cur2.take().unwrap();
193                            let result =
194                                FileComparisonResult::new(rel2, Classification::OnlyInDir2);
195                            tx.send(CompareProgress::Result(index, result))?;
196                            tx.send(CompareProgress::FileDone)?;
197                            index += 1;
198                            cur2 = it2_rx.recv().ok();
199                        }
200                        Ordering::Equal => {
201                            let (rel_path, path1) = cur1.take().unwrap();
202                            let (_, path2) = cur2.take().unwrap();
203                            let buffer_size = self.buffer_size;
204                            let tx_clone = tx.clone();
205                            let i = index;
206                            let should_compare =
207                                self.comparison_method != FileComparisonMethod::Size;
208                            scope.spawn(move |_| {
209                                let mut comparer = FileComparer::new(&path1, &path2);
210                                comparer.buffer_size = buffer_size;
211                                if let Some((h1, h2)) = hashers_ref {
212                                    comparer.hashers = Some((h1, h2));
213                                }
214                                let mut result = FileComparisonResult::new(
215                                    rel_path.clone(),
216                                    Classification::InBoth,
217                                );
218                                let event = match result.update(&comparer, should_compare) {
219                                    Ok(_) => CompareProgress::Result(i, result),
220                                    Err(error) => {
221                                        log::error!(
222                                            "Error during comparison of {:?}: {}",
223                                            rel_path,
224                                            error
225                                        );
226                                        CompareProgress::Error
227                                    }
228                                };
229                                if tx_clone.send(event).is_err()
230                                    || tx_clone.send(CompareProgress::FileDone).is_err()
231                                {
232                                    log::error!("Send failed during comparison of {:?}", rel_path);
233                                }
234                            });
235                            index += 1;
236                            cur1 = it1_rx.recv().ok();
237                            cur2 = it2_rx.recv().ok();
238                        }
239                    }
240                }
241                tx.send(CompareProgress::TotalFiles(index))
242            })?;
243            Ok::<(), anyhow::Error>(())
244        })?;
245
246        Self::save_hashers(hashers)?;
247        Ok(())
248    }
249
250    fn get_hashers(
251        &self,
252        dir1: &Path,
253        dir2: &Path,
254    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
255        if self.comparison_method == FileComparisonMethod::Hash
256            || self.comparison_method == FileComparisonMethod::Rehash
257        {
258            let (h1_res, h2_res) =
259                rayon::join(|| FileHasher::new(&[dir1]), || FileHasher::new(&[dir2]));
260            let mut h1 = h1_res?;
261            let mut h2 = h2_res?;
262            h1.buffer_size = self.buffer_size;
263            h2.buffer_size = self.buffer_size;
264            if let Some(progress) = self.progress.as_ref() {
265                h1.progress = Some(Arc::clone(progress));
266                h2.progress = Some(Arc::clone(progress));
267            }
268            return Ok(Some((h1, h2)));
269        }
270        Ok(None)
271    }
272
273    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
274        if let Some((h1, h2)) = hashers {
275            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
276            r1?;
277            r2?;
278        }
279        Ok(())
280    }
281
282    fn run_file_comparer(&self) -> anyhow::Result<()> {
283        assert!(self.dir1.is_file());
284        let file1 = &self.dir1;
285        let dir1 = file1.parent().unwrap();
286        let file1_name = file1.file_name().unwrap();
287        let (dir2, file2) = if self.dir2.is_file() {
288            (self.dir2.parent().unwrap(), self.dir2.clone())
289        } else {
290            (self.dir2.as_path(), self.dir2.join(file1_name))
291        };
292
293        let mut comparer = FileComparer::new(file1, &file2);
294        comparer.buffer_size = self.buffer_size;
295        let hashers = self.get_hashers(dir1, dir2)?;
296        if let Some((h1, h2)) = &hashers {
297            if self.comparison_method == FileComparisonMethod::Rehash {
298                h1.remove_cache_entry(file1)?;
299                h2.remove_cache_entry(&file2)?;
300            }
301            comparer.hashers = Some((h1, h2));
302        }
303        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
304        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
305        result.update(&comparer, should_compare_content)?;
306        let file1_str = file1.to_str().unwrap_or("file1");
307        if self.is_symbols_format {
308            println!("{} {}", result.to_symbol_string(), file1_str);
309        } else {
310            let file2_str = file2.to_str().unwrap_or("file2");
311            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
312        }
313        Self::save_hashers(hashers)?;
314        Ok(())
315    }
316}
317
318#[derive(Default)]
319struct ComparisonSummary {
320    pub in_both: usize,
321    pub only_in_dir1: usize,
322    pub only_in_dir2: usize,
323    pub dir1_newer: usize,
324    pub dir2_newer: usize,
325    pub dir1_larger: usize,
326    pub dir2_larger: usize,
327    pub diff_content: usize,
328    pub not_comparable: usize,
329    pub num_errors: usize,
330}
331
332impl ComparisonSummary {
333    pub fn update(&mut self, result: &FileComparisonResult) {
334        match result.classification {
335            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
336            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
337            Classification::InBoth => {
338                self.in_both += 1;
339                let mut is_not_comparable = false;
340                match result.modified_time_comparison {
341                    Some(Ordering::Greater) => self.dir1_newer += 1,
342                    Some(Ordering::Less) => self.dir2_newer += 1,
343                    Some(Ordering::Equal) => {}
344                    None => is_not_comparable = true,
345                }
346                match result.size_comparison {
347                    Some(Ordering::Greater) => self.dir1_larger += 1,
348                    Some(Ordering::Less) => self.dir2_larger += 1,
349                    Some(Ordering::Equal) => match result.is_content_same {
350                        Some(false) => self.diff_content += 1,
351                        Some(true) => {}
352                        None => is_not_comparable = true,
353                    },
354                    None => is_not_comparable = true,
355                }
356                if is_not_comparable {
357                    self.not_comparable += 1;
358                }
359            }
360        }
361    }
362
363    pub fn print(
364        &self,
365        mut writer: impl std::io::Write,
366        dir1_name: &str,
367        dir2_name: &str,
368    ) -> std::io::Result<()> {
369        let values = [
370            ("Files in both:", self.in_both),
371            ("Only in left:", self.only_in_dir1),
372            ("Only in right:", self.only_in_dir2),
373            ("Left is newer:", self.dir1_newer),
374            ("Right is newer:", self.dir2_newer),
375            ("Left is larger:", self.dir1_larger),
376            ("Right is larger:", self.dir2_larger),
377            ("Different content:", self.diff_content),
378            ("Not comparable:", self.not_comparable),
379            ("Errors:", self.num_errors),
380        ];
381        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
382        formatter.write_value(&mut writer, "Left:", dir1_name)?;
383        formatter.write_value(&mut writer, "Right:", dir2_name)?;
384        formatter.write_values(&mut writer, values)?;
385        Ok(())
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392    use std::fs;
393    use std::io::Write;
394
395    #[test]
396    fn comparison_summary() {
397        let mut summary = ComparisonSummary::default();
398        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
399        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
400        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
401        res3.modified_time_comparison = Some(Ordering::Greater);
402
403        summary.update(&res1);
404        summary.update(&res2);
405        summary.update(&res3);
406
407        assert_eq!(summary.only_in_dir1, 1);
408        assert_eq!(summary.only_in_dir2, 1);
409        assert_eq!(summary.in_both, 1);
410        assert_eq!(summary.dir1_newer, 1);
411    }
412
413    #[test]
414    fn directory_comparer_integration() -> anyhow::Result<()> {
415        let dir1 = tempfile::tempdir()?;
416        let dir2 = tempfile::tempdir()?;
417
418        // Create files in dir1
419        let file1_path = dir1.path().join("same.txt");
420        let mut file1 = fs::File::create(&file1_path)?;
421        file1.write_all(b"same content")?;
422
423        let only1_path = dir1.path().join("only1.txt");
424        let mut only1 = fs::File::create(&only1_path)?;
425        only1.write_all(b"only in dir1")?;
426
427        // Create files in dir2
428        let file2_path = dir2.path().join("same.txt");
429        let mut file2 = fs::File::create(&file2_path)?;
430        file2.write_all(b"same content")?;
431
432        let only2_path = dir2.path().join("only2.txt");
433        let mut only2 = fs::File::create(&only2_path)?;
434        only2.write_all(b"only in dir2")?;
435
436        // Create a different file
437        let diff1_path = dir1.path().join("diff.txt");
438        let mut diff1 = fs::File::create(&diff1_path)?;
439        diff1.write_all(b"content 1")?;
440
441        let diff2_path = dir2.path().join("diff.txt");
442        let mut diff2 = fs::File::create(&diff2_path)?;
443        diff2.write_all(b"content 222")?; // different length and content
444
445        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
446        let (tx, rx) = mpsc::channel();
447
448        comparer.compare_streaming_ordered(tx)?;
449
450        let mut results = Vec::new();
451        while let Ok(res) = rx.recv() {
452            if let CompareProgress::Result(_, r) = res {
453                results.push(r);
454            }
455        }
456
457        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
458
459        assert_eq!(results.len(), 4);
460
461        // diff.txt
462        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
463        assert_eq!(results[0].classification, Classification::InBoth);
464        assert!(
465            results[0].is_content_same == Some(false)
466                || results[0].size_comparison != Some(Ordering::Equal)
467        );
468
469        // only1.txt
470        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
471        assert_eq!(results[1].classification, Classification::OnlyInDir1);
472
473        // only2.txt
474        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
475        assert_eq!(results[2].classification, Classification::OnlyInDir2);
476
477        // same.txt
478        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
479        assert_eq!(results[3].classification, Classification::InBoth);
480        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
481
482        Ok(())
483    }
484
485    #[test]
486    fn directory_comparer_size_mode() -> anyhow::Result<()> {
487        let dir1 = tempfile::tempdir()?;
488        let dir2 = tempfile::tempdir()?;
489
490        let file1_path = dir1.path().join("file.txt");
491        let mut file1 = fs::File::create(&file1_path)?;
492        file1.write_all(b"content 1")?;
493
494        let file2_path = dir2.path().join("file.txt");
495        let mut file2 = fs::File::create(&file2_path)?;
496        file2.write_all(b"content 2")?; // same length, different content
497
498        let mut comparer =
499            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
500        comparer.comparison_method = FileComparisonMethod::Size;
501        let (tx, rx) = mpsc::channel();
502
503        comparer.compare_streaming_ordered(tx)?;
504
505        let mut results = Vec::new();
506        while let Ok(res) = rx.recv() {
507            if let CompareProgress::Result(_, r) = res {
508                results.push(r);
509            }
510        }
511
512        assert_eq!(results.len(), 1);
513        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
514        assert_eq!(results[0].classification, Classification::InBoth);
515        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
516        assert_eq!(results[0].is_content_same, None);
517
518        Ok(())
519    }
520}