Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3    Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::stdout,
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12};
13
14#[derive(Debug, Clone)]
15enum CompareProgress {
16    StartOfComparison,
17    FileDone,
18    TotalFiles(usize),
19    Result(usize, FileComparisonResult),
20}
21
22/// Methods for comparing files.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum FileComparisonMethod {
25    /// Compare only size and modification time.
26    Size,
27    /// Compare by hash (BLAKE3).
28    Hash,
29    /// Compare by hash, without using the cached hashes.
30    Rehash,
31    /// Compare byte-by-byte.
32    Full,
33}
34
35/// A tool for comparing the contents of two directories.
36pub struct DirectoryComparer {
37    dir1: PathBuf,
38    dir2: PathBuf,
39    pub is_symbols_format: bool,
40    pub buffer_size: usize,
41    pub comparison_method: FileComparisonMethod,
42    pub exclude: Option<GlobSet>,
43    pub progress: Option<Arc<ProgressBuilder>>,
44    pub jobs: usize,
45}
46
47impl DirectoryComparer {
48    pub const DEFAULT_JOBS: usize = 8;
49
50    /// Creates a new `DirectoryComparer` for the two given directories.
51    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
52        Self {
53            dir1,
54            dir2,
55            is_symbols_format: false,
56            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
57            comparison_method: FileComparisonMethod::Hash,
58            exclude: None,
59            progress: None,
60            jobs: Self::DEFAULT_JOBS,
61        }
62    }
63
64    /// Executes the directory comparison and prints results to stdout.
65    /// This is a convenience method for CLI usage.
66    pub fn run(&self) -> anyhow::Result<()> {
67        if self.dir1.is_file() {
68            return self.run_file_comparer();
69        }
70
71        let progress = self
72            .progress
73            .as_ref()
74            .map(|progress| progress.add_spinner())
75            .unwrap_or_else(Progress::none);
76        progress.set_message("Scanning directories...");
77        let start_time = std::time::Instant::now();
78        let mut summary = ComparisonSummary::default();
79        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
80        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
81        let (tx, rx) = mpsc::channel();
82        std::thread::scope(|scope| {
83            scope.spawn(move || {
84                if let Err(e) = self.compare_streaming_ordered(tx) {
85                    log::error!("Error during comparison: {}", e);
86                }
87            });
88
89            // Receive results and update summary/UI
90            while let Ok(event) = rx.recv() {
91                match event {
92                    CompareProgress::StartOfComparison => {
93                        progress.set_message("Comparing files...");
94                    }
95                    CompareProgress::TotalFiles(total_files) => {
96                        progress.set_length(total_files as u64);
97                        progress.set_message("");
98                    }
99                    CompareProgress::Result(_, result) => {
100                        summary.update(&result);
101                        if self.is_symbols_format {
102                            progress.suspend_for(stdout(), || {
103                                println!(
104                                    "{} {}",
105                                    result.to_symbol_string(),
106                                    result.relative_path.display()
107                                );
108                            })
109                        } else if !result.is_identical() {
110                            progress.suspend_for(stdout(), || {
111                                println!(
112                                    "{}: {}",
113                                    result.relative_path.display(),
114                                    result.to_string(dir1_str, dir2_str)
115                                );
116                            });
117                        }
118                    }
119                    CompareProgress::FileDone => progress.inc(1),
120                }
121            }
122        });
123        progress.finish();
124        eprintln!("\n--- Comparison Summary ---");
125        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
126        eprintln!(
127            "Comparison finished in {}.",
128            FormattedDuration(start_time.elapsed())
129        );
130        Ok(())
131    }
132
133    /// Performs the directory comparison and streams results via a channel.
134    ///
135    /// # Arguments
136    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
137    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
138        crate::sort_stream(
139            tx,
140            |tx_unordered| self.compare_streaming(tx_unordered),
141            |event| match event {
142                CompareProgress::Result(i, _) => Some(*i),
143                _ => None,
144            },
145        )
146    }
147
148    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
149        let mut it1 = FileIterator::new(self.dir1.clone());
150        let mut it2 = FileIterator::new(self.dir2.clone());
151        it1.exclude = self.exclude.as_ref();
152        it2.exclude = self.exclude.as_ref();
153        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
154        if let Some((h1, h2)) = &hashers {
155            it1.hasher = Some(h1);
156            it2.hasher = Some(h2);
157            if self.comparison_method == FileComparisonMethod::Rehash {
158                h1.clear_cache()?;
159                h2.clear_cache()?;
160            }
161        }
162        let hashers_ref = hashers.as_ref();
163        std::thread::scope(|global_scope| {
164            let it1_rx = it1.spawn_in_scope(global_scope);
165            let it2_rx = it2.spawn_in_scope(global_scope);
166            let pool = crate::build_thread_pool(self.jobs)?;
167            pool.scope(move |scope| {
168                let mut cur1 = it1_rx.recv().ok();
169                let mut cur2 = it2_rx.recv().ok();
170                let mut index = 0;
171                tx.send(CompareProgress::StartOfComparison)?;
172                loop {
173                    let cmp = match (&cur1, &cur2) {
174                        (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
175                        (Some(_), None) => Ordering::Less,
176                        (None, Some(_)) => Ordering::Greater,
177                        (None, None) => break,
178                    };
179                    match cmp {
180                        Ordering::Less => {
181                            let (rel1, _) = cur1.take().unwrap();
182                            let result =
183                                FileComparisonResult::new(rel1, Classification::OnlyInDir1);
184                            tx.send(CompareProgress::Result(index, result))?;
185                            tx.send(CompareProgress::FileDone)?;
186                            index += 1;
187                            cur1 = it1_rx.recv().ok();
188                        }
189                        Ordering::Greater => {
190                            let (rel2, _) = cur2.take().unwrap();
191                            let result =
192                                FileComparisonResult::new(rel2, Classification::OnlyInDir2);
193                            tx.send(CompareProgress::Result(index, result))?;
194                            tx.send(CompareProgress::FileDone)?;
195                            index += 1;
196                            cur2 = it2_rx.recv().ok();
197                        }
198                        Ordering::Equal => {
199                            let (rel_path, path1) = cur1.take().unwrap();
200                            let (_, path2) = cur2.take().unwrap();
201                            let buffer_size = self.buffer_size;
202                            let tx_clone = tx.clone();
203                            let i = index;
204                            let should_compare =
205                                self.comparison_method != FileComparisonMethod::Size;
206                            scope.spawn(move |_| {
207                                let mut comparer = FileComparer::new(&path1, &path2);
208                                comparer.buffer_size = buffer_size;
209                                if let Some((h1, h2)) = hashers_ref {
210                                    comparer.hashers = Some((h1, h2));
211                                }
212                                let mut result = FileComparisonResult::new(
213                                    rel_path.clone(),
214                                    Classification::InBoth,
215                                );
216                                if let Err(error) = result.update(&comparer, should_compare) {
217                                    log::error!(
218                                        "Error during comparison of {:?}: {}",
219                                        rel_path,
220                                        error
221                                    );
222                                }
223                                if tx_clone.send(CompareProgress::Result(i, result)).is_err()
224                                    || tx_clone.send(CompareProgress::FileDone).is_err()
225                                {
226                                    log::error!("Send failed during comparison of {:?}", rel_path);
227                                }
228                            });
229                            index += 1;
230                            cur1 = it1_rx.recv().ok();
231                            cur2 = it2_rx.recv().ok();
232                        }
233                    }
234                }
235                tx.send(CompareProgress::TotalFiles(index))
236            })?;
237            Ok::<(), anyhow::Error>(())
238        })?;
239
240        Self::save_hashers(hashers)?;
241        Ok(())
242    }
243
244    fn get_hashers(
245        &self,
246        dir1: &Path,
247        dir2: &Path,
248    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
249        if self.comparison_method == FileComparisonMethod::Hash
250            || self.comparison_method == FileComparisonMethod::Rehash
251        {
252            let (h1_res, h2_res) =
253                rayon::join(|| FileHasher::new(&[dir1]), || FileHasher::new(&[dir2]));
254            let mut h1 = h1_res?;
255            let mut h2 = h2_res?;
256            h1.buffer_size = self.buffer_size;
257            h2.buffer_size = self.buffer_size;
258            if let Some(progress) = self.progress.as_ref() {
259                h1.progress = Some(Arc::clone(progress));
260                h2.progress = Some(Arc::clone(progress));
261            }
262            return Ok(Some((h1, h2)));
263        }
264        Ok(None)
265    }
266
267    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
268        if let Some((h1, h2)) = hashers {
269            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
270            r1?;
271            r2?;
272        }
273        Ok(())
274    }
275
276    fn run_file_comparer(&self) -> anyhow::Result<()> {
277        assert!(self.dir1.is_file());
278        let file1 = &self.dir1;
279        let dir1 = file1.parent().unwrap();
280        let file1_name = file1.file_name().unwrap();
281        let (dir2, file2) = if self.dir2.is_file() {
282            (self.dir2.parent().unwrap(), self.dir2.clone())
283        } else {
284            (self.dir2.as_path(), self.dir2.join(file1_name))
285        };
286
287        let mut comparer = FileComparer::new(file1, &file2);
288        comparer.buffer_size = self.buffer_size;
289        let hashers = self.get_hashers(dir1, dir2)?;
290        if let Some((h1, h2)) = &hashers {
291            if self.comparison_method == FileComparisonMethod::Rehash {
292                h1.remove_cache_entry(file1)?;
293                h2.remove_cache_entry(&file2)?;
294            }
295            comparer.hashers = Some((h1, h2));
296        }
297        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
298        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
299        result.update(&comparer, should_compare_content)?;
300        let file1_str = file1.to_str().unwrap_or("file1");
301        if self.is_symbols_format {
302            println!("{} {}", result.to_symbol_string(), file1_str);
303        } else {
304            let file2_str = file2.to_str().unwrap_or("file2");
305            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
306        }
307        Self::save_hashers(hashers)?;
308        Ok(())
309    }
310}
311
312#[derive(Default)]
313struct ComparisonSummary {
314    pub in_both: usize,
315    pub only_in_dir1: usize,
316    pub only_in_dir2: usize,
317    pub dir1_newer: usize,
318    pub dir2_newer: usize,
319    pub dir1_larger: usize,
320    pub dir2_larger: usize,
321    pub diff_content: usize,
322    pub not_comparable: usize,
323}
324
325impl ComparisonSummary {
326    pub fn update(&mut self, result: &FileComparisonResult) {
327        match result.classification {
328            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
329            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
330            Classification::InBoth => {
331                self.in_both += 1;
332                let mut is_not_comparable = false;
333                match result.modified_time_comparison {
334                    Some(Ordering::Greater) => self.dir1_newer += 1,
335                    Some(Ordering::Less) => self.dir2_newer += 1,
336                    Some(Ordering::Equal) => {}
337                    None => is_not_comparable = true,
338                }
339                match result.size_comparison {
340                    Some(Ordering::Greater) => self.dir1_larger += 1,
341                    Some(Ordering::Less) => self.dir2_larger += 1,
342                    Some(Ordering::Equal) => match result.is_content_same {
343                        Some(false) => self.diff_content += 1,
344                        Some(true) => {}
345                        None => is_not_comparable = true,
346                    },
347                    None => is_not_comparable = true,
348                }
349                if is_not_comparable {
350                    self.not_comparable += 1;
351                }
352            }
353        }
354    }
355
356    pub fn print(
357        &self,
358        mut writer: impl std::io::Write,
359        dir1_name: &str,
360        dir2_name: &str,
361    ) -> std::io::Result<()> {
362        let values = [
363            ("Files in both:", self.in_both),
364            ("Only in left:", self.only_in_dir1),
365            ("Only in right:", self.only_in_dir2),
366            ("Left is newer:", self.dir1_newer),
367            ("Right is newer:", self.dir2_newer),
368            ("Left is larger:", self.dir1_larger),
369            ("Right is larger:", self.dir2_larger),
370            ("Different content:", self.diff_content),
371            ("Not comparable:", self.not_comparable),
372        ];
373        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
374        formatter.write_value(&mut writer, "Left:", dir1_name)?;
375        formatter.write_value(&mut writer, "Right:", dir2_name)?;
376        formatter.write_values(&mut writer, values)?;
377        Ok(())
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384    use std::fs;
385    use std::io::Write;
386
387    #[test]
388    fn comparison_summary() {
389        let mut summary = ComparisonSummary::default();
390        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
391        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
392        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
393        res3.modified_time_comparison = Some(Ordering::Greater);
394
395        summary.update(&res1);
396        summary.update(&res2);
397        summary.update(&res3);
398
399        assert_eq!(summary.only_in_dir1, 1);
400        assert_eq!(summary.only_in_dir2, 1);
401        assert_eq!(summary.in_both, 1);
402        assert_eq!(summary.dir1_newer, 1);
403    }
404
405    #[test]
406    fn directory_comparer_integration() -> anyhow::Result<()> {
407        let dir1 = tempfile::tempdir()?;
408        let dir2 = tempfile::tempdir()?;
409
410        // Create files in dir1
411        let file1_path = dir1.path().join("same.txt");
412        let mut file1 = fs::File::create(&file1_path)?;
413        file1.write_all(b"same content")?;
414
415        let only1_path = dir1.path().join("only1.txt");
416        let mut only1 = fs::File::create(&only1_path)?;
417        only1.write_all(b"only in dir1")?;
418
419        // Create files in dir2
420        let file2_path = dir2.path().join("same.txt");
421        let mut file2 = fs::File::create(&file2_path)?;
422        file2.write_all(b"same content")?;
423
424        let only2_path = dir2.path().join("only2.txt");
425        let mut only2 = fs::File::create(&only2_path)?;
426        only2.write_all(b"only in dir2")?;
427
428        // Create a different file
429        let diff1_path = dir1.path().join("diff.txt");
430        let mut diff1 = fs::File::create(&diff1_path)?;
431        diff1.write_all(b"content 1")?;
432
433        let diff2_path = dir2.path().join("diff.txt");
434        let mut diff2 = fs::File::create(&diff2_path)?;
435        diff2.write_all(b"content 222")?; // different length and content
436
437        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
438        let (tx, rx) = mpsc::channel();
439
440        comparer.compare_streaming_ordered(tx)?;
441
442        let mut results = Vec::new();
443        while let Ok(res) = rx.recv() {
444            if let CompareProgress::Result(_, r) = res {
445                results.push(r);
446            }
447        }
448
449        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
450
451        assert_eq!(results.len(), 4);
452
453        // diff.txt
454        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
455        assert_eq!(results[0].classification, Classification::InBoth);
456        assert!(
457            results[0].is_content_same == Some(false)
458                || results[0].size_comparison != Some(Ordering::Equal)
459        );
460
461        // only1.txt
462        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
463        assert_eq!(results[1].classification, Classification::OnlyInDir1);
464
465        // only2.txt
466        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
467        assert_eq!(results[2].classification, Classification::OnlyInDir2);
468
469        // same.txt
470        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
471        assert_eq!(results[3].classification, Classification::InBoth);
472        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
473
474        Ok(())
475    }
476
477    #[test]
478    fn directory_comparer_size_mode() -> anyhow::Result<()> {
479        let dir1 = tempfile::tempdir()?;
480        let dir2 = tempfile::tempdir()?;
481
482        let file1_path = dir1.path().join("file.txt");
483        let mut file1 = fs::File::create(&file1_path)?;
484        file1.write_all(b"content 1")?;
485
486        let file2_path = dir2.path().join("file.txt");
487        let mut file2 = fs::File::create(&file2_path)?;
488        file2.write_all(b"content 2")?; // same length, different content
489
490        let mut comparer =
491            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
492        comparer.comparison_method = FileComparisonMethod::Size;
493        let (tx, rx) = mpsc::channel();
494
495        comparer.compare_streaming_ordered(tx)?;
496
497        let mut results = Vec::new();
498        while let Ok(res) = rx.recv() {
499            if let CompareProgress::Result(_, r) = res {
500                results.push(r);
501            }
502        }
503
504        assert_eq!(results.len(), 1);
505        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
506        assert_eq!(results[0].classification, Classification::InBoth);
507        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
508        assert_eq!(results[0].is_content_same, None);
509
510        Ok(())
511    }
512}