Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, Progress,
3    ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::cmp::Ordering;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, mpsc};
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13    StartOfComparison,
14    FileDone,
15    TotalFiles(usize),
16    Result(usize, FileComparisonResult),
17}
18
19/// Methods for comparing files.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum FileComparisonMethod {
22    /// Compare only size and modification time.
23    Size,
24    /// Compare by hash (BLAKE3).
25    Hash,
26    /// Compare by hash, without using the cached hashes.
27    Rehash,
28    /// Compare byte-by-byte.
29    Full,
30}
31
32/// A tool for comparing the contents of two directories.
33pub struct DirectoryComparer {
34    dir1: PathBuf,
35    dir2: PathBuf,
36    pub is_symbols_format: bool,
37    pub buffer_size: usize,
38    pub comparison_method: FileComparisonMethod,
39    pub exclude: Option<GlobSet>,
40    pub progress: Option<Arc<ProgressBuilder>>,
41    pub jobs: usize,
42}
43
44impl DirectoryComparer {
45    pub const DEFAULT_JOBS: usize = 8;
46
47    /// Creates a new `DirectoryComparer` for the two given directories.
48    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
49        Self {
50            dir1,
51            dir2,
52            is_symbols_format: false,
53            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
54            comparison_method: FileComparisonMethod::Hash,
55            exclude: None,
56            progress: None,
57            jobs: Self::DEFAULT_JOBS,
58        }
59    }
60
61    /// Executes the directory comparison and prints results to stdout.
62    /// This is a convenience method for CLI usage.
63    pub fn run(&self) -> anyhow::Result<()> {
64        if self.dir1.is_file() {
65            return self.run_file_comparer();
66        }
67
68        let progress = self
69            .progress
70            .as_ref()
71            .map(|progress| progress.add_spinner())
72            .unwrap_or_else(Progress::none);
73        progress.set_message("Scanning directories...");
74        let start_time = std::time::Instant::now();
75        let mut summary = ComparisonSummary::default();
76        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
77        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
78        let (tx, rx) = mpsc::channel();
79        std::thread::scope(|scope| {
80            scope.spawn(move || {
81                if let Err(e) = self.compare_streaming_ordered(tx) {
82                    log::error!("Error during comparison: {}", e);
83                }
84            });
85
86            // Receive results and update summary/UI
87            while let Ok(event) = rx.recv() {
88                match event {
89                    CompareProgress::StartOfComparison => {
90                        progress.set_message("Comparing files...");
91                    }
92                    CompareProgress::TotalFiles(total_files) => {
93                        progress.set_length(total_files as u64);
94                        progress.set_message("");
95                    }
96                    CompareProgress::Result(_, result) => {
97                        summary.update(&result);
98                        if self.is_symbols_format {
99                            progress.suspend(|| {
100                                println!(
101                                    "{} {}",
102                                    result.to_symbol_string(),
103                                    result.relative_path.display()
104                                );
105                            })
106                        } else if !result.is_identical() {
107                            progress.suspend(|| {
108                                println!(
109                                    "{}: {}",
110                                    result.relative_path.display(),
111                                    result.to_string(dir1_str, dir2_str)
112                                );
113                            });
114                        }
115                    }
116                    CompareProgress::FileDone => progress.inc(1),
117                }
118            }
119        });
120        progress.finish();
121        eprintln!("\n--- Comparison Summary ---");
122        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
123        eprintln!(
124            "Comparison finished in {}.",
125            FormattedDuration(start_time.elapsed())
126        );
127        Ok(())
128    }
129
130    /// Performs the directory comparison and streams results via a channel.
131    ///
132    /// # Arguments
133    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
134    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
135        crate::sort_stream(
136            tx,
137            |tx_unordered| self.compare_streaming(tx_unordered),
138            |event| match event {
139                CompareProgress::Result(i, _) => Some(*i),
140                _ => None,
141            },
142        )
143    }
144
145    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
146        let mut it1 = FileIterator::new(self.dir1.clone());
147        let mut it2 = FileIterator::new(self.dir2.clone());
148        it1.exclude = self.exclude.as_ref();
149        it2.exclude = self.exclude.as_ref();
150        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
151        if let Some((h1, h2)) = &hashers {
152            it1.hasher = Some(h1);
153            it2.hasher = Some(h2);
154            if self.comparison_method == FileComparisonMethod::Rehash {
155                h1.clear_cache()?;
156                h2.clear_cache()?;
157            }
158        }
159        let hashers_ref = hashers.as_ref();
160        std::thread::scope(|global_scope| {
161            let it1_rx = it1.spawn_in_scope(global_scope);
162            let it2_rx = it2.spawn_in_scope(global_scope);
163            let pool = crate::build_thread_pool(self.jobs)?;
164            pool.scope(move |scope| {
165                let mut cur1 = it1_rx.recv().ok();
166                let mut cur2 = it2_rx.recv().ok();
167                let mut index = 0;
168                tx.send(CompareProgress::StartOfComparison)?;
169                loop {
170                    let cmp = match (&cur1, &cur2) {
171                        (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
172                        (Some(_), None) => Ordering::Less,
173                        (None, Some(_)) => Ordering::Greater,
174                        (None, None) => break,
175                    };
176                    match cmp {
177                        Ordering::Less => {
178                            let (rel1, _) = cur1.take().unwrap();
179                            let result =
180                                FileComparisonResult::new(rel1, Classification::OnlyInDir1);
181                            tx.send(CompareProgress::Result(index, result))?;
182                            tx.send(CompareProgress::FileDone)?;
183                            index += 1;
184                            cur1 = it1_rx.recv().ok();
185                        }
186                        Ordering::Greater => {
187                            let (rel2, _) = cur2.take().unwrap();
188                            let result =
189                                FileComparisonResult::new(rel2, Classification::OnlyInDir2);
190                            tx.send(CompareProgress::Result(index, result))?;
191                            tx.send(CompareProgress::FileDone)?;
192                            index += 1;
193                            cur2 = it2_rx.recv().ok();
194                        }
195                        Ordering::Equal => {
196                            let (rel_path, path1) = cur1.take().unwrap();
197                            let (_, path2) = cur2.take().unwrap();
198                            let buffer_size = self.buffer_size;
199                            let tx_clone = tx.clone();
200                            let i = index;
201                            let should_compare =
202                                self.comparison_method != FileComparisonMethod::Size;
203                            scope.spawn(move |_| {
204                                let mut comparer = FileComparer::new(&path1, &path2);
205                                comparer.buffer_size = buffer_size;
206                                if let Some((h1, h2)) = hashers_ref {
207                                    comparer.hashers = Some((h1, h2));
208                                }
209                                let mut result = FileComparisonResult::new(
210                                    rel_path.clone(),
211                                    Classification::InBoth,
212                                );
213                                if let Err(error) = result.update(&comparer, should_compare) {
214                                    log::error!(
215                                        "Error during comparison of {:?}: {}",
216                                        rel_path,
217                                        error
218                                    );
219                                }
220                                if tx_clone.send(CompareProgress::Result(i, result)).is_err()
221                                    || tx_clone.send(CompareProgress::FileDone).is_err()
222                                {
223                                    log::error!("Send failed during comparison of {:?}", rel_path);
224                                }
225                            });
226                            index += 1;
227                            cur1 = it1_rx.recv().ok();
228                            cur2 = it2_rx.recv().ok();
229                        }
230                    }
231                }
232                tx.send(CompareProgress::TotalFiles(index))
233            })?;
234            Ok::<(), anyhow::Error>(())
235        })?;
236
237        Self::save_hashers(hashers)?;
238        Ok(())
239    }
240
241    fn get_hashers(
242        &self,
243        dir1: &Path,
244        dir2: &Path,
245    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
246        if self.comparison_method == FileComparisonMethod::Hash
247            || self.comparison_method == FileComparisonMethod::Rehash
248        {
249            let (mut h1, mut h2) = rayon::join(
250                || FileHasher::new(dir1.to_path_buf()),
251                || FileHasher::new(dir2.to_path_buf()),
252            );
253            h1.buffer_size = self.buffer_size;
254            h2.buffer_size = self.buffer_size;
255            if let Some(progress) = self.progress.as_ref() {
256                h1.progress = Some(Arc::clone(progress));
257                h2.progress = Some(Arc::clone(progress));
258            }
259            return Ok(Some((h1, h2)));
260        }
261        Ok(None)
262    }
263
264    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
265        if let Some((h1, h2)) = hashers {
266            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
267            r1?;
268            r2?;
269        }
270        Ok(())
271    }
272
273    fn run_file_comparer(&self) -> anyhow::Result<()> {
274        assert!(self.dir1.is_file());
275        let file1 = &self.dir1;
276        let dir1 = file1.parent().unwrap();
277        let file1_name = file1.file_name().unwrap();
278        let (dir2, file2) = if self.dir2.is_file() {
279            (self.dir2.parent().unwrap(), self.dir2.clone())
280        } else {
281            (self.dir2.as_path(), self.dir2.join(file1_name))
282        };
283
284        let mut comparer = FileComparer::new(file1, &file2);
285        comparer.buffer_size = self.buffer_size;
286        let hashers = self.get_hashers(dir1, dir2)?;
287        if let Some((h1, h2)) = &hashers {
288            if self.comparison_method == FileComparisonMethod::Rehash {
289                h1.remove_cache_entry(file1)?;
290                h2.remove_cache_entry(&file2)?;
291            }
292            comparer.hashers = Some((h1, h2));
293        }
294        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
295        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
296        result.update(&comparer, should_compare_content)?;
297        let file1_str = file1.to_str().unwrap_or("file1");
298        if self.is_symbols_format {
299            println!("{} {}", result.to_symbol_string(), file1_str);
300        } else {
301            let file2_str = file2.to_str().unwrap_or("file2");
302            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
303        }
304        Self::save_hashers(hashers)?;
305        Ok(())
306    }
307}
308
309#[derive(Default)]
310struct ComparisonSummary {
311    pub in_both: usize,
312    pub only_in_dir1: usize,
313    pub only_in_dir2: usize,
314    pub dir1_newer: usize,
315    pub dir2_newer: usize,
316    pub dir1_larger: usize,
317    pub dir2_larger: usize,
318    pub diff_content: usize,
319    pub not_comparable: usize,
320}
321
322impl ComparisonSummary {
323    pub fn update(&mut self, result: &FileComparisonResult) {
324        match result.classification {
325            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
326            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
327            Classification::InBoth => {
328                self.in_both += 1;
329                let mut is_not_comparable = false;
330                match result.modified_time_comparison {
331                    Some(Ordering::Greater) => self.dir1_newer += 1,
332                    Some(Ordering::Less) => self.dir2_newer += 1,
333                    Some(Ordering::Equal) => {}
334                    None => is_not_comparable = true,
335                }
336                match result.size_comparison {
337                    Some(Ordering::Greater) => self.dir1_larger += 1,
338                    Some(Ordering::Less) => self.dir2_larger += 1,
339                    Some(Ordering::Equal) => match result.is_content_same {
340                        Some(false) => self.diff_content += 1,
341                        Some(true) => {}
342                        None => is_not_comparable = true,
343                    },
344                    None => is_not_comparable = true,
345                }
346                if is_not_comparable {
347                    self.not_comparable += 1;
348                }
349            }
350        }
351    }
352
353    pub fn print(
354        &self,
355        mut writer: impl std::io::Write,
356        dir1_name: &str,
357        dir2_name: &str,
358    ) -> std::io::Result<()> {
359        let values = [
360            ("Files in both:", self.in_both),
361            ("Only in left:", self.only_in_dir1),
362            ("Only in right:", self.only_in_dir2),
363            ("Left is newer:", self.dir1_newer),
364            ("Right is newer:", self.dir2_newer),
365            ("Left is larger:", self.dir1_larger),
366            ("Right is larger:", self.dir2_larger),
367            ("Different content:", self.diff_content),
368            ("Not comparable:", self.not_comparable),
369        ];
370        let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
371        writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
372        writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
373        for (label, value) in values {
374            writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
375        }
376        Ok(())
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use std::fs;
384    use std::io::Write;
385
386    #[test]
387    fn comparison_summary() {
388        let mut summary = ComparisonSummary::default();
389        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
390        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
391        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
392        res3.modified_time_comparison = Some(Ordering::Greater);
393
394        summary.update(&res1);
395        summary.update(&res2);
396        summary.update(&res3);
397
398        assert_eq!(summary.only_in_dir1, 1);
399        assert_eq!(summary.only_in_dir2, 1);
400        assert_eq!(summary.in_both, 1);
401        assert_eq!(summary.dir1_newer, 1);
402    }
403
404    #[test]
405    fn directory_comparer_integration() -> anyhow::Result<()> {
406        let dir1 = tempfile::tempdir()?;
407        let dir2 = tempfile::tempdir()?;
408
409        // Create files in dir1
410        let file1_path = dir1.path().join("same.txt");
411        let mut file1 = fs::File::create(&file1_path)?;
412        file1.write_all(b"same content")?;
413
414        let only1_path = dir1.path().join("only1.txt");
415        let mut only1 = fs::File::create(&only1_path)?;
416        only1.write_all(b"only in dir1")?;
417
418        // Create files in dir2
419        let file2_path = dir2.path().join("same.txt");
420        let mut file2 = fs::File::create(&file2_path)?;
421        file2.write_all(b"same content")?;
422
423        let only2_path = dir2.path().join("only2.txt");
424        let mut only2 = fs::File::create(&only2_path)?;
425        only2.write_all(b"only in dir2")?;
426
427        // Create a different file
428        let diff1_path = dir1.path().join("diff.txt");
429        let mut diff1 = fs::File::create(&diff1_path)?;
430        diff1.write_all(b"content 1")?;
431
432        let diff2_path = dir2.path().join("diff.txt");
433        let mut diff2 = fs::File::create(&diff2_path)?;
434        diff2.write_all(b"content 222")?; // different length and content
435
436        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
437        let (tx, rx) = mpsc::channel();
438
439        comparer.compare_streaming_ordered(tx)?;
440
441        let mut results = Vec::new();
442        while let Ok(res) = rx.recv() {
443            if let CompareProgress::Result(_, r) = res {
444                results.push(r);
445            }
446        }
447
448        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
449
450        assert_eq!(results.len(), 4);
451
452        // diff.txt
453        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
454        assert_eq!(results[0].classification, Classification::InBoth);
455        assert!(
456            results[0].is_content_same == Some(false)
457                || results[0].size_comparison != Some(Ordering::Equal)
458        );
459
460        // only1.txt
461        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
462        assert_eq!(results[1].classification, Classification::OnlyInDir1);
463
464        // only2.txt
465        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
466        assert_eq!(results[2].classification, Classification::OnlyInDir2);
467
468        // same.txt
469        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
470        assert_eq!(results[3].classification, Classification::InBoth);
471        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
472
473        Ok(())
474    }
475
476    #[test]
477    fn directory_comparer_size_mode() -> anyhow::Result<()> {
478        let dir1 = tempfile::tempdir()?;
479        let dir2 = tempfile::tempdir()?;
480
481        let file1_path = dir1.path().join("file.txt");
482        let mut file1 = fs::File::create(&file1_path)?;
483        file1.write_all(b"content 1")?;
484
485        let file2_path = dir2.path().join("file.txt");
486        let mut file2 = fs::File::create(&file2_path)?;
487        file2.write_all(b"content 2")?; // same length, different content
488
489        let mut comparer =
490            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
491        comparer.comparison_method = FileComparisonMethod::Size;
492        let (tx, rx) = mpsc::channel();
493
494        comparer.compare_streaming_ordered(tx)?;
495
496        let mut results = Vec::new();
497        while let Ok(res) = rx.recv() {
498            if let CompareProgress::Result(_, r) = res {
499                results.push(r);
500            }
501        }
502
503        assert_eq!(results.len(), 1);
504        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
505        assert_eq!(results[0].classification, Classification::InBoth);
506        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
507        assert_eq!(results[0].is_content_same, None);
508
509        Ok(())
510    }
511}