Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, Progress,
3    ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::cmp::Ordering;
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, mpsc};
11
12#[derive(Debug, Clone)]
13enum CompareProgress {
14    StartOfComparison,
15    FileDone,
16    TotalFiles(usize),
17    Result(usize, FileComparisonResult),
18}
19
20/// Methods for comparing files.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FileComparisonMethod {
23    /// Compare only size and modification time.
24    Size,
25    /// Compare by hash (BLAKE3).
26    Hash,
27    /// Compare by hash, without using the cached hashes.
28    Rehash,
29    /// Compare byte-by-byte.
30    Full,
31}
32
33/// A tool for comparing the contents of two directories.
34pub struct DirectoryComparer {
35    dir1: PathBuf,
36    dir2: PathBuf,
37    pub is_symbols_format: bool,
38    pub buffer_size: usize,
39    pub comparison_method: FileComparisonMethod,
40    pub exclude: Option<GlobSet>,
41    pub progress: Option<Arc<ProgressBuilder>>,
42    pub jobs: usize,
43}
44
45impl DirectoryComparer {
46    pub const DEFAULT_JOBS: usize = 8;
47
48    /// Creates a new `DirectoryComparer` for the two given directories.
49    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
50        Self {
51            dir1,
52            dir2,
53            is_symbols_format: false,
54            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
55            comparison_method: FileComparisonMethod::Hash,
56            exclude: None,
57            progress: None,
58            jobs: Self::DEFAULT_JOBS,
59        }
60    }
61
62    /// Executes the directory comparison and prints results to stdout.
63    /// This is a convenience method for CLI usage.
64    pub fn run(&self) -> anyhow::Result<()> {
65        if self.dir1.is_file() {
66            return self.run_file_comparer();
67        }
68
69        let progress = self
70            .progress
71            .as_ref()
72            .map(|progress| progress.add_spinner())
73            .unwrap_or_else(Progress::none);
74        progress.set_message("Scanning directories...");
75        let start_time = std::time::Instant::now();
76        let mut summary = ComparisonSummary::default();
77        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
78        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
79        let (tx, rx) = mpsc::channel();
80        std::thread::scope(|scope| {
81            scope.spawn(move || {
82                if let Err(e) = self.compare_streaming_ordered(tx) {
83                    log::error!("Error during comparison: {}", e);
84                }
85            });
86
87            // Receive results and update summary/UI
88            while let Ok(event) = rx.recv() {
89                match event {
90                    CompareProgress::StartOfComparison => {
91                        progress.set_message("Comparing files...");
92                    }
93                    CompareProgress::TotalFiles(total_files) => {
94                        progress.set_length(total_files as u64);
95                        progress.set_message("");
96                    }
97                    CompareProgress::Result(_, result) => {
98                        summary.update(&result);
99                        if self.is_symbols_format {
100                            progress.suspend(|| {
101                                println!(
102                                    "{} {}",
103                                    result.to_symbol_string(),
104                                    result.relative_path.display()
105                                );
106                            })
107                        } else if !result.is_identical() {
108                            progress.suspend(|| {
109                                println!(
110                                    "{}: {}",
111                                    result.relative_path.display(),
112                                    result.to_string(dir1_str, dir2_str)
113                                );
114                            });
115                        }
116                    }
117                    CompareProgress::FileDone => progress.inc(1),
118                }
119            }
120        });
121        progress.finish();
122        eprintln!("\n--- Comparison Summary ---");
123        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
124        eprintln!(
125            "Comparison finished in {}.",
126            FormattedDuration(start_time.elapsed())
127        );
128        Ok(())
129    }
130
131    /// Performs the directory comparison and streams results via a channel.
132    ///
133    /// # Arguments
134    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
135    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
136        let (tx_unordered, rx_unordered) = mpsc::channel();
137        std::thread::scope(|scope| {
138            scope.spawn(move || {
139                if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
140                    log::error!("Error during unordered comparison: {}", e);
141                }
142            });
143
144            let mut buffer = HashMap::new();
145            let mut next_index = 0;
146            for event in rx_unordered {
147                if let CompareProgress::Result(i, _) = &event {
148                    let index = *i;
149                    if index == next_index {
150                        tx.send(event)?;
151                        next_index += 1;
152                        while let Some(buffered) = buffer.remove(&next_index) {
153                            tx.send(buffered)?;
154                            next_index += 1;
155                        }
156                    } else {
157                        buffer.insert(index, event);
158                    }
159                } else {
160                    tx.send(event)?;
161                }
162            }
163            Ok::<(), anyhow::Error>(())
164        })?;
165        Ok(())
166    }
167
168    fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
169        let mut it1 = FileIterator::new(self.dir1.clone());
170        let mut it2 = FileIterator::new(self.dir2.clone());
171        it1.exclude = self.exclude.as_ref();
172        it2.exclude = self.exclude.as_ref();
173        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
174        if let Some((h1, h2)) = &hashers {
175            it1.hasher = Some(h1);
176            it2.hasher = Some(h2);
177            if self.comparison_method == FileComparisonMethod::Rehash {
178                h1.clear_cache()?;
179                h2.clear_cache()?;
180            }
181        }
182        let hashers_ref = hashers.as_ref();
183        std::thread::scope(|global_scope| {
184            let it1_rx = it1.spawn_in_scope(global_scope);
185            let it2_rx = it2.spawn_in_scope(global_scope);
186            let pool = crate::build_thread_pool(self.jobs)?;
187            pool.scope(move |scope| {
188                let mut cur1 = it1_rx.recv().ok();
189                let mut cur2 = it2_rx.recv().ok();
190                let mut index = 0;
191                tx.send(CompareProgress::StartOfComparison)?;
192                loop {
193                    let cmp = match (&cur1, &cur2) {
194                        (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
195                        (Some(_), None) => Ordering::Less,
196                        (None, Some(_)) => Ordering::Greater,
197                        (None, None) => break,
198                    };
199                    match cmp {
200                        Ordering::Less => {
201                            let (rel1, _) = cur1.take().unwrap();
202                            let result =
203                                FileComparisonResult::new(rel1, Classification::OnlyInDir1);
204                            tx.send(CompareProgress::Result(index, result))?;
205                            tx.send(CompareProgress::FileDone)?;
206                            index += 1;
207                            cur1 = it1_rx.recv().ok();
208                        }
209                        Ordering::Greater => {
210                            let (rel2, _) = cur2.take().unwrap();
211                            let result =
212                                FileComparisonResult::new(rel2, Classification::OnlyInDir2);
213                            tx.send(CompareProgress::Result(index, result))?;
214                            tx.send(CompareProgress::FileDone)?;
215                            index += 1;
216                            cur2 = it2_rx.recv().ok();
217                        }
218                        Ordering::Equal => {
219                            let (rel_path, path1) = cur1.take().unwrap();
220                            let (_, path2) = cur2.take().unwrap();
221                            let buffer_size = self.buffer_size;
222                            let tx_clone = tx.clone();
223                            let i = index;
224                            let should_compare =
225                                self.comparison_method != FileComparisonMethod::Size;
226                            scope.spawn(move |_| {
227                                let mut comparer = FileComparer::new(&path1, &path2);
228                                comparer.buffer_size = buffer_size;
229                                if let Some((h1, h2)) = hashers_ref {
230                                    comparer.hashers = Some((h1, h2));
231                                }
232                                let mut result = FileComparisonResult::new(
233                                    rel_path.clone(),
234                                    Classification::InBoth,
235                                );
236                                if let Err(error) = result.update(&comparer, should_compare) {
237                                    log::error!(
238                                        "Error during comparison of {:?}: {}",
239                                        rel_path,
240                                        error
241                                    );
242                                }
243                                if tx_clone.send(CompareProgress::Result(i, result)).is_err()
244                                    || tx_clone.send(CompareProgress::FileDone).is_err()
245                                {
246                                    log::error!("Send failed during comparison of {:?}", rel_path);
247                                }
248                            });
249                            index += 1;
250                            cur1 = it1_rx.recv().ok();
251                            cur2 = it2_rx.recv().ok();
252                        }
253                    }
254                }
255                tx.send(CompareProgress::TotalFiles(index))
256            })?;
257            Ok::<(), anyhow::Error>(())
258        })?;
259
260        Self::save_hashers(hashers)?;
261        Ok(())
262    }
263
264    fn get_hashers(
265        &self,
266        dir1: &Path,
267        dir2: &Path,
268    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
269        if self.comparison_method == FileComparisonMethod::Hash
270            || self.comparison_method == FileComparisonMethod::Rehash
271        {
272            let (mut h1, mut h2) = rayon::join(
273                || FileHasher::new(dir1.to_path_buf()),
274                || FileHasher::new(dir2.to_path_buf()),
275            );
276            h1.buffer_size = self.buffer_size;
277            h2.buffer_size = self.buffer_size;
278            if let Some(progress) = self.progress.as_ref() {
279                h1.progress = Some(Arc::clone(progress));
280                h2.progress = Some(Arc::clone(progress));
281            }
282            return Ok(Some((h1, h2)));
283        }
284        Ok(None)
285    }
286
287    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
288        if let Some((h1, h2)) = hashers {
289            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
290            r1?;
291            r2?;
292        }
293        Ok(())
294    }
295
296    fn run_file_comparer(&self) -> anyhow::Result<()> {
297        assert!(self.dir1.is_file());
298        let file1 = &self.dir1;
299        let dir1 = file1.parent().unwrap();
300        let file1_name = file1.file_name().unwrap();
301        let (dir2, file2) = if self.dir2.is_file() {
302            (self.dir2.parent().unwrap(), self.dir2.clone())
303        } else {
304            (self.dir2.as_path(), self.dir2.join(file1_name))
305        };
306
307        let mut comparer = FileComparer::new(file1, &file2);
308        comparer.buffer_size = self.buffer_size;
309        let hashers = self.get_hashers(dir1, dir2)?;
310        if let Some((h1, h2)) = &hashers {
311            if self.comparison_method == FileComparisonMethod::Rehash {
312                h1.remove_cache_entry(file1)?;
313                h2.remove_cache_entry(&file2)?;
314            }
315            comparer.hashers = Some((h1, h2));
316        }
317        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
318        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
319        result.update(&comparer, should_compare_content)?;
320        let file1_str = file1.to_str().unwrap_or("file1");
321        if self.is_symbols_format {
322            println!("{} {}", result.to_symbol_string(), file1_str);
323        } else {
324            let file2_str = file2.to_str().unwrap_or("file2");
325            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
326        }
327        Self::save_hashers(hashers)?;
328        Ok(())
329    }
330}
331
332#[derive(Default)]
333struct ComparisonSummary {
334    pub in_both: usize,
335    pub only_in_dir1: usize,
336    pub only_in_dir2: usize,
337    pub dir1_newer: usize,
338    pub dir2_newer: usize,
339    pub dir1_larger: usize,
340    pub dir2_larger: usize,
341    pub diff_content: usize,
342    pub not_comparable: usize,
343}
344
345impl ComparisonSummary {
346    pub fn update(&mut self, result: &FileComparisonResult) {
347        match result.classification {
348            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
349            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
350            Classification::InBoth => {
351                self.in_both += 1;
352                let mut is_not_comparable = false;
353                match result.modified_time_comparison {
354                    Some(Ordering::Greater) => self.dir1_newer += 1,
355                    Some(Ordering::Less) => self.dir2_newer += 1,
356                    Some(Ordering::Equal) => {}
357                    None => is_not_comparable = true,
358                }
359                match result.size_comparison {
360                    Some(Ordering::Greater) => self.dir1_larger += 1,
361                    Some(Ordering::Less) => self.dir2_larger += 1,
362                    Some(Ordering::Equal) => match result.is_content_same {
363                        Some(false) => self.diff_content += 1,
364                        Some(true) => {}
365                        None => is_not_comparable = true,
366                    },
367                    None => is_not_comparable = true,
368                }
369                if is_not_comparable {
370                    self.not_comparable += 1;
371                }
372            }
373        }
374    }
375
376    pub fn print(
377        &self,
378        mut writer: impl std::io::Write,
379        dir1_name: &str,
380        dir2_name: &str,
381    ) -> std::io::Result<()> {
382        let values = [
383            ("Files in both:", self.in_both),
384            ("Only in left:", self.only_in_dir1),
385            ("Only in right:", self.only_in_dir2),
386            ("Left is newer:", self.dir1_newer),
387            ("Right is newer:", self.dir2_newer),
388            ("Left is larger:", self.dir1_larger),
389            ("Right is larger:", self.dir2_larger),
390            ("Different content:", self.diff_content),
391            ("Not comparable:", self.not_comparable),
392        ];
393        let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
394        writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
395        writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
396        for (label, value) in values {
397            writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
398        }
399        Ok(())
400    }
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406    use std::fs;
407    use std::io::Write;
408
409    #[test]
410    fn comparison_summary() {
411        let mut summary = ComparisonSummary::default();
412        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
413        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
414        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
415        res3.modified_time_comparison = Some(Ordering::Greater);
416
417        summary.update(&res1);
418        summary.update(&res2);
419        summary.update(&res3);
420
421        assert_eq!(summary.only_in_dir1, 1);
422        assert_eq!(summary.only_in_dir2, 1);
423        assert_eq!(summary.in_both, 1);
424        assert_eq!(summary.dir1_newer, 1);
425    }
426
427    #[test]
428    fn directory_comparer_integration() -> anyhow::Result<()> {
429        let dir1 = tempfile::tempdir()?;
430        let dir2 = tempfile::tempdir()?;
431
432        // Create files in dir1
433        let file1_path = dir1.path().join("same.txt");
434        let mut file1 = fs::File::create(&file1_path)?;
435        file1.write_all(b"same content")?;
436
437        let only1_path = dir1.path().join("only1.txt");
438        let mut only1 = fs::File::create(&only1_path)?;
439        only1.write_all(b"only in dir1")?;
440
441        // Create files in dir2
442        let file2_path = dir2.path().join("same.txt");
443        let mut file2 = fs::File::create(&file2_path)?;
444        file2.write_all(b"same content")?;
445
446        let only2_path = dir2.path().join("only2.txt");
447        let mut only2 = fs::File::create(&only2_path)?;
448        only2.write_all(b"only in dir2")?;
449
450        // Create a different file
451        let diff1_path = dir1.path().join("diff.txt");
452        let mut diff1 = fs::File::create(&diff1_path)?;
453        diff1.write_all(b"content 1")?;
454
455        let diff2_path = dir2.path().join("diff.txt");
456        let mut diff2 = fs::File::create(&diff2_path)?;
457        diff2.write_all(b"content 222")?; // different length and content
458
459        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
460        let (tx, rx) = mpsc::channel();
461
462        comparer.compare_streaming_ordered(tx)?;
463
464        let mut results = Vec::new();
465        while let Ok(res) = rx.recv() {
466            if let CompareProgress::Result(_, r) = res {
467                results.push(r);
468            }
469        }
470
471        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
472
473        assert_eq!(results.len(), 4);
474
475        // diff.txt
476        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
477        assert_eq!(results[0].classification, Classification::InBoth);
478        assert!(
479            results[0].is_content_same == Some(false)
480                || results[0].size_comparison != Some(Ordering::Equal)
481        );
482
483        // only1.txt
484        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
485        assert_eq!(results[1].classification, Classification::OnlyInDir1);
486
487        // only2.txt
488        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
489        assert_eq!(results[2].classification, Classification::OnlyInDir2);
490
491        // same.txt
492        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
493        assert_eq!(results[3].classification, Classification::InBoth);
494        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
495
496        Ok(())
497    }
498
499    #[test]
500    fn directory_comparer_size_mode() -> anyhow::Result<()> {
501        let dir1 = tempfile::tempdir()?;
502        let dir2 = tempfile::tempdir()?;
503
504        let file1_path = dir1.path().join("file.txt");
505        let mut file1 = fs::File::create(&file1_path)?;
506        file1.write_all(b"content 1")?;
507
508        let file2_path = dir2.path().join("file.txt");
509        let mut file2 = fs::File::create(&file2_path)?;
510        file2.write_all(b"content 2")?; // same length, different content
511
512        let mut comparer =
513            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
514        comparer.comparison_method = FileComparisonMethod::Size;
515        let (tx, rx) = mpsc::channel();
516
517        comparer.compare_streaming_ordered(tx)?;
518
519        let mut results = Vec::new();
520        while let Ok(res) = rx.recv() {
521            if let CompareProgress::Result(_, r) = res {
522                results.push(r);
523            }
524        }
525
526        assert_eq!(results.len(), 1);
527        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
528        assert_eq!(results[0].classification, Classification::InBoth);
529        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
530        assert_eq!(results[0].is_content_same, None);
531
532        Ok(())
533    }
534}