Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3    FileIterator, OutputFormat, Progress, ProgressBuilder, ProgressValue,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::{self, stdout},
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12    time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17    StartOfComparison,
18    Progress(ProgressValue),
19    Total(ProgressValue),
20    Result(usize, FileComparisonResult),
21    Error,
22}
23
24/// Methods for comparing files.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27    /// Compare only size and modification time.
28    Size,
29    /// Compare by hash (BLAKE3).
30    Hash,
31    /// Compare by hash, without using the cached hashes.
32    Rehash,
33    /// Compare byte-by-byte.
34    Full,
35}
36
37/// A tool for comparing the contents of two directories.
38pub struct DirectoryComparer {
39    dir1: PathBuf,
40    dir2: PathBuf,
41    pub output_format: OutputFormat,
42    pub buffer_size: usize,
43    pub comparison_method: FileComparisonMethod,
44    pub exclude: Option<GlobSet>,
45    pub progress: Option<Arc<ProgressBuilder>>,
46    pub jobs: usize,
47}
48
49impl DirectoryComparer {
50    pub const DEFAULT_JOBS: usize = 8;
51
52    /// Creates a new `DirectoryComparer` for the two given directories.
53    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54        Self {
55            dir1,
56            dir2,
57            output_format: OutputFormat::Default,
58            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59            comparison_method: FileComparisonMethod::Hash,
60            exclude: None,
61            progress: None,
62            jobs: Self::DEFAULT_JOBS,
63        }
64    }
65
66    /// Executes the directory comparison and prints results to stdout.
67    /// This is a convenience method for CLI usage.
68    pub fn run(&self) -> anyhow::Result<()> {
69        match self.output_format {
70            OutputFormat::Default | OutputFormat::Symbol => {}
71            _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72        }
73        if self.dir1.is_file() {
74            return self.run_file_comparer();
75        }
76
77        let mut progress = self
78            .progress
79            .as_ref()
80            .map(|progress| progress.add_spinner())
81            .unwrap_or_else(Progress::none);
82        progress.set_message("Scanning directories...");
83        let start_time = std::time::Instant::now();
84        let mut summary = ComparisonSummary::default();
85        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87        let (tx, rx) = mpsc::channel();
88        std::thread::scope(|scope| {
89            scope.spawn(move || {
90                if let Err(e) = self.compare_streaming_ordered(tx) {
91                    log::error!("Error during comparison: {}", e);
92                }
93            });
94
95            // Receive results and update summary/UI
96            while let Ok(event) = rx.recv() {
97                match event {
98                    CompareProgress::StartOfComparison => {
99                        progress.set_message("Comparing files...");
100                    }
101                    CompareProgress::Total(total) => {
102                        progress.set_length(total);
103                        progress.set_message("");
104                    }
105                    CompareProgress::Result(_, result) => {
106                        summary.update(&result);
107                        progress.suspend_for(stdout(), || {
108                            result.print(self.output_format, dir1_str, dir2_str)
109                        });
110                    }
111                    CompareProgress::Progress(value) => progress.inc(value),
112                    CompareProgress::Error => summary.num_errors += 1,
113                }
114            }
115        });
116        progress.finish();
117        eprintln!("\n--- Comparison Summary ---");
118        summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
119        Ok(())
120    }
121
122    /// Performs the directory comparison and streams results via a channel.
123    ///
124    /// # Arguments
125    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
126    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
127        crate::sort_stream(
128            tx,
129            |tx_unordered| self.compare_streaming(tx_unordered),
130            |event| match event {
131                CompareProgress::Result(i, _) => Some(*i),
132                _ => None,
133            },
134        )
135    }
136
137    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
138        let mut it1 = FileIterator::new(&self.dir1);
139        let mut it2 = FileIterator::new(&self.dir2);
140        it1.exclude = self.exclude.as_ref();
141        it2.exclude = self.exclude.as_ref();
142        let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
143        if let Some((h1, h2)) = &mut hashers {
144            it1.cache = Some(h1.cache()?);
145            it2.cache = Some(h2.cache()?);
146            if self.comparison_method == FileComparisonMethod::Rehash {
147                h1.clear_cache()?;
148                h2.clear_cache()?;
149            }
150        }
151        let hashers_ref = hashers.as_ref();
152        std::thread::scope(|global_scope| {
153            let it1_rx = it1.spawn_in_scope(global_scope);
154            let it2_rx = it2.spawn_in_scope(global_scope);
155            let pool = crate::build_thread_pool(self.jobs)?;
156            pool.scope(move |scope| {
157                let mut cur1 = it1_rx.recv().ok();
158                let mut cur2 = it2_rx.recv().ok();
159                let mut index = 0;
160                let mut total = ProgressValue::default();
161                tx.send(CompareProgress::StartOfComparison)?;
162                loop {
163                    let cmp = match (&cur1, &cur2) {
164                        (Some(f1), Some(f2)) => {
165                            let rel1 = f1.relative_path(&self.dir1);
166                            let rel2 = f2.relative_path(&self.dir2);
167                            rel1.cmp(rel2)
168                        }
169                        (Some(_), None) => Ordering::Less,
170                        (None, Some(_)) => Ordering::Greater,
171                        (None, None) => break,
172                    };
173                    match cmp {
174                        Ordering::Less => {
175                            let file1 = cur1.take().unwrap();
176                            let rel1 = file1.relative_path(&self.dir1);
177                            let size = file1.size();
178                            total += ProgressValue::with_size(size);
179                            let result =
180                                FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
181                            tx.send(CompareProgress::Result(index, result))?;
182                            tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
183                            index += 1;
184                            cur1 = it1_rx.recv().ok();
185                        }
186                        Ordering::Greater => {
187                            let file2 = cur2.take().unwrap();
188                            let rel2 = file2.relative_path(&self.dir2);
189                            let size = file2.size();
190                            total += ProgressValue::with_size(size);
191                            let result =
192                                FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
193                            tx.send(CompareProgress::Result(index, result))?;
194                            tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
195                            index += 1;
196                            cur2 = it2_rx.recv().ok();
197                        }
198                        Ordering::Equal => {
199                            let file1 = cur1.take().unwrap();
200                            let file2 = cur2.take().unwrap();
201                            let buffer_size = self.buffer_size;
202                            let tx_clone = tx.clone();
203                            let i = index;
204                            let should_compare =
205                                self.comparison_method != FileComparisonMethod::Size;
206                            let size = file1.size();
207                            total += ProgressValue::with_size(size);
208                            scope.spawn(move |_| {
209                                let mut comparer = FileComparer::new(&file1, &file2);
210                                comparer.buffer_size = buffer_size;
211                                if let Some((h1, h2)) = hashers_ref {
212                                    comparer.hashers = Some((h1, h2));
213                                }
214                                let rel_path = file1.relative_path(&self.dir1);
215                                let mut result = FileComparisonResult::new(
216                                    rel_path.into(),
217                                    Classification::InBoth,
218                                );
219                                let event = match result.update(&comparer, should_compare) {
220                                    Ok(_) => CompareProgress::Result(i, result),
221                                    Err(error) => {
222                                        log::error!(
223                                            "Error comparing '{}': {}",
224                                            result.relative_path.display(),
225                                            error
226                                        );
227                                        CompareProgress::Error
228                                    }
229                                };
230                                if tx_clone.send(event).is_err()
231                                    || tx_clone
232                                        .send(CompareProgress::Progress(ProgressValue::with_size(
233                                            size,
234                                        )))
235                                        .is_err()
236                                {
237                                    log::error!("Send failed");
238                                }
239                            });
240                            index += 1;
241                            cur1 = it1_rx.recv().ok();
242                            cur2 = it2_rx.recv().ok();
243                        }
244                    }
245                }
246                tx.send(CompareProgress::Total(total))
247            })?;
248            Ok::<(), anyhow::Error>(())
249        })?;
250
251        Self::save_hashers(hashers)?;
252        Ok(())
253    }
254
255    fn get_hashers(
256        &self,
257        dir1: &Path,
258        dir2: &Path,
259    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
260        if self.comparison_method == FileComparisonMethod::Hash
261            || self.comparison_method == FileComparisonMethod::Rehash
262        {
263            let (h1_res, h2_res) = rayon::join(
264                || FileHasher::new_with_cache(&[dir1]),
265                || FileHasher::new_with_cache(&[dir2]),
266            );
267            let mut h1 = h1_res?;
268            let mut h2 = h2_res?;
269            h1.buffer_size = self.buffer_size;
270            h2.buffer_size = self.buffer_size;
271            if let Some(progress) = self.progress.as_ref() {
272                h1.progress = Some(Arc::clone(progress));
273                h2.progress = Some(Arc::clone(progress));
274            }
275            return Ok(Some((h1, h2)));
276        }
277        Ok(None)
278    }
279
280    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
281        if let Some((h1, h2)) = hashers {
282            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
283            r1?;
284            r2?;
285        }
286        Ok(())
287    }
288
289    fn run_file_comparer(&self) -> anyhow::Result<()> {
290        assert!(self.dir1.is_file());
291        let file1_path = &self.dir1;
292        let dir1 = file1_path.parent().unwrap();
293        let file1_name = file1_path.file_name().unwrap();
294        let (dir2, file2_path) = if self.dir2.is_file() {
295            (self.dir2.parent().unwrap(), self.dir2.clone())
296        } else {
297            (self.dir2.as_path(), self.dir2.join(file1_name))
298        };
299        let file1 = FileItem::try_from(file1_path.as_path())?;
300        let file2 = FileItem::try_from(file2_path.as_path())?;
301        let mut comparer = FileComparer::new(&file1, &file2);
302        comparer.buffer_size = self.buffer_size;
303        let mut hashers = self.get_hashers(dir1, dir2)?;
304        if let Some((h1, h2)) = &mut hashers {
305            if self.comparison_method == FileComparisonMethod::Rehash {
306                h1.remove_cache_entry(file1_path)?;
307                h2.remove_cache_entry(&file2_path)?;
308            }
309            comparer.hashers = Some((h1, h2));
310        }
311        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
312        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
313        result.update(&comparer, should_compare_content)?;
314        let file1_str = file1_path.to_str().unwrap_or("file1");
315        match self.output_format {
316            OutputFormat::Symbol => {
317                println!("{} {}", result.to_symbol_string(), file1_str);
318            }
319            OutputFormat::Default => {
320                let file2_str = file2_path.to_str().unwrap_or("file2");
321                println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
322            }
323            _ => unreachable!(),
324        }
325        Self::save_hashers(hashers)?;
326        Ok(())
327    }
328}
329
330#[derive(Default)]
331struct ComparisonSummary {
332    pub in_both: usize,
333    pub only_in_dir1: usize,
334    pub only_in_dir2: usize,
335    pub dir1_newer: usize,
336    pub dir2_newer: usize,
337    pub dir1_larger: usize,
338    pub dir2_larger: usize,
339    pub diff_content: usize,
340    pub not_comparable: usize,
341    pub num_errors: usize,
342}
343
344impl ComparisonSummary {
345    pub fn update(&mut self, result: &FileComparisonResult) {
346        match result.classification {
347            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
348            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
349            Classification::InBoth => {
350                self.in_both += 1;
351                let mut is_not_comparable = false;
352                match result.modified_time_comparison {
353                    Some(Ordering::Greater) => self.dir1_newer += 1,
354                    Some(Ordering::Less) => self.dir2_newer += 1,
355                    Some(Ordering::Equal) => {}
356                    None => is_not_comparable = true,
357                }
358                match result.size_comparison {
359                    Some(Ordering::Greater) => self.dir1_larger += 1,
360                    Some(Ordering::Less) => self.dir2_larger += 1,
361                    Some(Ordering::Equal) => match result.is_content_same {
362                        Some(false) => self.diff_content += 1,
363                        Some(true) => {}
364                        None => is_not_comparable = true,
365                    },
366                    None => is_not_comparable = true,
367                }
368                if is_not_comparable {
369                    self.not_comparable += 1;
370                }
371            }
372        }
373    }
374
375    pub fn print(
376        &self,
377        mut writer: impl std::io::Write,
378        start_time: &time::Instant,
379        dir1_name: &str,
380        dir2_name: &str,
381    ) -> std::io::Result<()> {
382        let values = [
383            ("Elapsed:", 0),
384            ("Files in both:", self.in_both),
385            ("Only in left:", self.only_in_dir1),
386            ("Only in right:", self.only_in_dir2),
387            ("Left is newer:", self.dir1_newer),
388            ("Right is newer:", self.dir2_newer),
389            ("Left is larger:", self.dir1_larger),
390            ("Right is larger:", self.dir2_larger),
391            ("Different content:", self.diff_content),
392            ("Not comparable:", self.not_comparable),
393            ("Errors:", self.num_errors),
394        ];
395        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
396        formatter.write_value(&mut writer, "Left:", dir1_name)?;
397        formatter.write_value(&mut writer, "Right:", dir2_name)?;
398        formatter.write_value(
399            &mut writer,
400            values[0].0,
401            FormattedDuration(start_time.elapsed()),
402        )?;
403        formatter.write_values(&mut writer, &values[1..])?;
404        Ok(())
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use std::fs;
412    use std::io::Write;
413
414    #[test]
415    fn comparison_summary() {
416        let mut summary = ComparisonSummary::default();
417        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
418        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
419        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
420        res3.modified_time_comparison = Some(Ordering::Greater);
421
422        summary.update(&res1);
423        summary.update(&res2);
424        summary.update(&res3);
425
426        assert_eq!(summary.only_in_dir1, 1);
427        assert_eq!(summary.only_in_dir2, 1);
428        assert_eq!(summary.in_both, 1);
429        assert_eq!(summary.dir1_newer, 1);
430    }
431
432    #[test]
433    fn directory_comparer_integration() -> anyhow::Result<()> {
434        let dir1 = tempfile::tempdir()?;
435        let dir2 = tempfile::tempdir()?;
436
437        // Create files in dir1
438        let file1_path = dir1.path().join("same.txt");
439        fs::write(file1_path, b"same content")?;
440
441        let only1_path = dir1.path().join("only1.txt");
442        fs::write(only1_path, b"only in dir1")?;
443
444        // Create files in dir2
445        let file2_path = dir2.path().join("same.txt");
446        fs::write(file2_path, b"same content")?;
447
448        let only2_path = dir2.path().join("only2.txt");
449        fs::write(only2_path, b"only in dir2")?;
450
451        // Create a different file
452        let diff1_path = dir1.path().join("diff.txt");
453        fs::write(diff1_path, b"content 1")?;
454        let diff2_path = dir2.path().join("diff.txt");
455        fs::write(diff2_path, b"content 222")?; // different length and content
456
457        // Same size but different content.
458        let diffc1_path = dir1.path().join("diffc.txt");
459        fs::write(diffc1_path, b"content 111")?;
460        let diffc2_path = dir2.path().join("diffc.txt");
461        fs::write(diffc2_path, b"content 222")?; // different length and content
462
463        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
464        let (tx, rx) = mpsc::channel();
465        comparer.compare_streaming_ordered(tx)?;
466        let mut results = Vec::new();
467        while let Ok(res) = rx.recv() {
468            if let CompareProgress::Result(_, r) = res {
469                results.push(r);
470            }
471        }
472        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
473        assert_eq!(results.len(), 5);
474
475        // diff.txt
476        let diff_result = &results[0];
477        assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
478        assert_eq!(diff_result.classification, Classification::InBoth);
479        assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
480        assert_eq!(diff_result.is_content_same, None);
481
482        // diff2.txt
483        let diffc_result = &results[1];
484        assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
485        assert_eq!(diffc_result.classification, Classification::InBoth);
486        assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
487        assert_eq!(diffc_result.is_content_same, Some(false));
488
489        // only1.txt
490        let only1_result = &results[2];
491        assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
492        assert_eq!(only1_result.classification, Classification::OnlyInDir1);
493
494        // only2.txt
495        let only2_result = &results[3];
496        assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
497        assert_eq!(only2_result.classification, Classification::OnlyInDir2);
498
499        // same.txt
500        let same_result = &results[4];
501        assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
502        assert_eq!(same_result.classification, Classification::InBoth);
503        assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
504
505        Ok(())
506    }
507
508    #[test]
509    fn directory_comparer_size_mode() -> anyhow::Result<()> {
510        let dir1 = tempfile::tempdir()?;
511        let dir2 = tempfile::tempdir()?;
512
513        let file1_path = dir1.path().join("file.txt");
514        let mut file1 = fs::File::create(&file1_path)?;
515        file1.write_all(b"content 1")?;
516
517        let file2_path = dir2.path().join("file.txt");
518        let mut file2 = fs::File::create(&file2_path)?;
519        file2.write_all(b"content 2")?; // same length, different content
520
521        let mut comparer =
522            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
523        comparer.comparison_method = FileComparisonMethod::Size;
524        let (tx, rx) = mpsc::channel();
525
526        comparer.compare_streaming_ordered(tx)?;
527
528        let mut results = Vec::new();
529        while let Ok(res) = rx.recv() {
530            if let CompareProgress::Result(_, r) = res {
531                results.push(r);
532            }
533        }
534
535        assert_eq!(results.len(), 1);
536        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
537        assert_eq!(results[0].classification, Classification::InBoth);
538        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
539        assert_eq!(results[0].is_content_same, None);
540
541        Ok(())
542    }
543}