Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3    FileIterator, OutputFormat, Progress, ProgressBuilder, ProgressValue,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::{self, stdout},
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12    time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17    StartOfComparison,
18    Progress(ProgressValue),
19    Total(ProgressValue),
20    Result(usize, FileComparisonResult),
21    Error,
22}
23
24/// Methods for comparing files.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27    /// Compare only size and modification time.
28    Size,
29    /// Compare by hash (BLAKE3).
30    Hash,
31    /// Compare by hash, without using the cached hashes.
32    Rehash,
33    /// Compare byte-by-byte.
34    Full,
35}
36
37/// A tool for comparing the contents of two directories.
38pub struct DirectoryComparer {
39    dir1: PathBuf,
40    dir2: PathBuf,
41    pub output_format: OutputFormat,
42    pub buffer_size: usize,
43    pub comparison_method: FileComparisonMethod,
44    pub exclude: Option<GlobSet>,
45    pub progress: Option<Arc<ProgressBuilder>>,
46    pub jobs: usize,
47}
48
49impl DirectoryComparer {
50    pub const DEFAULT_JOBS: usize = 8;
51
52    /// Creates a new `DirectoryComparer` for the two given directories.
53    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54        Self {
55            dir1,
56            dir2,
57            output_format: OutputFormat::Default,
58            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59            comparison_method: FileComparisonMethod::Hash,
60            exclude: None,
61            progress: None,
62            jobs: Self::DEFAULT_JOBS,
63        }
64    }
65
66    /// Executes the directory comparison and prints results to stdout.
67    /// This is a convenience method for CLI usage.
68    pub fn run(&self) -> anyhow::Result<()> {
69        match self.output_format {
70            OutputFormat::Default | OutputFormat::Symbol => {}
71            _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72        }
73        if self.dir1.is_file() {
74            return self.run_file_comparer();
75        }
76
77        let mut progress = self
78            .progress
79            .as_ref()
80            .map(|progress| progress.add_spinner())
81            .unwrap_or_else(Progress::none);
82        progress.set_message("Scanning directories...");
83        let start_time = std::time::Instant::now();
84        let mut summary = ComparisonSummary::default();
85        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87        let (tx, rx) = mpsc::channel();
88        std::thread::scope(|scope| {
89            scope.spawn(move || {
90                if let Err(e) = self.compare_streaming_ordered(tx) {
91                    log::error!("Error during comparison: {}", e);
92                }
93            });
94
95            // Receive results and update summary/UI
96            while let Ok(event) = rx.recv() {
97                match event {
98                    CompareProgress::StartOfComparison => {
99                        progress.set_message("Comparing files...");
100                    }
101                    CompareProgress::Total(total) => {
102                        progress.set_length(total);
103                        progress.set_message("");
104                    }
105                    CompareProgress::Result(_, result) => {
106                        summary.update(&result);
107                        match self.output_format {
108                            OutputFormat::Symbol => progress.suspend_for(stdout(), || {
109                                println!(
110                                    "{} {}",
111                                    result.to_symbol_string(),
112                                    result.relative_path.display()
113                                );
114                            }),
115                            OutputFormat::Default => {
116                                if !result.is_identical() {
117                                    progress.suspend_for(stdout(), || {
118                                        println!(
119                                            "{}: {}",
120                                            result.relative_path.display(),
121                                            result.to_string(dir1_str, dir2_str)
122                                        );
123                                    });
124                                }
125                            }
126                            _ => unreachable!(),
127                        }
128                    }
129                    CompareProgress::Progress(value) => progress.inc(value),
130                    CompareProgress::Error => summary.num_errors += 1,
131                }
132            }
133        });
134        progress.finish();
135        eprintln!("\n--- Comparison Summary ---");
136        summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
137        Ok(())
138    }
139
140    /// Performs the directory comparison and streams results via a channel.
141    ///
142    /// # Arguments
143    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
144    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
145        crate::sort_stream(
146            tx,
147            |tx_unordered| self.compare_streaming(tx_unordered),
148            |event| match event {
149                CompareProgress::Result(i, _) => Some(*i),
150                _ => None,
151            },
152        )
153    }
154
155    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
156        let mut it1 = FileIterator::new(&self.dir1);
157        let mut it2 = FileIterator::new(&self.dir2);
158        it1.exclude = self.exclude.as_ref();
159        it2.exclude = self.exclude.as_ref();
160        let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
161        if let Some((h1, h2)) = &mut hashers {
162            it1.cache = Some(h1.cache()?);
163            it2.cache = Some(h2.cache()?);
164            if self.comparison_method == FileComparisonMethod::Rehash {
165                h1.clear_cache()?;
166                h2.clear_cache()?;
167            }
168        }
169        let hashers_ref = hashers.as_ref();
170        std::thread::scope(|global_scope| {
171            let it1_rx = it1.spawn_in_scope(global_scope);
172            let it2_rx = it2.spawn_in_scope(global_scope);
173            let pool = crate::build_thread_pool(self.jobs)?;
174            pool.scope(move |scope| {
175                let mut cur1 = it1_rx.recv().ok();
176                let mut cur2 = it2_rx.recv().ok();
177                let mut index = 0;
178                let mut total = ProgressValue::default();
179                tx.send(CompareProgress::StartOfComparison)?;
180                loop {
181                    let cmp = match (&cur1, &cur2) {
182                        (Some(f1), Some(f2)) => {
183                            let rel1 = f1.relative_path(&self.dir1);
184                            let rel2 = f2.relative_path(&self.dir2);
185                            rel1.cmp(rel2)
186                        }
187                        (Some(_), None) => Ordering::Less,
188                        (None, Some(_)) => Ordering::Greater,
189                        (None, None) => break,
190                    };
191                    match cmp {
192                        Ordering::Less => {
193                            let file1 = cur1.take().unwrap();
194                            let rel1 = file1.relative_path(&self.dir1);
195                            let size = file1.size();
196                            total += ProgressValue::with_size(size);
197                            let result =
198                                FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
199                            tx.send(CompareProgress::Result(index, result))?;
200                            tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
201                            index += 1;
202                            cur1 = it1_rx.recv().ok();
203                        }
204                        Ordering::Greater => {
205                            let file2 = cur2.take().unwrap();
206                            let rel2 = file2.relative_path(&self.dir2);
207                            let size = file2.size();
208                            total += ProgressValue::with_size(size);
209                            let result =
210                                FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
211                            tx.send(CompareProgress::Result(index, result))?;
212                            tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
213                            index += 1;
214                            cur2 = it2_rx.recv().ok();
215                        }
216                        Ordering::Equal => {
217                            let file1 = cur1.take().unwrap();
218                            let file2 = cur2.take().unwrap();
219                            let buffer_size = self.buffer_size;
220                            let tx_clone = tx.clone();
221                            let i = index;
222                            let should_compare =
223                                self.comparison_method != FileComparisonMethod::Size;
224                            let size = file1.size();
225                            total += ProgressValue::with_size(size);
226                            scope.spawn(move |_| {
227                                let mut comparer = FileComparer::new(&file1, &file2);
228                                comparer.buffer_size = buffer_size;
229                                if let Some((h1, h2)) = hashers_ref {
230                                    comparer.hashers = Some((h1, h2));
231                                }
232                                let rel_path = file1.relative_path(&self.dir1);
233                                let mut result = FileComparisonResult::new(
234                                    rel_path.into(),
235                                    Classification::InBoth,
236                                );
237                                let event = match result.update(&comparer, should_compare) {
238                                    Ok(_) => CompareProgress::Result(i, result),
239                                    Err(error) => {
240                                        log::error!(
241                                            "Error comparing '{}': {}",
242                                            result.relative_path.display(),
243                                            error
244                                        );
245                                        CompareProgress::Error
246                                    }
247                                };
248                                if tx_clone.send(event).is_err()
249                                    || tx_clone
250                                        .send(CompareProgress::Progress(ProgressValue::with_size(
251                                            size,
252                                        )))
253                                        .is_err()
254                                {
255                                    log::error!("Send failed");
256                                }
257                            });
258                            index += 1;
259                            cur1 = it1_rx.recv().ok();
260                            cur2 = it2_rx.recv().ok();
261                        }
262                    }
263                }
264                tx.send(CompareProgress::Total(total))
265            })?;
266            Ok::<(), anyhow::Error>(())
267        })?;
268
269        Self::save_hashers(hashers)?;
270        Ok(())
271    }
272
273    fn get_hashers(
274        &self,
275        dir1: &Path,
276        dir2: &Path,
277    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
278        if self.comparison_method == FileComparisonMethod::Hash
279            || self.comparison_method == FileComparisonMethod::Rehash
280        {
281            let (h1_res, h2_res) = rayon::join(
282                || FileHasher::new_with_cache(&[dir1]),
283                || FileHasher::new_with_cache(&[dir2]),
284            );
285            let mut h1 = h1_res?;
286            let mut h2 = h2_res?;
287            h1.buffer_size = self.buffer_size;
288            h2.buffer_size = self.buffer_size;
289            if let Some(progress) = self.progress.as_ref() {
290                h1.progress = Some(Arc::clone(progress));
291                h2.progress = Some(Arc::clone(progress));
292            }
293            return Ok(Some((h1, h2)));
294        }
295        Ok(None)
296    }
297
298    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
299        if let Some((h1, h2)) = hashers {
300            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
301            r1?;
302            r2?;
303        }
304        Ok(())
305    }
306
307    fn run_file_comparer(&self) -> anyhow::Result<()> {
308        assert!(self.dir1.is_file());
309        let file1_path = &self.dir1;
310        let dir1 = file1_path.parent().unwrap();
311        let file1_name = file1_path.file_name().unwrap();
312        let (dir2, file2_path) = if self.dir2.is_file() {
313            (self.dir2.parent().unwrap(), self.dir2.clone())
314        } else {
315            (self.dir2.as_path(), self.dir2.join(file1_name))
316        };
317        let file1 = FileItem::try_from(file1_path.as_path())?;
318        let file2 = FileItem::try_from(file2_path.as_path())?;
319        let mut comparer = FileComparer::new(&file1, &file2);
320        comparer.buffer_size = self.buffer_size;
321        let mut hashers = self.get_hashers(dir1, dir2)?;
322        if let Some((h1, h2)) = &mut hashers {
323            if self.comparison_method == FileComparisonMethod::Rehash {
324                h1.remove_cache_entry(file1_path)?;
325                h2.remove_cache_entry(&file2_path)?;
326            }
327            comparer.hashers = Some((h1, h2));
328        }
329        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
330        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
331        result.update(&comparer, should_compare_content)?;
332        let file1_str = file1_path.to_str().unwrap_or("file1");
333        match self.output_format {
334            OutputFormat::Symbol => {
335                println!("{} {}", result.to_symbol_string(), file1_str);
336            }
337            OutputFormat::Default => {
338                let file2_str = file2_path.to_str().unwrap_or("file2");
339                println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
340            }
341            _ => unreachable!(),
342        }
343        Self::save_hashers(hashers)?;
344        Ok(())
345    }
346}
347
348#[derive(Default)]
349struct ComparisonSummary {
350    pub in_both: usize,
351    pub only_in_dir1: usize,
352    pub only_in_dir2: usize,
353    pub dir1_newer: usize,
354    pub dir2_newer: usize,
355    pub dir1_larger: usize,
356    pub dir2_larger: usize,
357    pub diff_content: usize,
358    pub not_comparable: usize,
359    pub num_errors: usize,
360}
361
362impl ComparisonSummary {
363    pub fn update(&mut self, result: &FileComparisonResult) {
364        match result.classification {
365            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
366            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
367            Classification::InBoth => {
368                self.in_both += 1;
369                let mut is_not_comparable = false;
370                match result.modified_time_comparison {
371                    Some(Ordering::Greater) => self.dir1_newer += 1,
372                    Some(Ordering::Less) => self.dir2_newer += 1,
373                    Some(Ordering::Equal) => {}
374                    None => is_not_comparable = true,
375                }
376                match result.size_comparison {
377                    Some(Ordering::Greater) => self.dir1_larger += 1,
378                    Some(Ordering::Less) => self.dir2_larger += 1,
379                    Some(Ordering::Equal) => match result.is_content_same {
380                        Some(false) => self.diff_content += 1,
381                        Some(true) => {}
382                        None => is_not_comparable = true,
383                    },
384                    None => is_not_comparable = true,
385                }
386                if is_not_comparable {
387                    self.not_comparable += 1;
388                }
389            }
390        }
391    }
392
393    pub fn print(
394        &self,
395        mut writer: impl std::io::Write,
396        start_time: &time::Instant,
397        dir1_name: &str,
398        dir2_name: &str,
399    ) -> std::io::Result<()> {
400        let values = [
401            ("Elapsed:", 0),
402            ("Files in both:", self.in_both),
403            ("Only in left:", self.only_in_dir1),
404            ("Only in right:", self.only_in_dir2),
405            ("Left is newer:", self.dir1_newer),
406            ("Right is newer:", self.dir2_newer),
407            ("Left is larger:", self.dir1_larger),
408            ("Right is larger:", self.dir2_larger),
409            ("Different content:", self.diff_content),
410            ("Not comparable:", self.not_comparable),
411            ("Errors:", self.num_errors),
412        ];
413        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
414        formatter.write_value(&mut writer, "Left:", dir1_name)?;
415        formatter.write_value(&mut writer, "Right:", dir2_name)?;
416        formatter.write_value(
417            &mut writer,
418            values[0].0,
419            FormattedDuration(start_time.elapsed()),
420        )?;
421        formatter.write_values(&mut writer, &values[1..])?;
422        Ok(())
423    }
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429    use std::fs;
430    use std::io::Write;
431
432    #[test]
433    fn comparison_summary() {
434        let mut summary = ComparisonSummary::default();
435        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
436        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
437        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
438        res3.modified_time_comparison = Some(Ordering::Greater);
439
440        summary.update(&res1);
441        summary.update(&res2);
442        summary.update(&res3);
443
444        assert_eq!(summary.only_in_dir1, 1);
445        assert_eq!(summary.only_in_dir2, 1);
446        assert_eq!(summary.in_both, 1);
447        assert_eq!(summary.dir1_newer, 1);
448    }
449
450    #[test]
451    fn directory_comparer_integration() -> anyhow::Result<()> {
452        let dir1 = tempfile::tempdir()?;
453        let dir2 = tempfile::tempdir()?;
454
455        // Create files in dir1
456        let file1_path = dir1.path().join("same.txt");
457        fs::write(file1_path, b"same content")?;
458
459        let only1_path = dir1.path().join("only1.txt");
460        fs::write(only1_path, b"only in dir1")?;
461
462        // Create files in dir2
463        let file2_path = dir2.path().join("same.txt");
464        fs::write(file2_path, b"same content")?;
465
466        let only2_path = dir2.path().join("only2.txt");
467        fs::write(only2_path, b"only in dir2")?;
468
469        // Create a different file
470        let diff1_path = dir1.path().join("diff.txt");
471        fs::write(diff1_path, b"content 1")?;
472        let diff2_path = dir2.path().join("diff.txt");
473        fs::write(diff2_path, b"content 222")?; // different length and content
474
475        // Same size but different content.
476        let diffc1_path = dir1.path().join("diffc.txt");
477        fs::write(diffc1_path, b"content 111")?;
478        let diffc2_path = dir2.path().join("diffc.txt");
479        fs::write(diffc2_path, b"content 222")?; // different length and content
480
481        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
482        let (tx, rx) = mpsc::channel();
483        comparer.compare_streaming_ordered(tx)?;
484        let mut results = Vec::new();
485        while let Ok(res) = rx.recv() {
486            if let CompareProgress::Result(_, r) = res {
487                results.push(r);
488            }
489        }
490        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
491        assert_eq!(results.len(), 5);
492
493        // diff.txt
494        let diff_result = &results[0];
495        assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
496        assert_eq!(diff_result.classification, Classification::InBoth);
497        assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
498        assert_eq!(diff_result.is_content_same, None);
499
500        // diff2.txt
501        let diffc_result = &results[1];
502        assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
503        assert_eq!(diffc_result.classification, Classification::InBoth);
504        assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
505        assert_eq!(diffc_result.is_content_same, Some(false));
506
507        // only1.txt
508        let only1_result = &results[2];
509        assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
510        assert_eq!(only1_result.classification, Classification::OnlyInDir1);
511
512        // only2.txt
513        let only2_result = &results[3];
514        assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
515        assert_eq!(only2_result.classification, Classification::OnlyInDir2);
516
517        // same.txt
518        let same_result = &results[4];
519        assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
520        assert_eq!(same_result.classification, Classification::InBoth);
521        assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
522
523        Ok(())
524    }
525
526    #[test]
527    fn directory_comparer_size_mode() -> anyhow::Result<()> {
528        let dir1 = tempfile::tempdir()?;
529        let dir2 = tempfile::tempdir()?;
530
531        let file1_path = dir1.path().join("file.txt");
532        let mut file1 = fs::File::create(&file1_path)?;
533        file1.write_all(b"content 1")?;
534
535        let file2_path = dir2.path().join("file.txt");
536        let mut file2 = fs::File::create(&file2_path)?;
537        file2.write_all(b"content 2")?; // same length, different content
538
539        let mut comparer =
540            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
541        comparer.comparison_method = FileComparisonMethod::Size;
542        let (tx, rx) = mpsc::channel();
543
544        comparer.compare_streaming_ordered(tx)?;
545
546        let mut results = Vec::new();
547        while let Ok(res) = rx.recv() {
548            if let CompareProgress::Result(_, r) = res {
549                results.push(r);
550            }
551        }
552
553        assert_eq!(results.len(), 1);
554        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
555        assert_eq!(results[0].classification, Classification::InBoth);
556        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
557        assert_eq!(results[0].is_content_same, None);
558
559        Ok(())
560    }
561}