Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3    OutputFormat, Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8    cmp::Ordering,
9    io::{self, stdout},
10    path::{Path, PathBuf},
11    sync::{Arc, mpsc},
12    time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17    StartOfComparison,
18    FileDone,
19    TotalFiles(usize),
20    Result(usize, FileComparisonResult),
21    Error,
22}
23
24/// Methods for comparing files.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27    /// Compare only size and modification time.
28    Size,
29    /// Compare by hash (BLAKE3).
30    Hash,
31    /// Compare by hash, without using the cached hashes.
32    Rehash,
33    /// Compare byte-by-byte.
34    Full,
35}
36
37/// A tool for comparing the contents of two directories.
38pub struct DirectoryComparer {
39    dir1: PathBuf,
40    dir2: PathBuf,
41    pub output_format: OutputFormat,
42    pub buffer_size: usize,
43    pub comparison_method: FileComparisonMethod,
44    pub exclude: Option<GlobSet>,
45    pub progress: Option<Arc<ProgressBuilder>>,
46    pub jobs: usize,
47}
48
49impl DirectoryComparer {
50    pub const DEFAULT_JOBS: usize = 8;
51
52    /// Creates a new `DirectoryComparer` for the two given directories.
53    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54        Self {
55            dir1,
56            dir2,
57            output_format: OutputFormat::Default,
58            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59            comparison_method: FileComparisonMethod::Hash,
60            exclude: None,
61            progress: None,
62            jobs: Self::DEFAULT_JOBS,
63        }
64    }
65
66    /// Executes the directory comparison and prints results to stdout.
67    /// This is a convenience method for CLI usage.
68    pub fn run(&self) -> anyhow::Result<()> {
69        match self.output_format {
70            OutputFormat::Default | OutputFormat::Symbol => {}
71            _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72        }
73        if self.dir1.is_file() {
74            return self.run_file_comparer();
75        }
76
77        let progress = self
78            .progress
79            .as_ref()
80            .map(|progress| progress.add_spinner())
81            .unwrap_or_else(Progress::none);
82        progress.set_message("Scanning directories...");
83        let start_time = std::time::Instant::now();
84        let mut summary = ComparisonSummary::default();
85        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87        let (tx, rx) = mpsc::channel();
88        std::thread::scope(|scope| {
89            scope.spawn(move || {
90                if let Err(e) = self.compare_streaming_ordered(tx) {
91                    log::error!("Error during comparison: {}", e);
92                }
93            });
94
95            // Receive results and update summary/UI
96            while let Ok(event) = rx.recv() {
97                match event {
98                    CompareProgress::StartOfComparison => {
99                        progress.set_message("Comparing files...");
100                    }
101                    CompareProgress::TotalFiles(total_files) => {
102                        progress.set_length(total_files as u64);
103                        progress.set_message("");
104                    }
105                    CompareProgress::Result(_, result) => {
106                        summary.update(&result);
107                        match self.output_format {
108                            OutputFormat::Symbol => progress.suspend_for(stdout(), || {
109                                println!(
110                                    "{} {}",
111                                    result.to_symbol_string(),
112                                    result.relative_path.display()
113                                );
114                            }),
115                            OutputFormat::Default => {
116                                if !result.is_identical() {
117                                    progress.suspend_for(stdout(), || {
118                                        println!(
119                                            "{}: {}",
120                                            result.relative_path.display(),
121                                            result.to_string(dir1_str, dir2_str)
122                                        );
123                                    });
124                                }
125                            }
126                            OutputFormat::Yaml => unreachable!(),
127                        }
128                    }
129                    CompareProgress::FileDone => progress.inc(1),
130                    CompareProgress::Error => summary.num_errors += 1,
131                }
132            }
133        });
134        progress.finish();
135        eprintln!("\n--- Comparison Summary ---");
136        summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
137        Ok(())
138    }
139
140    /// Performs the directory comparison and streams results via a channel.
141    ///
142    /// # Arguments
143    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
144    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
145        crate::sort_stream(
146            tx,
147            |tx_unordered| self.compare_streaming(tx_unordered),
148            |event| match event {
149                CompareProgress::Result(i, _) => Some(*i),
150                _ => None,
151            },
152        )
153    }
154
155    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
156        let mut it1 = FileIterator::new(&self.dir1);
157        let mut it2 = FileIterator::new(&self.dir2);
158        it1.exclude = self.exclude.as_ref();
159        it2.exclude = self.exclude.as_ref();
160        let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
161        if let Some((h1, h2)) = &mut hashers {
162            it1.cache = Some(h1.cache()?);
163            it2.cache = Some(h2.cache()?);
164            if self.comparison_method == FileComparisonMethod::Rehash {
165                h1.clear_cache()?;
166                h2.clear_cache()?;
167            }
168        }
169        let hashers_ref = hashers.as_ref();
170        std::thread::scope(|global_scope| {
171            let it1_rx = it1.spawn_in_scope(global_scope);
172            let it2_rx = it2.spawn_in_scope(global_scope);
173            let pool = crate::build_thread_pool(self.jobs)?;
174            pool.scope(move |scope| {
175                let mut cur1 = it1_rx.recv().ok();
176                let mut cur2 = it2_rx.recv().ok();
177                let mut index = 0;
178                tx.send(CompareProgress::StartOfComparison)?;
179                loop {
180                    let cmp = match (&cur1, &cur2) {
181                        (Some(p1), Some(p2)) => {
182                            let rel1 = crate::strip_prefix(p1, &self.dir1).unwrap();
183                            let rel2 = crate::strip_prefix(p2, &self.dir2).unwrap();
184                            rel1.cmp(rel2)
185                        }
186                        (Some(_), None) => Ordering::Less,
187                        (None, Some(_)) => Ordering::Greater,
188                        (None, None) => break,
189                    };
190                    match cmp {
191                        Ordering::Less => {
192                            let path1 = cur1.take().unwrap();
193                            let rel1 = crate::strip_prefix(&path1, &self.dir1).unwrap();
194                            let result =
195                                FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
196                            tx.send(CompareProgress::Result(index, result))?;
197                            tx.send(CompareProgress::FileDone)?;
198                            index += 1;
199                            cur1 = it1_rx.recv().ok();
200                        }
201                        Ordering::Greater => {
202                            let path2 = cur2.take().unwrap();
203                            let rel2 = crate::strip_prefix(&path2, &self.dir2).unwrap();
204                            let result =
205                                FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
206                            tx.send(CompareProgress::Result(index, result))?;
207                            tx.send(CompareProgress::FileDone)?;
208                            index += 1;
209                            cur2 = it2_rx.recv().ok();
210                        }
211                        Ordering::Equal => {
212                            let path1 = cur1.take().unwrap();
213                            let path2 = cur2.take().unwrap();
214                            let buffer_size = self.buffer_size;
215                            let tx_clone = tx.clone();
216                            let i = index;
217                            let should_compare =
218                                self.comparison_method != FileComparisonMethod::Size;
219                            scope.spawn(move |_| {
220                                let mut comparer = FileComparer::new(&path1, &path2);
221                                comparer.buffer_size = buffer_size;
222                                if let Some((h1, h2)) = hashers_ref {
223                                    comparer.hashers = Some((h1, h2));
224                                }
225                                let rel_path = crate::strip_prefix(&path1, &self.dir1).unwrap();
226                                let mut result = FileComparisonResult::new(
227                                    rel_path.into(),
228                                    Classification::InBoth,
229                                );
230                                let event = match result.update(&comparer, should_compare) {
231                                    Ok(_) => CompareProgress::Result(i, result),
232                                    Err(error) => {
233                                        log::error!(
234                                            "Error comparing {:?}: {}",
235                                            result.relative_path,
236                                            error
237                                        );
238                                        CompareProgress::Error
239                                    }
240                                };
241                                if tx_clone.send(event).is_err()
242                                    || tx_clone.send(CompareProgress::FileDone).is_err()
243                                {
244                                    log::error!("Send failed");
245                                }
246                            });
247                            index += 1;
248                            cur1 = it1_rx.recv().ok();
249                            cur2 = it2_rx.recv().ok();
250                        }
251                    }
252                }
253                tx.send(CompareProgress::TotalFiles(index))
254            })?;
255            Ok::<(), anyhow::Error>(())
256        })?;
257
258        Self::save_hashers(hashers)?;
259        Ok(())
260    }
261
262    fn get_hashers(
263        &self,
264        dir1: &Path,
265        dir2: &Path,
266    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
267        if self.comparison_method == FileComparisonMethod::Hash
268            || self.comparison_method == FileComparisonMethod::Rehash
269        {
270            let (h1_res, h2_res) = rayon::join(
271                || FileHasher::new_with_cache(&[dir1]),
272                || FileHasher::new_with_cache(&[dir2]),
273            );
274            let mut h1 = h1_res?;
275            let mut h2 = h2_res?;
276            h1.buffer_size = self.buffer_size;
277            h2.buffer_size = self.buffer_size;
278            if let Some(progress) = self.progress.as_ref() {
279                h1.progress = Some(Arc::clone(progress));
280                h2.progress = Some(Arc::clone(progress));
281            }
282            return Ok(Some((h1, h2)));
283        }
284        Ok(None)
285    }
286
287    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
288        if let Some((h1, h2)) = hashers {
289            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
290            r1?;
291            r2?;
292        }
293        Ok(())
294    }
295
296    fn run_file_comparer(&self) -> anyhow::Result<()> {
297        assert!(self.dir1.is_file());
298        let file1 = &self.dir1;
299        let dir1 = file1.parent().unwrap();
300        let file1_name = file1.file_name().unwrap();
301        let (dir2, file2) = if self.dir2.is_file() {
302            (self.dir2.parent().unwrap(), self.dir2.clone())
303        } else {
304            (self.dir2.as_path(), self.dir2.join(file1_name))
305        };
306
307        let mut comparer = FileComparer::new(file1, &file2);
308        comparer.buffer_size = self.buffer_size;
309        let mut hashers = self.get_hashers(dir1, dir2)?;
310        if let Some((h1, h2)) = &mut hashers {
311            if self.comparison_method == FileComparisonMethod::Rehash {
312                h1.remove_cache_entry(file1)?;
313                h2.remove_cache_entry(&file2)?;
314            }
315            comparer.hashers = Some((h1, h2));
316        }
317        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
318        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
319        result.update(&comparer, should_compare_content)?;
320        let file1_str = file1.to_str().unwrap_or("file1");
321        match self.output_format {
322            OutputFormat::Symbol => {
323                println!("{} {}", result.to_symbol_string(), file1_str);
324            }
325            OutputFormat::Default => {
326                let file2_str = file2.to_str().unwrap_or("file2");
327                println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
328            }
329            OutputFormat::Yaml => unreachable!(),
330        }
331        Self::save_hashers(hashers)?;
332        Ok(())
333    }
334}
335
336#[derive(Default)]
337struct ComparisonSummary {
338    pub in_both: usize,
339    pub only_in_dir1: usize,
340    pub only_in_dir2: usize,
341    pub dir1_newer: usize,
342    pub dir2_newer: usize,
343    pub dir1_larger: usize,
344    pub dir2_larger: usize,
345    pub diff_content: usize,
346    pub not_comparable: usize,
347    pub num_errors: usize,
348}
349
350impl ComparisonSummary {
351    pub fn update(&mut self, result: &FileComparisonResult) {
352        match result.classification {
353            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
354            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
355            Classification::InBoth => {
356                self.in_both += 1;
357                let mut is_not_comparable = false;
358                match result.modified_time_comparison {
359                    Some(Ordering::Greater) => self.dir1_newer += 1,
360                    Some(Ordering::Less) => self.dir2_newer += 1,
361                    Some(Ordering::Equal) => {}
362                    None => is_not_comparable = true,
363                }
364                match result.size_comparison {
365                    Some(Ordering::Greater) => self.dir1_larger += 1,
366                    Some(Ordering::Less) => self.dir2_larger += 1,
367                    Some(Ordering::Equal) => match result.is_content_same {
368                        Some(false) => self.diff_content += 1,
369                        Some(true) => {}
370                        None => is_not_comparable = true,
371                    },
372                    None => is_not_comparable = true,
373                }
374                if is_not_comparable {
375                    self.not_comparable += 1;
376                }
377            }
378        }
379    }
380
381    pub fn print(
382        &self,
383        mut writer: impl std::io::Write,
384        start_time: &time::Instant,
385        dir1_name: &str,
386        dir2_name: &str,
387    ) -> std::io::Result<()> {
388        let values = [
389            ("Elapsed:", 0),
390            ("Files in both:", self.in_both),
391            ("Only in left:", self.only_in_dir1),
392            ("Only in right:", self.only_in_dir2),
393            ("Left is newer:", self.dir1_newer),
394            ("Right is newer:", self.dir2_newer),
395            ("Left is larger:", self.dir1_larger),
396            ("Right is larger:", self.dir2_larger),
397            ("Different content:", self.diff_content),
398            ("Not comparable:", self.not_comparable),
399            ("Errors:", self.num_errors),
400        ];
401        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
402        formatter.write_value(&mut writer, "Left:", dir1_name)?;
403        formatter.write_value(&mut writer, "Right:", dir2_name)?;
404        formatter.write_value(
405            &mut writer,
406            values[0].0,
407            FormattedDuration(start_time.elapsed()),
408        )?;
409        formatter.write_values(&mut writer, &values[1..])?;
410        Ok(())
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use std::fs;
418    use std::io::Write;
419
420    #[test]
421    fn comparison_summary() {
422        let mut summary = ComparisonSummary::default();
423        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
424        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
425        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
426        res3.modified_time_comparison = Some(Ordering::Greater);
427
428        summary.update(&res1);
429        summary.update(&res2);
430        summary.update(&res3);
431
432        assert_eq!(summary.only_in_dir1, 1);
433        assert_eq!(summary.only_in_dir2, 1);
434        assert_eq!(summary.in_both, 1);
435        assert_eq!(summary.dir1_newer, 1);
436    }
437
438    #[test]
439    fn directory_comparer_integration() -> anyhow::Result<()> {
440        let dir1 = tempfile::tempdir()?;
441        let dir2 = tempfile::tempdir()?;
442
443        // Create files in dir1
444        let file1_path = dir1.path().join("same.txt");
445        let mut file1 = fs::File::create(&file1_path)?;
446        file1.write_all(b"same content")?;
447
448        let only1_path = dir1.path().join("only1.txt");
449        let mut only1 = fs::File::create(&only1_path)?;
450        only1.write_all(b"only in dir1")?;
451
452        // Create files in dir2
453        let file2_path = dir2.path().join("same.txt");
454        let mut file2 = fs::File::create(&file2_path)?;
455        file2.write_all(b"same content")?;
456
457        let only2_path = dir2.path().join("only2.txt");
458        let mut only2 = fs::File::create(&only2_path)?;
459        only2.write_all(b"only in dir2")?;
460
461        // Create a different file
462        let diff1_path = dir1.path().join("diff.txt");
463        let mut diff1 = fs::File::create(&diff1_path)?;
464        diff1.write_all(b"content 1")?;
465
466        let diff2_path = dir2.path().join("diff.txt");
467        let mut diff2 = fs::File::create(&diff2_path)?;
468        diff2.write_all(b"content 222")?; // different length and content
469
470        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
471        let (tx, rx) = mpsc::channel();
472
473        comparer.compare_streaming_ordered(tx)?;
474
475        let mut results = Vec::new();
476        while let Ok(res) = rx.recv() {
477            if let CompareProgress::Result(_, r) = res {
478                results.push(r);
479            }
480        }
481
482        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
483
484        assert_eq!(results.len(), 4);
485
486        // diff.txt
487        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
488        assert_eq!(results[0].classification, Classification::InBoth);
489        assert!(
490            results[0].is_content_same == Some(false)
491                || results[0].size_comparison != Some(Ordering::Equal)
492        );
493
494        // only1.txt
495        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
496        assert_eq!(results[1].classification, Classification::OnlyInDir1);
497
498        // only2.txt
499        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
500        assert_eq!(results[2].classification, Classification::OnlyInDir2);
501
502        // same.txt
503        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
504        assert_eq!(results[3].classification, Classification::InBoth);
505        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
506
507        Ok(())
508    }
509
510    #[test]
511    fn directory_comparer_size_mode() -> anyhow::Result<()> {
512        let dir1 = tempfile::tempdir()?;
513        let dir2 = tempfile::tempdir()?;
514
515        let file1_path = dir1.path().join("file.txt");
516        let mut file1 = fs::File::create(&file1_path)?;
517        file1.write_all(b"content 1")?;
518
519        let file2_path = dir2.path().join("file.txt");
520        let mut file2 = fs::File::create(&file2_path)?;
521        file2.write_all(b"content 2")?; // same length, different content
522
523        let mut comparer =
524            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
525        comparer.comparison_method = FileComparisonMethod::Size;
526        let (tx, rx) = mpsc::channel();
527
528        comparer.compare_streaming_ordered(tx)?;
529
530        let mut results = Vec::new();
531        while let Ok(res) = rx.recv() {
532            if let CompareProgress::Result(_, r) = res {
533                results.push(r);
534            }
535        }
536
537        assert_eq!(results.len(), 1);
538        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
539        assert_eq!(results[0].classification, Classification::InBoth);
540        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
541        assert_eq!(results[0].is_content_same, None);
542
543        Ok(())
544    }
545}