Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3    OutputFormat, Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use simple_path::SimplePath;
8use std::{
9    cmp::Ordering,
10    io::{self, stdout},
11    path::{Path, PathBuf},
12    sync::{Arc, mpsc},
13    time,
14};
15
16#[derive(Debug, Clone)]
17enum CompareProgress {
18    StartOfComparison,
19    FileDone,
20    TotalFiles(usize),
21    Result(usize, FileComparisonResult),
22    Error,
23}
24
25/// Methods for comparing files.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum FileComparisonMethod {
28    /// Compare only size and modification time.
29    Size,
30    /// Compare by hash (BLAKE3).
31    Hash,
32    /// Compare by hash, without using the cached hashes.
33    Rehash,
34    /// Compare byte-by-byte.
35    Full,
36}
37
38/// A tool for comparing the contents of two directories.
39pub struct DirectoryComparer {
40    dir1: PathBuf,
41    dir2: PathBuf,
42    pub output_format: OutputFormat,
43    pub buffer_size: usize,
44    pub comparison_method: FileComparisonMethod,
45    pub exclude: Option<GlobSet>,
46    pub progress: Option<Arc<ProgressBuilder>>,
47    pub jobs: usize,
48}
49
50impl DirectoryComparer {
51    pub const DEFAULT_JOBS: usize = 8;
52
53    /// Creates a new `DirectoryComparer` for the two given directories.
54    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
55        Self {
56            dir1,
57            dir2,
58            output_format: OutputFormat::Default,
59            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
60            comparison_method: FileComparisonMethod::Hash,
61            exclude: None,
62            progress: None,
63            jobs: Self::DEFAULT_JOBS,
64        }
65    }
66
67    /// Executes the directory comparison and prints results to stdout.
68    /// This is a convenience method for CLI usage.
69    pub fn run(&self) -> anyhow::Result<()> {
70        match self.output_format {
71            OutputFormat::Default | OutputFormat::Symbol => {}
72            _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
73        }
74        if self.dir1.is_file() {
75            return self.run_file_comparer();
76        }
77
78        let progress = self
79            .progress
80            .as_ref()
81            .map(|progress| progress.add_spinner())
82            .unwrap_or_else(Progress::none);
83        progress.set_message("Scanning directories...");
84        let start_time = std::time::Instant::now();
85        let mut summary = ComparisonSummary::default();
86        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
87        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
88        let (tx, rx) = mpsc::channel();
89        std::thread::scope(|scope| {
90            scope.spawn(move || {
91                if let Err(e) = self.compare_streaming_ordered(tx) {
92                    log::error!("Error during comparison: {}", e);
93                }
94            });
95
96            // Receive results and update summary/UI
97            while let Ok(event) = rx.recv() {
98                match event {
99                    CompareProgress::StartOfComparison => {
100                        progress.set_message("Comparing files...");
101                    }
102                    CompareProgress::TotalFiles(total_files) => {
103                        progress.set_length(total_files as u64);
104                        progress.set_message("");
105                    }
106                    CompareProgress::Result(_, result) => {
107                        summary.update(&result);
108                        match self.output_format {
109                            OutputFormat::Symbol => progress.suspend_for(stdout(), || {
110                                println!(
111                                    "{} {}",
112                                    result.to_symbol_string(),
113                                    result.relative_path.display()
114                                );
115                            }),
116                            OutputFormat::Default => {
117                                if !result.is_identical() {
118                                    progress.suspend_for(stdout(), || {
119                                        println!(
120                                            "{}: {}",
121                                            result.relative_path.display(),
122                                            result.to_string(dir1_str, dir2_str)
123                                        );
124                                    });
125                                }
126                            }
127                            OutputFormat::Yaml => unreachable!(),
128                        }
129                    }
130                    CompareProgress::FileDone => progress.inc(1),
131                    CompareProgress::Error => summary.num_errors += 1,
132                }
133            }
134        });
135        progress.finish();
136        eprintln!("\n--- Comparison Summary ---");
137        summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
138        Ok(())
139    }
140
141    /// Performs the directory comparison and streams results via a channel.
142    ///
143    /// # Arguments
144    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
145    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
146        crate::sort_stream(
147            tx,
148            |tx_unordered| self.compare_streaming(tx_unordered),
149            |event| match event {
150                CompareProgress::Result(i, _) => Some(*i),
151                _ => None,
152            },
153        )
154    }
155
156    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
157        let mut it1 = FileIterator::new(&self.dir1);
158        let mut it2 = FileIterator::new(&self.dir2);
159        it1.exclude = self.exclude.as_ref();
160        it2.exclude = self.exclude.as_ref();
161        let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
162        if let Some((h1, h2)) = &mut hashers {
163            it1.cache = Some(h1.cache()?);
164            it2.cache = Some(h2.cache()?);
165            if self.comparison_method == FileComparisonMethod::Rehash {
166                h1.clear_cache()?;
167                h2.clear_cache()?;
168            }
169        }
170        let hashers_ref = hashers.as_ref();
171        std::thread::scope(|global_scope| {
172            let it1_rx = it1.spawn_in_scope(global_scope);
173            let it2_rx = it2.spawn_in_scope(global_scope);
174            let pool = crate::build_thread_pool(self.jobs)?;
175            pool.scope(move |scope| {
176                let mut cur1 = it1_rx.recv().ok();
177                let mut cur2 = it2_rx.recv().ok();
178                let mut index = 0;
179                tx.send(CompareProgress::StartOfComparison)?;
180                loop {
181                    let cmp = match (&cur1, &cur2) {
182                        (Some(p1), Some(p2)) => {
183                            let rel1 = SimplePath::strip_prefix(p1, &self.dir1).unwrap();
184                            let rel2 = SimplePath::strip_prefix(p2, &self.dir2).unwrap();
185                            rel1.cmp(rel2)
186                        }
187                        (Some(_), None) => Ordering::Less,
188                        (None, Some(_)) => Ordering::Greater,
189                        (None, None) => break,
190                    };
191                    match cmp {
192                        Ordering::Less => {
193                            let path1 = cur1.take().unwrap();
194                            let rel1 = SimplePath::strip_prefix(&path1, &self.dir1).unwrap();
195                            let result =
196                                FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
197                            tx.send(CompareProgress::Result(index, result))?;
198                            tx.send(CompareProgress::FileDone)?;
199                            index += 1;
200                            cur1 = it1_rx.recv().ok();
201                        }
202                        Ordering::Greater => {
203                            let path2 = cur2.take().unwrap();
204                            let rel2 = SimplePath::strip_prefix(&path2, &self.dir2).unwrap();
205                            let result =
206                                FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
207                            tx.send(CompareProgress::Result(index, result))?;
208                            tx.send(CompareProgress::FileDone)?;
209                            index += 1;
210                            cur2 = it2_rx.recv().ok();
211                        }
212                        Ordering::Equal => {
213                            let path1 = cur1.take().unwrap();
214                            let path2 = cur2.take().unwrap();
215                            let buffer_size = self.buffer_size;
216                            let tx_clone = tx.clone();
217                            let i = index;
218                            let should_compare =
219                                self.comparison_method != FileComparisonMethod::Size;
220                            scope.spawn(move |_| {
221                                let mut comparer = FileComparer::new(&path1, &path2);
222                                comparer.buffer_size = buffer_size;
223                                if let Some((h1, h2)) = hashers_ref {
224                                    comparer.hashers = Some((h1, h2));
225                                }
226                                let rel_path =
227                                    SimplePath::strip_prefix(&path1, &self.dir1).unwrap();
228                                let mut result = FileComparisonResult::new(
229                                    rel_path.into(),
230                                    Classification::InBoth,
231                                );
232                                let event = match result.update(&comparer, should_compare) {
233                                    Ok(_) => CompareProgress::Result(i, result),
234                                    Err(error) => {
235                                        log::error!(
236                                            "Error comparing {:?}: {}",
237                                            result.relative_path,
238                                            error
239                                        );
240                                        CompareProgress::Error
241                                    }
242                                };
243                                if tx_clone.send(event).is_err()
244                                    || tx_clone.send(CompareProgress::FileDone).is_err()
245                                {
246                                    log::error!("Send failed");
247                                }
248                            });
249                            index += 1;
250                            cur1 = it1_rx.recv().ok();
251                            cur2 = it2_rx.recv().ok();
252                        }
253                    }
254                }
255                tx.send(CompareProgress::TotalFiles(index))
256            })?;
257            Ok::<(), anyhow::Error>(())
258        })?;
259
260        Self::save_hashers(hashers)?;
261        Ok(())
262    }
263
264    fn get_hashers(
265        &self,
266        dir1: &Path,
267        dir2: &Path,
268    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
269        if self.comparison_method == FileComparisonMethod::Hash
270            || self.comparison_method == FileComparisonMethod::Rehash
271        {
272            let (h1_res, h2_res) = rayon::join(
273                || FileHasher::new_with_cache(&[dir1]),
274                || FileHasher::new_with_cache(&[dir2]),
275            );
276            let mut h1 = h1_res?;
277            let mut h2 = h2_res?;
278            h1.buffer_size = self.buffer_size;
279            h2.buffer_size = self.buffer_size;
280            if let Some(progress) = self.progress.as_ref() {
281                h1.progress = Some(Arc::clone(progress));
282                h2.progress = Some(Arc::clone(progress));
283            }
284            return Ok(Some((h1, h2)));
285        }
286        Ok(None)
287    }
288
289    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
290        if let Some((h1, h2)) = hashers {
291            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
292            r1?;
293            r2?;
294        }
295        Ok(())
296    }
297
298    fn run_file_comparer(&self) -> anyhow::Result<()> {
299        assert!(self.dir1.is_file());
300        let file1 = &self.dir1;
301        let dir1 = file1.parent().unwrap();
302        let file1_name = file1.file_name().unwrap();
303        let (dir2, file2) = if self.dir2.is_file() {
304            (self.dir2.parent().unwrap(), self.dir2.clone())
305        } else {
306            (self.dir2.as_path(), self.dir2.join(file1_name))
307        };
308
309        let mut comparer = FileComparer::new(file1, &file2);
310        comparer.buffer_size = self.buffer_size;
311        let mut hashers = self.get_hashers(dir1, dir2)?;
312        if let Some((h1, h2)) = &mut hashers {
313            if self.comparison_method == FileComparisonMethod::Rehash {
314                h1.remove_cache_entry(file1)?;
315                h2.remove_cache_entry(&file2)?;
316            }
317            comparer.hashers = Some((h1, h2));
318        }
319        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
320        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
321        result.update(&comparer, should_compare_content)?;
322        let file1_str = file1.to_str().unwrap_or("file1");
323        match self.output_format {
324            OutputFormat::Symbol => {
325                println!("{} {}", result.to_symbol_string(), file1_str);
326            }
327            OutputFormat::Default => {
328                let file2_str = file2.to_str().unwrap_or("file2");
329                println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
330            }
331            OutputFormat::Yaml => unreachable!(),
332        }
333        Self::save_hashers(hashers)?;
334        Ok(())
335    }
336}
337
338#[derive(Default)]
339struct ComparisonSummary {
340    pub in_both: usize,
341    pub only_in_dir1: usize,
342    pub only_in_dir2: usize,
343    pub dir1_newer: usize,
344    pub dir2_newer: usize,
345    pub dir1_larger: usize,
346    pub dir2_larger: usize,
347    pub diff_content: usize,
348    pub not_comparable: usize,
349    pub num_errors: usize,
350}
351
352impl ComparisonSummary {
353    pub fn update(&mut self, result: &FileComparisonResult) {
354        match result.classification {
355            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
356            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
357            Classification::InBoth => {
358                self.in_both += 1;
359                let mut is_not_comparable = false;
360                match result.modified_time_comparison {
361                    Some(Ordering::Greater) => self.dir1_newer += 1,
362                    Some(Ordering::Less) => self.dir2_newer += 1,
363                    Some(Ordering::Equal) => {}
364                    None => is_not_comparable = true,
365                }
366                match result.size_comparison {
367                    Some(Ordering::Greater) => self.dir1_larger += 1,
368                    Some(Ordering::Less) => self.dir2_larger += 1,
369                    Some(Ordering::Equal) => match result.is_content_same {
370                        Some(false) => self.diff_content += 1,
371                        Some(true) => {}
372                        None => is_not_comparable = true,
373                    },
374                    None => is_not_comparable = true,
375                }
376                if is_not_comparable {
377                    self.not_comparable += 1;
378                }
379            }
380        }
381    }
382
383    pub fn print(
384        &self,
385        mut writer: impl std::io::Write,
386        start_time: &time::Instant,
387        dir1_name: &str,
388        dir2_name: &str,
389    ) -> std::io::Result<()> {
390        let values = [
391            ("Elapsed:", 0),
392            ("Files in both:", self.in_both),
393            ("Only in left:", self.only_in_dir1),
394            ("Only in right:", self.only_in_dir2),
395            ("Left is newer:", self.dir1_newer),
396            ("Right is newer:", self.dir2_newer),
397            ("Left is larger:", self.dir1_larger),
398            ("Right is larger:", self.dir2_larger),
399            ("Different content:", self.diff_content),
400            ("Not comparable:", self.not_comparable),
401            ("Errors:", self.num_errors),
402        ];
403        let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
404        formatter.write_value(&mut writer, "Left:", dir1_name)?;
405        formatter.write_value(&mut writer, "Right:", dir2_name)?;
406        formatter.write_value(
407            &mut writer,
408            values[0].0,
409            FormattedDuration(start_time.elapsed()),
410        )?;
411        formatter.write_values(&mut writer, &values[1..])?;
412        Ok(())
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use std::fs;
420    use std::io::Write;
421
422    #[test]
423    fn comparison_summary() {
424        let mut summary = ComparisonSummary::default();
425        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
426        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
427        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
428        res3.modified_time_comparison = Some(Ordering::Greater);
429
430        summary.update(&res1);
431        summary.update(&res2);
432        summary.update(&res3);
433
434        assert_eq!(summary.only_in_dir1, 1);
435        assert_eq!(summary.only_in_dir2, 1);
436        assert_eq!(summary.in_both, 1);
437        assert_eq!(summary.dir1_newer, 1);
438    }
439
440    #[test]
441    fn directory_comparer_integration() -> anyhow::Result<()> {
442        let dir1 = tempfile::tempdir()?;
443        let dir2 = tempfile::tempdir()?;
444
445        // Create files in dir1
446        let file1_path = dir1.path().join("same.txt");
447        let mut file1 = fs::File::create(&file1_path)?;
448        file1.write_all(b"same content")?;
449
450        let only1_path = dir1.path().join("only1.txt");
451        let mut only1 = fs::File::create(&only1_path)?;
452        only1.write_all(b"only in dir1")?;
453
454        // Create files in dir2
455        let file2_path = dir2.path().join("same.txt");
456        let mut file2 = fs::File::create(&file2_path)?;
457        file2.write_all(b"same content")?;
458
459        let only2_path = dir2.path().join("only2.txt");
460        let mut only2 = fs::File::create(&only2_path)?;
461        only2.write_all(b"only in dir2")?;
462
463        // Create a different file
464        let diff1_path = dir1.path().join("diff.txt");
465        let mut diff1 = fs::File::create(&diff1_path)?;
466        diff1.write_all(b"content 1")?;
467
468        let diff2_path = dir2.path().join("diff.txt");
469        let mut diff2 = fs::File::create(&diff2_path)?;
470        diff2.write_all(b"content 222")?; // different length and content
471
472        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
473        let (tx, rx) = mpsc::channel();
474
475        comparer.compare_streaming_ordered(tx)?;
476
477        let mut results = Vec::new();
478        while let Ok(res) = rx.recv() {
479            if let CompareProgress::Result(_, r) = res {
480                results.push(r);
481            }
482        }
483
484        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
485
486        assert_eq!(results.len(), 4);
487
488        // diff.txt
489        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
490        assert_eq!(results[0].classification, Classification::InBoth);
491        assert!(
492            results[0].is_content_same == Some(false)
493                || results[0].size_comparison != Some(Ordering::Equal)
494        );
495
496        // only1.txt
497        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
498        assert_eq!(results[1].classification, Classification::OnlyInDir1);
499
500        // only2.txt
501        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
502        assert_eq!(results[2].classification, Classification::OnlyInDir2);
503
504        // same.txt
505        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
506        assert_eq!(results[3].classification, Classification::InBoth);
507        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
508
509        Ok(())
510    }
511
512    #[test]
513    fn directory_comparer_size_mode() -> anyhow::Result<()> {
514        let dir1 = tempfile::tempdir()?;
515        let dir2 = tempfile::tempdir()?;
516
517        let file1_path = dir1.path().join("file.txt");
518        let mut file1 = fs::File::create(&file1_path)?;
519        file1.write_all(b"content 1")?;
520
521        let file2_path = dir2.path().join("file.txt");
522        let mut file2 = fs::File::create(&file2_path)?;
523        file2.write_all(b"content 2")?; // same length, different content
524
525        let mut comparer =
526            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
527        comparer.comparison_method = FileComparisonMethod::Size;
528        let (tx, rx) = mpsc::channel();
529
530        comparer.compare_streaming_ordered(tx)?;
531
532        let mut results = Vec::new();
533        while let Ok(res) = rx.recv() {
534            if let CompareProgress::Result(_, r) = res {
535                results.push(r);
536            }
537        }
538
539        assert_eq!(results.len(), 1);
540        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
541        assert_eq!(results[0].classification, Classification::InBoth);
542        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
543        assert_eq!(results[0].is_content_same, None);
544
545        Ok(())
546    }
547}