Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, ProgressReporter,
3};
4use globset::GlobSet;
5
6use std::cmp::Ordering;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::mpsc;
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13    StartOfComparison,
14    FileDone,
15    TotalFiles(usize),
16    Result(usize, FileComparisonResult),
17}
18
19/// Methods for comparing files.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum FileComparisonMethod {
22    /// Compare only size and modification time.
23    Size,
24    /// Compare by hash (BLAKE3).
25    Hash,
26    /// Compare by hash, without using the cached hashes.
27    Rehash,
28    /// Compare byte-by-byte.
29    Full,
30}
31
32/// A tool for comparing the contents of two directories.
33pub struct DirectoryComparer {
34    dir1: PathBuf,
35    dir2: PathBuf,
36    pub is_symbols_format: bool,
37    pub buffer_size: usize,
38    pub comparison_method: FileComparisonMethod,
39    pub exclude: Option<GlobSet>,
40}
41
42impl DirectoryComparer {
43    /// Creates a new `DirectoryComparer` for the two given directories.
44    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
45        Self {
46            dir1,
47            dir2,
48            is_symbols_format: false,
49            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
50            comparison_method: FileComparisonMethod::Hash,
51            exclude: None,
52        }
53    }
54
55    /// Sets the maximum number of threads for parallel processing.
56    /// This initializes the global Rayon thread pool.
57    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
58        rayon::ThreadPoolBuilder::new()
59            .num_threads(parallel)
60            .build_global()
61            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
62        Ok(())
63    }
64
65    /// Executes the directory comparison and prints results to stdout.
66    /// This is a convenience method for CLI usage.
67    pub fn run(&self) -> anyhow::Result<()> {
68        if self.dir1.is_file() {
69            return self.run_file_comparer();
70        }
71
72        let progress = ProgressReporter::new();
73        progress.set_message("Scanning directories...");
74        let start_time = std::time::Instant::now();
75        let mut summary = ComparisonSummary::default();
76        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
77        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
78        let (tx, rx) = mpsc::channel();
79        std::thread::scope(|scope| {
80            scope.spawn(move || {
81                if let Err(e) = self.compare_streaming_ordered(tx) {
82                    log::error!("Error during comparison: {}", e);
83                }
84            });
85
86            // Receive results and update summary/UI
87            while let Ok(event) = rx.recv() {
88                match event {
89                    CompareProgress::StartOfComparison => {
90                        progress.set_message("Comparing files...");
91                    }
92                    CompareProgress::TotalFiles(total_files) => {
93                        progress.set_length(total_files as u64);
94                        progress.set_message("");
95                    }
96                    CompareProgress::Result(_, result) => {
97                        summary.update(&result);
98                        if self.is_symbols_format {
99                            progress.suspend(|| {
100                                println!(
101                                    "{} {}",
102                                    result.to_symbol_string(),
103                                    result.relative_path.display()
104                                );
105                            })
106                        } else if !result.is_identical() {
107                            progress.suspend(|| {
108                                println!(
109                                    "{}: {}",
110                                    result.relative_path.display(),
111                                    result.to_string(dir1_str, dir2_str)
112                                );
113                            });
114                        }
115                    }
116                    CompareProgress::FileDone => progress.inc(1),
117                }
118            }
119        });
120        progress.finish();
121        eprintln!("\n--- Comparison Summary ---");
122        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
123        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
124        Ok(())
125    }
126
127    /// Performs the directory comparison and streams results via a channel.
128    ///
129    /// # Arguments
130    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
131    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
132        let (tx_unordered, rx_unordered) = mpsc::channel();
133        std::thread::scope(|scope| {
134            scope.spawn(move || {
135                if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
136                    log::error!("Error during unordered comparison: {}", e);
137                }
138            });
139
140            let mut buffer = HashMap::new();
141            let mut next_index = 0;
142            for event in rx_unordered {
143                if let CompareProgress::Result(i, _) = &event {
144                    let index = *i;
145                    if index == next_index {
146                        tx.send(event)?;
147                        next_index += 1;
148                        while let Some(buffered) = buffer.remove(&next_index) {
149                            tx.send(buffered)?;
150                            next_index += 1;
151                        }
152                    } else {
153                        buffer.insert(index, event);
154                    }
155                } else {
156                    tx.send(event)?;
157                }
158            }
159            Ok::<(), anyhow::Error>(())
160        })?;
161        Ok(())
162    }
163
164    fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
165        let mut it1 = FileIterator::new(self.dir1.clone());
166        let mut it2 = FileIterator::new(self.dir2.clone());
167        it1.exclude = self.exclude.as_ref();
168        it2.exclude = self.exclude.as_ref();
169        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
170        if let Some((h1, h2)) = &hashers {
171            it1.hasher = Some(h1);
172            it2.hasher = Some(h2);
173            if self.comparison_method == FileComparisonMethod::Rehash {
174                h1.clear_cache()?;
175                h2.clear_cache()?;
176            }
177        }
178
179        let mut cur1 = it1.next();
180        let mut cur2 = it2.next();
181        let mut index = 0;
182        tx.send(CompareProgress::StartOfComparison)?;
183        rayon::scope(|scope| {
184            loop {
185                let cmp = match (&cur1, &cur2) {
186                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
187                    (Some(_), None) => Ordering::Less,
188                    (None, Some(_)) => Ordering::Greater,
189                    (None, None) => break,
190                };
191                match cmp {
192                    Ordering::Less => {
193                        let (rel1, _) = cur1.take().unwrap();
194                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
195                        tx.send(CompareProgress::Result(index, result))?;
196                        tx.send(CompareProgress::FileDone)?;
197                        index += 1;
198                        cur1 = it1.next();
199                    }
200                    Ordering::Greater => {
201                        let (rel2, _) = cur2.take().unwrap();
202                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
203                        tx.send(CompareProgress::Result(index, result))?;
204                        tx.send(CompareProgress::FileDone)?;
205                        index += 1;
206                        cur2 = it2.next();
207                    }
208                    Ordering::Equal => {
209                        let (rel_path, path1) = cur1.take().unwrap();
210                        let (_, path2) = cur2.take().unwrap();
211                        let buffer_size = self.buffer_size;
212                        let tx_clone = tx.clone();
213                        let i = index;
214                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
215                        let hashers_ref = hashers.as_ref();
216                        scope.spawn(move |_| {
217                            let mut comparer = FileComparer::new(&path1, &path2);
218                            comparer.buffer_size = buffer_size;
219                            if let Some((h1, h2)) = hashers_ref {
220                                comparer.hashers = Some((h1, h2));
221                            }
222                            let mut result =
223                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
224                            if let Err(error) = result.update(&comparer, should_compare) {
225                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
226                            }
227                            if tx_clone.send(CompareProgress::Result(i, result)).is_err()
228                                || tx_clone.send(CompareProgress::FileDone).is_err()
229                            {
230                                log::error!("Send failed during comparison of {:?}", rel_path);
231                            }
232                        });
233                        index += 1;
234                        cur1 = it1.next();
235                        cur2 = it2.next();
236                    }
237                }
238            }
239            tx.send(CompareProgress::TotalFiles(index))
240        })?;
241        Self::save_hashers(hashers)?;
242        Ok(())
243    }
244
245    fn get_hashers(
246        &self,
247        dir1: &Path,
248        dir2: &Path,
249    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
250        if self.comparison_method == FileComparisonMethod::Hash
251            || self.comparison_method == FileComparisonMethod::Rehash
252        {
253            let (mut h1, mut h2) = rayon::join(
254                || FileHasher::new(dir1.to_path_buf()),
255                || FileHasher::new(dir2.to_path_buf()),
256            );
257            h1.buffer_size = self.buffer_size;
258            h2.buffer_size = self.buffer_size;
259            return Ok(Some((h1, h2)));
260        }
261        Ok(None)
262    }
263
264    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
265        if let Some((h1, h2)) = hashers {
266            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
267            r1?;
268            r2?;
269        }
270        Ok(())
271    }
272
273    fn run_file_comparer(&self) -> anyhow::Result<()> {
274        assert!(self.dir1.is_file());
275        let file1 = &self.dir1;
276        let dir1 = file1.parent().unwrap();
277        let file1_name = file1.file_name().unwrap();
278        let (dir2, file2) = if self.dir2.is_file() {
279            (self.dir2.parent().unwrap(), self.dir2.clone())
280        } else {
281            (self.dir2.as_path(), self.dir2.join(file1_name))
282        };
283
284        let mut comparer = FileComparer::new(file1, &file2);
285        comparer.buffer_size = self.buffer_size;
286        let hashers = self.get_hashers(dir1, dir2)?;
287        if let Some((h1, h2)) = &hashers {
288            if self.comparison_method == FileComparisonMethod::Rehash {
289                h1.remove_cache_entry(file1)?;
290                h2.remove_cache_entry(&file2)?;
291            }
292            comparer.hashers = Some((h1, h2));
293        }
294        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
295        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
296        result.update(&comparer, should_compare_content)?;
297        let file1_str = file1.to_str().unwrap_or("file1");
298        if self.is_symbols_format {
299            println!("{} {}", result.to_symbol_string(), file1_str);
300        } else {
301            let file2_str = file2.to_str().unwrap_or("file2");
302            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
303        }
304        Self::save_hashers(hashers)?;
305        Ok(())
306    }
307}
308
309#[derive(Default)]
310struct ComparisonSummary {
311    pub in_both: usize,
312    pub only_in_dir1: usize,
313    pub only_in_dir2: usize,
314    pub dir1_newer: usize,
315    pub dir2_newer: usize,
316    pub dir1_larger: usize,
317    pub dir2_larger: usize,
318    pub diff_content: usize,
319    pub not_comparable: usize,
320}
321
322impl ComparisonSummary {
323    pub fn update(&mut self, result: &FileComparisonResult) {
324        match result.classification {
325            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
326            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
327            Classification::InBoth => {
328                self.in_both += 1;
329                let mut is_not_comparable = false;
330                match result.modified_time_comparison {
331                    Some(Ordering::Greater) => self.dir1_newer += 1,
332                    Some(Ordering::Less) => self.dir2_newer += 1,
333                    Some(Ordering::Equal) => {}
334                    None => is_not_comparable = true,
335                }
336                match result.size_comparison {
337                    Some(Ordering::Greater) => self.dir1_larger += 1,
338                    Some(Ordering::Less) => self.dir2_larger += 1,
339                    Some(Ordering::Equal) => match result.is_content_same {
340                        Some(false) => self.diff_content += 1,
341                        Some(true) => {}
342                        None => is_not_comparable = true,
343                    },
344                    None => is_not_comparable = true,
345                }
346                if is_not_comparable {
347                    self.not_comparable += 1;
348                }
349            }
350        }
351    }
352
353    pub fn print(
354        &self,
355        mut writer: impl std::io::Write,
356        dir1_name: &str,
357        dir2_name: &str,
358    ) -> std::io::Result<()> {
359        let values = [
360            ("Files in both:", self.in_both),
361            ("Only in left:", self.only_in_dir1),
362            ("Only in right:", self.only_in_dir2),
363            ("Left is newer:", self.dir1_newer),
364            ("Right is newer:", self.dir2_newer),
365            ("Left is larger:", self.dir1_larger),
366            ("Right is larger:", self.dir2_larger),
367            ("Different content:", self.diff_content),
368            ("Not comparable:", self.not_comparable),
369        ];
370        let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
371        writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
372        writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
373        for (label, value) in values {
374            writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
375        }
376        Ok(())
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use std::fs;
384    use std::io::Write;
385
386    #[test]
387    fn comparison_summary() {
388        let mut summary = ComparisonSummary::default();
389        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
390        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
391        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
392        res3.modified_time_comparison = Some(Ordering::Greater);
393
394        summary.update(&res1);
395        summary.update(&res2);
396        summary.update(&res3);
397
398        assert_eq!(summary.only_in_dir1, 1);
399        assert_eq!(summary.only_in_dir2, 1);
400        assert_eq!(summary.in_both, 1);
401        assert_eq!(summary.dir1_newer, 1);
402    }
403
404    #[test]
405    fn directory_comparer_integration() -> anyhow::Result<()> {
406        let dir1 = tempfile::tempdir()?;
407        let dir2 = tempfile::tempdir()?;
408
409        // Create files in dir1
410        let file1_path = dir1.path().join("same.txt");
411        let mut file1 = fs::File::create(&file1_path)?;
412        file1.write_all(b"same content")?;
413
414        let only1_path = dir1.path().join("only1.txt");
415        let mut only1 = fs::File::create(&only1_path)?;
416        only1.write_all(b"only in dir1")?;
417
418        // Create files in dir2
419        let file2_path = dir2.path().join("same.txt");
420        let mut file2 = fs::File::create(&file2_path)?;
421        file2.write_all(b"same content")?;
422
423        let only2_path = dir2.path().join("only2.txt");
424        let mut only2 = fs::File::create(&only2_path)?;
425        only2.write_all(b"only in dir2")?;
426
427        // Create a different file
428        let diff1_path = dir1.path().join("diff.txt");
429        let mut diff1 = fs::File::create(&diff1_path)?;
430        diff1.write_all(b"content 1")?;
431
432        let diff2_path = dir2.path().join("diff.txt");
433        let mut diff2 = fs::File::create(&diff2_path)?;
434        diff2.write_all(b"content 222")?; // different length and content
435
436        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
437        let (tx, rx) = mpsc::channel();
438
439        comparer.compare_streaming_ordered(tx)?;
440
441        let mut results = Vec::new();
442        while let Ok(res) = rx.recv() {
443            if let CompareProgress::Result(_, r) = res {
444                results.push(r);
445            }
446        }
447
448        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
449
450        assert_eq!(results.len(), 4);
451
452        // diff.txt
453        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
454        assert_eq!(results[0].classification, Classification::InBoth);
455        assert!(
456            results[0].is_content_same == Some(false)
457                || results[0].size_comparison != Some(Ordering::Equal)
458        );
459
460        // only1.txt
461        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
462        assert_eq!(results[1].classification, Classification::OnlyInDir1);
463
464        // only2.txt
465        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
466        assert_eq!(results[2].classification, Classification::OnlyInDir2);
467
468        // same.txt
469        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
470        assert_eq!(results[3].classification, Classification::InBoth);
471        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
472
473        Ok(())
474    }
475
476    #[test]
477    fn directory_comparer_size_mode() -> anyhow::Result<()> {
478        let dir1 = tempfile::tempdir()?;
479        let dir2 = tempfile::tempdir()?;
480
481        let file1_path = dir1.path().join("file.txt");
482        let mut file1 = fs::File::create(&file1_path)?;
483        file1.write_all(b"content 1")?;
484
485        let file2_path = dir2.path().join("file.txt");
486        let mut file2 = fs::File::create(&file2_path)?;
487        file2.write_all(b"content 2")?; // same length, different content
488
489        let mut comparer =
490            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
491        comparer.comparison_method = FileComparisonMethod::Size;
492        let (tx, rx) = mpsc::channel();
493
494        comparer.compare_streaming_ordered(tx)?;
495
496        let mut results = Vec::new();
497        while let Ok(res) = rx.recv() {
498            if let CompareProgress::Result(_, r) = res {
499                results.push(r);
500            }
501        }
502
503        assert_eq!(results.len(), 1);
504        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
505        assert_eq!(results[0].classification, Classification::InBoth);
506        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
507        assert_eq!(results[0].is_content_same, None);
508
509        Ok(())
510    }
511}