Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{
2    Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, Progress,
3    ProgressBuilder,
4};
5use globset::GlobSet;
6
7use std::cmp::Ordering;
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, mpsc};
11
12#[derive(Debug, Clone)]
13enum CompareProgress {
14    StartOfComparison,
15    FileDone,
16    TotalFiles(usize),
17    Result(usize, FileComparisonResult),
18}
19
20/// Methods for comparing files.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FileComparisonMethod {
23    /// Compare only size and modification time.
24    Size,
25    /// Compare by hash (BLAKE3).
26    Hash,
27    /// Compare by hash, without using the cached hashes.
28    Rehash,
29    /// Compare byte-by-byte.
30    Full,
31}
32
33/// A tool for comparing the contents of two directories.
34pub struct DirectoryComparer {
35    dir1: PathBuf,
36    dir2: PathBuf,
37    pub is_symbols_format: bool,
38    pub buffer_size: usize,
39    pub comparison_method: FileComparisonMethod,
40    pub exclude: Option<GlobSet>,
41    pub progress: Option<Arc<ProgressBuilder>>,
42}
43
44impl DirectoryComparer {
45    /// Creates a new `DirectoryComparer` for the two given directories.
46    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
47        Self {
48            dir1,
49            dir2,
50            is_symbols_format: false,
51            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
52            comparison_method: FileComparisonMethod::Hash,
53            exclude: None,
54            progress: None,
55        }
56    }
57
58    /// Sets the maximum number of threads for parallel processing.
59    /// This initializes the global Rayon thread pool.
60    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
61        rayon::ThreadPoolBuilder::new()
62            .num_threads(parallel)
63            .build_global()
64            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
65        Ok(())
66    }
67
68    /// Executes the directory comparison and prints results to stdout.
69    /// This is a convenience method for CLI usage.
70    pub fn run(&self) -> anyhow::Result<()> {
71        if self.dir1.is_file() {
72            return self.run_file_comparer();
73        }
74
75        let progress = self
76            .progress
77            .as_ref()
78            .map(|progress| progress.add_spinner())
79            .unwrap_or_else(Progress::none);
80        progress.set_message("Scanning directories...");
81        let start_time = std::time::Instant::now();
82        let mut summary = ComparisonSummary::default();
83        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
84        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
85        let (tx, rx) = mpsc::channel();
86        std::thread::scope(|scope| {
87            scope.spawn(move || {
88                if let Err(e) = self.compare_streaming_ordered(tx) {
89                    log::error!("Error during comparison: {}", e);
90                }
91            });
92
93            // Receive results and update summary/UI
94            while let Ok(event) = rx.recv() {
95                match event {
96                    CompareProgress::StartOfComparison => {
97                        progress.set_message("Comparing files...");
98                    }
99                    CompareProgress::TotalFiles(total_files) => {
100                        progress.set_length(total_files as u64);
101                        progress.set_message("");
102                    }
103                    CompareProgress::Result(_, result) => {
104                        summary.update(&result);
105                        if self.is_symbols_format {
106                            progress.suspend(|| {
107                                println!(
108                                    "{} {}",
109                                    result.to_symbol_string(),
110                                    result.relative_path.display()
111                                );
112                            })
113                        } else if !result.is_identical() {
114                            progress.suspend(|| {
115                                println!(
116                                    "{}: {}",
117                                    result.relative_path.display(),
118                                    result.to_string(dir1_str, dir2_str)
119                                );
120                            });
121                        }
122                    }
123                    CompareProgress::FileDone => progress.inc(1),
124                }
125            }
126        });
127        progress.finish();
128        eprintln!("\n--- Comparison Summary ---");
129        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
130        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
131        Ok(())
132    }
133
134    /// Performs the directory comparison and streams results via a channel.
135    ///
136    /// # Arguments
137    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
138    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
139        let (tx_unordered, rx_unordered) = mpsc::channel();
140        std::thread::scope(|scope| {
141            scope.spawn(move || {
142                if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
143                    log::error!("Error during unordered comparison: {}", e);
144                }
145            });
146
147            let mut buffer = HashMap::new();
148            let mut next_index = 0;
149            for event in rx_unordered {
150                if let CompareProgress::Result(i, _) = &event {
151                    let index = *i;
152                    if index == next_index {
153                        tx.send(event)?;
154                        next_index += 1;
155                        while let Some(buffered) = buffer.remove(&next_index) {
156                            tx.send(buffered)?;
157                            next_index += 1;
158                        }
159                    } else {
160                        buffer.insert(index, event);
161                    }
162                } else {
163                    tx.send(event)?;
164                }
165            }
166            Ok::<(), anyhow::Error>(())
167        })?;
168        Ok(())
169    }
170
171    fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
172        let mut it1 = FileIterator::new(self.dir1.clone());
173        let mut it2 = FileIterator::new(self.dir2.clone());
174        it1.exclude = self.exclude.as_ref();
175        it2.exclude = self.exclude.as_ref();
176        let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
177        if let Some((h1, h2)) = &hashers {
178            it1.hasher = Some(h1);
179            it2.hasher = Some(h2);
180            if self.comparison_method == FileComparisonMethod::Rehash {
181                h1.clear_cache()?;
182                h2.clear_cache()?;
183            }
184        }
185
186        let mut cur1 = it1.next();
187        let mut cur2 = it2.next();
188        let mut index = 0;
189        tx.send(CompareProgress::StartOfComparison)?;
190        rayon::scope(|scope| {
191            loop {
192                let cmp = match (&cur1, &cur2) {
193                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
194                    (Some(_), None) => Ordering::Less,
195                    (None, Some(_)) => Ordering::Greater,
196                    (None, None) => break,
197                };
198                match cmp {
199                    Ordering::Less => {
200                        let (rel1, _) = cur1.take().unwrap();
201                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
202                        tx.send(CompareProgress::Result(index, result))?;
203                        tx.send(CompareProgress::FileDone)?;
204                        index += 1;
205                        cur1 = it1.next();
206                    }
207                    Ordering::Greater => {
208                        let (rel2, _) = cur2.take().unwrap();
209                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
210                        tx.send(CompareProgress::Result(index, result))?;
211                        tx.send(CompareProgress::FileDone)?;
212                        index += 1;
213                        cur2 = it2.next();
214                    }
215                    Ordering::Equal => {
216                        let (rel_path, path1) = cur1.take().unwrap();
217                        let (_, path2) = cur2.take().unwrap();
218                        let buffer_size = self.buffer_size;
219                        let tx_clone = tx.clone();
220                        let i = index;
221                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
222                        let hashers_ref = hashers.as_ref();
223                        scope.spawn(move |_| {
224                            let mut comparer = FileComparer::new(&path1, &path2);
225                            comparer.buffer_size = buffer_size;
226                            if let Some((h1, h2)) = hashers_ref {
227                                comparer.hashers = Some((h1, h2));
228                            }
229                            let mut result =
230                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
231                            if let Err(error) = result.update(&comparer, should_compare) {
232                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
233                            }
234                            if tx_clone.send(CompareProgress::Result(i, result)).is_err()
235                                || tx_clone.send(CompareProgress::FileDone).is_err()
236                            {
237                                log::error!("Send failed during comparison of {:?}", rel_path);
238                            }
239                        });
240                        index += 1;
241                        cur1 = it1.next();
242                        cur2 = it2.next();
243                    }
244                }
245            }
246            tx.send(CompareProgress::TotalFiles(index))
247        })?;
248        Self::save_hashers(hashers)?;
249        Ok(())
250    }
251
252    fn get_hashers(
253        &self,
254        dir1: &Path,
255        dir2: &Path,
256    ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
257        if self.comparison_method == FileComparisonMethod::Hash
258            || self.comparison_method == FileComparisonMethod::Rehash
259        {
260            let (mut h1, mut h2) = rayon::join(
261                || FileHasher::new(dir1.to_path_buf()),
262                || FileHasher::new(dir2.to_path_buf()),
263            );
264            h1.buffer_size = self.buffer_size;
265            h2.buffer_size = self.buffer_size;
266            if let Some(progress) = self.progress.as_ref() {
267                h1.progress = Some(Arc::clone(progress));
268                h2.progress = Some(Arc::clone(progress));
269            }
270            return Ok(Some((h1, h2)));
271        }
272        Ok(None)
273    }
274
275    fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
276        if let Some((h1, h2)) = hashers {
277            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
278            r1?;
279            r2?;
280        }
281        Ok(())
282    }
283
284    fn run_file_comparer(&self) -> anyhow::Result<()> {
285        assert!(self.dir1.is_file());
286        let file1 = &self.dir1;
287        let dir1 = file1.parent().unwrap();
288        let file1_name = file1.file_name().unwrap();
289        let (dir2, file2) = if self.dir2.is_file() {
290            (self.dir2.parent().unwrap(), self.dir2.clone())
291        } else {
292            (self.dir2.as_path(), self.dir2.join(file1_name))
293        };
294
295        let mut comparer = FileComparer::new(file1, &file2);
296        comparer.buffer_size = self.buffer_size;
297        let hashers = self.get_hashers(dir1, dir2)?;
298        if let Some((h1, h2)) = &hashers {
299            if self.comparison_method == FileComparisonMethod::Rehash {
300                h1.remove_cache_entry(file1)?;
301                h2.remove_cache_entry(&file2)?;
302            }
303            comparer.hashers = Some((h1, h2));
304        }
305        let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
306        let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
307        result.update(&comparer, should_compare_content)?;
308        let file1_str = file1.to_str().unwrap_or("file1");
309        if self.is_symbols_format {
310            println!("{} {}", result.to_symbol_string(), file1_str);
311        } else {
312            let file2_str = file2.to_str().unwrap_or("file2");
313            println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
314        }
315        Self::save_hashers(hashers)?;
316        Ok(())
317    }
318}
319
320#[derive(Default)]
321struct ComparisonSummary {
322    pub in_both: usize,
323    pub only_in_dir1: usize,
324    pub only_in_dir2: usize,
325    pub dir1_newer: usize,
326    pub dir2_newer: usize,
327    pub dir1_larger: usize,
328    pub dir2_larger: usize,
329    pub diff_content: usize,
330    pub not_comparable: usize,
331}
332
333impl ComparisonSummary {
334    pub fn update(&mut self, result: &FileComparisonResult) {
335        match result.classification {
336            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
337            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
338            Classification::InBoth => {
339                self.in_both += 1;
340                let mut is_not_comparable = false;
341                match result.modified_time_comparison {
342                    Some(Ordering::Greater) => self.dir1_newer += 1,
343                    Some(Ordering::Less) => self.dir2_newer += 1,
344                    Some(Ordering::Equal) => {}
345                    None => is_not_comparable = true,
346                }
347                match result.size_comparison {
348                    Some(Ordering::Greater) => self.dir1_larger += 1,
349                    Some(Ordering::Less) => self.dir2_larger += 1,
350                    Some(Ordering::Equal) => match result.is_content_same {
351                        Some(false) => self.diff_content += 1,
352                        Some(true) => {}
353                        None => is_not_comparable = true,
354                    },
355                    None => is_not_comparable = true,
356                }
357                if is_not_comparable {
358                    self.not_comparable += 1;
359                }
360            }
361        }
362    }
363
364    pub fn print(
365        &self,
366        mut writer: impl std::io::Write,
367        dir1_name: &str,
368        dir2_name: &str,
369    ) -> std::io::Result<()> {
370        let values = [
371            ("Files in both:", self.in_both),
372            ("Only in left:", self.only_in_dir1),
373            ("Only in right:", self.only_in_dir2),
374            ("Left is newer:", self.dir1_newer),
375            ("Right is newer:", self.dir2_newer),
376            ("Left is larger:", self.dir1_larger),
377            ("Right is larger:", self.dir2_larger),
378            ("Different content:", self.diff_content),
379            ("Not comparable:", self.not_comparable),
380        ];
381        let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
382        writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
383        writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
384        for (label, value) in values {
385            writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
386        }
387        Ok(())
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394    use std::fs;
395    use std::io::Write;
396
397    #[test]
398    fn comparison_summary() {
399        let mut summary = ComparisonSummary::default();
400        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
401        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
402        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
403        res3.modified_time_comparison = Some(Ordering::Greater);
404
405        summary.update(&res1);
406        summary.update(&res2);
407        summary.update(&res3);
408
409        assert_eq!(summary.only_in_dir1, 1);
410        assert_eq!(summary.only_in_dir2, 1);
411        assert_eq!(summary.in_both, 1);
412        assert_eq!(summary.dir1_newer, 1);
413    }
414
415    #[test]
416    fn directory_comparer_integration() -> anyhow::Result<()> {
417        let dir1 = tempfile::tempdir()?;
418        let dir2 = tempfile::tempdir()?;
419
420        // Create files in dir1
421        let file1_path = dir1.path().join("same.txt");
422        let mut file1 = fs::File::create(&file1_path)?;
423        file1.write_all(b"same content")?;
424
425        let only1_path = dir1.path().join("only1.txt");
426        let mut only1 = fs::File::create(&only1_path)?;
427        only1.write_all(b"only in dir1")?;
428
429        // Create files in dir2
430        let file2_path = dir2.path().join("same.txt");
431        let mut file2 = fs::File::create(&file2_path)?;
432        file2.write_all(b"same content")?;
433
434        let only2_path = dir2.path().join("only2.txt");
435        let mut only2 = fs::File::create(&only2_path)?;
436        only2.write_all(b"only in dir2")?;
437
438        // Create a different file
439        let diff1_path = dir1.path().join("diff.txt");
440        let mut diff1 = fs::File::create(&diff1_path)?;
441        diff1.write_all(b"content 1")?;
442
443        let diff2_path = dir2.path().join("diff.txt");
444        let mut diff2 = fs::File::create(&diff2_path)?;
445        diff2.write_all(b"content 222")?; // different length and content
446
447        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
448        let (tx, rx) = mpsc::channel();
449
450        comparer.compare_streaming_ordered(tx)?;
451
452        let mut results = Vec::new();
453        while let Ok(res) = rx.recv() {
454            if let CompareProgress::Result(_, r) = res {
455                results.push(r);
456            }
457        }
458
459        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
460
461        assert_eq!(results.len(), 4);
462
463        // diff.txt
464        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
465        assert_eq!(results[0].classification, Classification::InBoth);
466        assert!(
467            results[0].is_content_same == Some(false)
468                || results[0].size_comparison != Some(Ordering::Equal)
469        );
470
471        // only1.txt
472        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
473        assert_eq!(results[1].classification, Classification::OnlyInDir1);
474
475        // only2.txt
476        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
477        assert_eq!(results[2].classification, Classification::OnlyInDir2);
478
479        // same.txt
480        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
481        assert_eq!(results[3].classification, Classification::InBoth);
482        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
483
484        Ok(())
485    }
486
487    #[test]
488    fn directory_comparer_size_mode() -> anyhow::Result<()> {
489        let dir1 = tempfile::tempdir()?;
490        let dir2 = tempfile::tempdir()?;
491
492        let file1_path = dir1.path().join("file.txt");
493        let mut file1 = fs::File::create(&file1_path)?;
494        file1.write_all(b"content 1")?;
495
496        let file2_path = dir2.path().join("file.txt");
497        let mut file2 = fs::File::create(&file2_path)?;
498        file2.write_all(b"content 2")?; // same length, different content
499
500        let mut comparer =
501            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
502        comparer.comparison_method = FileComparisonMethod::Size;
503        let (tx, rx) = mpsc::channel();
504
505        comparer.compare_streaming_ordered(tx)?;
506
507        let mut results = Vec::new();
508        while let Ok(res) = rx.recv() {
509            if let CompareProgress::Result(_, r) = res {
510                results.push(r);
511            }
512        }
513
514        assert_eq!(results.len(), 1);
515        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
516        assert_eq!(results[0].classification, Classification::InBoth);
517        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
518        assert_eq!(results[0].is_content_same, None);
519
520        Ok(())
521    }
522}