Skip to main content

compare_dir/
dir_comparer.rs

1use crate::{Classification, FileComparer, FileComparisonResult, FileHasher};
2use indicatif::{ProgressBar, ProgressStyle};
3
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::path::{Path, PathBuf};
7use std::sync::mpsc;
8use walkdir::WalkDir;
9
10#[derive(Debug, Clone)]
11enum CompareProgress {
12    StartOfComparison,
13    TotalFiles(usize),
14    Result(usize, FileComparisonResult),
15}
16
17#[derive(Default)]
18struct ComparisonSummary {
19    pub in_both: usize,
20    pub only_in_dir1: usize,
21    pub only_in_dir2: usize,
22    pub dir1_newer: usize,
23    pub dir2_newer: usize,
24    pub same_time_diff_size: usize,
25    pub same_time_size_diff_content: usize,
26}
27
28impl ComparisonSummary {
29    pub fn update(&mut self, result: &FileComparisonResult) {
30        match result.classification {
31            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
32            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
33            Classification::InBoth => {
34                self.in_both += 1;
35                match result.modified_time_comparison {
36                    Some(Ordering::Greater) => self.dir1_newer += 1,
37                    Some(Ordering::Less) => self.dir2_newer += 1,
38                    _ => {
39                        if result.size_comparison != Some(Ordering::Equal) {
40                            self.same_time_diff_size += 1;
41                        } else if result.is_content_same == Some(false) {
42                            self.same_time_size_diff_content += 1;
43                        }
44                    }
45                }
46            }
47        }
48    }
49
50    pub fn print(
51        &self,
52        mut writer: impl std::io::Write,
53        dir1_name: &str,
54        dir2_name: &str,
55    ) -> std::io::Result<()> {
56        writeln!(writer, "Files in both: {}", self.in_both)?;
57        writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
58        writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
59        writeln!(
60            writer,
61            "Files in both ({} is newer): {}",
62            dir1_name, self.dir1_newer
63        )?;
64        writeln!(
65            writer,
66            "Files in both ({} is newer): {}",
67            dir2_name, self.dir2_newer
68        )?;
69        writeln!(
70            writer,
71            "Files in both (same time, different size): {}",
72            self.same_time_diff_size
73        )?;
74        writeln!(
75            writer,
76            "Files in both (same time and size, different content): {}",
77            self.same_time_size_diff_content
78        )?;
79        Ok(())
80    }
81}
82
83/// Methods for comparing files.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub enum FileComparisonMethod {
86    /// Compare only size and modification time.
87    Size,
88    /// Compare by hash (BLAKE3).
89    Hash,
90    /// Compare by hash, without using the cached hashes.
91    Rehash,
92    /// Compare byte-by-byte.
93    Full,
94}
95
96/// A tool for comparing the contents of two directories.
97pub struct DirectoryComparer {
98    dir1: PathBuf,
99    dir2: PathBuf,
100    pub is_symbols_format: bool,
101    pub buffer_size: usize,
102    pub comparison_method: FileComparisonMethod,
103}
104
105impl DirectoryComparer {
106    /// Creates a new `DirectoryComparer` for the two given directories.
107    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
108        Self {
109            dir1,
110            dir2,
111            is_symbols_format: false,
112            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
113            comparison_method: FileComparisonMethod::Hash,
114        }
115    }
116
117    /// Sets the maximum number of threads for parallel processing.
118    /// This initializes the global Rayon thread pool.
119    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
120        rayon::ThreadPoolBuilder::new()
121            .num_threads(parallel)
122            .build_global()
123            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
124        Ok(())
125    }
126
127    /// Executes the directory comparison and prints results to stdout.
128    /// This is a convenience method for CLI usage.
129    pub fn run(&self) -> anyhow::Result<()> {
130        let progress = ProgressBar::new_spinner();
131        progress.enable_steady_tick(std::time::Duration::from_millis(120));
132        progress.set_style(
133            ProgressStyle::with_template("[{elapsed_precise}] {spinner:.green} {msg}").unwrap(),
134        );
135        progress.set_message("Scanning directories...");
136        let start_time = std::time::Instant::now();
137        let mut summary = ComparisonSummary::default();
138        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
139        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
140        let (tx, rx) = mpsc::channel();
141        std::thread::scope(|scope| {
142            scope.spawn(move || {
143                if let Err(e) = self.compare_streaming_ordered(tx) {
144                    log::error!("Error during comparison: {}", e);
145                }
146            });
147
148            // Receive results and update summary/UI
149            while let Ok(event) = rx.recv() {
150                match event {
151                    CompareProgress::StartOfComparison => {
152                        progress.set_message("Comparing files...");
153                    }
154                    CompareProgress::TotalFiles(total_files) => {
155                        progress.set_length(total_files as u64);
156                        progress.set_style(
157                            ProgressStyle::with_template(
158                                "[{elapsed_precise}] {bar:40.cyan/blue} {percent}% {pos:>7}/{len:7} {msg}",
159                            )
160                            .unwrap(),
161                        );
162                        progress.set_message("");
163                    }
164                    CompareProgress::Result(_, result) => {
165                        summary.update(&result);
166                        if self.is_symbols_format {
167                            progress.suspend(|| {
168                                println!(
169                                    "{} {}",
170                                    result.to_symbol_string(),
171                                    result.relative_path.display()
172                                );
173                            })
174                        } else if !result.is_identical() {
175                            progress.suspend(|| {
176                                println!(
177                                    "{}: {}",
178                                    result.relative_path.display(),
179                                    result.to_string(dir1_str, dir2_str)
180                                );
181                            });
182                        }
183                        progress.inc(1);
184                    }
185                }
186            }
187        });
188        progress.finish();
189        eprintln!("\n--- Comparison Summary ---");
190        summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
191        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
192        Ok(())
193    }
194
195    /// Performs the directory comparison and streams results via a channel.
196    ///
197    /// # Arguments
198    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
199    fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
200        let (tx_unordered, rx_unordered) = mpsc::channel();
201        std::thread::scope(|scope| {
202            scope.spawn(move || {
203                if let Err(e) = self.compare_streaming(tx_unordered) {
204                    log::error!("Error during unordered comparison: {}", e);
205                }
206            });
207
208            let mut buffer = HashMap::new();
209            let mut next_index = 0;
210            for event in rx_unordered {
211                if let CompareProgress::Result(i, _) = &event {
212                    let index = *i;
213                    if index == next_index {
214                        tx.send(event)?;
215                        next_index += 1;
216                        while let Some(buffered) = buffer.remove(&next_index) {
217                            tx.send(buffered)?;
218                            next_index += 1;
219                        }
220                    } else {
221                        buffer.insert(index, event);
222                    }
223                } else {
224                    tx.send(event)?;
225                }
226            }
227            Ok::<(), anyhow::Error>(())
228        })?;
229
230        Ok(())
231    }
232
233    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
234        log::info!("Scanning directory: {:?}", self.dir1);
235        let mut it1 = WalkDir::new(&self.dir1).sort_by_file_name().into_iter();
236        log::info!("Scanning directory: {:?}", self.dir2);
237        let mut it2 = WalkDir::new(&self.dir2).sort_by_file_name().into_iter();
238        let mut next1 = Self::get_next_file(&mut it1, &self.dir1);
239        let mut next2 = Self::get_next_file(&mut it2, &self.dir2);
240        let mut index = 0;
241        let hashers = if self.comparison_method == FileComparisonMethod::Hash
242            || self.comparison_method == FileComparisonMethod::Rehash
243        {
244            let (h1, h2) = rayon::join(
245                || FileHasher::new(self.dir1.clone()),
246                || FileHasher::new(self.dir2.clone()),
247            );
248            if self.comparison_method == FileComparisonMethod::Rehash {
249                h1.clear_cache()?;
250                h2.clear_cache()?;
251            }
252            Some((h1, h2))
253        } else {
254            None
255        };
256        tx.send(CompareProgress::StartOfComparison)?;
257        rayon::scope(|scope| {
258            loop {
259                let cmp = match (&next1, &next2) {
260                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
261                    (Some(_), None) => Ordering::Less,
262                    (None, Some(_)) => Ordering::Greater,
263                    (None, None) => break,
264                };
265                match cmp {
266                    Ordering::Less => {
267                        let (rel1, _) = next1.take().unwrap();
268                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
269                        tx.send(CompareProgress::Result(index, result))?;
270                        index += 1;
271                        next1 = Self::get_next_file(&mut it1, &self.dir1);
272                    }
273                    Ordering::Greater => {
274                        let (rel2, _) = next2.take().unwrap();
275                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
276                        tx.send(CompareProgress::Result(index, result))?;
277                        index += 1;
278                        next2 = Self::get_next_file(&mut it2, &self.dir2);
279                    }
280                    Ordering::Equal => {
281                        let (rel_path, path1) = next1.take().unwrap();
282                        let (_, path2) = next2.take().unwrap();
283                        let buffer_size = self.buffer_size;
284                        let tx_clone = tx.clone();
285                        let i = index;
286                        let should_compare = self.comparison_method != FileComparisonMethod::Size;
287                        let hashers_ref = hashers.as_ref();
288                        scope.spawn(move |_| {
289                            let mut comparer = FileComparer::new(&path1, &path2);
290                            comparer.buffer_size = buffer_size;
291                            if let Some((h1, h2)) = hashers_ref {
292                                comparer.hashers = Some((h1, h2));
293                            }
294                            let mut result =
295                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
296                            if let Err(error) = result.update(&comparer, should_compare) {
297                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
298                            }
299                            if tx_clone.send(CompareProgress::Result(i, result)).is_err() {
300                                log::error!(
301                                    "Receiver dropped, stopping comparison of {:?}",
302                                    rel_path
303                                );
304                            }
305                        });
306                        index += 1;
307                        next1 = Self::get_next_file(&mut it1, &self.dir1);
308                        next2 = Self::get_next_file(&mut it2, &self.dir2);
309                    }
310                }
311            }
312            tx.send(CompareProgress::TotalFiles(index))
313        })?;
314        if let Some((h1, h2)) = hashers {
315            let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
316            r1?;
317            r2?;
318        }
319        Ok(())
320    }
321
322    fn get_next_file(it: &mut walkdir::IntoIter, dir: &Path) -> Option<(PathBuf, PathBuf)> {
323        for entry in it {
324            match entry {
325                Ok(entry) => {
326                    if entry.file_type().is_file() {
327                        let rel_path = entry.path().strip_prefix(dir).unwrap();
328                        return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
329                    }
330                }
331                Err(error) => {
332                    log::error!("Error while walking directory: {}", error);
333                }
334            }
335        }
336        None
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343    use std::fs;
344    use std::io::Write;
345
346    #[test]
347    fn test_comparison_summary() {
348        let mut summary = ComparisonSummary::default();
349        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
350        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
351        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
352        res3.modified_time_comparison = Some(Ordering::Greater);
353
354        summary.update(&res1);
355        summary.update(&res2);
356        summary.update(&res3);
357
358        assert_eq!(summary.only_in_dir1, 1);
359        assert_eq!(summary.only_in_dir2, 1);
360        assert_eq!(summary.in_both, 1);
361        assert_eq!(summary.dir1_newer, 1);
362    }
363
364    #[test]
365    fn test_directory_comparer_integration() -> anyhow::Result<()> {
366        let dir1 = tempfile::tempdir()?;
367        let dir2 = tempfile::tempdir()?;
368
369        // Create files in dir1
370        let file1_path = dir1.path().join("same.txt");
371        let mut file1 = fs::File::create(&file1_path)?;
372        file1.write_all(b"same content")?;
373
374        let only1_path = dir1.path().join("only1.txt");
375        let mut only1 = fs::File::create(&only1_path)?;
376        only1.write_all(b"only in dir1")?;
377
378        // Create files in dir2
379        let file2_path = dir2.path().join("same.txt");
380        let mut file2 = fs::File::create(&file2_path)?;
381        file2.write_all(b"same content")?;
382
383        let only2_path = dir2.path().join("only2.txt");
384        let mut only2 = fs::File::create(&only2_path)?;
385        only2.write_all(b"only in dir2")?;
386
387        // Create a different file
388        let diff1_path = dir1.path().join("diff.txt");
389        let mut diff1 = fs::File::create(&diff1_path)?;
390        diff1.write_all(b"content 1")?;
391
392        let diff2_path = dir2.path().join("diff.txt");
393        let mut diff2 = fs::File::create(&diff2_path)?;
394        diff2.write_all(b"content 222")?; // different length and content
395
396        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
397        let (tx, rx) = mpsc::channel();
398
399        comparer.compare_streaming_ordered(tx)?;
400
401        let mut results = Vec::new();
402        while let Ok(res) = rx.recv() {
403            if let CompareProgress::Result(_, r) = res {
404                results.push(r);
405            }
406        }
407
408        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
409
410        assert_eq!(results.len(), 4);
411
412        // diff.txt
413        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
414        assert_eq!(results[0].classification, Classification::InBoth);
415        assert!(
416            results[0].is_content_same == Some(false)
417                || results[0].size_comparison != Some(Ordering::Equal)
418        );
419
420        // only1.txt
421        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
422        assert_eq!(results[1].classification, Classification::OnlyInDir1);
423
424        // only2.txt
425        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
426        assert_eq!(results[2].classification, Classification::OnlyInDir2);
427
428        // same.txt
429        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
430        assert_eq!(results[3].classification, Classification::InBoth);
431        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
432
433        Ok(())
434    }
435
436    #[test]
437    fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
438        let dir1 = tempfile::tempdir()?;
439        let dir2 = tempfile::tempdir()?;
440
441        let file1_path = dir1.path().join("file.txt");
442        let mut file1 = fs::File::create(&file1_path)?;
443        file1.write_all(b"content 1")?;
444
445        let file2_path = dir2.path().join("file.txt");
446        let mut file2 = fs::File::create(&file2_path)?;
447        file2.write_all(b"content 2")?; // same length, different content
448
449        let mut comparer =
450            DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
451        comparer.comparison_method = FileComparisonMethod::Size;
452        let (tx, rx) = mpsc::channel();
453
454        comparer.compare_streaming_ordered(tx)?;
455
456        let mut results = Vec::new();
457        while let Ok(res) = rx.recv() {
458            if let CompareProgress::Result(_, r) = res {
459                results.push(r);
460            }
461        }
462
463        assert_eq!(results.len(), 1);
464        assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
465        assert_eq!(results[0].classification, Classification::InBoth);
466        assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
467        assert_eq!(results[0].is_content_same, None);
468
469        Ok(())
470    }
471}