Skip to main content

compare_dir/
dir_comparer.rs

1use crate::file_comparer::{Classification, FileComparer, FileComparisonResult};
2use indicatif::{ProgressBar, ProgressStyle};
3
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::path::{Path, PathBuf};
7use std::sync::mpsc;
8use walkdir::WalkDir;
9
10#[derive(Debug, Clone)]
11enum CompareProgress {
12    StartOfComparison,
13    TotalFiles(usize),
14    Result(usize, FileComparisonResult),
15}
16
17#[derive(Default)]
18struct ComparisonSummary {
19    pub in_both: usize,
20    pub only_in_dir1: usize,
21    pub only_in_dir2: usize,
22    pub dir1_newer: usize,
23    pub dir2_newer: usize,
24    pub same_time_diff_size: usize,
25    pub same_time_size_diff_content: usize,
26}
27
28impl ComparisonSummary {
29    pub fn update(&mut self, result: &FileComparisonResult) {
30        match result.classification {
31            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
32            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
33            Classification::InBoth => {
34                self.in_both += 1;
35                match result.modified_time_comparison {
36                    Some(Ordering::Greater) => self.dir1_newer += 1,
37                    Some(Ordering::Less) => self.dir2_newer += 1,
38                    _ => {
39                        if result.size_comparison != Some(Ordering::Equal) {
40                            self.same_time_diff_size += 1;
41                        } else if result.is_content_same == Some(false) {
42                            self.same_time_size_diff_content += 1;
43                        }
44                    }
45                }
46            }
47        }
48    }
49
50    pub fn print(
51        &self,
52        mut writer: impl std::io::Write,
53        dir1_name: &str,
54        dir2_name: &str,
55    ) -> std::io::Result<()> {
56        writeln!(writer, "Files in both: {}", self.in_both)?;
57        writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
58        writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
59        writeln!(
60            writer,
61            "Files in both ({} is newer): {}",
62            dir1_name, self.dir1_newer
63        )?;
64        writeln!(
65            writer,
66            "Files in both ({} is newer): {}",
67            dir2_name, self.dir2_newer
68        )?;
69        writeln!(
70            writer,
71            "Files in both (same time, different size): {}",
72            self.same_time_diff_size
73        )?;
74        writeln!(
75            writer,
76            "Files in both (same time and size, different content): {}",
77            self.same_time_size_diff_content
78        )?;
79        Ok(())
80    }
81}
82
83/// A tool for comparing the contents of two directories.
84pub struct DirectoryComparer {
85    dir1: PathBuf,
86    dir2: PathBuf,
87    pub is_symbols_format: bool,
88    pub buffer_size: usize,
89}
90
91impl DirectoryComparer {
92    /// Creates a new `DirectoryComparer` for the two given directories.
93    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
94        Self {
95            dir1,
96            dir2,
97            is_symbols_format: false,
98            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
99        }
100    }
101
102    /// Sets the maximum number of threads for parallel processing.
103    /// This initializes the global Rayon thread pool.
104    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
105        rayon::ThreadPoolBuilder::new()
106            .num_threads(parallel)
107            .build_global()
108            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
109        Ok(())
110    }
111
112    /// Executes the directory comparison and prints results to stdout.
113    /// This is a convenience method for CLI usage.
114    pub fn run(&self) -> anyhow::Result<()> {
115        let progress = ProgressBar::new_spinner();
116        progress.enable_steady_tick(std::time::Duration::from_millis(120));
117        progress.set_style(
118            ProgressStyle::with_template("[{elapsed_precise}] {spinner:.green} {msg}").unwrap(),
119        );
120        progress.set_message("Scanning directories...");
121
122        let start_time = std::time::Instant::now();
123        let mut summary = ComparisonSummary::default();
124        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
125        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
126
127        let (tx, rx) = mpsc::channel();
128
129        std::thread::scope(|scope| {
130            scope.spawn(move || {
131                if let Err(e) = self.compare_streaming(tx) {
132                    log::error!("Error during comparison: {}", e);
133                }
134            });
135
136            // Receive results and update summary/UI
137            while let Ok(event) = rx.recv() {
138                match event {
139                    CompareProgress::StartOfComparison => {
140                        progress.set_message("Comparing files...");
141                    }
142                    CompareProgress::TotalFiles(total_files) => {
143                        progress.set_length(total_files as u64);
144                        progress.set_style(
145                            ProgressStyle::with_template(
146                                "[{elapsed_precise}] {bar:40.cyan/blue} {percent}% {pos:>7}/{len:7} {msg}",
147                            )
148                            .unwrap(),
149                        );
150                        progress.set_message("");
151                    }
152                    CompareProgress::Result(_, result) => {
153                        summary.update(&result);
154                        if self.is_symbols_format {
155                            progress.suspend(|| {
156                                println!(
157                                    "{} {}",
158                                    result.to_symbol_string(),
159                                    result.relative_path.display()
160                                );
161                            })
162                        } else if !result.is_identical() {
163                            progress.suspend(|| {
164                                println!(
165                                    "{}: {}",
166                                    result.relative_path.display(),
167                                    result.to_string(dir1_str, dir2_str)
168                                );
169                            });
170                        }
171                        progress.inc(1);
172                    }
173                }
174            }
175        });
176
177        progress.finish();
178
179        eprintln!("\n--- Comparison Summary ---");
180        let _ = summary.print(&mut std::io::stderr(), dir1_str, dir2_str);
181        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
182        Ok(())
183    }
184
185    fn get_next_file(it: &mut walkdir::IntoIter, dir: &Path) -> Option<(PathBuf, PathBuf)> {
186        for entry in it.filter_map(|e| e.ok()) {
187            if entry.file_type().is_file()
188                && let Ok(rel_path) = entry.path().strip_prefix(dir)
189            {
190                return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
191            }
192        }
193        None
194    }
195
196    /// Performs the directory comparison and streams results via a channel.
197    ///
198    /// # Arguments
199    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
200    fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
201        let (tx_unordered, rx_unordered) = mpsc::channel();
202        std::thread::scope(|scope| {
203            scope.spawn(move || {
204                if let Err(e) = self.compare_unordered_streaming(tx_unordered) {
205                    log::error!("Error during unordered comparison: {}", e);
206                }
207            });
208
209            let mut buffer = HashMap::new();
210            let mut next_index = 0;
211
212            for event in rx_unordered {
213                if let CompareProgress::Result(i, _) = &event {
214                    let index = *i;
215                    if index == next_index {
216                        tx.send(event)?;
217                        next_index += 1;
218                        while let Some(buffered) = buffer.remove(&next_index) {
219                            tx.send(buffered)?;
220                            next_index += 1;
221                        }
222                    } else {
223                        buffer.insert(index, event);
224                    }
225                } else {
226                    tx.send(event)?;
227                }
228            }
229            Ok::<(), anyhow::Error>(())
230        })?;
231
232        Ok(())
233    }
234
235    fn compare_unordered_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
236        log::info!("Scanning directory: {:?}", self.dir1);
237        let mut it1 = WalkDir::new(&self.dir1).sort_by_file_name().into_iter();
238        log::info!("Scanning directory: {:?}", self.dir2);
239        let mut it2 = WalkDir::new(&self.dir2).sort_by_file_name().into_iter();
240        let mut next1 = Self::get_next_file(&mut it1, &self.dir1);
241        let mut next2 = Self::get_next_file(&mut it2, &self.dir2);
242        let mut index = 0;
243        tx.send(CompareProgress::StartOfComparison)?;
244        rayon::scope(|scope| {
245            loop {
246                let cmp = match (&next1, &next2) {
247                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
248                    (Some(_), None) => Ordering::Less,
249                    (None, Some(_)) => Ordering::Greater,
250                    (None, None) => break,
251                };
252
253                match cmp {
254                    Ordering::Less => {
255                        let (rel1, _) = next1.take().unwrap();
256                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
257                        tx.send(CompareProgress::Result(index, result))?;
258                        index += 1;
259                        next1 = Self::get_next_file(&mut it1, &self.dir1);
260                    }
261                    Ordering::Greater => {
262                        let (rel2, _) = next2.take().unwrap();
263                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
264                        tx.send(CompareProgress::Result(index, result))?;
265                        index += 1;
266                        next2 = Self::get_next_file(&mut it2, &self.dir2);
267                    }
268                    Ordering::Equal => {
269                        let (rel_path, path1) = next1.take().unwrap();
270                        let (_, path2) = next2.take().unwrap();
271
272                        let mut result =
273                            FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
274                        let buffer_size = self.buffer_size;
275                        let tx_clone = tx.clone();
276                        let i = index;
277                        scope.spawn(move |_| {
278                            let mut comparer = FileComparer::new(&path1, &path2);
279                            comparer.buffer_size = buffer_size;
280                            if let Err(error) = result.update(&comparer) {
281                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
282                            }
283                            if tx_clone.send(CompareProgress::Result(i, result)).is_err() {
284                                log::error!(
285                                    "Receiver dropped, stopping comparison of {:?}",
286                                    rel_path
287                                );
288                            }
289                        });
290                        index += 1;
291                        next1 = Self::get_next_file(&mut it1, &self.dir1);
292                        next2 = Self::get_next_file(&mut it2, &self.dir2);
293                    }
294                }
295            }
296
297            tx.send(CompareProgress::TotalFiles(index))
298        })?;
299
300        Ok(())
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use std::fs;
308    use std::io::Write;
309
310    #[test]
311    fn test_comparison_summary() {
312        let mut summary = ComparisonSummary::default();
313        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
314        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
315        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
316        res3.modified_time_comparison = Some(Ordering::Greater);
317
318        summary.update(&res1);
319        summary.update(&res2);
320        summary.update(&res3);
321
322        assert_eq!(summary.only_in_dir1, 1);
323        assert_eq!(summary.only_in_dir2, 1);
324        assert_eq!(summary.in_both, 1);
325        assert_eq!(summary.dir1_newer, 1);
326    }
327
328    #[test]
329    fn test_directory_comparer_integration() -> anyhow::Result<()> {
330        let dir1 = tempfile::tempdir()?;
331        let dir2 = tempfile::tempdir()?;
332
333        // Create files in dir1
334        let file1_path = dir1.path().join("same.txt");
335        let mut file1 = fs::File::create(&file1_path)?;
336        file1.write_all(b"same content")?;
337
338        let only1_path = dir1.path().join("only1.txt");
339        let mut only1 = fs::File::create(&only1_path)?;
340        only1.write_all(b"only in dir1")?;
341
342        // Create files in dir2
343        let file2_path = dir2.path().join("same.txt");
344        let mut file2 = fs::File::create(&file2_path)?;
345        file2.write_all(b"same content")?;
346
347        let only2_path = dir2.path().join("only2.txt");
348        let mut only2 = fs::File::create(&only2_path)?;
349        only2.write_all(b"only in dir2")?;
350
351        // Create a different file
352        let diff1_path = dir1.path().join("diff.txt");
353        let mut diff1 = fs::File::create(&diff1_path)?;
354        diff1.write_all(b"content 1")?;
355
356        let diff2_path = dir2.path().join("diff.txt");
357        let mut diff2 = fs::File::create(&diff2_path)?;
358        diff2.write_all(b"content 222")?; // different length and content
359
360        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
361        let (tx, rx) = mpsc::channel();
362
363        comparer.compare_streaming(tx)?;
364
365        let mut results = Vec::new();
366        while let Ok(res) = rx.recv() {
367            if let CompareProgress::Result(_, r) = res {
368                results.push(r);
369            }
370        }
371
372        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
373
374        assert_eq!(results.len(), 4);
375
376        // diff.txt
377        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
378        assert_eq!(results[0].classification, Classification::InBoth);
379        assert!(
380            results[0].is_content_same == Some(false)
381                || results[0].size_comparison != Some(Ordering::Equal)
382        );
383
384        // only1.txt
385        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
386        assert_eq!(results[1].classification, Classification::OnlyInDir1);
387
388        // only2.txt
389        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
390        assert_eq!(results[2].classification, Classification::OnlyInDir2);
391
392        // same.txt
393        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
394        assert_eq!(results[3].classification, Classification::InBoth);
395        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
396
397        Ok(())
398    }
399}