Skip to main content

compare_dir/
dir_comparer.rs

1use crate::file_comparer::{Classification, FileComparer, FileComparisonResult};
2use indicatif::{ProgressBar, ProgressStyle};
3
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
8use std::sync::mpsc;
9use walkdir::WalkDir;
10
11#[derive(Default)]
12pub struct ComparisonSummary {
13    pub in_both: usize,
14    pub only_in_dir1: usize,
15    pub only_in_dir2: usize,
16    pub dir1_newer: usize,
17    pub dir2_newer: usize,
18    pub same_time_diff_size: usize,
19    pub same_time_size_diff_content: usize,
20}
21
22impl ComparisonSummary {
23    pub fn update(&mut self, result: &FileComparisonResult) {
24        match result.classification {
25            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
26            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
27            Classification::InBoth => {
28                self.in_both += 1;
29                match result.modified_time_comparison {
30                    Some(Ordering::Greater) => self.dir1_newer += 1,
31                    Some(Ordering::Less) => self.dir2_newer += 1,
32                    _ => {
33                        if result.size_comparison != Some(Ordering::Equal) {
34                            self.same_time_diff_size += 1;
35                        } else if result.is_content_same == Some(false) {
36                            self.same_time_size_diff_content += 1;
37                        }
38                    }
39                }
40            }
41        }
42    }
43
44    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
45        println!("Files in both: {}", self.in_both);
46        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
47        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
48        println!(
49            "Files in both ({} is newer): {}",
50            dir1_name, self.dir1_newer
51        );
52        println!(
53            "Files in both ({} is newer): {}",
54            dir2_name, self.dir2_newer
55        );
56        println!(
57            "Files in both (same time, different size): {}",
58            self.same_time_diff_size
59        );
60        println!(
61            "Files in both (same time and size, different content): {}",
62            self.same_time_size_diff_content
63        );
64    }
65}
66
67/// A tool for comparing the contents of two directories.
68pub struct DirectoryComparer {
69    dir1: PathBuf,
70    dir2: PathBuf,
71    total_files: AtomicUsize,
72    pub buffer_size: usize,
73}
74
75impl DirectoryComparer {
76    /// Creates a new `DirectoryComparer` for the two given directories.
77    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
78        Self {
79            dir1,
80            dir2,
81            total_files: AtomicUsize::new(0),
82            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
83        }
84    }
85
86    /// Sets the maximum number of threads for parallel processing.
87    /// This initializes the global Rayon thread pool.
88    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
89        rayon::ThreadPoolBuilder::new()
90            .num_threads(parallel)
91            .build_global()
92            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
93        Ok(())
94    }
95
96    /// Executes the directory comparison and prints results to stdout.
97    /// This is a convenience method for CLI usage.
98    pub fn run(&self) -> anyhow::Result<()> {
99        let progress = ProgressBar::new_spinner();
100        progress.enable_steady_tick(std::time::Duration::from_millis(120));
101        progress.set_style(
102            ProgressStyle::with_template("[{elapsed_precise}] {spinner:.green} {msg}").unwrap(),
103        );
104        progress.set_message("Scanning directories...");
105
106        let start_time = std::time::Instant::now();
107        let mut summary = ComparisonSummary::default();
108        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
109        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
110
111        let (tx, rx) = mpsc::channel();
112
113        std::thread::scope(|scope| {
114            scope.spawn(move || {
115                if let Err(e) = self.compare_streaming(tx) {
116                    log::error!("Error during comparison: {}", e);
117                }
118            });
119
120            // Receive results and update summary/UI
121            let mut is_first = true;
122            let mut length_set = false;
123            while let Ok(result) = rx.recv() {
124                if is_first {
125                    is_first = false;
126                    progress.set_message("Comparing files...");
127                }
128                if !length_set {
129                    let total_files = self.total_files.load(AtomicOrdering::Relaxed);
130                    if total_files > 0 {
131                        progress.set_length(total_files as u64);
132                        progress.set_style(
133                            ProgressStyle::with_template(
134                                "[{elapsed_precise}] {bar:40.cyan/blue} {percent}% {pos:>7}/{len:7} {msg}",
135                            )
136                            .unwrap(),
137                        );
138                        progress.set_message("");
139                        length_set = true;
140                    }
141                }
142                summary.update(&result);
143                if !result.is_identical() {
144                    progress.suspend(|| {
145                        println!("{}", result.to_string(dir1_str, dir2_str));
146                    });
147                }
148                progress.inc(1);
149            }
150        });
151
152        progress.finish();
153
154        eprintln!("\n--- Comparison Summary ---");
155        summary.print(dir1_str, dir2_str);
156        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
157        Ok(())
158    }
159
160    fn get_next_file(it: &mut walkdir::IntoIter, dir: &Path) -> Option<(PathBuf, PathBuf)> {
161        for entry in it.filter_map(|e| e.ok()) {
162            if entry.file_type().is_file()
163                && let Ok(rel_path) = entry.path().strip_prefix(dir)
164            {
165                return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
166            }
167        }
168        None
169    }
170
171    /// Performs the directory comparison and streams results via a channel.
172    ///
173    /// # Arguments
174    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
175    pub(crate) fn compare_streaming(
176        &self,
177        tx: mpsc::Sender<FileComparisonResult>,
178    ) -> anyhow::Result<()> {
179        let (tx_unordered, rx_unordered) = mpsc::channel();
180
181        std::thread::scope(|scope| {
182            scope.spawn(move || {
183                if let Err(e) = self.compare_unordered_streaming(tx_unordered) {
184                    log::error!("Error during unordered comparison: {}", e);
185                }
186            });
187
188            let mut buffer = HashMap::new();
189            let mut next_index = 0;
190
191            for (i, result) in rx_unordered {
192                if i == next_index {
193                    if tx.send(result).is_err() {
194                        break; // Main receiver disconnected
195                    }
196                    next_index += 1;
197                    while let Some(result) = buffer.remove(&next_index) {
198                        if tx.send(result).is_err() {
199                            break;
200                        }
201                        next_index += 1;
202                    }
203                } else {
204                    buffer.insert(i, result);
205                }
206            }
207        });
208
209        Ok(())
210    }
211
212    fn compare_unordered_streaming(
213        &self,
214        tx: mpsc::Sender<(usize, FileComparisonResult)>,
215    ) -> anyhow::Result<()> {
216        log::info!("Scanning directory: {:?}", self.dir1);
217        let mut it1 = WalkDir::new(&self.dir1).sort_by_file_name().into_iter();
218        log::info!("Scanning directory: {:?}", self.dir2);
219        let mut it2 = WalkDir::new(&self.dir2).sort_by_file_name().into_iter();
220
221        let mut next1 = Self::get_next_file(&mut it1, &self.dir1);
222        let mut next2 = Self::get_next_file(&mut it2, &self.dir2);
223
224        let mut index = 0;
225
226        rayon::scope(|scope| {
227            loop {
228                match (&next1, &next2) {
229                    (Some((rel1, path1)), Some((rel2, path2))) => match rel1.cmp(rel2) {
230                        Ordering::Less => {
231                            let result =
232                                FileComparisonResult::new(rel1.clone(), Classification::OnlyInDir1);
233                            if tx.send((index, result)).is_err() {
234                                break;
235                            }
236                            index += 1;
237                            next1 = Self::get_next_file(&mut it1, &self.dir1);
238                        }
239                        Ordering::Greater => {
240                            let result =
241                                FileComparisonResult::new(rel2.clone(), Classification::OnlyInDir2);
242                            if tx.send((index, result)).is_err() {
243                                break;
244                            }
245                            index += 1;
246                            next2 = Self::get_next_file(&mut it2, &self.dir2);
247                        }
248                        Ordering::Equal => {
249                            let rel_path = rel1.clone();
250                            let path1_clone = path1.clone();
251                            let path2_clone = path2.clone();
252                            let mut result =
253                                FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
254                            let buffer_size = self.buffer_size;
255                            let tx_clone = tx.clone();
256                            let i = index;
257                            scope.spawn(move |_| {
258                                let mut comparer = FileComparer::new(&path1_clone, &path2_clone);
259                                comparer.buffer_size = buffer_size;
260                                if let Err(error) = result.update(&comparer) {
261                                    log::error!(
262                                        "Error during comparison of {:?}: {}",
263                                        rel_path,
264                                        error
265                                    );
266                                }
267                                if tx_clone.send((i, result)).is_err() {
268                                    log::error!(
269                                        "Receiver dropped, stopping comparison of {:?}",
270                                        rel_path
271                                    );
272                                }
273                            });
274                            index += 1;
275                            next1 = Self::get_next_file(&mut it1, &self.dir1);
276                            next2 = Self::get_next_file(&mut it2, &self.dir2);
277                        }
278                    },
279                    (Some((rel1, _)), None) => {
280                        let result =
281                            FileComparisonResult::new(rel1.clone(), Classification::OnlyInDir1);
282                        if tx.send((index, result)).is_err() {
283                            break;
284                        }
285                        index += 1;
286                        next1 = Self::get_next_file(&mut it1, &self.dir1);
287                    }
288                    (None, Some((rel2, _))) => {
289                        let result =
290                            FileComparisonResult::new(rel2.clone(), Classification::OnlyInDir2);
291                        if tx.send((index, result)).is_err() {
292                            break;
293                        }
294                        index += 1;
295                        next2 = Self::get_next_file(&mut it2, &self.dir2);
296                    }
297                    (None, None) => {
298                        break;
299                    }
300                }
301            }
302
303            self.total_files.store(index, AtomicOrdering::Relaxed);
304        });
305
306        Ok(())
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use std::fs;
314    use std::io::Write;
315
316    #[test]
317    fn test_comparison_summary() {
318        let mut summary = ComparisonSummary::default();
319        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
320        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
321        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
322        res3.modified_time_comparison = Some(Ordering::Greater);
323
324        summary.update(&res1);
325        summary.update(&res2);
326        summary.update(&res3);
327
328        assert_eq!(summary.only_in_dir1, 1);
329        assert_eq!(summary.only_in_dir2, 1);
330        assert_eq!(summary.in_both, 1);
331        assert_eq!(summary.dir1_newer, 1);
332    }
333
334    #[test]
335    fn test_directory_comparer_integration() -> anyhow::Result<()> {
336        let dir1 = tempfile::tempdir()?;
337        let dir2 = tempfile::tempdir()?;
338
339        // Create files in dir1
340        let file1_path = dir1.path().join("same.txt");
341        let mut file1 = fs::File::create(&file1_path)?;
342        file1.write_all(b"same content")?;
343
344        let only1_path = dir1.path().join("only1.txt");
345        let mut only1 = fs::File::create(&only1_path)?;
346        only1.write_all(b"only in dir1")?;
347
348        // Create files in dir2
349        let file2_path = dir2.path().join("same.txt");
350        let mut file2 = fs::File::create(&file2_path)?;
351        file2.write_all(b"same content")?;
352
353        let only2_path = dir2.path().join("only2.txt");
354        let mut only2 = fs::File::create(&only2_path)?;
355        only2.write_all(b"only in dir2")?;
356
357        // Create a different file
358        let diff1_path = dir1.path().join("diff.txt");
359        let mut diff1 = fs::File::create(&diff1_path)?;
360        diff1.write_all(b"content 1")?;
361
362        let diff2_path = dir2.path().join("diff.txt");
363        let mut diff2 = fs::File::create(&diff2_path)?;
364        diff2.write_all(b"content 222")?; // different length and content
365
366        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
367        let (tx, rx) = mpsc::channel();
368
369        comparer.compare_streaming(tx)?;
370
371        let mut results = Vec::new();
372        while let Ok(res) = rx.recv() {
373            results.push(res);
374        }
375
376        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
377
378        assert_eq!(results.len(), 4);
379
380        // diff.txt
381        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
382        assert_eq!(results[0].classification, Classification::InBoth);
383        assert!(
384            results[0].is_content_same == Some(false)
385                || results[0].size_comparison != Some(Ordering::Equal)
386        );
387
388        // only1.txt
389        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
390        assert_eq!(results[1].classification, Classification::OnlyInDir1);
391
392        // only2.txt
393        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
394        assert_eq!(results[2].classification, Classification::OnlyInDir2);
395
396        // same.txt
397        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
398        assert_eq!(results[3].classification, Classification::InBoth);
399        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
400
401        Ok(())
402    }
403}