Skip to main content

compare_dir/
dir_comparer.rs

1use crate::file_comparer::{Classification, FileComparer, FileComparisonResult};
2use indicatif::{ProgressBar, ProgressStyle};
3
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::path::{Path, PathBuf};
7use std::sync::mpsc;
8use walkdir::WalkDir;
9
10#[derive(Debug, Clone)]
11pub enum CompareProgress {
12    StartOfComparison,
13    TotalFiles(usize),
14    Result(usize, FileComparisonResult),
15}
16
17#[derive(Default)]
18pub struct ComparisonSummary {
19    pub in_both: usize,
20    pub only_in_dir1: usize,
21    pub only_in_dir2: usize,
22    pub dir1_newer: usize,
23    pub dir2_newer: usize,
24    pub same_time_diff_size: usize,
25    pub same_time_size_diff_content: usize,
26}
27
28impl ComparisonSummary {
29    pub fn update(&mut self, result: &FileComparisonResult) {
30        match result.classification {
31            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
32            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
33            Classification::InBoth => {
34                self.in_both += 1;
35                match result.modified_time_comparison {
36                    Some(Ordering::Greater) => self.dir1_newer += 1,
37                    Some(Ordering::Less) => self.dir2_newer += 1,
38                    _ => {
39                        if result.size_comparison != Some(Ordering::Equal) {
40                            self.same_time_diff_size += 1;
41                        } else if result.is_content_same == Some(false) {
42                            self.same_time_size_diff_content += 1;
43                        }
44                    }
45                }
46            }
47        }
48    }
49
50    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
51        println!("Files in both: {}", self.in_both);
52        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
53        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
54        println!(
55            "Files in both ({} is newer): {}",
56            dir1_name, self.dir1_newer
57        );
58        println!(
59            "Files in both ({} is newer): {}",
60            dir2_name, self.dir2_newer
61        );
62        println!(
63            "Files in both (same time, different size): {}",
64            self.same_time_diff_size
65        );
66        println!(
67            "Files in both (same time and size, different content): {}",
68            self.same_time_size_diff_content
69        );
70    }
71}
72
73/// A tool for comparing the contents of two directories.
74pub struct DirectoryComparer {
75    dir1: PathBuf,
76    dir2: PathBuf,
77    pub buffer_size: usize,
78}
79
80impl DirectoryComparer {
81    /// Creates a new `DirectoryComparer` for the two given directories.
82    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
83        Self {
84            dir1,
85            dir2,
86            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
87        }
88    }
89
90    /// Sets the maximum number of threads for parallel processing.
91    /// This initializes the global Rayon thread pool.
92    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
93        rayon::ThreadPoolBuilder::new()
94            .num_threads(parallel)
95            .build_global()
96            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
97        Ok(())
98    }
99
100    /// Executes the directory comparison and prints results to stdout.
101    /// This is a convenience method for CLI usage.
102    pub fn run(&self) -> anyhow::Result<()> {
103        let progress = ProgressBar::new_spinner();
104        progress.enable_steady_tick(std::time::Duration::from_millis(120));
105        progress.set_style(
106            ProgressStyle::with_template("[{elapsed_precise}] {spinner:.green} {msg}").unwrap(),
107        );
108        progress.set_message("Scanning directories...");
109
110        let start_time = std::time::Instant::now();
111        let mut summary = ComparisonSummary::default();
112        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
113        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
114
115        let (tx, rx) = mpsc::channel();
116
117        std::thread::scope(|scope| {
118            scope.spawn(move || {
119                if let Err(e) = self.compare_streaming(tx) {
120                    log::error!("Error during comparison: {}", e);
121                }
122            });
123
124            // Receive results and update summary/UI
125            while let Ok(event) = rx.recv() {
126                match event {
127                    CompareProgress::StartOfComparison => {
128                        progress.set_message("Comparing files...");
129                    }
130                    CompareProgress::TotalFiles(total_files) => {
131                        progress.set_length(total_files as u64);
132                        progress.set_style(
133                            ProgressStyle::with_template(
134                                "[{elapsed_precise}] {bar:40.cyan/blue} {percent}% {pos:>7}/{len:7} {msg}",
135                            )
136                            .unwrap(),
137                        );
138                        progress.set_message("");
139                    }
140                    CompareProgress::Result(_, result) => {
141                        summary.update(&result);
142                        if !result.is_identical() {
143                            progress.suspend(|| {
144                                println!("{}", result.to_string(dir1_str, dir2_str));
145                            });
146                        }
147                        progress.inc(1);
148                    }
149                }
150            }
151        });
152
153        progress.finish();
154
155        eprintln!("\n--- Comparison Summary ---");
156        summary.print(dir1_str, dir2_str);
157        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
158        Ok(())
159    }
160
161    fn get_next_file(it: &mut walkdir::IntoIter, dir: &Path) -> Option<(PathBuf, PathBuf)> {
162        for entry in it.filter_map(|e| e.ok()) {
163            if entry.file_type().is_file()
164                && let Ok(rel_path) = entry.path().strip_prefix(dir)
165            {
166                return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
167            }
168        }
169        None
170    }
171
172    /// Performs the directory comparison and streams results via a channel.
173    ///
174    /// # Arguments
175    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
176    pub(crate) fn compare_streaming(
177        &self,
178        tx: mpsc::Sender<CompareProgress>,
179    ) -> anyhow::Result<()> {
180        let (tx_unordered, rx_unordered) = mpsc::channel();
181        std::thread::scope(|scope| {
182            scope.spawn(move || {
183                if let Err(e) = self.compare_unordered_streaming(tx_unordered) {
184                    log::error!("Error during unordered comparison: {}", e);
185                }
186            });
187
188            let mut buffer = HashMap::new();
189            let mut next_index = 0;
190
191            for event in rx_unordered {
192                if let CompareProgress::Result(i, _) = &event {
193                    let index = *i;
194                    if index == next_index {
195                        tx.send(event)?;
196                        next_index += 1;
197                        while let Some(buffered) = buffer.remove(&next_index) {
198                            tx.send(buffered)?;
199                            next_index += 1;
200                        }
201                    } else {
202                        buffer.insert(index, event);
203                    }
204                } else {
205                    tx.send(event)?;
206                }
207            }
208            Ok::<(), anyhow::Error>(())
209        })?;
210
211        Ok(())
212    }
213
214    fn compare_unordered_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
215        log::info!("Scanning directory: {:?}", self.dir1);
216        let mut it1 = WalkDir::new(&self.dir1).sort_by_file_name().into_iter();
217        log::info!("Scanning directory: {:?}", self.dir2);
218        let mut it2 = WalkDir::new(&self.dir2).sort_by_file_name().into_iter();
219        let mut next1 = Self::get_next_file(&mut it1, &self.dir1);
220        let mut next2 = Self::get_next_file(&mut it2, &self.dir2);
221        let mut index = 0;
222        tx.send(CompareProgress::StartOfComparison)?;
223        rayon::scope(|scope| {
224            loop {
225                let cmp = match (&next1, &next2) {
226                    (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
227                    (Some(_), None) => Ordering::Less,
228                    (None, Some(_)) => Ordering::Greater,
229                    (None, None) => break,
230                };
231
232                match cmp {
233                    Ordering::Less => {
234                        let (rel1, _) = next1.take().unwrap();
235                        let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
236                        tx.send(CompareProgress::Result(index, result))?;
237                        index += 1;
238                        next1 = Self::get_next_file(&mut it1, &self.dir1);
239                    }
240                    Ordering::Greater => {
241                        let (rel2, _) = next2.take().unwrap();
242                        let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
243                        tx.send(CompareProgress::Result(index, result))?;
244                        index += 1;
245                        next2 = Self::get_next_file(&mut it2, &self.dir2);
246                    }
247                    Ordering::Equal => {
248                        let (rel_path, path1) = next1.take().unwrap();
249                        let (_, path2) = next2.take().unwrap();
250
251                        let mut result =
252                            FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
253                        let buffer_size = self.buffer_size;
254                        let tx_clone = tx.clone();
255                        let i = index;
256                        scope.spawn(move |_| {
257                            let mut comparer = FileComparer::new(&path1, &path2);
258                            comparer.buffer_size = buffer_size;
259                            if let Err(error) = result.update(&comparer) {
260                                log::error!("Error during comparison of {:?}: {}", rel_path, error);
261                            }
262                            if tx_clone.send(CompareProgress::Result(i, result)).is_err() {
263                                log::error!(
264                                    "Receiver dropped, stopping comparison of {:?}",
265                                    rel_path
266                                );
267                            }
268                        });
269                        index += 1;
270                        next1 = Self::get_next_file(&mut it1, &self.dir1);
271                        next2 = Self::get_next_file(&mut it2, &self.dir2);
272                    }
273                }
274            }
275
276            tx.send(CompareProgress::TotalFiles(index))
277        })?;
278
279        Ok(())
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use std::fs;
287    use std::io::Write;
288
289    #[test]
290    fn test_comparison_summary() {
291        let mut summary = ComparisonSummary::default();
292        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
293        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
294        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
295        res3.modified_time_comparison = Some(Ordering::Greater);
296
297        summary.update(&res1);
298        summary.update(&res2);
299        summary.update(&res3);
300
301        assert_eq!(summary.only_in_dir1, 1);
302        assert_eq!(summary.only_in_dir2, 1);
303        assert_eq!(summary.in_both, 1);
304        assert_eq!(summary.dir1_newer, 1);
305    }
306
307    #[test]
308    fn test_directory_comparer_integration() -> anyhow::Result<()> {
309        let dir1 = tempfile::tempdir()?;
310        let dir2 = tempfile::tempdir()?;
311
312        // Create files in dir1
313        let file1_path = dir1.path().join("same.txt");
314        let mut file1 = fs::File::create(&file1_path)?;
315        file1.write_all(b"same content")?;
316
317        let only1_path = dir1.path().join("only1.txt");
318        let mut only1 = fs::File::create(&only1_path)?;
319        only1.write_all(b"only in dir1")?;
320
321        // Create files in dir2
322        let file2_path = dir2.path().join("same.txt");
323        let mut file2 = fs::File::create(&file2_path)?;
324        file2.write_all(b"same content")?;
325
326        let only2_path = dir2.path().join("only2.txt");
327        let mut only2 = fs::File::create(&only2_path)?;
328        only2.write_all(b"only in dir2")?;
329
330        // Create a different file
331        let diff1_path = dir1.path().join("diff.txt");
332        let mut diff1 = fs::File::create(&diff1_path)?;
333        diff1.write_all(b"content 1")?;
334
335        let diff2_path = dir2.path().join("diff.txt");
336        let mut diff2 = fs::File::create(&diff2_path)?;
337        diff2.write_all(b"content 222")?; // different length and content
338
339        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
340        let (tx, rx) = mpsc::channel();
341
342        comparer.compare_streaming(tx)?;
343
344        let mut results = Vec::new();
345        while let Ok(res) = rx.recv() {
346            if let CompareProgress::Result(_, r) = res {
347                results.push(r);
348            }
349        }
350
351        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
352
353        assert_eq!(results.len(), 4);
354
355        // diff.txt
356        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
357        assert_eq!(results[0].classification, Classification::InBoth);
358        assert!(
359            results[0].is_content_same == Some(false)
360                || results[0].size_comparison != Some(Ordering::Equal)
361        );
362
363        // only1.txt
364        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
365        assert_eq!(results[1].classification, Classification::OnlyInDir1);
366
367        // only2.txt
368        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
369        assert_eq!(results[2].classification, Classification::OnlyInDir2);
370
371        // same.txt
372        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
373        assert_eq!(results[3].classification, Classification::InBoth);
374        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
375
376        Ok(())
377    }
378}