Skip to main content

compare_dir/
lib.rs

1use indicatif::{ProgressBar, ProgressStyle};
2use log::info;
3use rayon::prelude::*;
4use std::collections::HashMap;
5use std::fs;
6use std::io::{self, Read};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex, mpsc};
9use walkdir::WalkDir;
10
11/// How a file is classified during comparison.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Classification {
14    /// File exists only in the first directory.
15    OnlyInDir1,
16    /// File exists only in the second directory.
17    OnlyInDir2,
18    /// File exists in both directories.
19    InBoth,
20}
21
22/// The result of comparing two values (e.g., size or modified time).
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Comparison {
25    /// The value in the first directory is greater.
26    Dir1Greater,
27    /// The value in the second directory is greater.
28    Dir2Greater,
29    /// The values are equal.
30    Same,
31}
32
33impl Comparison {
34    pub fn from_values<T: PartialOrd>(v1: T, v2: T) -> Self {
35        if v1 > v2 {
36            Comparison::Dir1Greater
37        } else if v2 > v1 {
38            Comparison::Dir2Greater
39        } else {
40            Comparison::Same
41        }
42    }
43}
44
45/// Detailed result of comparing a single file.
46#[derive(Debug, Clone)]
47pub struct FileComparisonResult {
48    /// The path relative to the root of the directories.
49    pub relative_path: PathBuf,
50    /// Whether the file exists in one or both directories.
51    pub classification: Classification,
52    /// Comparison of the last modified time, if applicable.
53    pub modified_time_comparison: Option<Comparison>,
54    /// Comparison of the file size, if applicable.
55    pub size_comparison: Option<Comparison>,
56    /// Whether the content is byte-for-byte identical, if applicable.
57    pub is_content_same: Option<bool>,
58}
59
60impl FileComparisonResult {
61    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
62        Self {
63            relative_path,
64            classification,
65            modified_time_comparison: None,
66            size_comparison: None,
67            is_content_same: None,
68        }
69    }
70
71    pub fn is_identical(&self) -> bool {
72        self.classification == Classification::InBoth
73            && self.modified_time_comparison == Some(Comparison::Same)
74            && self.size_comparison == Some(Comparison::Same)
75            && self.is_content_same == Some(true)
76    }
77
78    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
79        let mut parts = Vec::new();
80        match self.classification {
81            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
82            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
83            Classification::InBoth => {}
84        }
85
86        if let Some(comp) = &self.modified_time_comparison {
87            match comp {
88                Comparison::Dir1Greater => parts.push(format!("{} is newer", dir1_name)),
89                Comparison::Dir2Greater => parts.push(format!("{} is newer", dir2_name)),
90                Comparison::Same => {}
91            }
92        }
93
94        if let Some(comp) = &self.size_comparison {
95            match comp {
96                Comparison::Dir1Greater => parts.push(format!("Size of {} is larger", dir1_name)),
97                Comparison::Dir2Greater => parts.push(format!("Size of {} is larger", dir2_name)),
98                Comparison::Same => {}
99            }
100        }
101
102        if let Some(same) = self.is_content_same
103            && !same
104        {
105            parts.push("Content differ".to_string());
106        }
107
108        format!("{}: {}", self.relative_path.display(), parts.join(", "))
109    }
110}
111
112#[derive(Default)]
113pub struct ComparisonSummary {
114    pub in_both: usize,
115    pub only_in_dir1: usize,
116    pub only_in_dir2: usize,
117    pub dir1_newer: usize,
118    pub dir2_newer: usize,
119    pub same_time_diff_size: usize,
120    pub same_time_size_diff_content: usize,
121}
122
123impl ComparisonSummary {
124    pub fn update(&mut self, result: &FileComparisonResult) {
125        match result.classification {
126            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
127            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
128            Classification::InBoth => {
129                self.in_both += 1;
130                match result.modified_time_comparison {
131                    Some(Comparison::Dir1Greater) => self.dir1_newer += 1,
132                    Some(Comparison::Dir2Greater) => self.dir2_newer += 1,
133                    _ => {
134                        if result.size_comparison != Some(Comparison::Same) {
135                            self.same_time_diff_size += 1;
136                        } else if result.is_content_same == Some(false) {
137                            self.same_time_size_diff_content += 1;
138                        }
139                    }
140                }
141            }
142        }
143    }
144
145    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
146        println!("Files in both: {}", self.in_both);
147        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
148        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
149        println!(
150            "Files in both ({} is newer): {}",
151            dir1_name, self.dir1_newer
152        );
153        println!(
154            "Files in both ({} is newer): {}",
155            dir2_name, self.dir2_newer
156        );
157        println!(
158            "Files in both (same time, different size): {}",
159            self.same_time_diff_size
160        );
161        println!(
162            "Files in both (same time and size, different content): {}",
163            self.same_time_size_diff_content
164        );
165    }
166}
167
168/// A tool for comparing the contents of two directories.
169pub struct DirectoryComparer {
170    dir1: PathBuf,
171    dir2: PathBuf,
172}
173
174impl DirectoryComparer {
175    /// Creates a new `DirectoryComparer` for the two given directories.
176    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
177        Self { dir1, dir2 }
178    }
179
180    /// Sets the maximum number of threads for parallel processing.
181    /// This initializes the global Rayon thread pool.
182    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
183        rayon::ThreadPoolBuilder::new()
184            .num_threads(parallel)
185            .build_global()
186            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
187        Ok(())
188    }
189
190    /// Executes the directory comparison and prints results to stdout.
191    /// This is a convenience method for CLI usage.
192    pub fn run(&self) -> anyhow::Result<()> {
193        let pb_holder: Arc<Mutex<Option<ProgressBar>>> = Arc::new(Mutex::new(None));
194
195        let start_time = std::time::Instant::now();
196        let mut summary = ComparisonSummary::default();
197        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
198        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
199
200        let (tx, rx) = mpsc::channel();
201        let pb_holder_c = pb_holder.clone();
202
203        std::thread::scope(|s| {
204            s.spawn(move || {
205                let on_total = move |total: usize| {
206                    let pb = ProgressBar::new(total as u64);
207                    pb.set_style(
208                        ProgressStyle::with_template(
209                            "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
210                        )
211                        .unwrap()
212                        .progress_chars("##-"),
213                    );
214                    *pb_holder_c.lock().unwrap() = Some(pb);
215                };
216
217                if let Err(e) = self.compare_streaming(on_total, tx) {
218                    eprintln!("Error during comparison: {}", e);
219                }
220            });
221
222            // Receive results and update summary/UI
223            while let Ok(result) = rx.recv() {
224                summary.update(&result);
225                if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
226                    if !result.is_identical() {
227                        pb.suspend(|| {
228                            println!("{}", result.to_string(dir1_str, dir2_str));
229                        });
230                    }
231                    pb.inc(1);
232                } else if !result.is_identical() {
233                    println!("{}", result.to_string(dir1_str, dir2_str));
234                }
235            }
236        });
237
238        if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
239            pb.finish_and_clear();
240        }
241
242        eprintln!("\n--- Comparison Summary ---");
243        summary.print(dir1_str, dir2_str);
244        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
245        Ok(())
246    }
247
248    fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
249        let mut files = HashMap::new();
250        for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
251            if entry.file_type().is_file() {
252                let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
253                files.insert(rel_path, entry.path().to_path_buf());
254            }
255        }
256        Ok(files)
257    }
258
259    /// Performs the directory comparison and streams results via a channel.
260    ///
261    /// # Arguments
262    /// * `on_total` - A callback triggered with the total number of files to be compared.
263    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
264    fn compare_streaming<F>(
265        &self,
266        on_total: F,
267        tx: mpsc::Sender<FileComparisonResult>,
268    ) -> anyhow::Result<()>
269    where
270        F: FnOnce(usize),
271    {
272        let (dir1_files, dir2_files) = rayon::join(
273            || {
274                info!("Scanning directory: {:?}", self.dir1);
275                Self::get_files(&self.dir1)
276            },
277            || {
278                info!("Scanning directory: {:?}", self.dir2);
279                Self::get_files(&self.dir2)
280            },
281        );
282        let dir1_files = dir1_files?;
283        let dir2_files = dir2_files?;
284
285        let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
286        all_rel_paths.sort();
287        all_rel_paths.dedup();
288
289        on_total(all_rel_paths.len());
290
291        all_rel_paths.into_par_iter().for_each(|rel_path| {
292            let in_dir1 = dir1_files.get(rel_path);
293            let in_dir2 = dir2_files.get(rel_path);
294
295            let result = match (in_dir1, in_dir2) {
296                (Some(_), None) => {
297                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
298                }
299                (None, Some(_)) => {
300                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
301                }
302                (Some(p1), Some(p2)) => {
303                    let mut result =
304                        FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
305                    let m1 = fs::metadata(p1).ok();
306                    let m2 = fs::metadata(p2).ok();
307
308                    if let (Some(m1), Some(m2)) = (m1, m2) {
309                        let t1 = m1.modified().ok();
310                        let t2 = m2.modified().ok();
311                        if let (Some(t1), Some(t2)) = (t1, t2) {
312                            result.modified_time_comparison = Some(Comparison::from_values(t1, t2));
313                        }
314
315                        let s1 = m1.len();
316                        let s2 = m2.len();
317                        result.size_comparison = Some(Comparison::from_values(s1, s2));
318
319                        if s1 == s2 {
320                            info!("Comparing content: {:?}", rel_path);
321                            result.is_content_same =
322                                Some(compare_contents(p1, p2).unwrap_or(false));
323                        }
324                    }
325                    result
326                }
327                (None, None) => unreachable!(),
328            };
329            let _ = tx.send(result);
330        });
331
332        Ok(())
333    }
334}
335
336fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
337    let mut f1 = fs::File::open(p1)?;
338    let mut f2 = fs::File::open(p2)?;
339
340    let mut buf1 = [0u8; 8192];
341    let mut buf2 = [0u8; 8192];
342
343    loop {
344        let n1 = f1.read(&mut buf1)?;
345        let n2 = f2.read(&mut buf2)?;
346
347        if n1 != n2 || buf1[..n1] != buf2[..n2] {
348            return Ok(false);
349        }
350
351        if n1 == 0 {
352            return Ok(true);
353        }
354    }
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360    use std::io::Write;
361    use tempfile::NamedTempFile;
362
363    #[test]
364    fn test_compare_contents_identical() -> io::Result<()> {
365        let mut f1 = NamedTempFile::new()?;
366        let mut f2 = NamedTempFile::new()?;
367        f1.write_all(b"hello world")?;
368        f2.write_all(b"hello world")?;
369        assert!(compare_contents(f1.path(), f2.path())?);
370        Ok(())
371    }
372
373    #[test]
374    fn test_compare_contents_different() -> io::Result<()> {
375        let mut f1 = NamedTempFile::new()?;
376        let mut f2 = NamedTempFile::new()?;
377        f1.write_all(b"hello world")?;
378        f2.write_all(b"hello rust")?;
379        assert!(!compare_contents(f1.path(), f2.path())?);
380        Ok(())
381    }
382
383    #[test]
384    fn test_compare_contents_different_size() -> io::Result<()> {
385        let mut f1 = NamedTempFile::new()?;
386        let mut f2 = NamedTempFile::new()?;
387        f1.write_all(b"hello world")?;
388        f2.write_all(b"hello")?;
389        // compare_contents assumes same size, but let's see what it does
390        assert!(!compare_contents(f1.path(), f2.path())?);
391        Ok(())
392    }
393
394    #[test]
395    fn test_comparison_summary() {
396        let mut summary = ComparisonSummary::default();
397        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
398        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
399        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
400        res3.modified_time_comparison = Some(Comparison::Dir1Greater);
401
402        summary.update(&res1);
403        summary.update(&res2);
404        summary.update(&res3);
405
406        assert_eq!(summary.only_in_dir1, 1);
407        assert_eq!(summary.only_in_dir2, 1);
408        assert_eq!(summary.in_both, 1);
409        assert_eq!(summary.dir1_newer, 1);
410    }
411
412    #[test]
413    fn test_directory_comparer_integration() -> anyhow::Result<()> {
414        let dir1 = tempfile::tempdir()?;
415        let dir2 = tempfile::tempdir()?;
416
417        // Create files in dir1
418        let file1_path = dir1.path().join("same.txt");
419        let mut file1 = fs::File::create(&file1_path)?;
420        file1.write_all(b"same content")?;
421
422        let only1_path = dir1.path().join("only1.txt");
423        let mut only1 = fs::File::create(&only1_path)?;
424        only1.write_all(b"only in dir1")?;
425
426        // Create files in dir2
427        let file2_path = dir2.path().join("same.txt");
428        let mut file2 = fs::File::create(&file2_path)?;
429        file2.write_all(b"same content")?;
430
431        let only2_path = dir2.path().join("only2.txt");
432        let mut only2 = fs::File::create(&only2_path)?;
433        only2.write_all(b"only in dir2")?;
434
435        // Create a different file
436        let diff1_path = dir1.path().join("diff.txt");
437        let mut diff1 = fs::File::create(&diff1_path)?;
438        diff1.write_all(b"content 1")?;
439
440        let diff2_path = dir2.path().join("diff.txt");
441        let mut diff2 = fs::File::create(&diff2_path)?;
442        diff2.write_all(b"content 222")?; // different length and content
443
444        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
445        let (tx, rx) = mpsc::channel();
446
447        comparer.compare_streaming(|_| {}, tx)?;
448
449        let mut results = Vec::new();
450        while let Ok(res) = rx.recv() {
451            results.push(res);
452        }
453
454        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
455
456        assert_eq!(results.len(), 4);
457
458        // diff.txt
459        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
460        assert_eq!(results[0].classification, Classification::InBoth);
461        assert!(
462            results[0].is_content_same == Some(false)
463                || results[0].size_comparison != Some(Comparison::Same)
464        );
465
466        // only1.txt
467        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
468        assert_eq!(results[1].classification, Classification::OnlyInDir1);
469
470        // only2.txt
471        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
472        assert_eq!(results[2].classification, Classification::OnlyInDir2);
473
474        // same.txt
475        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
476        assert_eq!(results[3].classification, Classification::InBoth);
477        assert_eq!(results[3].size_comparison, Some(Comparison::Same));
478
479        Ok(())
480    }
481}