Skip to main content

compare_dir/
lib.rs

1use indicatif::{ProgressBar, ProgressStyle};
2use std::sync::{Arc, Mutex, mpsc};
3use std::path::{Path, PathBuf};
4use std::fs;
5use std::io::{self, Read};
6use rayon::prelude::*;
7use walkdir::WalkDir;
8use std::collections::HashMap;
9use log::info;
10
11/// How a file is classified during comparison.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Classification {
14    /// File exists only in the first directory.
15    OnlyInDir1,
16    /// File exists only in the second directory.
17    OnlyInDir2,
18    /// File exists in both directories.
19    InBoth,
20}
21
22/// The result of comparing two values (e.g., size or modified time).
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Comparison {
25    /// The value in the first directory is greater.
26    Dir1Greater,
27    /// The value in the second directory is greater.
28    Dir2Greater,
29    /// The values are equal.
30    Same,
31}
32
33impl Comparison {
34    pub fn from_values<T: PartialOrd>(v1: T, v2: T) -> Self {
35        if v1 > v2 {
36            Comparison::Dir1Greater
37        } else if v2 > v1 {
38            Comparison::Dir2Greater
39        } else {
40            Comparison::Same
41        }
42    }
43}
44
45/// Detailed result of comparing a single file.
46#[derive(Debug, Clone)]
47pub struct FileComparisonResult {
48    /// The path relative to the root of the directories.
49    pub relative_path: PathBuf,
50    /// Whether the file exists in one or both directories.
51    pub classification: Classification,
52    /// Comparison of the last modified time, if applicable.
53    pub modified_time_comparison: Option<Comparison>,
54    /// Comparison of the file size, if applicable.
55    pub size_comparison: Option<Comparison>,
56    /// Whether the content is byte-for-byte identical, if applicable.
57    pub is_content_same: Option<bool>,
58}
59
60impl FileComparisonResult {
61    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
62        Self {
63            relative_path,
64            classification,
65            modified_time_comparison: None,
66            size_comparison: None,
67            is_content_same: None,
68        }
69    }
70
71    pub fn is_identical(&self) -> bool {
72        self.classification == Classification::InBoth
73            && self.modified_time_comparison == Some(Comparison::Same)
74            && self.size_comparison == Some(Comparison::Same)
75            && self.is_content_same == Some(true)
76    }
77
78    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
79        let mut parts = Vec::new();
80        match self.classification {
81            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
82            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
83            Classification::InBoth => {}
84        }
85
86        if let Some(comp) = &self.modified_time_comparison {
87            match comp {
88                Comparison::Dir1Greater => parts.push(format!("{} is newer", dir1_name)),
89                Comparison::Dir2Greater => parts.push(format!("{} is newer", dir2_name)),
90                Comparison::Same => {}
91            }
92        }
93
94        if let Some(comp) = &self.size_comparison {
95            match comp {
96                Comparison::Dir1Greater => parts.push(format!("Size of {} is larger", dir1_name)),
97                Comparison::Dir2Greater => parts.push(format!("Size of {} is larger", dir2_name)),
98                Comparison::Same => {}
99            }
100        }
101
102        if let Some(same) = self.is_content_same {
103            if !same {
104                parts.push("Content differ".to_string());
105            }
106        }
107
108        format!("{}: {}", self.relative_path.display(), parts.join(", "))
109    }
110}
111
112pub struct ComparisonSummary {
113    pub in_both: usize,
114    pub only_in_dir1: usize,
115    pub only_in_dir2: usize,
116    pub dir1_newer: usize,
117    pub dir2_newer: usize,
118    pub same_time_diff_size: usize,
119    pub same_time_size_diff_content: usize,
120}
121
122impl Default for ComparisonSummary {
123    fn default() -> Self {
124        Self {
125            in_both: 0,
126            only_in_dir1: 0,
127            only_in_dir2: 0,
128            dir1_newer: 0,
129            dir2_newer: 0,
130            same_time_diff_size: 0,
131            same_time_size_diff_content: 0,
132        }
133    }
134}
135
136impl ComparisonSummary {
137    pub fn update(&mut self, result: &FileComparisonResult) {
138        match result.classification {
139            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
140            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
141            Classification::InBoth => {
142                self.in_both += 1;
143                match result.modified_time_comparison {
144                    Some(Comparison::Dir1Greater) => self.dir1_newer += 1,
145                    Some(Comparison::Dir2Greater) => self.dir2_newer += 1,
146                    _ => {
147                        if result.size_comparison != Some(Comparison::Same) {
148                            self.same_time_diff_size += 1;
149                        } else if result.is_content_same == Some(false) {
150                            self.same_time_size_diff_content += 1;
151                        }
152                    }
153                }
154            }
155        }
156    }
157
158    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
159        println!("Files in both: {}", self.in_both);
160        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
161        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
162        println!("Files in both ({} is newer): {}", dir1_name, self.dir1_newer);
163        println!("Files in both ({} is newer): {}", dir2_name, self.dir2_newer);
164        println!(
165            "Files in both (same time, different size): {}",
166            self.same_time_diff_size
167        );
168        println!(
169            "Files in both (same time and size, different content): {}",
170            self.same_time_size_diff_content
171        );
172    }
173}
174
175/// A tool for comparing the contents of two directories.
176pub struct DirectoryComparer {
177    dir1: PathBuf,
178    dir2: PathBuf,
179}
180
181impl DirectoryComparer {
182    /// Creates a new `DirectoryComparer` for the two given directories.
183    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
184        Self { dir1, dir2 }
185    }
186
187    /// Sets the maximum number of threads for parallel processing.
188    /// This initializes the global Rayon thread pool.
189    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
190        rayon::ThreadPoolBuilder::new()
191            .num_threads(parallel)
192            .build_global()
193            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
194        Ok(())
195    }
196
197    /// Executes the directory comparison and prints results to stdout.
198    /// This is a convenience method for CLI usage.
199    pub fn run(dir1: PathBuf, dir2: PathBuf) -> anyhow::Result<()> {
200
201        let pb_holder: Arc<Mutex<Option<ProgressBar>>> = Arc::new(Mutex::new(None));
202
203        let start_time = std::time::Instant::now();
204        let mut summary = ComparisonSummary::default();
205        let dir1_str = dir1.to_str().unwrap_or("dir1");
206        let dir2_str = dir2.to_str().unwrap_or("dir2");
207
208        let (tx, rx) = mpsc::channel();
209
210        // Run comparison in a separate thread or use rayon::spawn
211        let dir1_c = dir1.clone();
212        let dir2_c = dir2.clone();
213        let pb_holder_c = pb_holder.clone();
214
215        std::thread::spawn(move || {
216            let comparer = Self::new(dir1_c, dir2_c);
217            let on_total = move |total: usize| {
218                let pb = ProgressBar::new(total as u64);
219                pb.set_style(
220                    ProgressStyle::with_template(
221                        "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
222                    )
223                    .unwrap()
224                    .progress_chars("##-"),
225                );
226                *pb_holder_c.lock().unwrap() = Some(pb);
227            };
228
229            if let Err(e) = comparer.compare_streaming(on_total, tx) {
230                eprintln!("Error during comparison: {}", e);
231            }
232        });
233
234        // Receive results and update summary/UI
235        while let Ok(result) = rx.recv() {
236            summary.update(&result);
237            if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
238                if !result.is_identical() {
239                    pb.suspend(|| {
240                        println!("{}", result.to_string(dir1_str, dir2_str));
241                    });
242                }
243                pb.inc(1);
244            } else {
245                if !result.is_identical() {
246                    println!("{}", result.to_string(dir1_str, dir2_str));
247                }
248            }
249        }
250
251        if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
252            pb.finish_and_clear();
253        }
254
255        eprintln!("\n--- Comparison Summary ---");
256        summary.print(dir1_str, dir2_str);
257        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
258        Ok(())
259    }
260
261    fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
262        let mut files = HashMap::new();
263        for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
264            if entry.file_type().is_file() {
265                let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
266                files.insert(rel_path, entry.path().to_path_buf());
267            }
268        }
269        Ok(files)
270    }
271
272    /// Performs the directory comparison and streams results via a channel.
273    ///
274    /// # Arguments
275    /// * `on_total` - A callback triggered with the total number of files to be compared.
276    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
277    pub fn compare_streaming<F>(&self, on_total: F, tx: mpsc::Sender<FileComparisonResult>) -> anyhow::Result<()>
278    where
279        F: FnOnce(usize),
280    {
281        let (dir1_files, dir2_files) = rayon::join(
282            || {
283                info!("Scanning directory: {:?}", self.dir1);
284                Self::get_files(&self.dir1)
285            },
286            || {
287                info!("Scanning directory: {:?}", self.dir2);
288                Self::get_files(&self.dir2)
289            },
290        );
291        let dir1_files = dir1_files?;
292        let dir2_files = dir2_files?;
293
294        let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
295        all_rel_paths.sort();
296        all_rel_paths.dedup();
297
298        on_total(all_rel_paths.len());
299
300        all_rel_paths.into_par_iter().for_each(|rel_path| {
301            let in_dir1 = dir1_files.get(rel_path);
302            let in_dir2 = dir2_files.get(rel_path);
303
304            let result = match (in_dir1, in_dir2) {
305                (Some(_), None) => {
306                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
307                }
308                (None, Some(_)) => {
309                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
310                }
311                (Some(p1), Some(p2)) => {
312                    let mut result = FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
313                    let m1 = fs::metadata(p1).ok();
314                    let m2 = fs::metadata(p2).ok();
315
316                    if let (Some(m1), Some(m2)) = (m1, m2) {
317                        let t1 = m1.modified().ok();
318                        let t2 = m2.modified().ok();
319                        if let (Some(t1), Some(t2)) = (t1, t2) {
320                            result.modified_time_comparison = Some(Comparison::from_values(t1, t2));
321                        }
322
323                        let s1 = m1.len();
324                        let s2 = m2.len();
325                        result.size_comparison = Some(Comparison::from_values(s1, s2));
326
327                        if s1 == s2 {
328                            info!("Comparing content: {:?}", rel_path);
329                            result.is_content_same = Some(compare_contents(p1, p2).unwrap_or(false));
330                        }
331                    }
332                    result
333                }
334                (None, None) => unreachable!(),
335            };
336            let _ = tx.send(result);
337        });
338
339        Ok(())
340    }
341}
342
343fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
344    let mut f1 = fs::File::open(p1)?;
345    let mut f2 = fs::File::open(p2)?;
346
347    let mut buf1 = [0u8; 8192];
348    let mut buf2 = [0u8; 8192];
349
350    loop {
351        let n1 = f1.read(&mut buf1)?;
352        let n2 = f2.read(&mut buf2)?;
353
354        if n1 != n2 || buf1[..n1] != buf2[..n2] {
355            return Ok(false);
356        }
357
358        if n1 == 0 {
359            return Ok(true);
360        }
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367    use std::io::Write;
368    use tempfile::NamedTempFile;
369
370    #[test]
371    fn test_compare_contents_identical() -> io::Result<()> {
372        let mut f1 = NamedTempFile::new()?;
373        let mut f2 = NamedTempFile::new()?;
374        f1.write_all(b"hello world")?;
375        f2.write_all(b"hello world")?;
376        assert!(compare_contents(f1.path(), f2.path())?);
377        Ok(())
378    }
379
380    #[test]
381    fn test_compare_contents_different() -> io::Result<()> {
382        let mut f1 = NamedTempFile::new()?;
383        let mut f2 = NamedTempFile::new()?;
384        f1.write_all(b"hello world")?;
385        f2.write_all(b"hello rust")?;
386        assert!(!compare_contents(f1.path(), f2.path())?);
387        Ok(())
388    }
389
390    #[test]
391    fn test_compare_contents_different_size() -> io::Result<()> {
392        let mut f1 = NamedTempFile::new()?;
393        let mut f2 = NamedTempFile::new()?;
394        f1.write_all(b"hello world")?;
395        f2.write_all(b"hello")?;
396        // compare_contents assumes same size, but let's see what it does
397        assert!(!compare_contents(f1.path(), f2.path())?);
398        Ok(())
399    }
400
401    #[test]
402    fn test_comparison_summary() {
403        let mut summary = ComparisonSummary::default();
404        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
405        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
406        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
407        res3.modified_time_comparison = Some(Comparison::Dir1Greater);
408        
409        summary.update(&res1);
410        summary.update(&res2);
411        summary.update(&res3);
412        
413        assert_eq!(summary.only_in_dir1, 1);
414        assert_eq!(summary.only_in_dir2, 1);
415        assert_eq!(summary.in_both, 1);
416        assert_eq!(summary.dir1_newer, 1);
417    }
418
419    #[test]
420    fn test_directory_comparer_integration() -> anyhow::Result<()> {
421        let dir1 = tempfile::tempdir()?;
422        let dir2 = tempfile::tempdir()?;
423
424        // Create files in dir1
425        let file1_path = dir1.path().join("same.txt");
426        let mut file1 = fs::File::create(&file1_path)?;
427        file1.write_all(b"same content")?;
428
429        let only1_path = dir1.path().join("only1.txt");
430        let mut only1 = fs::File::create(&only1_path)?;
431        only1.write_all(b"only in dir1")?;
432
433        // Create files in dir2
434        let file2_path = dir2.path().join("same.txt");
435        let mut file2 = fs::File::create(&file2_path)?;
436        file2.write_all(b"same content")?;
437
438        let only2_path = dir2.path().join("only2.txt");
439        let mut only2 = fs::File::create(&only2_path)?;
440        only2.write_all(b"only in dir2")?;
441
442        // Create a different file
443        let diff1_path = dir1.path().join("diff.txt");
444        let mut diff1 = fs::File::create(&diff1_path)?;
445        diff1.write_all(b"content 1")?;
446
447        let diff2_path = dir2.path().join("diff.txt");
448        let mut diff2 = fs::File::create(&diff2_path)?;
449        diff2.write_all(b"content 222")?; // different length and content
450
451        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
452        let (tx, rx) = mpsc::channel();
453
454        comparer.compare_streaming(|_| {}, tx)?;
455
456        let mut results = Vec::new();
457        while let Ok(res) = rx.recv() {
458            results.push(res);
459        }
460        
461        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
462
463        assert_eq!(results.len(), 4);
464
465        // diff.txt
466        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
467        assert_eq!(results[0].classification, Classification::InBoth);
468        assert!(results[0].is_content_same == Some(false) || results[0].size_comparison != Some(Comparison::Same));
469
470        // only1.txt
471        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
472        assert_eq!(results[1].classification, Classification::OnlyInDir1);
473
474        // only2.txt
475        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
476        assert_eq!(results[2].classification, Classification::OnlyInDir2);
477
478        // same.txt
479        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
480        assert_eq!(results[3].classification, Classification::InBoth);
481        assert_eq!(results[3].size_comparison, Some(Comparison::Same));
482
483        Ok(())
484    }
485}