Skip to main content

folder_compare/
lib.rs

1/*!
2A library to recursively compare files in two folders and return two lists of files: One with new files and one with changed files.
3
4`folder_compare` also takes a list of Strings acting as exclude patterns using `RegexSet`.
5
6Overall the functionality is comparable to a `diff -rq folder1 folder2 -X excludepatterns.pat` on unix like systems
7
8For recognizing changed files, hashing with [`FxHasher`] is used.
9
10[`FxHasher`]: https://github.com/cbreeden/fxhash
11*/
12use std::path::{Path, PathBuf};
13use walkdir::WalkDir;
14use regex::RegexSet;
15use std::hash::Hasher;
16use std::fs::File;
17use fxhash::FxHasher;
18use std::io::Read;
19
20pub struct FolderCompare {
21    pub changed_files: Vec<PathBuf>,
22    pub new_files: Vec<PathBuf>,
23    pub unchanged_files: Vec<PathBuf>,
24}
25
26impl FolderCompare {
27    /// Instantiates an object of FolderCompare and does the comparison between two `Path` directories and delivers itself consisting of
28    /// two lists of `PathBuf` containing changed and new (only existing in first Directory) files.
29    /// It takes a `Vec<&str>` as argument for excluding specific substrings in the path (e.g. file extensions like .txt).
30    ///
31    ///
32    /// # Example
33    ///
34    /// The following code recursively iterates over two directories and returns lists of changed and new files
35    ///
36    ///```
37    /// use std::path::Path;
38    /// use folder_compare;
39    /// use folder_compare::FolderCompare;
40    ///
41    ///
42    /// let excluded = vec![".doc".to_string(), ".txt".to_string()];
43    ///
44    /// let result = FolderCompare::new(Path::new("/tmp/a"), Path::new("/tmp/b"), &excluded).unwrap();
45    ///
46    /// let changed_files = result.changed_files;
47    /// let new_files = result.new_files;
48    /// let unchanged_files = result.unchanged_files;
49    ///```
50    ///
51    pub fn new(path1: &Path, path2: &Path, excluded: &Vec<String>) -> Result<Self, Error> {
52
53        let mut final_object = FolderCompare {
54            changed_files: vec![],
55            new_files: vec![],
56            unchanged_files: vec![]
57        };
58
59        let mut walker = WalkDir::new(path1).into_iter();
60        let set = RegexSet::new(excluded)?;
61
62        loop {
63            let entry = match walker.next() {
64                None => break,
65                Some(Err(_)) => continue,
66                Some(Ok(entry)) => entry,
67            };
68            if !entry.file_type().is_file() {
69                continue;
70            }
71
72            if entry.path_is_symlink() {
73                continue;
74            }
75
76            if set.matches(entry.path().to_str().unwrap()).matched_any() {
77                continue;
78            }
79
80            let path_without_prefix = entry.path().strip_prefix(path1)?;
81            let file_in_second_path = path2.join(path_without_prefix);
82            if !file_in_second_path.is_file() {
83                final_object.new_files.push(entry.path().to_path_buf());
84                continue;
85            }
86
87            let second_file = file_in_second_path.to_path_buf().clone();
88
89            let buffer = &mut vec![];
90            File::open(entry.path())?.read_to_end(buffer)?;
91            let mut hasher = FxHasher::default();
92            hasher.write(buffer);
93            let buffer2 = &mut vec![];
94            File::open(second_file)?.read_to_end(buffer2)?;
95            let mut hasher2 = FxHasher::default();
96            hasher2.write(buffer2);
97
98            if hasher.finish() == hasher2.finish() {
99                final_object.unchanged_files.push(entry.into_path());
100            } else {
101                final_object.changed_files.push(entry.into_path());
102            }
103        }
104
105
106        Ok(final_object)
107    }
108}
109
110/// Wrapper for possible errors
111#[derive(Debug)]
112pub enum Error {
113    Io(std::io::Error),
114    Regex(regex::Error),
115    StripPrefix(std::path::StripPrefixError),
116}
117
118impl From<std::io::Error> for Error {
119    fn from(e: std::io::Error) -> Error {
120        Error::Io(e)
121    }
122}
123
124impl From<regex::Error> for Error {
125    fn from(e: regex::Error) -> Error {
126        Error::Regex(e)
127    }
128}
129
130impl From<std::path::StripPrefixError> for Error {
131    fn from(e: std::path::StripPrefixError) -> Error {
132        Error::StripPrefix(e)
133    }
134}