1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*!
A library to recursively compare files in two folders and return two lists of files: One with new files and one with changed files.

`folder_compare` also takes a list of Strings acting as exclude patterns using `RegexSet`.

Overall the functionality is comparable to a `diff -rq folder1 folder2 -X excludepatterns.pat` on unix like systems

For recognizing changed files, hashing with [`FxHasher`] is used.

[`FxHasher`]: https://github.com/cbreeden/fxhash
*/
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use regex::RegexSet;
use std::hash::Hasher;
use std::fs::File;
use fxhash::FxHasher;
use std::io::Read;

pub struct FolderCompare {
    pub changed_files: Vec<PathBuf>,
    pub new_files: Vec<PathBuf>,
}

impl FolderCompare {
    /// Instantiates an object of FolderCompare and does the comparison between two `Path` directories and delivers itself consisting of
    /// two lists of `PathBuf` containing changed and new (only existing in first Directory) files.
    /// It takes a `Vec<&str>` as argument for excluding specific substrings in the path (e.g. file extensions like .txt).
    ///
    ///
    /// # Example
    ///
    /// The following code recursively iterates over two directories and returns lists of changed and new files
    ///
    ///```
    /// use std::path::Path;
    /// use folder_compare;
    /// use folder_compare::FolderCompare;
    ///
    ///
    /// let excluded = vec![".doc".to_string(), ".txt".to_string()];
    ///
    /// let result = FolderCompare::new(Path::new("/tmp/a"), Path::new("/tmp/b"), &excluded).unwrap();
    ///
    /// let changed_files = result.changed_files;
    /// let new_filed = result.new_files;
    ///```
    ///
    pub fn new(path1: &Path, path2: &Path, excluded: &Vec<String>) -> Result<Self, Error> {

        let mut final_object = FolderCompare {
            changed_files: vec![],
            new_files: vec![]
        };

        let mut walker = WalkDir::new(path1).into_iter();
        let set = RegexSet::new(excluded)?;

        loop {
            let entry = match walker.next() {
                None => break,
                Some(Err(_)) => continue,
                Some(Ok(entry)) => entry,
            };
            if !entry.file_type().is_file() {
                continue;
            }

            if entry.path_is_symlink() {
                continue;
            }

            if set.matches(entry.path().to_str().unwrap()).matched_any() {
                continue;
            }

            let path_without_prefix = entry.path().strip_prefix(path1)?;
            let file_in_second_path = path2.join(path_without_prefix);
            if !file_in_second_path.is_file() {
                final_object.new_files.push(entry.path().to_path_buf());
                continue;
            }

            let second_file = file_in_second_path.to_path_buf().clone();

            let buffer = &mut vec![];
            File::open(entry.path())?.read_to_end(buffer)?;
            let mut hasher = FxHasher::default();
            hasher.write(buffer);
            let buffer2 = &mut vec![];
            File::open(second_file)?.read_to_end(buffer2)?;
            let mut hasher2 = FxHasher::default();
            hasher2.write(buffer2);

            if hasher.finish() == hasher2.finish() {
                continue;
            }
            final_object.changed_files.push(entry.into_path());
        }


        Ok(final_object)
    }
}

/// Wrapper for possible errors
#[derive(Debug)]
pub enum Error {
    Io(std::io::Error),
    Regex(regex::Error),
    StripPrefix(std::path::StripPrefixError),
}

impl From<std::io::Error> for Error {
    fn from(e: std::io::Error) -> Error {
        Error::Io(e)
    }
}

impl From<regex::Error> for Error {
    fn from(e: regex::Error) -> Error {
        Error::Regex(e)
    }
}

impl From<std::path::StripPrefixError> for Error {
    fn from(e: std::path::StripPrefixError) -> Error {
        Error::StripPrefix(e)
    }
}