1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
//! This a binary crate. You _can_ use it as a library, but I wouldn't recommend it.
//!
//! A collection of functions and structs to find duplicate files.
//!
//! # Example :
//!
//! Find, display, and report, all the duplicate files at the given path :
//!
//! ```no_run
//! let counter = yadf::Config::builder()
//!     .paths(&["path/to/somewhere", "another/path"]) // required
//!     .minimum_file_size(64) // optional
//!     .maximum_file_size(1024 * 8) // optional
//!     .regex(None) // optional
//!     .glob(None) // optional
//!     .build()
//!     .scan::<yadf::HighwayHasher>();
//! println!("{}", counter.duplicates().display::<yadf::Fdupes>());
//! eprintln!("{}", yadf::Report::from(&counter));
//! ```

mod bag;
mod fs;
mod macros;
mod report;

pub use bag::{Duplicates, Fdupes, Machine, TreeBag};
pub use fs::wrapper::DirEntry;
pub use globset;
#[cfg(any(test, feature = "build-bin"))]
pub use hashers::{HighwayHasher, SeaHasher, XxHasher};
pub use regex;
pub use report::Report;
use std::path::Path;

/// Meta trait for the Hasher, Default and Write traits
pub trait Hasher: core::hash::Hasher + std::io::Write + core::default::Default {}
impl<T> Hasher for T
where
    T: core::hash::Hasher,
    T: core::default::Default,
    T: std::io::Write,
{
}

/// Search configuration
///
/// # Example
///
/// ```no_run
/// let counter = yadf::Config::builder()
///     .paths(&["path/to/somewhere", "another/path"]) // required
///     .minimum_file_size(64) // optional
///     .maximum_file_size(1024 * 8) // optional
///     .regex(None) // optional
///     .glob(None) // optional
///     .build()
///     .scan::<yadf::HighwayHasher>();
/// ```
///
/// see the docs for the [ConfigBuilder](struct.ConfigBuilder.html)
#[derive(Debug, Default, typed_builder::TypedBuilder)]
#[builder(doc)]
pub struct Config<'a, P>
where
    P: AsRef<Path>,
{
    #[builder(setter(doc = "Paths that will be checked for duplicate files"))]
    paths: &'a [P],
    #[builder(default, setter(into, doc = "Minimum file size"))]
    minimum_file_size: Option<u64>,
    #[builder(default, setter(into, doc = "Maximum file size"))]
    maximum_file_size: Option<u64>,
    #[builder(default, setter(into, doc = "File name must match this regex"))]
    regex: Option<regex::Regex>,
    #[builder(default, setter(into, doc = "File name must match this glob"))]
    glob: Option<globset::Glob>,
}

impl<P> Config<'_, P>
where
    P: AsRef<Path>,
{
    /// This will attemps a complete scan according to its configuration.
    pub fn scan<H: Hasher>(self) -> TreeBag<u64, DirEntry> {
        let bag = fs::find_dupes_partial::<H, P>(
            self.paths,
            self.minimum_file_size,
            self.maximum_file_size,
            self.regex,
            self.glob.map(|g| g.compile_matcher()),
        );
        if log::log_enabled!(log::Level::Info) {
            log::info!(
                "scanned {} files",
                bag.values().map(|b| b.len()).sum::<usize>()
            );
            log::info!(
                "found {} possible duplicates after initial scan",
                bag.duplicates().iter().map(|b| b.len()).sum::<usize>()
            );
            if log::log_enabled!(log::Level::Debug) {
                log::debug!("{:?}", bag);
            }
        }
        let dupes = fs::dedupe::<H>(bag);
        if log::log_enabled!(log::Level::Info) {
            log::info!(
                "found {} duplicates in {} groups after checksumming",
                dupes.duplicates().iter().map(|b| b.len()).sum::<usize>(),
                dupes.duplicates().iter().count(),
            );
            if log::log_enabled!(log::Level::Debug) {
                log::debug!("{:?}", dupes);
            }
        }
        dupes
    }
}

#[cfg(any(test, feature = "build-bin"))]
mod hashers {
    /// Hasher struct implementing Hasher, Default and Write
    #[derive(Default)]
    #[repr(transparent)]
    pub struct HighwayHasher(highway::HighwayHasher);
    /// Hasher struct implementing Hasher, Default and Write
    #[derive(Default)]
    #[repr(transparent)]
    pub struct SeaHasher(seahash::SeaHasher);
    /// Hasher struct implementing Hasher, Default and Write
    #[derive(Default)]
    #[repr(transparent)]
    pub struct XxHasher(twox_hash::XxHash64);

    super::newtype_impl_hasher_and_write!(HighwayHasher);
    super::newtype_impl_hasher_and_write!(SeaHasher);
    super::newtype_impl_hasher_and_write!(XxHasher);
}