1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//! A collection of functions and structs to find duplicate files.
//!
//! # Example :
//!
//! Find, display, and report, all the duplicate files at the given path :
//!
//! ```no_run
//! let counter = yadf::Config::builder().paths(&["."]).build().find_dupes::<yadf::SeaHasher>();
//! println!("{}", counter.duplicates().display::<yadf::Fdupes>());
//! eprintln!("{}", yadf::Report::from(&counter));
//! ```

mod bag;
pub mod fs;
mod macros;
mod report;

pub use bag::{Fdupes, Machine, TreeBag};
pub use fs::wrapper::DirEntry;
#[cfg(any(test, feature = "build-bin"))]
pub use hashers::{HighwayHasher, SeaHasher, XxHasher};
pub use report::Report;
use std::hash::Hasher;
use std::path::Path;

/// Search configuration
#[derive(Debug, Default, typed_builder::TypedBuilder)]
pub struct Config<'a, P>
where
    P: AsRef<Path>,
{
    paths: &'a [P],
    #[builder(default)]
    min: Option<u64>,
    #[builder(default)]
    max: Option<u64>,
    #[builder(default)]
    regex: Option<regex::Regex>,
    #[builder(default)]
    glob: Option<globset::GlobMatcher>,
}

impl<P> Config<'_, P>
where
    P: AsRef<Path>,
{
    /// This will attemps a complete scan of every file,
    /// within the given size constraints, at the given path.
    pub fn find_dupes<H>(self) -> TreeBag<u64, DirEntry>
    where
        H: Hasher + Default,
        H: std::io::Write,
    {
        let dupes =
            fs::find_dupes_partial::<H, P>(self.paths, self.min, self.max, self.regex, self.glob);
        if log::log_enabled!(log::Level::Info) {
            log::info!(
                "scanned {} files",
                dupes.values().map(|b| b.len()).sum::<usize>()
            );
            log::info!(
                "found {} possible duplicates after initial scan",
                dupes.duplicates().iter().map(|b| b.len()).sum::<usize>()
            );
            if log::log_enabled!(log::Level::Debug) {
                log::debug!("{:?}", dupes);
            }
        }
        let dupes = fs::dedupe::<H>(dupes);
        if log::log_enabled!(log::Level::Info) {
            log::info!(
                "found {} duplicates in {} groups after checksumming",
                dupes.duplicates().iter().map(|b| b.len()).sum::<usize>(),
                dupes.duplicates().iter().count(),
            );
            if log::log_enabled!(log::Level::Debug) {
                log::debug!("{:?}", dupes);
            }
        }
        dupes
    }
}

#[cfg(any(test, feature = "build-bin"))]
mod hashers {
    #[derive(Default)]
    #[repr(transparent)]
    pub struct HighwayHasher(highway::HighwayHasher);
    #[derive(Default)]
    #[repr(transparent)]
    pub struct SeaHasher(seahash::SeaHasher);
    #[derive(Default)]
    #[repr(transparent)]
    pub struct XxHasher(twox_hash::XxHash64);

    super::newtype_impl_hasher_and_write!(HighwayHasher);
    super::newtype_impl_hasher_and_write!(SeaHasher);
    super::newtype_impl_hasher_and_write!(XxHasher);
}