1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
mod bag;
pub mod fs;
mod macros;
mod report;
pub use bag::{Fdupes, Machine, TreeBag};
pub use fs::wrapper::DirEntry;
#[cfg(any(test, feature = "build-bin"))]
pub use hashers::{HighwayHasher, SeaHasher, XxHasher};
pub use report::Report;
use std::hash::Hasher;
use std::path::Path;
#[derive(Debug, Default, typed_builder::TypedBuilder)]
pub struct Config<'a, P>
where
P: AsRef<Path>,
{
paths: &'a [P],
#[builder(default)]
min: Option<u64>,
#[builder(default)]
max: Option<u64>,
#[builder(default)]
regex: Option<regex::Regex>,
#[builder(default)]
glob: Option<globset::GlobMatcher>,
}
impl<P> Config<'_, P>
where
P: AsRef<Path>,
{
pub fn find_dupes<H>(self) -> TreeBag<u64, DirEntry>
where
H: Hasher + Default,
H: std::io::Write,
{
let dupes =
fs::find_dupes_partial::<H, P>(self.paths, self.min, self.max, self.regex, self.glob);
if log::log_enabled!(log::Level::Info) {
log::info!(
"scanned {} files",
dupes.values().map(|b| b.len()).sum::<usize>()
);
log::info!(
"found {} possible duplicates after initial scan",
dupes.duplicates().iter().map(|b| b.len()).sum::<usize>()
);
if log::log_enabled!(log::Level::Debug) {
log::debug!("{:?}", dupes);
}
}
let dupes = fs::dedupe::<H>(dupes);
if log::log_enabled!(log::Level::Info) {
log::info!(
"found {} duplicates in {} groups after checksumming",
dupes.duplicates().iter().map(|b| b.len()).sum::<usize>(),
dupes.duplicates().iter().count(),
);
if log::log_enabled!(log::Level::Debug) {
log::debug!("{:?}", dupes);
}
}
dupes
}
}
#[cfg(any(test, feature = "build-bin"))]
mod hashers {
#[derive(Default)]
#[repr(transparent)]
pub struct HighwayHasher(highway::HighwayHasher);
#[derive(Default)]
#[repr(transparent)]
pub struct SeaHasher(seahash::SeaHasher);
#[derive(Default)]
#[repr(transparent)]
pub struct XxHasher(twox_hash::XxHash64);
super::newtype_impl_hasher_and_write!(HighwayHasher);
super::newtype_impl_hasher_and_write!(SeaHasher);
super::newtype_impl_hasher_and_write!(XxHasher);
}