1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
mod bag;
mod fs;
pub mod path;
pub use bag::{Factor, Fdupes, Machine, Replicates, TreeBag};
pub use globset;
pub use regex;
use std::hash::Hasher;
use std::path::Path;
pub type FileCounter = TreeBag<u64, path::Path>;
pub type FileReplicates<'a> = Replicates<'a, u64, path::Path>;
#[derive(Debug, Default, typed_builder::TypedBuilder)]
#[builder(doc)]
pub struct Yadf<'a, P>
where
P: AsRef<Path>,
{
#[builder(setter(doc = "Paths that will be checked for duplicate files"))]
paths: &'a [P],
#[builder(default, setter(into, doc = "Minimum file size"))]
minimum_file_size: Option<u64>,
#[builder(default, setter(into, doc = "Maximum file size"))]
maximum_file_size: Option<u64>,
#[builder(default, setter(into, doc = "Maximum recursion depth"))]
max_depth: Option<usize>,
#[builder(default, setter(into, doc = "File name must match this regex"))]
regex: Option<regex::Regex>,
#[builder(default, setter(into, doc = "File name must match this glob"))]
glob: Option<globset::Glob>,
}
impl<P> Yadf<'_, P>
where
P: AsRef<Path>,
{
pub fn scan<H>(self) -> FileCounter
where
H: Hasher + Default,
{
let bag = fs::find_dupes_partial::<H, P>(
self.paths,
self.minimum_file_size,
self.maximum_file_size,
self.regex,
self.glob.map(|g| g.compile_matcher()),
self.max_depth,
);
if log::log_enabled!(log::Level::Info) {
log::info!(
"scanned {} files",
bag.0.values().map(|b| b.len()).sum::<usize>()
);
log::info!(
"found {} possible duplicates after initial scan",
bag.duplicates().iter().map(|b| b.len()).sum::<usize>()
);
log::trace!("{:?}", bag);
}
let bag = fs::dedupe::<H>(bag);
if log::log_enabled!(log::Level::Info) {
log::info!(
"found {} duplicates in {} groups after checksumming",
bag.duplicates().iter().map(|b| b.len()).sum::<usize>(),
bag.duplicates().iter().count(),
);
log::trace!("{:?}", bag);
}
bag
}
}