1#![deny(unsafe_code)]
26#![warn(rust_2018_idioms)]
27
28mod bag;
29mod ext;
30mod fs;
31mod path;
32
33pub use bag::{Factor, Fdupes, Machine, TreeBag};
34pub use globset;
35pub use path::Path;
36pub use regex;
37use std::hash::Hasher;
38use std::rc::Rc;
39
40pub type FileCounter = TreeBag<u64, Path>;
41pub type FileReplicates<'a> = bag::Replicates<'a, u64, Path>;
42
43#[derive(Debug, typed_builder::TypedBuilder)]
62#[builder(doc)]
63pub struct Yadf<P: AsRef<std::path::Path>> {
64 #[builder(setter(into, doc = "Paths that will be checked for duplicate files"))]
65 paths: Rc<[P]>,
66 #[builder(default, setter(into, doc = "Minimum file size"))]
67 minimum_file_size: Option<u64>,
68 #[builder(default, setter(into, doc = "Maximum file size"))]
69 maximum_file_size: Option<u64>,
70 #[builder(default, setter(into, doc = "Maximum recursion depth"))]
71 max_depth: Option<usize>,
72 #[builder(default, setter(into, doc = "File name must match this regex"))]
73 regex: Option<regex::Regex>,
74 #[builder(default, setter(into, doc = "File name must match this glob"))]
75 glob: Option<globset::Glob>,
76 #[cfg(unix)]
77 #[builder(default, setter(doc = "Treat hard links as duplicates"))]
78 hard_links: bool,
79}
80
81impl<P> Yadf<P>
82where
83 P: AsRef<std::path::Path>,
84{
85 pub fn scan<H>(self) -> FileCounter
87 where
88 H: Hasher + Default,
89 {
90 #[cfg(unix)]
91 let file_filter = fs::filter::FileFilter::new(
92 self.minimum_file_size,
93 self.maximum_file_size,
94 self.regex,
95 self.glob.map(|g| g.compile_matcher()),
96 self.hard_links,
97 );
98 #[cfg(not(unix))]
99 let file_filter = fs::filter::FileFilter::new(
100 self.minimum_file_size,
101 self.maximum_file_size,
102 self.regex,
103 self.glob.map(|g| g.compile_matcher()),
104 );
105 let bag = fs::find_dupes_partial::<H, _>(&self.paths, self.max_depth, file_filter);
106 if log::log_enabled!(log::Level::Info) {
107 log::info!(
108 "scanned {} files",
109 bag.as_inner().values().map(Vec::len).sum::<usize>()
110 );
111 log::info!(
112 "found {} possible duplicates after initial scan",
113 bag.duplicates().iter().map(Vec::len).sum::<usize>()
114 );
115 log::trace!("{:?}", bag);
116 }
117 let bag = fs::dedupe::<H>(bag);
118 if log::log_enabled!(log::Level::Info) {
119 log::info!(
120 "found {} duplicates in {} groups after checksumming",
121 bag.duplicates().iter().map(Vec::len).sum::<usize>(),
122 bag.duplicates().iter().count(),
123 );
124 log::trace!("{:?}", bag);
125 }
126 bag
127 }
128}