1pub mod cli;
2pub mod delete;
3pub mod hash;
4pub mod report;
5pub mod scan;
6
7use std::collections::HashSet;
8use std::path::Path;
9
10use indicatif::{ProgressBar, ProgressStyle};
11use rayon::prelude::*;
12
13use cli::{Args, MediaFilter};
14use scan::{HashedFile, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
15
16fn make_progress_bar(len: u64, msg: &str, quiet: bool) -> ProgressBar {
17 if quiet {
18 return ProgressBar::hidden();
19 }
20 let pb = ProgressBar::new(len);
21 pb.set_style(
22 ProgressStyle::default_bar()
23 .template("{msg} [{bar:40}] {pos}/{len} ({eta})")
24 .expect("valid template")
25 .progress_chars("=> "),
26 );
27 pb.set_message(msg.to_string());
28 pb
29}
30
31fn hash_images(files: &[std::path::PathBuf], directory: &Path, args: &Args) -> Vec<HashedFile> {
32 let pb = make_progress_bar(files.len() as u64, "Hashing images", args.quiet);
33
34 let results: Vec<_> = files
35 .par_iter()
36 .filter_map(|f| {
37 let result = hash::compute_image_hash(f);
38 pb.inc(1);
39 match result {
40 Ok(h) => {
41 let rel = f.strip_prefix(directory).unwrap_or(f);
42 let key = rel.to_string_lossy().to_string();
43 if args.verbose {
44 eprintln!(" {} -> {:?}", key, h);
45 }
46 Some(HashedFile {
47 relative_path: key,
48 hash: h,
49 })
50 }
51 Err(e) => {
52 eprintln!(" Warning: skipping {}: {e}", f.display());
53 None
54 }
55 }
56 })
57 .collect();
58
59 pb.finish_and_clear();
60 results
61}
62
63fn hash_videos(
64 files: &[std::path::PathBuf],
65 directory: &Path,
66 ffmpeg: &Path,
67 args: &Args,
68) -> Vec<HashedFile> {
69 let pb = make_progress_bar(files.len() as u64, "Hashing videos", args.quiet);
70
71 let results: Vec<_> = files
72 .par_iter()
73 .filter_map(|f| {
74 let result = hash::extract_video_frame_hash(f, ffmpeg);
75 pb.inc(1);
76 match result {
77 Ok(h) => {
78 let rel = f.strip_prefix(directory).unwrap_or(f);
79 let key = rel.to_string_lossy().to_string();
80 if args.verbose {
81 eprintln!(" {} -> {:?}", key, h);
82 }
83 Some(HashedFile {
84 relative_path: key,
85 hash: h,
86 })
87 }
88 Err(e) => {
89 eprintln!(" Warning: skipping {}: {e}", f.display());
90 None
91 }
92 }
93 })
94 .collect();
95
96 pb.finish_and_clear();
97 results
98}
99
100fn compare_hashes(
101 hashes: &[HashedFile],
102 threshold: u32,
103 label: &str,
104 args: &Args,
105) -> Vec<scan::DuplicateGroup> {
106 let total_pairs = (hashes.len() * hashes.len().saturating_sub(1)) / 2;
107 let pb = make_progress_bar(
108 total_pairs as u64,
109 &format!("Comparing {label}"),
110 args.quiet,
111 );
112
113 let mut duplicates = std::collections::HashMap::new();
114 for h in hashes {
115 duplicates
116 .entry(h.relative_path.clone())
117 .or_insert_with(Vec::new);
118 }
119
120 for i in 0..hashes.len() {
121 for j in (i + 1)..hashes.len() {
122 let distance = hashes[i].hash.dist(&hashes[j].hash);
123 if args.verbose {
124 eprintln!(
125 " {} <-> {}: distance={}",
126 hashes[i].relative_path, hashes[j].relative_path, distance
127 );
128 }
129 if distance <= threshold {
130 duplicates
131 .entry(hashes[i].relative_path.clone())
132 .or_default()
133 .push(hashes[j].relative_path.clone());
134 duplicates
135 .entry(hashes[j].relative_path.clone())
136 .or_default()
137 .push(hashes[i].relative_path.clone());
138 }
139 pb.inc(1);
140 }
141 }
142
143 pb.finish_and_clear();
144 scan::build_duplicate_groups(&duplicates)
145}
146
147fn process_media(
148 directory: &Path,
149 extensions: &HashSet<&str>,
150 label: &str,
151 hash_fn: impl Fn(&[std::path::PathBuf], &Path, &Args) -> Vec<HashedFile>,
152 args: &Args,
153 all_groups: &mut Vec<scan::DuplicateGroup>,
154) -> eyre::Result<()> {
155 let files = scan::collect_files(directory, extensions)?;
156 if files.is_empty() {
157 if !args.json {
158 println!("No {label}s found.");
159 }
160 return Ok(());
161 }
162
163 if !args.quiet && !args.json {
164 eprintln!("Scanning {} {label}(s)...", files.len());
165 }
166
167 let hashes = hash_fn(&files, directory, args);
168 let groups = compare_hashes(&hashes, args.threshold, label, args);
169
170 if !args.json {
171 if groups.is_empty() {
172 println!("No duplicate {label}s found.");
173 } else {
174 println!("{}", report::format_table(&groups, args.dry_run, label));
175 }
176 }
177
178 all_groups.extend(groups);
179 Ok(())
180}
181
182pub fn run(args: &Args) -> eyre::Result<bool> {
183 let directory = &args.directory;
184 let mut total_deleted = 0usize;
185 let mut all_groups: Vec<scan::DuplicateGroup> = Vec::new();
186 let mut empty_files_rel: Vec<String> = Vec::new();
187
188 if args.delete_empty {
189 let empty = delete::find_empty_files(directory)?;
190 if !empty.is_empty() {
191 empty_files_rel = empty
192 .iter()
193 .map(|p| {
194 p.strip_prefix(directory)
195 .unwrap_or(p)
196 .to_string_lossy()
197 .to_string()
198 })
199 .collect();
200
201 if !args.json {
202 println!(
203 "{}",
204 report::format_empty_table(&empty_files_rel, args.dry_run)
205 );
206 }
207
208 if !args.dry_run {
209 total_deleted += delete::delete_files(&empty, directory, "empty", args.yes)?;
210 }
211 }
212 }
213
214 if !matches!(args.only, Some(MediaFilter::Videos)) {
215 let image_exts: HashSet<&str> = IMAGE_EXTENSIONS.iter().copied().collect();
216 process_media(
217 directory,
218 &image_exts,
219 "image",
220 hash_images,
221 args,
222 &mut all_groups,
223 )?;
224 }
225
226 if !matches!(args.only, Some(MediaFilter::Images)) {
227 match hash::find_ffmpeg() {
228 Ok(ffmpeg) => {
229 let video_exts: HashSet<&str> = VIDEO_EXTENSIONS.iter().copied().collect();
230 process_media(
231 directory,
232 &video_exts,
233 "video",
234 |files, dir, a| hash_videos(files, dir, &ffmpeg, a),
235 args,
236 &mut all_groups,
237 )?;
238 }
239 Err(_) => {
240 if !args.quiet && !args.json {
241 eprintln!("Warning: ffmpeg not found, skipping video processing");
242 }
243 }
244 }
245 }
246
247 let found_duplicates = !all_groups.is_empty();
248
249 if args.json {
250 println!(
251 "{}",
252 report::format_json(&all_groups, &empty_files_rel, args.dry_run)
253 );
254 }
255
256 if !args.dry_run && found_duplicates {
257 let to_delete = report::resolve_deletions(&all_groups, directory);
258 total_deleted += delete::delete_files(&to_delete, directory, "duplicate", args.yes)?;
259 }
260
261 if !args.json {
262 if args.dry_run && found_duplicates {
263 let total: usize = all_groups.iter().map(|g| g.duplicates.len()).sum();
264 println!("\n[dry run] {} file(s) would be deleted.", total);
265 } else if total_deleted > 0 {
266 eprintln!("\nRemoved {total_deleted} duplicate(s) total.");
267 }
268 }
269
270 Ok(found_duplicates)
271}