Skip to main content

dublette/
lib.rs

1pub mod cli;
2pub mod delete;
3pub mod hash;
4pub mod report;
5pub mod scan;
6
7use std::collections::HashSet;
8use std::path::Path;
9
10use indicatif::{ProgressBar, ProgressStyle};
11use rayon::prelude::*;
12
13use cli::{Args, MediaFilter};
14use scan::{HashedFile, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
15
16fn make_progress_bar(len: u64, msg: &str, quiet: bool) -> ProgressBar {
17    if quiet {
18        return ProgressBar::hidden();
19    }
20    let pb = ProgressBar::new(len);
21    pb.set_style(
22        ProgressStyle::default_bar()
23            .template("{msg} [{bar:40}] {pos}/{len} ({eta})")
24            .expect("valid template")
25            .progress_chars("=> "),
26    );
27    pb.set_message(msg.to_string());
28    pb
29}
30
31fn hash_images(files: &[std::path::PathBuf], directory: &Path, args: &Args) -> Vec<HashedFile> {
32    let pb = make_progress_bar(files.len() as u64, "Hashing images", args.quiet);
33
34    let results: Vec<_> = files
35        .par_iter()
36        .filter_map(|f| {
37            let result = hash::compute_image_hash(f);
38            pb.inc(1);
39            match result {
40                Ok(h) => {
41                    let rel = f.strip_prefix(directory).unwrap_or(f);
42                    let key = rel.to_string_lossy().to_string();
43                    if args.verbose {
44                        eprintln!("  {} -> {:?}", key, h);
45                    }
46                    Some(HashedFile {
47                        relative_path: key,
48                        hash: h,
49                    })
50                }
51                Err(e) => {
52                    eprintln!("  Warning: skipping {}: {e}", f.display());
53                    None
54                }
55            }
56        })
57        .collect();
58
59    pb.finish_and_clear();
60    results
61}
62
63fn hash_videos(
64    files: &[std::path::PathBuf],
65    directory: &Path,
66    ffmpeg: &Path,
67    args: &Args,
68) -> Vec<HashedFile> {
69    let pb = make_progress_bar(files.len() as u64, "Hashing videos", args.quiet);
70
71    let results: Vec<_> = files
72        .par_iter()
73        .filter_map(|f| {
74            let result = hash::extract_video_frame_hash(f, ffmpeg);
75            pb.inc(1);
76            match result {
77                Ok(h) => {
78                    let rel = f.strip_prefix(directory).unwrap_or(f);
79                    let key = rel.to_string_lossy().to_string();
80                    if args.verbose {
81                        eprintln!("  {} -> {:?}", key, h);
82                    }
83                    Some(HashedFile {
84                        relative_path: key,
85                        hash: h,
86                    })
87                }
88                Err(e) => {
89                    eprintln!("  Warning: skipping {}: {e}", f.display());
90                    None
91                }
92            }
93        })
94        .collect();
95
96    pb.finish_and_clear();
97    results
98}
99
100fn compare_hashes(
101    hashes: &[HashedFile],
102    threshold: u32,
103    label: &str,
104    args: &Args,
105) -> Vec<scan::DuplicateGroup> {
106    let total_pairs = (hashes.len() * hashes.len().saturating_sub(1)) / 2;
107    let pb = make_progress_bar(
108        total_pairs as u64,
109        &format!("Comparing {label}"),
110        args.quiet,
111    );
112
113    let mut duplicates = std::collections::HashMap::new();
114    for h in hashes {
115        duplicates
116            .entry(h.relative_path.clone())
117            .or_insert_with(Vec::new);
118    }
119
120    for i in 0..hashes.len() {
121        for j in (i + 1)..hashes.len() {
122            let distance = hashes[i].hash.dist(&hashes[j].hash);
123            if args.verbose {
124                eprintln!(
125                    "  {} <-> {}: distance={}",
126                    hashes[i].relative_path, hashes[j].relative_path, distance
127                );
128            }
129            if distance <= threshold {
130                duplicates
131                    .entry(hashes[i].relative_path.clone())
132                    .or_default()
133                    .push(hashes[j].relative_path.clone());
134                duplicates
135                    .entry(hashes[j].relative_path.clone())
136                    .or_default()
137                    .push(hashes[i].relative_path.clone());
138            }
139            pb.inc(1);
140        }
141    }
142
143    pb.finish_and_clear();
144    scan::build_duplicate_groups(&duplicates)
145}
146
147fn process_media(
148    directory: &Path,
149    extensions: &HashSet<&str>,
150    label: &str,
151    hash_fn: impl Fn(&[std::path::PathBuf], &Path, &Args) -> Vec<HashedFile>,
152    args: &Args,
153    all_groups: &mut Vec<scan::DuplicateGroup>,
154) -> eyre::Result<()> {
155    let files = scan::collect_files(directory, extensions)?;
156    if files.is_empty() {
157        if !args.json {
158            println!("No {label}s found.");
159        }
160        return Ok(());
161    }
162
163    if !args.quiet && !args.json {
164        eprintln!("Scanning {} {label}(s)...", files.len());
165    }
166
167    let hashes = hash_fn(&files, directory, args);
168    let groups = compare_hashes(&hashes, args.threshold, label, args);
169
170    if !args.json {
171        if groups.is_empty() {
172            println!("No duplicate {label}s found.");
173        } else {
174            println!("{}", report::format_table(&groups, args.dry_run, label));
175        }
176    }
177
178    all_groups.extend(groups);
179    Ok(())
180}
181
182pub fn run(args: &Args) -> eyre::Result<bool> {
183    let directory = &args.directory;
184    let mut total_deleted = 0usize;
185    let mut all_groups: Vec<scan::DuplicateGroup> = Vec::new();
186    let mut empty_files_rel: Vec<String> = Vec::new();
187
188    if args.delete_empty {
189        let empty = delete::find_empty_files(directory)?;
190        if !empty.is_empty() {
191            empty_files_rel = empty
192                .iter()
193                .map(|p| {
194                    p.strip_prefix(directory)
195                        .unwrap_or(p)
196                        .to_string_lossy()
197                        .to_string()
198                })
199                .collect();
200
201            if !args.json {
202                println!(
203                    "{}",
204                    report::format_empty_table(&empty_files_rel, args.dry_run)
205                );
206            }
207
208            if !args.dry_run {
209                total_deleted += delete::delete_files(&empty, directory, "empty", args.yes)?;
210            }
211        }
212    }
213
214    if !matches!(args.only, Some(MediaFilter::Videos)) {
215        let image_exts: HashSet<&str> = IMAGE_EXTENSIONS.iter().copied().collect();
216        process_media(
217            directory,
218            &image_exts,
219            "image",
220            hash_images,
221            args,
222            &mut all_groups,
223        )?;
224    }
225
226    if !matches!(args.only, Some(MediaFilter::Images)) {
227        match hash::find_ffmpeg() {
228            Ok(ffmpeg) => {
229                let video_exts: HashSet<&str> = VIDEO_EXTENSIONS.iter().copied().collect();
230                process_media(
231                    directory,
232                    &video_exts,
233                    "video",
234                    |files, dir, a| hash_videos(files, dir, &ffmpeg, a),
235                    args,
236                    &mut all_groups,
237                )?;
238            }
239            Err(_) => {
240                if !args.quiet && !args.json {
241                    eprintln!("Warning: ffmpeg not found, skipping video processing");
242                }
243            }
244        }
245    }
246
247    let found_duplicates = !all_groups.is_empty();
248
249    if args.json {
250        println!(
251            "{}",
252            report::format_json(&all_groups, &empty_files_rel, args.dry_run)
253        );
254    }
255
256    if !args.dry_run && found_duplicates {
257        let to_delete = report::resolve_deletions(&all_groups, directory);
258        total_deleted += delete::delete_files(&to_delete, directory, "duplicate", args.yes)?;
259    }
260
261    if !args.json {
262        if args.dry_run && found_duplicates {
263            let total: usize = all_groups.iter().map(|g| g.duplicates.len()).sum();
264            println!("\n[dry run] {} file(s) would be deleted.", total);
265        } else if total_deleted > 0 {
266            eprintln!("\nRemoved {total_deleted} duplicate(s) total.");
267        }
268    }
269
270    Ok(found_duplicates)
271}