use crate::extract::Candidate;
use anyhow::{Context, Result};
use image::imageops::FilterType;
use rayon::prelude::*;
use std::path::{Path, PathBuf};
pub fn dedup_candidates(
candidates: &[Candidate],
threshold: f32,
verbose: bool,
) -> Result<Vec<Candidate>> {
let fingerprints = fingerprints_par(candidates.iter().map(|c| c.path.as_path()))?;
Ok(run_dedup(
candidates,
&fingerprints,
threshold,
verbose,
|c| c.path.as_path(),
))
}
pub fn dedup_paths(paths: &[PathBuf], threshold: f32, verbose: bool) -> Result<Vec<PathBuf>> {
let fingerprints = fingerprints_par(paths.iter().map(|p| p.as_path()))?;
Ok(run_dedup(paths, &fingerprints, threshold, verbose, |p| {
p.as_path()
}))
}
fn fingerprints_par<'a>(paths: impl Iterator<Item = &'a Path>) -> Result<Vec<Vec<u8>>> {
const THUMB: u32 = 256;
let paths: Vec<&Path> = paths.collect();
paths
.par_iter()
.map(|p| {
fingerprint(p, THUMB).with_context(|| format!("fingerprint {}", p.display()))
})
.collect()
}
fn run_dedup<T: Clone>(
items: &[T],
fingerprints: &[Vec<u8>],
threshold: f32,
verbose: bool,
path_of: impl Fn(&T) -> &Path,
) -> Vec<T> {
let mut kept: Vec<T> = Vec::new();
let mut last_fp: Option<&Vec<u8>> = None;
for (item, fp) in items.iter().zip(fingerprints.iter()) {
let (keep, diff) = match last_fp {
None => (true, f32::INFINITY),
Some(prev) => {
let d = mean_abs_diff(prev, fp);
(d > threshold, d)
}
};
if verbose {
let name = path_of(item)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("?");
eprintln!(
"[dedup] {} diff={:.2} {}",
name,
diff,
if keep { "keep" } else { "drop" }
);
}
if keep {
kept.push(item.clone());
last_fp = Some(fp);
}
}
kept
}
fn fingerprint(path: &Path, size: u32) -> Result<Vec<u8>> {
let img = image::open(path)?
.resize_exact(size, size, FilterType::Triangle)
.to_luma8();
Ok(img.into_raw())
}
fn mean_abs_diff(a: &[u8], b: &[u8]) -> f32 {
debug_assert_eq!(a.len(), b.len());
let sum: u32 = a
.iter()
.zip(b.iter())
.map(|(x, y)| (*x as i32 - *y as i32).unsigned_abs())
.sum();
sum as f32 / a.len() as f32
}