#![allow(dead_code)]
#![allow(clippy::cast_precision_loss)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum HashAlgo {
Dhash,
Phash,
Ahash,
}
impl HashAlgo {
#[must_use]
pub const fn hash_bits(self) -> u32 {
64
}
#[must_use]
pub const fn name(self) -> &'static str {
match self {
HashAlgo::Dhash => "dhash",
HashAlgo::Phash => "phash",
HashAlgo::Ahash => "ahash",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PerceptualHash {
pub bits: u64,
pub algo: HashAlgo,
}
impl PerceptualHash {
#[must_use]
pub const fn new(bits: u64, algo: HashAlgo) -> Self {
Self { bits, algo }
}
#[must_use]
pub fn hamming_distance(&self, other: &Self) -> u32 {
(self.bits ^ other.bits).count_ones()
}
#[must_use]
pub fn similarity(&self, other: &Self) -> f32 {
1.0 - self.hamming_distance(other) as f32 / 64.0
}
#[must_use]
pub fn to_hex(self) -> String {
format!("{:016x}", self.bits)
}
}
impl std::fmt::Display for PerceptualHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.algo.name(), self.to_hex())
}
}
fn resize_to_gray(
pixels: &[u8],
src_w: usize,
src_h: usize,
out_w: usize,
out_h: usize,
) -> Vec<u8> {
let channels = if pixels.len() == src_w * src_h {
1
} else if pixels.len() >= src_w * src_h * 3 {
3
} else {
1
};
let x_ratio = src_w as f32 / out_w as f32;
let y_ratio = src_h as f32 / out_h as f32;
let mut out = Vec::with_capacity(out_w * out_h);
for ny in 0..out_h {
let sy = (ny as f32 * y_ratio) as usize;
let sy = sy.min(src_h - 1);
for nx in 0..out_w {
let sx = (nx as f32 * x_ratio) as usize;
let sx = sx.min(src_w - 1);
let base = (sy * src_w + sx) * channels;
let gray = if channels >= 3 {
let r = pixels[base] as f32;
let g = pixels[base + 1] as f32;
let b = pixels[base + 2] as f32;
(0.299 * r + 0.587 * g + 0.114 * b) as u8
} else {
pixels[base]
};
out.push(gray);
}
}
out
}
#[must_use]
pub fn compute_dhash(pixels: &[u8], width: usize, height: usize) -> PerceptualHash {
if pixels.is_empty() || width == 0 || height == 0 {
return PerceptualHash::new(0, HashAlgo::Dhash);
}
let thumb = resize_to_gray(pixels, width, height, 9, 8);
let mut hash = 0u64;
let mut bit = 0u32;
for row in 0..8usize {
for col in 0..8usize {
let left = thumb[row * 9 + col];
let right = thumb[row * 9 + col + 1];
if left > right {
hash |= 1u64 << bit;
}
bit += 1;
}
}
PerceptualHash::new(hash, HashAlgo::Dhash)
}
#[must_use]
pub fn compute_ahash(pixels: &[u8], width: usize, height: usize) -> PerceptualHash {
if pixels.is_empty() || width == 0 || height == 0 {
return PerceptualHash::new(0, HashAlgo::Ahash);
}
let thumb = resize_to_gray(pixels, width, height, 8, 8);
let mean: f32 = thumb.iter().map(|&p| p as f32).sum::<f32>() / 64.0;
let mut hash = 0u64;
for (i, &px) in thumb.iter().enumerate() {
if px as f32 >= mean {
hash |= 1u64 << i;
}
}
PerceptualHash::new(hash, HashAlgo::Ahash)
}
pub struct PerceptualDeduplicator {
pub threshold: f32,
pub algo: HashAlgo,
}
impl PerceptualDeduplicator {
#[must_use]
pub fn new(threshold: f32) -> Self {
Self {
threshold,
algo: HashAlgo::Dhash,
}
}
#[must_use]
pub fn with_algo(threshold: f32, algo: HashAlgo) -> Self {
Self { threshold, algo }
}
#[must_use]
pub fn is_duplicate(&self, hash_a: &PerceptualHash, hash_b: &PerceptualHash) -> bool {
hash_a.similarity(hash_b) >= self.threshold
}
#[must_use]
pub fn find_duplicates(&self, hashes: &[PerceptualHash]) -> Vec<(usize, usize)> {
let mut pairs = Vec::new();
for i in 0..hashes.len() {
for j in (i + 1)..hashes.len() {
if self.is_duplicate(&hashes[i], &hashes[j]) {
pairs.push((i, j));
}
}
}
pairs
}
#[must_use]
pub fn find_clusters(&self, hashes: &[PerceptualHash]) -> Vec<Vec<usize>> {
let pairs = self.find_duplicates(hashes);
let n = hashes.len();
let mut parent: Vec<usize> = (0..n).collect();
fn find(parent: &mut Vec<usize>, x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]);
}
parent[x]
}
for (a, b) in &pairs {
let ra = find(&mut parent, *a);
let rb = find(&mut parent, *b);
if ra != rb {
parent[ra] = rb;
}
}
let mut clusters: std::collections::HashMap<usize, Vec<usize>> =
std::collections::HashMap::new();
for i in 0..n {
let root = find(&mut parent, i);
clusters.entry(root).or_default().push(i);
}
clusters.into_values().filter(|c| c.len() > 1).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash_algo_bits() {
assert_eq!(HashAlgo::Dhash.hash_bits(), 64);
assert_eq!(HashAlgo::Phash.hash_bits(), 64);
assert_eq!(HashAlgo::Ahash.hash_bits(), 64);
}
#[test]
fn test_hash_algo_name() {
assert_eq!(HashAlgo::Dhash.name(), "dhash");
assert_eq!(HashAlgo::Phash.name(), "phash");
assert_eq!(HashAlgo::Ahash.name(), "ahash");
}
#[test]
fn test_hamming_distance_same() {
let h = PerceptualHash::new(0xDEAD_BEEF_DEAD_BEEF, HashAlgo::Dhash);
assert_eq!(h.hamming_distance(&h), 0);
}
#[test]
fn test_hamming_distance_all_different() {
let h1 = PerceptualHash::new(0x0000_0000_0000_0000, HashAlgo::Dhash);
let h2 = PerceptualHash::new(0xFFFF_FFFF_FFFF_FFFF, HashAlgo::Dhash);
assert_eq!(h1.hamming_distance(&h2), 64);
}
#[test]
fn test_similarity_identical() {
let h = PerceptualHash::new(0xABCD_EF01_2345_6789, HashAlgo::Ahash);
assert_eq!(h.similarity(&h), 1.0);
}
#[test]
fn test_similarity_maximally_different() {
let h1 = PerceptualHash::new(0, HashAlgo::Dhash);
let h2 = PerceptualHash::new(u64::MAX, HashAlgo::Dhash);
assert!((h1.similarity(&h2) - 0.0).abs() < f32::EPSILON);
}
#[test]
fn test_similarity_range() {
let h1 = PerceptualHash::new(0b1010_1010, HashAlgo::Dhash);
let h2 = PerceptualHash::new(0b0101_0101, HashAlgo::Dhash);
let sim = h1.similarity(&h2);
assert!((0.0..=1.0).contains(&sim));
}
#[test]
fn test_display() {
let h = PerceptualHash::new(0, HashAlgo::Dhash);
let s = format!("{h}");
assert!(s.starts_with("dhash:"));
}
#[test]
fn test_to_hex_length() {
let h = PerceptualHash::new(0xFFFF_FFFF_FFFF_FFFF, HashAlgo::Phash);
assert_eq!(h.to_hex().len(), 16);
}
#[test]
fn test_compute_dhash_empty() {
let h = compute_dhash(&[], 0, 0);
assert_eq!(h.bits, 0);
assert_eq!(h.algo, HashAlgo::Dhash);
}
#[test]
fn test_compute_dhash_uniform_gray() {
let pixels = vec![128u8; 64 * 64];
let h = compute_dhash(&pixels, 64, 64);
assert_eq!(h.bits, 0);
}
#[test]
fn test_compute_dhash_deterministic() {
let pixels: Vec<u8> = (0..32 * 32).map(|i| (i % 256) as u8).collect();
let h1 = compute_dhash(&pixels, 32, 32);
let h2 = compute_dhash(&pixels, 32, 32);
assert_eq!(h1.bits, h2.bits);
}
#[test]
fn test_compute_dhash_64_bits() {
let pixels: Vec<u8> = (0..16 * 16).map(|i| (i % 256) as u8).collect();
let h = compute_dhash(&pixels, 16, 16);
assert!(h.bits.count_ones() <= 64);
}
#[test]
fn test_compute_ahash_empty() {
let h = compute_ahash(&[], 0, 0);
assert_eq!(h.bits, 0);
assert_eq!(h.algo, HashAlgo::Ahash);
}
#[test]
fn test_compute_ahash_deterministic() {
let pixels: Vec<u8> = (0..64 * 64).map(|i| (i % 200) as u8).collect();
let h1 = compute_ahash(&pixels, 64, 64);
let h2 = compute_ahash(&pixels, 64, 64);
assert_eq!(h1.bits, h2.bits);
}
#[test]
fn test_compute_ahash_uniform_produces_all_ones() {
let pixels = vec![100u8; 64 * 64];
let h = compute_ahash(&pixels, 64, 64);
assert_eq!(h.bits, u64::MAX);
}
#[test]
fn test_deduplicator_new() {
let d = PerceptualDeduplicator::new(0.9);
assert!((d.threshold - 0.9).abs() < f32::EPSILON);
assert_eq!(d.algo, HashAlgo::Dhash);
}
#[test]
fn test_is_duplicate_identical() {
let d = PerceptualDeduplicator::new(0.9);
let h = PerceptualHash::new(0xABCD, HashAlgo::Dhash);
assert!(d.is_duplicate(&h, &h));
}
#[test]
fn test_is_duplicate_maximally_different() {
let d = PerceptualDeduplicator::new(0.5);
let h1 = PerceptualHash::new(0, HashAlgo::Dhash);
let h2 = PerceptualHash::new(u64::MAX, HashAlgo::Dhash);
assert!(!d.is_duplicate(&h1, &h2));
}
#[test]
fn test_find_duplicates_empty() {
let d = PerceptualDeduplicator::new(0.9);
let pairs = d.find_duplicates(&[]);
assert!(pairs.is_empty());
}
#[test]
fn test_find_duplicates_all_same() {
let d = PerceptualDeduplicator::new(1.0);
let hashes = vec![
PerceptualHash::new(42, HashAlgo::Dhash),
PerceptualHash::new(42, HashAlgo::Dhash),
PerceptualHash::new(42, HashAlgo::Dhash),
];
let pairs = d.find_duplicates(&hashes);
assert_eq!(pairs.len(), 3);
}
#[test]
fn test_find_duplicates_none() {
let d = PerceptualDeduplicator::new(1.0);
let hashes = vec![
PerceptualHash::new(0x0000, HashAlgo::Dhash),
PerceptualHash::new(0xFFFF_FFFF_FFFF_FFFF, HashAlgo::Dhash),
];
let pairs = d.find_duplicates(&hashes);
assert!(pairs.is_empty());
}
#[test]
fn test_find_clusters_basic() {
let d = PerceptualDeduplicator::new(1.0);
let v = 42u64;
let hashes = vec![
PerceptualHash::new(v, HashAlgo::Dhash),
PerceptualHash::new(v, HashAlgo::Dhash),
PerceptualHash::new(u64::MAX, HashAlgo::Dhash),
];
let clusters = d.find_clusters(&hashes);
assert_eq!(clusters.len(), 1);
let mut c = clusters[0].clone();
c.sort_unstable();
assert_eq!(c, vec![0, 1]);
}
}