use crate::core::fingerprint::ImageFingerprint;
use subtle::ConstantTimeEq;
const BLOCK_DISTANCE_THRESHOLD: u32 = 32;
#[inline]
pub fn hash_similarity(distance: u32) -> f32 {
if distance >= 64 {
0.0
} else {
1.0 - (distance as f32 / 64.0)
}
}
#[inline]
pub fn hamming_distance(a: u64, b: u64) -> u32 {
(a ^ b).count_ones()
}
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)] pub struct Similarity {
pub score: f32,
pub exact_match: bool,
pub perceptual_distance: u32,
}
impl Similarity {
#[inline]
#[must_use]
pub fn perfect() -> Self {
Self {
score: 1.0,
exact_match: true,
perceptual_distance: 0,
}
}
}
#[must_use]
pub fn compute_similarity(a: &ImageFingerprint, b: &ImageFingerprint) -> Similarity {
compute_similarity_with_threshold(a, b, BLOCK_DISTANCE_THRESHOLD)
}
#[must_use]
pub fn compute_similarity_with_threshold(
a: &ImageFingerprint,
b: &ImageFingerprint,
block_threshold: u32,
) -> Similarity {
compute_similarity_with_weights(a, b, 0.4, 0.6, block_threshold)
}
#[must_use]
pub fn compute_similarity_with_weights(
a: &ImageFingerprint,
b: &ImageFingerprint,
global_weight: f32,
block_weight: f32,
block_threshold: u32,
) -> Similarity {
let exact_match = a.exact.ct_eq(&b.exact).into();
let global_distance = hamming_distance(a.global_hash, b.global_hash);
let global_similarity = hash_similarity(global_distance);
let block_similarity =
compute_block_similarity_with_threshold(&a.block_hashes, &b.block_hashes, block_threshold);
let combined_score =
(global_weight * global_similarity + block_weight * block_similarity).clamp(0.0, 1.0);
if exact_match {
Similarity {
score: 1.0,
exact_match: true,
perceptual_distance: 0,
}
} else {
Similarity {
score: combined_score,
exact_match: false,
perceptual_distance: global_distance,
}
}
}
#[must_use]
#[allow(dead_code)] pub fn compute_block_similarity(a: &[u64; 16], b: &[u64; 16]) -> f32 {
compute_block_similarity_with_threshold(a, b, BLOCK_DISTANCE_THRESHOLD)
}
pub fn compute_block_similarity_with_threshold(
a: &[u64; 16],
b: &[u64; 16],
max_distance: u32,
) -> f32 {
let mut total_similarity = 0.0f32;
let mut valid_comparisons = 0u32;
for i in 0..16 {
let distance = hamming_distance(a[i], b[i]);
if distance <= max_distance {
total_similarity += hash_similarity(distance);
valid_comparisons += 1;
}
}
if valid_comparisons == 0 {
0.0
} else {
total_similarity / valid_comparisons as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash_similarity_distance_zero() {
assert_eq!(hash_similarity(0), 1.0);
}
#[test]
fn test_hash_similarity_distance_32() {
assert_eq!(hash_similarity(32), 0.5);
}
#[test]
fn test_hash_similarity_distance_64() {
assert_eq!(hash_similarity(64), 0.0);
}
#[test]
fn test_hash_similarity_distance_greater_than_64() {
assert_eq!(hash_similarity(100), 0.0);
}
#[test]
fn test_hash_similarity_distance_16() {
assert_eq!(hash_similarity(16), 0.75);
}
#[test]
fn test_hamming_distance_identical() {
assert_eq!(hamming_distance(0, 0), 0);
assert_eq!(hamming_distance(u64::MAX, u64::MAX), 0);
assert_eq!(hamming_distance(0x1234567890ABCDEF, 0x1234567890ABCDEF), 0);
}
#[test]
fn test_hamming_distance_opposite() {
assert_eq!(hamming_distance(0, u64::MAX), 64);
assert_eq!(hamming_distance(u64::MAX, 0), 64);
}
#[test]
fn test_hamming_distance_single_bit() {
assert_eq!(hamming_distance(0, 1), 1);
assert_eq!(hamming_distance(0, 2), 1);
assert_eq!(hamming_distance(0, 4), 1);
}
#[test]
fn test_hamming_distance_known_values() {
assert_eq!(hamming_distance(0b10101010, 0b01010101), 8);
assert_eq!(hamming_distance(0xFF00FF00, 0x00FF00FF), 32);
}
#[test]
fn test_hamming_distance_symmetric() {
let a: u64 = 0x1234567890ABCDEF;
let b: u64 = 0xFEDCBA0987654321;
assert_eq!(hamming_distance(a, b), hamming_distance(b, a));
}
#[test]
fn test_compute_block_similarity_identical() {
let blocks_a = [1u64; 16];
let blocks_b = [1u64; 16];
let sim = compute_block_similarity(&blocks_a, &blocks_b);
assert!((sim - 1.0).abs() < 1e-6);
}
#[test]
fn test_compute_block_similarity_all_different() {
let blocks_a = [0u64; 16];
let blocks_b = [u64::MAX; 16];
let sim = compute_block_similarity(&blocks_a, &blocks_b);
assert_eq!(sim, 0.0);
}
#[test]
fn test_compute_block_similarity_partial_match() {
let mut blocks_a = [0u64; 16];
let mut blocks_b = [0u64; 16];
for i in 0..8 {
blocks_a[i] = 0x1234567890ABCDEF;
blocks_b[i] = 0x1234567890ABCDEF;
}
let sim = compute_block_similarity(&blocks_a, &blocks_b);
assert!(
(sim - 1.0).abs() < 1e-5,
"Expected 1.0 for half matching blocks (others are identical 0s), got {}",
sim
);
}
#[test]
fn test_compute_block_similarity_all_above_threshold() {
let blocks_a = [0u64; 16];
let mut blocks_b = [0u64; 16];
for i in 0..16 {
blocks_b[i] = blocks_a[i] ^ u64::MAX;
}
let sim = compute_block_similarity(&blocks_a, &blocks_b);
assert_eq!(sim, 0.0);
}
#[test]
fn test_compute_similarity_identical_fingerprints() {
let fp = ImageFingerprint::new([1u8; 32], 0x1234567890ABCDEF, [0xABCDEF; 16]);
let sim = compute_similarity(&fp, &fp);
assert_eq!(sim.score, 1.0);
assert!(sim.exact_match);
assert_eq!(sim.perceptual_distance, 0);
}
#[test]
fn test_compute_similarity_different_fingerprints() {
let fp1 = ImageFingerprint::new([1u8; 32], 0x0000000000000000, [0u64; 16]);
let fp2 = ImageFingerprint::new([2u8; 32], 0xFFFFFFFFFFFFFFFF, [u64::MAX; 16]);
let sim = compute_similarity(&fp1, &fp2);
assert!(!sim.exact_match);
assert!(sim.score < 0.1);
assert_eq!(sim.perceptual_distance, 64);
}
#[test]
fn test_compute_similarity_same_exact_hash_different_blocks() {
let fp1 = ImageFingerprint::new([1u8; 32], 0x1234567890ABCDEF, [0u64; 16]);
let fp2 = ImageFingerprint::new([1u8; 32], 0x1234567890ABCDEF, [u64::MAX; 16]);
let sim = compute_similarity(&fp1, &fp2);
assert!(sim.exact_match);
assert_eq!(sim.score, 1.0);
}
#[test]
fn test_compute_similarity_similar_global_different_blocks() {
let fp1 = ImageFingerprint::new([1u8; 32], 0x0000000000000000, [0u64; 16]);
let fp2 = ImageFingerprint::new([2u8; 32], 0x0000000000000001, [u64::MAX; 16]);
let sim = compute_similarity(&fp1, &fp2);
assert!(!sim.exact_match);
assert!(sim.perceptual_distance == 1);
}
#[test]
fn test_similarity_perfect() {
let sim = Similarity::perfect();
assert_eq!(sim.score, 1.0);
assert!(sim.exact_match);
assert_eq!(sim.perceptual_distance, 0);
}
#[test]
fn test_similarity_clone_copy() {
let sim = Similarity {
score: 0.75,
exact_match: false,
perceptual_distance: 16,
};
let sim2 = sim;
assert_eq!(sim.score, sim2.score);
assert_eq!(sim.exact_match, sim2.exact_match);
assert_eq!(sim.perceptual_distance, sim2.perceptual_distance);
}
#[test]
fn test_similarity_partial_eq() {
let sim1 = Similarity {
score: 0.75,
exact_match: false,
perceptual_distance: 16,
};
let sim2 = Similarity {
score: 0.75,
exact_match: false,
perceptual_distance: 16,
};
let sim3 = Similarity {
score: 0.80,
exact_match: false,
perceptual_distance: 12,
};
assert_eq!(sim1, sim2);
assert_ne!(sim1, sim3);
}
#[test]
fn test_weighted_combination_formula() {
let global_hash1 = 0x0000000000000000;
let global_hash2 = 0x0000000000000000;
let mut blocks1 = [0u64; 16];
let mut blocks2 = [0u64; 16];
for i in 0..16 {
blocks1[i] = 0xAAAAAAAAAAAAAAAA;
blocks2[i] = 0xAAAAAAAAAAAAAAAA;
}
let fp1 = ImageFingerprint::new([1u8; 32], global_hash1, blocks1);
let fp2 = ImageFingerprint::new([2u8; 32], global_hash2, blocks2);
let sim = compute_similarity(&fp1, &fp2);
assert!(!sim.exact_match);
assert_eq!(sim.perceptual_distance, 0);
assert!((sim.score - 1.0).abs() < 1e-5);
}
}