use crate::core::similarity::Similarity;
use crate::hash::algorithms::HashAlgorithm;
pub const DEFAULT_AHASH_WEIGHT: f32 = 0.10;
pub const DEFAULT_PHASH_WEIGHT: f32 = 0.60;
pub const DEFAULT_DHASH_WEIGHT: f32 = 0.30;
pub const DEFAULT_GLOBAL_WEIGHT: f32 = 0.40;
pub const DEFAULT_BLOCK_WEIGHT: f32 = 0.60;
pub const DEFAULT_BLOCK_DISTANCE_THRESHOLD: u32 = 32;
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(deny_unknown_fields))]
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct MultiHashConfig {
pub ahash_weight: f32,
pub phash_weight: f32,
pub dhash_weight: f32,
pub global_weight: f32,
pub block_weight: f32,
pub block_distance_threshold: u32,
}
impl Default for MultiHashConfig {
fn default() -> Self {
Self {
ahash_weight: DEFAULT_AHASH_WEIGHT,
phash_weight: DEFAULT_PHASH_WEIGHT,
dhash_weight: DEFAULT_DHASH_WEIGHT,
global_weight: DEFAULT_GLOBAL_WEIGHT,
block_weight: DEFAULT_BLOCK_WEIGHT,
block_distance_threshold: DEFAULT_BLOCK_DISTANCE_THRESHOLD,
}
}
}
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(deny_unknown_fields))]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bytemuck::Pod, bytemuck::Zeroable)]
#[repr(C)]
pub struct ImageFingerprint {
pub(crate) exact: [u8; 32],
pub(crate) global_hash: u64,
pub(crate) block_hashes: [u64; 16],
}
impl ImageFingerprint {
#[inline]
pub(crate) fn new(exact: [u8; 32], global_hash: u64, block_hashes: [u64; 16]) -> Self {
Self {
exact,
global_hash,
block_hashes,
}
}
#[inline]
#[must_use]
pub fn exact_hash(&self) -> &[u8; 32] {
&self.exact
}
#[inline]
#[must_use]
pub const fn format_version() -> u32 {
crate::FORMAT_VERSION
}
#[inline]
#[must_use]
pub fn global_hash(&self) -> u64 {
self.global_hash
}
#[inline]
#[must_use]
pub fn block_hashes(&self) -> &[u64; 16] {
&self.block_hashes
}
#[inline]
#[must_use]
pub fn distance(&self, other: &ImageFingerprint) -> u32 {
(self.global_hash ^ other.global_hash).count_ones()
}
#[doc(alias = "compare")]
#[doc(alias = "match")]
#[must_use]
pub fn is_similar(&self, other: &ImageFingerprint, threshold: f32) -> bool {
debug_assert!(
(0.0..=1.0).contains(&threshold),
"threshold must be in range [0.0, 1.0], got {threshold}"
);
if self.exact == other.exact {
return true;
}
let clamped_threshold = threshold.clamp(0.0, 1.0);
let sim = crate::core::similarity::compute_similarity(self, other);
sim.score >= clamped_threshold
}
}
impl core::fmt::Display for ImageFingerprint {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
for byte in &self.exact {
write!(f, "{:02x}", byte)?;
}
write!(f, ":{:016x}:", self.global_hash)?;
for (i, h) in self.block_hashes.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{:016x}", h)?;
}
Ok(())
}
}
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(deny_unknown_fields))]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bytemuck::Pod, bytemuck::Zeroable)]
#[repr(C)]
pub struct MultiHashFingerprint {
pub(crate) exact: [u8; 32],
pub(crate) ahash: ImageFingerprint,
pub(crate) phash: ImageFingerprint,
pub(crate) dhash: ImageFingerprint,
}
const _: () = {
assert!(
core::mem::size_of::<ImageFingerprint>() == 168,
"ImageFingerprint binary layout drifted"
);
assert!(
core::mem::size_of::<MultiHashFingerprint>() == 536,
"MultiHashFingerprint binary layout drifted"
);
};
impl MultiHashFingerprint {
pub(crate) fn new(
exact: [u8; 32],
ahash: ImageFingerprint,
phash: ImageFingerprint,
dhash: ImageFingerprint,
) -> Self {
Self {
exact,
ahash,
phash,
dhash,
}
}
#[inline]
#[must_use]
pub fn exact_hash(&self) -> &[u8; 32] {
&self.exact
}
#[inline]
#[must_use]
pub const fn format_version() -> u32 {
crate::FORMAT_VERSION
}
#[inline]
#[must_use]
pub fn ahash(&self) -> &ImageFingerprint {
&self.ahash
}
#[inline]
#[must_use]
pub fn phash(&self) -> &ImageFingerprint {
&self.phash
}
#[inline]
#[must_use]
pub fn dhash(&self) -> &ImageFingerprint {
&self.dhash
}
#[must_use]
pub fn get(&self, algorithm: HashAlgorithm) -> &ImageFingerprint {
match algorithm {
HashAlgorithm::AHash => &self.ahash,
HashAlgorithm::PHash => &self.phash,
HashAlgorithm::DHash => &self.dhash,
}
}
#[must_use]
pub fn compare(&self, other: &MultiHashFingerprint) -> Similarity {
self.compare_with_threshold(other, 32)
}
#[must_use]
pub fn compare_with_threshold(
&self,
other: &MultiHashFingerprint,
block_threshold: u32,
) -> Similarity {
let cfg = MultiHashConfig {
block_distance_threshold: block_threshold,
..MultiHashConfig::default()
};
self.compare_with_config(other, &cfg)
}
#[must_use]
pub fn compare_with_config(
&self,
other: &MultiHashFingerprint,
config: &MultiHashConfig,
) -> Similarity {
use crate::core::similarity::{compute_score_only, hamming_distance};
use subtle::ConstantTimeEq;
let exact_match = self.exact.ct_eq(&other.exact).into();
if exact_match {
return Similarity {
score: 1.0,
exact_match: true,
perceptual_distance: 0,
};
}
let ahash_sim = compute_score_only(
&self.ahash,
&other.ahash,
config.global_weight,
config.block_weight,
config.block_distance_threshold,
);
let phash_sim = compute_score_only(
&self.phash,
&other.phash,
config.global_weight,
config.block_weight,
config.block_distance_threshold,
);
let dhash_sim = compute_score_only(
&self.dhash,
&other.dhash,
config.global_weight,
config.block_weight,
config.block_distance_threshold,
);
let weighted_score = ahash_sim * config.ahash_weight
+ phash_sim * config.phash_weight
+ dhash_sim * config.dhash_weight;
let ahash_dist = hamming_distance(self.ahash.global_hash, other.ahash.global_hash);
let phash_dist = hamming_distance(self.phash.global_hash, other.phash.global_hash);
let dhash_dist = hamming_distance(self.dhash.global_hash, other.dhash.global_hash);
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let avg_distance = {
let weight_sum = config.ahash_weight + config.phash_weight + config.dhash_weight;
let raw = (ahash_dist as f32 * config.ahash_weight)
+ (phash_dist as f32 * config.phash_weight)
+ (dhash_dist as f32 * config.dhash_weight);
if weight_sum > 0.0 {
(raw / weight_sum) as u32
} else {
0
}
};
Similarity {
score: weighted_score.clamp(0.0, 1.0),
exact_match: false,
perceptual_distance: avg_distance,
}
}
#[must_use]
pub fn is_similar(&self, other: &MultiHashFingerprint, threshold: f32) -> bool {
debug_assert!(
(0.0..=1.0).contains(&threshold),
"threshold must be in range [0.0, 1.0], got {}",
threshold
);
let clamped_threshold = threshold.clamp(0.0, 1.0);
self.compare(other).score >= clamped_threshold
}
}
impl core::fmt::Display for MultiHashFingerprint {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
for byte in &self.exact {
write!(f, "{:02x}", byte)?;
}
write!(
f,
"|{:016x}|{:016x}|{:016x}",
self.ahash.global_hash, self.phash.global_hash, self.dhash.global_hash
)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn fp(global: u64, blocks_word: u64) -> ImageFingerprint {
ImageFingerprint::new([0u8; 32], global, [blocks_word; 16])
}
fn multi(exact: [u8; 32], a_global: u64, p_global: u64, d_global: u64) -> MultiHashFingerprint {
MultiHashFingerprint::new(
exact,
ImageFingerprint::new(exact, a_global, [a_global; 16]),
ImageFingerprint::new(exact, p_global, [p_global; 16]),
ImageFingerprint::new(exact, d_global, [d_global; 16]),
)
}
#[test]
fn multi_hash_config_default_matches_compare() {
let a = multi([1u8; 32], 0xAAAA, 0xBBBB, 0xCCCC);
let b = multi([2u8; 32], 0xAAAA, 0xBBB0, 0xCCC0);
let default_score = a.compare(&b).score;
let cfg_score = a.compare_with_config(&b, &MultiHashConfig::default()).score;
assert!(
(default_score - cfg_score).abs() < 1e-6,
"{default_score} vs {cfg_score}"
);
}
#[test]
fn multi_hash_config_phash_only_ignores_other_algorithms() {
let a = multi([1u8; 32], 0x0000_0000, 0x1234_5678, 0x0000_0000);
let b = multi([2u8; 32], u64::MAX, 0x1234_5678, u64::MAX);
let default_score = a.compare(&b).score;
let phash_only = MultiHashConfig {
ahash_weight: 0.0,
phash_weight: 1.0,
dhash_weight: 0.0,
..MultiHashConfig::default()
};
let phash_score = a.compare_with_config(&b, &phash_only).score;
assert!((phash_score - 1.0).abs() < 1e-6, "got {phash_score}");
assert!(
default_score < phash_score,
"{default_score} >= {phash_score}"
);
}
#[test]
fn multi_hash_config_exact_match_is_always_one() {
let a = multi([7u8; 32], 0xAAAA, 0xBBBB, 0xCCCC);
let weird = MultiHashConfig {
ahash_weight: 0.0,
phash_weight: 0.0,
dhash_weight: 0.0,
global_weight: 0.0,
block_weight: 0.0,
block_distance_threshold: 0,
};
let s = a.compare_with_config(&a, &weird);
assert!(s.exact_match);
assert_eq!(s.score, 1.0);
}
#[test]
fn multi_hash_config_score_clamped_to_unit_interval() {
let a = multi([1u8; 32], 0, 0, 0);
let b = multi([2u8; 32], 0, 0, 0);
let cfg = MultiHashConfig {
ahash_weight: 5.0,
phash_weight: 5.0,
dhash_weight: 5.0,
global_weight: 10.0,
block_weight: 10.0,
block_distance_threshold: 32,
};
let s = a.compare_with_config(&b, &cfg);
assert!(s.score <= 1.0 && s.score >= 0.0, "got {}", s.score);
}
#[test]
fn fingerprint_unused_helper_compiles() {
let _ = fp(0x1234, 0xABCD);
}
#[test]
fn format_version_is_one() {
assert_eq!(crate::FORMAT_VERSION, 1);
assert_eq!(ImageFingerprint::format_version(), 1);
assert_eq!(MultiHashFingerprint::format_version(), 1);
}
#[test]
fn image_fingerprint_layout_is_stable() {
assert_eq!(core::mem::size_of::<ImageFingerprint>(), 168);
assert_eq!(core::mem::align_of::<ImageFingerprint>(), 8);
}
#[test]
fn multi_hash_fingerprint_layout_is_stable() {
assert_eq!(core::mem::size_of::<MultiHashFingerprint>(), 536);
assert_eq!(core::mem::align_of::<MultiHashFingerprint>(), 8);
}
#[test]
fn image_fingerprint_cast_slice_roundtrips() {
let fps = vec![
ImageFingerprint::new([1u8; 32], 0xAAAA_BBBB_CCCC_DDDD, [0x1234; 16]),
ImageFingerprint::new([2u8; 32], 0xDEAD_BEEF_CAFE_BABE, [0xFEDC; 16]),
ImageFingerprint::new([3u8; 32], 0, [0; 16]),
];
let bytes: &[u8] = bytemuck::cast_slice(&fps);
assert_eq!(bytes.len(), 3 * 168);
let back: &[ImageFingerprint] = bytemuck::cast_slice(bytes);
assert_eq!(back.len(), fps.len());
assert_eq!(back, &fps[..]);
}
#[test]
fn multi_hash_fingerprint_cast_slice_roundtrips() {
let fps = vec![
multi([1u8; 32], 0x1111, 0x2222, 0x3333),
multi([2u8; 32], 0xAAAA, 0xBBBB, 0xCCCC),
];
let bytes: &[u8] = bytemuck::cast_slice(&fps);
assert_eq!(bytes.len(), 2 * 536);
let back: &[MultiHashFingerprint] = bytemuck::cast_slice(bytes);
assert_eq!(back.len(), fps.len());
assert_eq!(back, &fps[..]);
}
#[test]
fn fingerprint_zeroed_is_valid() {
let z: MultiHashFingerprint = bytemuck::Zeroable::zeroed();
assert_eq!(*z.exact_hash(), [0u8; 32]);
assert_eq!(z.ahash().global_hash(), 0);
}
#[test]
fn image_fingerprint_display() {
let fp = ImageFingerprint::new([0xABu8; 32], 0x1234_5678_9ABC_DEF0, [0xFF; 16]);
let s = format!("{}", fp);
assert!(s.starts_with("abababab"));
assert!(s.contains(":123456789abcdef0:"));
assert!(s.contains("00000000000000ff"));
}
#[test]
fn multi_hash_fingerprint_display() {
let m = multi([0x01u8; 32], 0xAAAA, 0xBBBB, 0xCCCC);
let s = format!("{}", m);
assert!(s.starts_with("01010101"));
assert!(s.contains("|000000000000aaaa|"));
assert!(s.contains("|000000000000bbbb|"));
assert!(s.ends_with("000000000000cccc"));
}
#[test]
fn is_similar_uses_block_hashes() {
let fp1 = ImageFingerprint::new([1u8; 32], 0x1234, [0u64; 16]);
let fp2 = ImageFingerprint::new([2u8; 32], 0x1234, [u64::MAX; 16]);
assert!(!fp1.is_similar(&fp2, 1.0));
assert!(fp1.is_similar(&fp2, 0.3));
}
#[test]
fn perceptual_distance_bounded_with_inflated_weights() {
let a = multi([1u8; 32], 0, 0xFFFF_FFFF_FFFF_FFFF, 0);
let b = multi([2u8; 32], 0, 0, 0);
let cfg = MultiHashConfig {
ahash_weight: 0.0,
phash_weight: 5.0,
dhash_weight: 0.0,
..MultiHashConfig::default()
};
let s = a.compare_with_config(&b, &cfg);
assert!(s.perceptual_distance <= 64, "got {}", s.perceptual_distance);
}
#[test]
fn perceptual_distance_zero_weights() {
let a = multi([1u8; 32], 0xFFFF, 0xFFFF, 0xFFFF);
let b = multi([2u8; 32], 0, 0, 0);
let cfg = MultiHashConfig {
ahash_weight: 0.0,
phash_weight: 0.0,
dhash_weight: 0.0,
..MultiHashConfig::default()
};
let s = a.compare_with_config(&b, &cfg);
assert_eq!(s.perceptual_distance, 0);
}
}