#![allow(dead_code)]
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SignatureType {
PerceptualVisual,
PerceptualAudio,
Cryptographic,
NeuralEmbedding,
Thumbnail,
}
impl SignatureType {
#[must_use]
pub const fn is_perceptual(self) -> bool {
matches!(
self,
Self::PerceptualVisual | Self::PerceptualAudio | Self::NeuralEmbedding
)
}
#[must_use]
pub const fn supports_exact_match(self) -> bool {
matches!(self, Self::Cryptographic)
}
#[must_use]
pub const fn label(self) -> &'static str {
match self {
Self::PerceptualVisual => "perceptual-visual",
Self::PerceptualAudio => "perceptual-audio",
Self::Cryptographic => "cryptographic",
Self::NeuralEmbedding => "neural-embedding",
Self::Thumbnail => "thumbnail",
}
}
}
#[derive(Debug, Clone)]
pub struct ContentSignature {
pub asset_id: String,
pub sig_type: SignatureType,
pub data: Vec<u8>,
pub confidence: f64,
}
impl ContentSignature {
#[must_use]
pub fn new(
asset_id: impl Into<String>,
sig_type: SignatureType,
data: Vec<u8>,
confidence: f64,
) -> Self {
Self {
asset_id: asset_id.into(),
sig_type,
data,
confidence,
}
}
#[must_use]
pub fn matches(&self, other: &Self, tolerance: u32) -> bool {
if self.sig_type != other.sig_type {
return false;
}
if self.data.len() != other.data.len() {
return false;
}
if self.sig_type.supports_exact_match() {
return self.data == other.data;
}
let diff: u32 = self
.data
.iter()
.zip(&other.data)
.map(|(a, b)| u32::from(*a != *b))
.sum();
diff <= tolerance
}
#[must_use]
pub fn data_len(&self) -> usize {
self.data.len()
}
}
#[derive(Debug, Default)]
pub struct SignatureDatabase {
entries: HashMap<String, Vec<ContentSignature>>,
}
impl SignatureDatabase {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn store(&mut self, sig: ContentSignature) {
self.entries
.entry(sig.asset_id.clone())
.or_default()
.push(sig);
}
#[must_use]
pub fn lookup(&self, asset_id: &str) -> &[ContentSignature] {
self.entries.get(asset_id).map(Vec::as_slice).unwrap_or(&[])
}
#[must_use]
pub fn match_count(&self) -> usize {
self.entries.values().map(Vec::len).sum()
}
#[must_use]
pub fn find_matches(&self, query: &ContentSignature, tolerance: u32) -> Vec<(String, usize)> {
self.entries
.iter()
.filter_map(|(id, sigs)| {
let count = sigs.iter().filter(|s| query.matches(s, tolerance)).count();
if count > 0 && id != &query.asset_id {
Some((id.clone(), count))
} else {
None
}
})
.collect()
}
pub fn remove_asset(&mut self, asset_id: &str) -> Vec<ContentSignature> {
self.entries.remove(asset_id).unwrap_or_default()
}
#[must_use]
pub fn asset_count(&self) -> usize {
self.entries.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_sig(asset_id: &str, sig_type: SignatureType, data: Vec<u8>) -> ContentSignature {
ContentSignature::new(asset_id, sig_type, data, 1.0)
}
#[test]
fn test_sig_type_is_perceptual_visual() {
assert!(SignatureType::PerceptualVisual.is_perceptual());
}
#[test]
fn test_sig_type_is_perceptual_audio() {
assert!(SignatureType::PerceptualAudio.is_perceptual());
}
#[test]
fn test_sig_type_not_perceptual_crypto() {
assert!(!SignatureType::Cryptographic.is_perceptual());
}
#[test]
fn test_sig_type_supports_exact_match() {
assert!(SignatureType::Cryptographic.supports_exact_match());
assert!(!SignatureType::PerceptualVisual.supports_exact_match());
}
#[test]
fn test_sig_type_label_nonempty() {
for t in [
SignatureType::PerceptualVisual,
SignatureType::PerceptualAudio,
SignatureType::Cryptographic,
SignatureType::NeuralEmbedding,
SignatureType::Thumbnail,
] {
assert!(!t.label().is_empty());
}
}
#[test]
fn test_signature_exact_match_identical() {
let s1 = make_sig("a1", SignatureType::Cryptographic, vec![1, 2, 3, 4]);
let s2 = make_sig("a2", SignatureType::Cryptographic, vec![1, 2, 3, 4]);
assert!(s1.matches(&s2, 0));
}
#[test]
fn test_signature_exact_match_different() {
let s1 = make_sig("a1", SignatureType::Cryptographic, vec![1, 2, 3, 4]);
let s2 = make_sig("a2", SignatureType::Cryptographic, vec![1, 2, 3, 5]);
assert!(!s1.matches(&s2, 0));
}
#[test]
fn test_signature_perceptual_within_tolerance() {
let s1 = make_sig("a1", SignatureType::PerceptualVisual, vec![0, 0, 0, 0]);
let s2 = make_sig("a2", SignatureType::PerceptualVisual, vec![1, 0, 0, 0]);
assert!(s1.matches(&s2, 1));
}
#[test]
fn test_signature_perceptual_exceeds_tolerance() {
let s1 = make_sig("a1", SignatureType::PerceptualVisual, vec![0, 0, 0, 0]);
let s2 = make_sig("a2", SignatureType::PerceptualVisual, vec![1, 1, 0, 0]);
assert!(!s1.matches(&s2, 1));
}
#[test]
fn test_signature_type_mismatch() {
let s1 = make_sig("a1", SignatureType::PerceptualVisual, vec![0; 4]);
let s2 = make_sig("a2", SignatureType::Cryptographic, vec![0; 4]);
assert!(!s1.matches(&s2, 10));
}
#[test]
fn test_database_store_and_lookup() {
let mut db = SignatureDatabase::new();
db.store(make_sig(
"asset1",
SignatureType::Cryptographic,
vec![0xAB; 4],
));
let sigs = db.lookup("asset1");
assert_eq!(sigs.len(), 1);
}
#[test]
fn test_database_lookup_missing() {
let db = SignatureDatabase::new();
assert!(db.lookup("nonexistent").is_empty());
}
#[test]
fn test_database_match_count() {
let mut db = SignatureDatabase::new();
db.store(make_sig("a", SignatureType::Cryptographic, vec![1; 4]));
db.store(make_sig("a", SignatureType::PerceptualVisual, vec![1; 4]));
db.store(make_sig("b", SignatureType::Cryptographic, vec![1; 4]));
assert_eq!(db.match_count(), 3);
}
#[test]
fn test_database_find_matches() {
let mut db = SignatureDatabase::new();
db.store(make_sig(
"other",
SignatureType::PerceptualVisual,
vec![0, 0, 0, 0],
));
let query = make_sig("query", SignatureType::PerceptualVisual, vec![0, 0, 0, 1]);
let matches = db.find_matches(&query, 1);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].0, "other");
}
#[test]
fn test_database_remove_asset() {
let mut db = SignatureDatabase::new();
db.store(make_sig("x", SignatureType::Cryptographic, vec![0; 4]));
assert_eq!(db.asset_count(), 1);
let removed = db.remove_asset("x");
assert_eq!(removed.len(), 1);
assert_eq!(db.asset_count(), 0);
}
}