#![allow(dead_code)]
use std::collections::HashMap;
const BLOCK_SIZE: usize = 32;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FrameFingerprint {
pub hash: [u8; BLOCK_SIZE],
pub frame_number: u64,
}
impl FrameFingerprint {
pub fn new(hash: [u8; BLOCK_SIZE], frame_number: u64) -> Self {
Self { hash, frame_number }
}
pub fn from_bytes(data: &[u8], frame_number: u64) -> Self {
let mut hash = [0u8; BLOCK_SIZE];
let len = data.len().min(BLOCK_SIZE);
hash[..len].copy_from_slice(&data[..len]);
Self { hash, frame_number }
}
pub fn hamming_distance(&self, other: &Self) -> u32 {
self.hash
.iter()
.zip(other.hash.iter())
.map(|(a, b)| (a ^ b).count_ones())
.sum()
}
pub fn is_similar(&self, other: &Self, threshold: u32) -> bool {
self.hamming_distance(other) <= threshold
}
}
#[derive(Debug, Clone)]
pub struct ClipFingerprint {
pub clip_id: String,
pub frames: Vec<FrameFingerprint>,
pub duration_ms: u64,
pub sample_fps: f64,
}
impl ClipFingerprint {
pub fn new(clip_id: &str, duration_ms: u64, sample_fps: f64) -> Self {
Self {
clip_id: clip_id.to_string(),
frames: Vec::new(),
duration_ms,
sample_fps,
}
}
pub fn add_frame(&mut self, fp: FrameFingerprint) {
self.frames.push(fp);
}
pub fn frame_count(&self) -> usize {
self.frames.len()
}
#[allow(clippy::cast_precision_loss)]
pub fn average_distance(&self, other: &Self) -> f64 {
let count = self.frames.len().min(other.frames.len());
if count == 0 {
return f64::MAX;
}
let total: u64 = self
.frames
.iter()
.zip(other.frames.iter())
.map(|(a, b)| u64::from(a.hamming_distance(b)))
.sum();
total as f64 / count as f64
}
pub fn is_duplicate(&self, other: &Self, distance_threshold: f64) -> bool {
self.average_distance(other) <= distance_threshold
}
#[allow(clippy::cast_precision_loss)]
pub fn find_best_offset(&self, other: &Self) -> (i64, f64) {
if self.frames.is_empty() || other.frames.is_empty() {
return (0, f64::MAX);
}
let max_offset = self.frames.len().max(other.frames.len()) as i64;
let mut best_offset: i64 = 0;
let mut best_distance = f64::MAX;
for offset in -max_offset..=max_offset {
let mut total_dist = 0u64;
let mut count = 0u64;
for (i, fp_a) in self.frames.iter().enumerate() {
let j = i as i64 + offset;
if j >= 0 && (j as usize) < other.frames.len() {
total_dist += u64::from(fp_a.hamming_distance(&other.frames[j as usize]));
count += 1;
}
}
if count > 0 {
let avg = total_dist as f64 / count as f64;
if avg < best_distance {
best_distance = avg;
best_offset = offset;
}
}
}
(best_offset, best_distance)
}
}
#[derive(Debug, Clone)]
pub struct SimilarityResult {
pub clip_a: String,
pub clip_b: String,
pub distance: f64,
pub similarity: f64,
pub is_duplicate: bool,
}
#[derive(Debug)]
pub struct FingerprintDb {
entries: HashMap<String, ClipFingerprint>,
duplicate_threshold: f64,
}
impl FingerprintDb {
pub fn new(duplicate_threshold: f64) -> Self {
Self {
entries: HashMap::new(),
duplicate_threshold,
}
}
pub fn with_default_threshold() -> Self {
Self::new(8.0)
}
pub fn insert(&mut self, fp: ClipFingerprint) {
self.entries.insert(fp.clip_id.clone(), fp);
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn get(&self, clip_id: &str) -> Option<&ClipFingerprint> {
self.entries.get(clip_id)
}
#[allow(clippy::cast_precision_loss)]
pub fn find_similar(&self, query: &ClipFingerprint) -> Vec<SimilarityResult> {
let max_bits = (BLOCK_SIZE * 8) as f64;
self.entries
.values()
.filter(|stored| stored.clip_id != query.clip_id)
.map(|stored| {
let distance = query.average_distance(stored);
let similarity = (1.0 - distance / max_bits).max(0.0);
let is_dup = distance <= self.duplicate_threshold;
SimilarityResult {
clip_a: query.clip_id.clone(),
clip_b: stored.clip_id.clone(),
distance,
similarity,
is_duplicate: is_dup,
}
})
.collect()
}
pub fn remove(&mut self, clip_id: &str) -> bool {
self.entries.remove(clip_id).is_some()
}
}
pub fn compute_phash(luminance: &[u8], width: usize, height: usize) -> [u8; BLOCK_SIZE] {
let mut hash = [0u8; BLOCK_SIZE];
if luminance.is_empty() || width == 0 || height == 0 {
return hash;
}
#[allow(clippy::cast_precision_loss)]
let mean: f64 = luminance.iter().map(|&v| f64::from(v)).sum::<f64>() / luminance.len() as f64;
for (i, &pixel) in luminance.iter().enumerate() {
if i / 8 >= BLOCK_SIZE {
break;
}
if f64::from(pixel) > mean {
hash[i / 8] |= 1 << (i % 8);
}
}
hash
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_frame_fingerprint_new() {
let hash = [0u8; BLOCK_SIZE];
let fp = FrameFingerprint::new(hash, 42);
assert_eq!(fp.frame_number, 42);
assert_eq!(fp.hash, [0u8; BLOCK_SIZE]);
}
#[test]
fn test_frame_fingerprint_from_bytes() {
let data = vec![1, 2, 3, 4, 5];
let fp = FrameFingerprint::from_bytes(&data, 0);
assert_eq!(fp.hash[0], 1);
assert_eq!(fp.hash[4], 5);
assert_eq!(fp.hash[5], 0);
}
#[test]
fn test_hamming_distance_identical() {
let fp_a = FrameFingerprint::new([0xFF; BLOCK_SIZE], 0);
let fp_b = FrameFingerprint::new([0xFF; BLOCK_SIZE], 1);
assert_eq!(fp_a.hamming_distance(&fp_b), 0);
}
#[test]
fn test_hamming_distance_different() {
let fp_a = FrameFingerprint::new([0x00; BLOCK_SIZE], 0);
let fp_b = FrameFingerprint::new([0xFF; BLOCK_SIZE], 1);
assert_eq!(fp_a.hamming_distance(&fp_b), (BLOCK_SIZE * 8) as u32);
}
#[test]
fn test_is_similar() {
let fp_a = FrameFingerprint::new([0x00; BLOCK_SIZE], 0);
let mut hash_b = [0x00; BLOCK_SIZE];
hash_b[0] = 0x01; let fp_b = FrameFingerprint::new(hash_b, 1);
assert!(fp_a.is_similar(&fp_b, 2));
assert!(!fp_a.is_similar(&fp_b, 0));
}
#[test]
fn test_clip_fingerprint_new() {
let cf = ClipFingerprint::new("clip1", 5000, 2.0);
assert_eq!(cf.clip_id, "clip1");
assert_eq!(cf.duration_ms, 5000);
assert_eq!(cf.frame_count(), 0);
}
#[test]
fn test_clip_fingerprint_add_frame() {
let mut cf = ClipFingerprint::new("clip1", 5000, 2.0);
cf.add_frame(FrameFingerprint::new([0; BLOCK_SIZE], 0));
cf.add_frame(FrameFingerprint::new([0; BLOCK_SIZE], 1));
assert_eq!(cf.frame_count(), 2);
}
#[test]
fn test_clip_average_distance_identical() {
let mut cf_a = ClipFingerprint::new("a", 1000, 1.0);
let mut cf_b = ClipFingerprint::new("b", 1000, 1.0);
for i in 0..5 {
let hash = [i as u8; BLOCK_SIZE];
cf_a.add_frame(FrameFingerprint::new(hash, i));
cf_b.add_frame(FrameFingerprint::new(hash, i));
}
assert!((cf_a.average_distance(&cf_b) - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_clip_is_duplicate() {
let mut cf_a = ClipFingerprint::new("a", 1000, 1.0);
let mut cf_b = ClipFingerprint::new("b", 1000, 1.0);
let hash = [0xAA; BLOCK_SIZE];
cf_a.add_frame(FrameFingerprint::new(hash, 0));
cf_b.add_frame(FrameFingerprint::new(hash, 0));
assert!(cf_a.is_duplicate(&cf_b, 1.0));
}
#[test]
fn test_fingerprint_db_insert_get() {
let mut db = FingerprintDb::with_default_threshold();
let cf = ClipFingerprint::new("c1", 2000, 1.0);
db.insert(cf);
assert_eq!(db.len(), 1);
assert!(db.get("c1").is_some());
assert!(db.get("c2").is_none());
}
#[test]
fn test_fingerprint_db_remove() {
let mut db = FingerprintDb::with_default_threshold();
db.insert(ClipFingerprint::new("c1", 1000, 1.0));
assert!(db.remove("c1"));
assert!(!db.remove("c1"));
assert!(db.is_empty());
}
#[test]
fn test_fingerprint_db_find_similar() {
let mut db = FingerprintDb::new(10.0);
let mut cf1 = ClipFingerprint::new("stored", 1000, 1.0);
cf1.add_frame(FrameFingerprint::new([0x00; BLOCK_SIZE], 0));
db.insert(cf1);
let mut query = ClipFingerprint::new("query", 1000, 1.0);
query.add_frame(FrameFingerprint::new([0x00; BLOCK_SIZE], 0));
let results = db.find_similar(&query);
assert_eq!(results.len(), 1);
assert!(results[0].is_duplicate);
assert!((results[0].distance - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_compute_phash_uniform() {
let data = vec![128u8; 64];
let hash = compute_phash(&data, 8, 8);
assert_eq!(hash, [0u8; BLOCK_SIZE]);
}
#[test]
fn test_compute_phash_half_bright() {
let mut data = vec![0u8; 256];
for v in data.iter_mut().take(128) {
*v = 255;
}
let hash = compute_phash(&data, 16, 16);
assert_ne!(hash, [0u8; BLOCK_SIZE]);
}
#[test]
fn test_compute_phash_empty() {
let hash = compute_phash(&[], 0, 0);
assert_eq!(hash, [0u8; BLOCK_SIZE]);
}
}