use std::collections::VecDeque;
use crate::storage::embedding::cosine_similarity;
use uuid::Uuid;
pub struct FrameDeduplicator {
threshold: f32,
recent_embeddings: VecDeque<(Uuid, Vec<f32>)>,
window_size: usize,
}
impl FrameDeduplicator {
pub fn new(threshold: f32, window_size: usize) -> Self {
Self {
threshold,
recent_embeddings: VecDeque::with_capacity(window_size),
window_size,
}
}
pub fn is_duplicate(&self, embedding: &[f32]) -> Option<(Uuid, f32)> {
let mut best_match: Option<(Uuid, f32)> = None;
for (id, stored_emb) in &self.recent_embeddings {
if stored_emb.len() != embedding.len() {
continue;
}
let sim = cosine_similarity(embedding, stored_emb);
if sim >= self.threshold {
match best_match {
Some((_, best_sim)) if sim > best_sim => {
best_match = Some((*id, sim));
}
None => {
best_match = Some((*id, sim));
}
_ => {}
}
}
}
best_match
}
pub fn add(&mut self, id: Uuid, embedding: Vec<f32>) {
if self.recent_embeddings.len() >= self.window_size {
self.recent_embeddings.pop_front();
}
self.recent_embeddings.push_back((id, embedding));
}
pub fn window_len(&self) -> usize {
self.recent_embeddings.len()
}
pub fn threshold(&self) -> f32 {
self.threshold
}
pub fn clear(&mut self) {
self.recent_embeddings.clear();
}
}