use crate::spatiotemporal::diversity::types::ScoredEpisode;
use std::cmp::Ordering;
#[derive(Debug, Clone)]
pub struct DiversityMaximizer {
lambda: f32,
}
impl DiversityMaximizer {
#[must_use]
pub fn new(lambda: f32) -> Self {
assert!(
(0.0..=1.0).contains(&lambda),
"Lambda must be in range [0.0, 1.0], got {lambda}"
);
Self { lambda }
}
#[must_use]
pub fn lambda(&self) -> f32 {
self.lambda
}
#[must_use]
pub fn maximize_diversity(
&self,
candidates: Vec<ScoredEpisode>,
limit: usize,
) -> Vec<ScoredEpisode> {
if candidates.is_empty() || limit == 0 {
return Vec::new();
}
if candidates.len() <= limit {
return candidates;
}
let mut selected: Vec<ScoredEpisode> = Vec::with_capacity(limit);
let mut remaining = candidates;
while selected.len() < limit && !remaining.is_empty() {
let best_idx = self.find_max_mmr_index(&remaining, &selected);
let best_episode = remaining.remove(best_idx);
selected.push(best_episode);
}
selected
}
fn find_max_mmr_index(
&self,
candidates: &[ScoredEpisode],
selected: &[ScoredEpisode],
) -> usize {
candidates
.iter()
.enumerate()
.map(|(idx, episode)| {
let mmr_score = self.calculate_mmr_score(episode, selected);
(idx, mmr_score)
})
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal))
.map_or(0, |(idx, _)| idx)
}
fn calculate_mmr_score(&self, episode: &ScoredEpisode, selected: &[ScoredEpisode]) -> f32 {
let relevance = episode.relevance_score();
if selected.is_empty() {
return self.lambda * relevance;
}
let max_similarity = selected
.iter()
.map(|s| Self::calculate_similarity(episode, s))
.max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal))
.unwrap_or(0.0);
self.lambda * relevance - (1.0 - self.lambda) * max_similarity
}
#[must_use]
pub fn calculate_similarity(episode1: &ScoredEpisode, episode2: &ScoredEpisode) -> f32 {
let emb1 = episode1.embedding();
let emb2 = episode2.embedding();
if emb1.len() != emb2.len() {
return 0.0;
}
if emb1.is_empty() {
return 0.0;
}
let dot_product: f32 = emb1.iter().zip(emb2.iter()).map(|(a, b)| a * b).sum();
let magnitude1: f32 = emb1.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude2: f32 = emb2.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude1 == 0.0 || magnitude2 == 0.0 {
return 0.0;
}
let similarity = dot_product / (magnitude1 * magnitude2);
similarity.clamp(0.0, 1.0)
}
#[must_use]
pub fn calculate_diversity_score(&self, selected: &[ScoredEpisode]) -> f32 {
if selected.is_empty() {
return 0.0;
}
if selected.len() == 1 {
return 1.0; }
let n = selected.len();
let mut total_dissimilarity = 0.0_f32;
let mut pair_count = 0;
for i in 0..n {
for j in 0..n {
if i != j {
let similarity = Self::calculate_similarity(&selected[i], &selected[j]);
let dissimilarity = 1.0 - similarity;
total_dissimilarity += dissimilarity;
pair_count += 1;
}
}
}
if pair_count > 0 {
total_dissimilarity / pair_count as f32
} else {
0.0
}
}
}
impl Default for DiversityMaximizer {
fn default() -> Self {
Self::new(0.7)
}
}