use std::collections::HashMap;
use crate::engagement::{ContentEngagementScore, EngagementComponents};
use crate::error::AnalyticsError;
#[derive(Debug, Clone, PartialEq)]
pub struct EngagementFeatures {
pub content_id: String,
pub watch_time_ratio: f64,
pub completion_rate: f64,
pub rewatch_rate: f64,
pub social_score: f64,
pub seek_penalty: f64,
}
impl EngagementFeatures {
pub fn from_score(score: &ContentEngagementScore) -> Self {
let c = &score.components;
Self {
content_id: score.content_id.clone(),
watch_time_ratio: c.watch_time_score as f64,
completion_rate: c.completion_score as f64,
rewatch_rate: c.rewatch_score as f64,
social_score: c.social_score as f64,
seek_penalty: c.seek_forward_penalty as f64,
}
}
pub fn from_components(
content_id: impl Into<String>,
components: &EngagementComponents,
) -> Self {
Self {
content_id: content_id.into(),
watch_time_ratio: components.watch_time_score as f64,
completion_rate: components.completion_score as f64,
rewatch_rate: components.rewatch_score as f64,
social_score: components.social_score as f64,
seek_penalty: components.seek_forward_penalty as f64,
}
}
pub fn as_array(&self) -> [f64; 5] {
[
self.watch_time_ratio,
self.completion_rate,
self.rewatch_rate,
self.social_score,
self.seek_penalty,
]
}
pub fn l2_norm(&self) -> f64 {
self.as_array().iter().map(|v| v * v).sum::<f64>().sqrt()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SimilarityMetric {
#[default]
Cosine,
NegativeEuclidean,
Pearson,
}
fn cosine_similarity(a: &[f64; 5], b: &[f64; 5]) -> f64 {
let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
let norm_b: f64 = b.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm_a < f64::EPSILON || norm_b < f64::EPSILON {
0.0
} else {
dot / (norm_a * norm_b)
}
}
fn negative_euclidean(a: &[f64; 5], b: &[f64; 5]) -> f64 {
let dist: f64 = a
.iter()
.zip(b.iter())
.map(|(x, y)| (x - y) * (x - y))
.sum::<f64>()
.sqrt();
-dist
}
fn pearson_correlation(a: &[f64; 5], b: &[f64; 5]) -> f64 {
let n = a.len() as f64;
let mean_a: f64 = a.iter().sum::<f64>() / n;
let mean_b: f64 = b.iter().sum::<f64>() / n;
let mut cov = 0.0f64;
let mut var_a = 0.0f64;
let mut var_b = 0.0f64;
for (&ai, &bi) in a.iter().zip(b.iter()) {
let da = ai - mean_a;
let db = bi - mean_b;
cov += da * db;
var_a += da * da;
var_b += db * db;
}
let denom = (var_a * var_b).sqrt();
if denom < f64::EPSILON {
0.0
} else {
cov / denom
}
}
pub fn similarity(a: &EngagementFeatures, b: &EngagementFeatures, metric: SimilarityMetric) -> f64 {
let va = a.as_array();
let vb = b.as_array();
match metric {
SimilarityMetric::Cosine => cosine_similarity(&va, &vb),
SimilarityMetric::NegativeEuclidean => negative_euclidean(&va, &vb),
SimilarityMetric::Pearson => pearson_correlation(&va, &vb),
}
}
#[derive(Debug, Clone)]
pub struct FeatureNormaliser {
min: [f64; 5],
max: [f64; 5],
}
impl FeatureNormaliser {
pub fn fit(features: &[EngagementFeatures]) -> Result<Self, AnalyticsError> {
if features.is_empty() {
return Err(AnalyticsError::InsufficientData(
"cannot fit normaliser on empty feature set".to_string(),
));
}
let mut min = [f64::MAX; 5];
let mut max = [f64::MIN; 5];
for f in features {
let arr = f.as_array();
for (i, &v) in arr.iter().enumerate() {
if v < min[i] {
min[i] = v;
}
if v > max[i] {
max[i] = v;
}
}
}
Ok(Self { min, max })
}
pub fn transform(&self, features: &EngagementFeatures) -> EngagementFeatures {
let arr = features.as_array();
let mut normalised = [0.0f64; 5];
for (i, &v) in arr.iter().enumerate() {
let range = self.max[i] - self.min[i];
normalised[i] = if range < f64::EPSILON {
0.5
} else {
(v - self.min[i]) / range
};
}
EngagementFeatures {
content_id: features.content_id.clone(),
watch_time_ratio: normalised[0],
completion_rate: normalised[1],
rewatch_rate: normalised[2],
social_score: normalised[3],
seek_penalty: normalised[4],
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct RecommendationResult {
pub content_id: String,
pub similarity_score: f64,
}
pub struct ContentRecommender {
index: HashMap<String, EngagementFeatures>,
metric: SimilarityMetric,
}
impl ContentRecommender {
pub fn new(
features: impl IntoIterator<Item = EngagementFeatures>,
metric: SimilarityMetric,
) -> Result<Self, AnalyticsError> {
let index: HashMap<String, EngagementFeatures> = features
.into_iter()
.map(|f| (f.content_id.clone(), f))
.collect();
if index.is_empty() {
return Err(AnalyticsError::InsufficientData(
"recommender index is empty".to_string(),
));
}
Ok(Self { index, metric })
}
pub fn recommend(
&self,
query_content_id: &str,
top_n: usize,
) -> Result<Vec<RecommendationResult>, AnalyticsError> {
let query = self.index.get(query_content_id).ok_or_else(|| {
AnalyticsError::InvalidInput(format!(
"content '{}' not found in recommender index",
query_content_id
))
})?;
let mut scored: Vec<RecommendationResult> = self
.index
.values()
.filter(|f| f.content_id != query_content_id)
.map(|candidate| {
let score = similarity(query, candidate, self.metric);
RecommendationResult {
content_id: candidate.content_id.clone(),
similarity_score: score,
}
})
.collect();
scored.sort_by(|a, b| {
b.similarity_score
.partial_cmp(&a.similarity_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
scored.truncate(top_n);
Ok(scored)
}
pub fn index_size(&self) -> usize {
self.index.len()
}
}
pub fn recommend_similar(
query_content_id: &str,
scores: &[ContentEngagementScore],
top_n: usize,
metric: SimilarityMetric,
) -> Result<Vec<RecommendationResult>, AnalyticsError> {
let features: Vec<EngagementFeatures> =
scores.iter().map(EngagementFeatures::from_score).collect();
let recommender = ContentRecommender::new(features, metric)?;
recommender.recommend(query_content_id, top_n)
}
#[cfg(test)]
mod tests {
use super::*;
fn make_score(
id: &str,
watch: f32,
completion: f32,
rewatch: f32,
social: f32,
seek: f32,
) -> ContentEngagementScore {
ContentEngagementScore {
content_id: id.to_string(),
score: (watch + completion + rewatch + social - seek) / 4.0,
components: EngagementComponents {
watch_time_score: watch,
completion_score: completion,
rewatch_score: rewatch,
social_score: social,
seek_forward_penalty: seek,
},
}
}
#[test]
fn cosine_identical_vectors_returns_one() {
let a = EngagementFeatures {
content_id: "a".to_string(),
watch_time_ratio: 0.8,
completion_rate: 0.7,
rewatch_rate: 0.2,
social_score: 0.5,
seek_penalty: 0.1,
};
let score = similarity(&a, &a, SimilarityMetric::Cosine);
assert!((score - 1.0).abs() < 1e-9, "cosine of identical={score}");
}
#[test]
fn euclidean_same_point_returns_zero() {
let a = EngagementFeatures {
content_id: "a".to_string(),
watch_time_ratio: 0.5,
completion_rate: 0.5,
rewatch_rate: 0.5,
social_score: 0.5,
seek_penalty: 0.5,
};
let score = similarity(&a, &a, SimilarityMetric::NegativeEuclidean);
assert!(score.abs() < 1e-9, "negative_euclidean same point={score}");
}
#[test]
fn recommender_excludes_query_item() {
let scores: Vec<ContentEngagementScore> = vec![
make_score("a", 0.9, 0.9, 0.3, 0.5, 0.1),
make_score("b", 0.8, 0.8, 0.2, 0.5, 0.1),
make_score("c", 0.1, 0.1, 0.0, 0.5, 0.9),
];
let recs = recommend_similar("a", &scores, 5, SimilarityMetric::Cosine)
.expect("recommendations should succeed");
assert!(
recs.iter().all(|r| r.content_id != "a"),
"query item must not appear in results"
);
}
#[test]
fn similar_content_ranks_higher_than_dissimilar() {
let scores = vec![
make_score("query", 0.9, 0.9, 0.3, 0.5, 0.05),
make_score("similar", 0.85, 0.88, 0.28, 0.5, 0.06),
make_score("dissimilar", 0.1, 0.05, 0.0, 0.5, 0.95),
];
let recs = recommend_similar("query", &scores, 2, SimilarityMetric::Cosine).expect("recs");
assert_eq!(
recs[0].content_id, "similar",
"most similar should rank first"
);
}
#[test]
fn recommender_returns_at_most_top_n() {
let scores: Vec<_> = (0..10u32)
.map(|i| make_score(&format!("c{i}"), 0.5 + i as f32 * 0.02, 0.5, 0.1, 0.5, 0.05))
.collect();
let recs = recommend_similar("c0", &scores, 3, SimilarityMetric::Cosine).expect("recs");
assert!(recs.len() <= 3, "should return at most 3 results");
}
#[test]
fn recommender_error_on_unknown_query_id() {
let scores = vec![make_score("known", 0.5, 0.5, 0.2, 0.5, 0.1)];
let result = recommend_similar("unknown_id", &scores, 3, SimilarityMetric::Cosine);
assert!(result.is_err(), "should error on unknown query ID");
}
#[test]
fn feature_normaliser_maps_to_unit_interval() {
let f1 = EngagementFeatures {
content_id: "f1".to_string(),
watch_time_ratio: 0.2,
completion_rate: 0.3,
rewatch_rate: 0.1,
social_score: 0.4,
seek_penalty: 0.5,
};
let f2 = EngagementFeatures {
content_id: "f2".to_string(),
watch_time_ratio: 0.8,
completion_rate: 0.9,
rewatch_rate: 0.7,
social_score: 0.6,
seek_penalty: 0.2,
};
let normaliser =
FeatureNormaliser::fit(&[f1.clone(), f2.clone()]).expect("fit should succeed");
let n1 = normaliser.transform(&f1);
let n2 = normaliser.transform(&f2);
assert!((n1.watch_time_ratio).abs() < 1e-9);
assert!((n2.watch_time_ratio - 1.0).abs() < 1e-9);
}
#[test]
fn pearson_identical_vectors_returns_one_or_nan_handled() {
let a = EngagementFeatures {
content_id: "a".to_string(),
watch_time_ratio: 0.7,
completion_rate: 0.6,
rewatch_rate: 0.3,
social_score: 0.5,
seek_penalty: 0.1,
};
let b = EngagementFeatures {
content_id: "b".to_string(),
watch_time_ratio: 0.8,
completion_rate: 0.7,
rewatch_rate: 0.4,
social_score: 0.6,
seek_penalty: 0.2,
};
let score = similarity(&a, &b, SimilarityMetric::Pearson);
assert!(score > 0.9, "pearson score={score}");
}
#[test]
fn recommender_index_size_matches_input() {
let scores: Vec<_> = (0..5u32)
.map(|i| make_score(&format!("c{i}"), 0.5, 0.5, 0.1, 0.5, 0.1))
.collect();
let features: Vec<EngagementFeatures> =
scores.iter().map(EngagementFeatures::from_score).collect();
let recommender =
ContentRecommender::new(features, SimilarityMetric::Cosine).expect("recommender");
assert_eq!(recommender.index_size(), 5);
}
}