use crate::error::{RecommendError, RecommendResult};
use crate::{ContentMetadata, Recommendation, RecommendationReason, RecommendationRequest};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
pub struct ContentSimilarityCalculator {
feature_vectors: HashMap<Uuid, super::vector::ContentVector>,
}
impl ContentSimilarityCalculator {
#[must_use]
pub fn new() -> Self {
Self {
feature_vectors: HashMap::new(),
}
}
pub fn add_content(&mut self, content_id: Uuid, vector: super::vector::ContentVector) {
self.feature_vectors.insert(content_id, vector);
}
pub fn calculate_similarity(&self, content_a: Uuid, content_b: Uuid) -> RecommendResult<f32> {
let vec_a = self
.feature_vectors
.get(&content_a)
.ok_or(RecommendError::ContentNotFound(content_a))?;
let vec_b = self
.feature_vectors
.get(&content_b)
.ok_or(RecommendError::ContentNotFound(content_b))?;
Ok(super::distance::cosine_similarity(vec_a, vec_b))
}
pub fn find_similar(
&self,
content_id: Uuid,
limit: usize,
) -> RecommendResult<Vec<(Uuid, f32)>> {
let vec = self
.feature_vectors
.get(&content_id)
.ok_or(RecommendError::ContentNotFound(content_id))?;
let mut similarities: Vec<(Uuid, f32)> = self
.feature_vectors
.iter()
.filter(|(id, _)| **id != content_id)
.map(|(id, other_vec)| (*id, super::distance::cosine_similarity(vec, other_vec)))
.collect();
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
similarities.truncate(limit);
Ok(similarities)
}
}
impl Default for ContentSimilarityCalculator {
fn default() -> Self {
Self::new()
}
}
pub struct ContentRecommender {
similarity_calculator: ContentSimilarityCalculator,
content_metadata: HashMap<Uuid, ContentMetadata>,
}
impl ContentRecommender {
#[must_use]
pub fn new() -> Self {
Self {
similarity_calculator: ContentSimilarityCalculator::new(),
content_metadata: HashMap::new(),
}
}
pub fn add_content(
&mut self,
content_id: Uuid,
metadata: ContentMetadata,
features: super::features::ContentFeatures,
) {
let vector = super::vector::ContentVector::from_features(&features);
self.similarity_calculator.add_content(content_id, vector);
self.content_metadata.insert(content_id, metadata);
}
pub fn recommend(
&self,
request: &RecommendationRequest,
) -> RecommendResult<Vec<Recommendation>> {
let base_content = match request.content_id {
Some(id) => id,
None => return Ok(Vec::new()),
};
let similar = self
.similarity_calculator
.find_similar(base_content, request.limit * 2)?;
let recommendations: Vec<Recommendation> = similar
.into_iter()
.enumerate()
.filter_map(|(idx, (content_id, similarity))| {
self.content_metadata
.get(&content_id)
.map(|metadata| Recommendation {
content_id,
score: similarity,
rank: idx + 1,
reasons: vec![RecommendationReason::SimilarToLiked {
content_id: base_content,
similarity,
}],
metadata: metadata.clone(),
explanation: None,
})
})
.take(request.limit)
.collect();
Ok(recommendations)
}
}
impl Default for ContentRecommender {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum SimilarityMetric {
Cosine,
Euclidean,
Jaccard,
Pearson,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_similarity_calculator_creation() {
let calculator = ContentSimilarityCalculator::new();
assert_eq!(calculator.feature_vectors.len(), 0);
}
#[test]
fn test_content_recommender_creation() {
let recommender = ContentRecommender::new();
assert_eq!(recommender.content_metadata.len(), 0);
}
#[test]
fn test_similarity_metric_variants() {
let metrics = [
SimilarityMetric::Cosine,
SimilarityMetric::Euclidean,
SimilarityMetric::Jaccard,
SimilarityMetric::Pearson,
];
assert_eq!(metrics.len(), 4);
}
}