#![allow(dead_code)]
pub struct ContentFeatures {
pub item_id: u64,
pub tags: Vec<String>,
pub genre: String,
pub duration_s: u64,
pub rating: f32,
pub year: u32,
}
impl ContentFeatures {
#[must_use]
pub fn new(id: u64) -> Self {
Self {
item_id: id,
tags: Vec::new(),
genre: String::new(),
duration_s: 0,
rating: 0.0,
year: 0,
}
}
pub fn add_tag(&mut self, tag: &str) {
let t = tag.to_owned();
if !self.tags.contains(&t) {
self.tags.push(t);
}
}
#[must_use]
pub fn tag_vector(&self, all_tags: &[String]) -> Vec<f32> {
all_tags
.iter()
.map(|t| {
if self.tags.contains(t) {
1.0f32
} else {
0.0f32
}
})
.collect()
}
}
#[must_use]
pub fn item_similarity(a: &ContentFeatures, b: &ContentFeatures, all_tags: &[String]) -> f64 {
let va = a.tag_vector(all_tags);
let vb = b.tag_vector(all_tags);
let dot: f64 = va
.iter()
.zip(vb.iter())
.map(|(&x, &y)| f64::from(x) * f64::from(y))
.sum();
let norm_a: f64 = va
.iter()
.map(|&x| f64::from(x) * f64::from(x))
.sum::<f64>()
.sqrt();
let norm_b: f64 = vb
.iter()
.map(|&x| f64::from(x) * f64::from(x))
.sum::<f64>()
.sqrt();
let cosine = if norm_a == 0.0 || norm_b == 0.0 {
0.0
} else {
dot / (norm_a * norm_b)
};
let genre_bonus = if !a.genre.is_empty() && a.genre == b.genre {
0.1
} else {
0.0
};
(cosine + genre_bonus).min(1.0)
}
pub struct ContentFilter {
pub items: Vec<ContentFeatures>,
pub all_tags: Vec<String>,
}
impl ContentFilter {
#[must_use]
pub fn new() -> Self {
Self {
items: Vec::new(),
all_tags: Vec::new(),
}
}
pub fn add_item(&mut self, item: ContentFeatures) {
for tag in &item.tags {
if !self.all_tags.contains(tag) {
self.all_tags.push(tag.clone());
}
}
self.items.push(item);
}
#[must_use]
pub fn similar_to(&self, item_id: u64, n: usize) -> Vec<(u64, f64)> {
let target = match self.items.iter().find(|i| i.item_id == item_id) {
Some(t) => t,
None => return Vec::new(),
};
let mut scored: Vec<(u64, f64)> = self
.items
.iter()
.filter(|i| i.item_id != item_id)
.map(|other| {
let sim = item_similarity(target, other, &self.all_tags);
(other.item_id, sim)
})
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(n);
scored
}
#[must_use]
pub fn by_genre(&self, genre: &str) -> Vec<&ContentFeatures> {
self.items.iter().filter(|i| i.genre == genre).collect()
}
#[must_use]
pub fn by_tags(&self, tags: &[&str]) -> Vec<&ContentFeatures> {
self.items
.iter()
.filter(|i| tags.iter().all(|t| i.tags.contains(&(*t).to_owned())))
.collect()
}
}
impl Default for ContentFilter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_item(id: u64, tags: &[&str], genre: &str) -> ContentFeatures {
let mut f = ContentFeatures::new(id);
f.genre = genre.to_owned();
for &t in tags {
f.add_tag(t);
}
f
}
#[test]
fn test_content_features_new() {
let f = ContentFeatures::new(1);
assert_eq!(f.item_id, 1);
assert!(f.tags.is_empty());
assert!(f.genre.is_empty());
}
#[test]
fn test_add_tag_no_duplicates() {
let mut f = ContentFeatures::new(1);
f.add_tag("action");
f.add_tag("action");
assert_eq!(f.tags.len(), 1);
}
#[test]
fn test_tag_vector_length() {
let mut f = ContentFeatures::new(1);
f.add_tag("a");
f.add_tag("b");
let vocab = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
let v = f.tag_vector(&vocab);
assert_eq!(v.len(), 3);
assert_eq!(v[0], 1.0);
assert_eq!(v[1], 1.0);
assert_eq!(v[2], 0.0);
}
#[test]
fn test_item_similarity_identical() {
let a = make_item(1, &["x", "y"], "drama");
let b = make_item(2, &["x", "y"], "drama");
let vocab = vec!["x".to_owned(), "y".to_owned()];
let sim = item_similarity(&a, &b, &vocab);
assert!((sim - 1.0).abs() < 1e-9);
}
#[test]
fn test_item_similarity_no_overlap() {
let a = make_item(1, &["x"], "drama");
let b = make_item(2, &["y"], "comedy");
let vocab = vec!["x".to_owned(), "y".to_owned()];
let sim = item_similarity(&a, &b, &vocab);
assert!(sim < 0.01);
}
#[test]
fn test_item_similarity_empty_tags() {
let a = ContentFeatures::new(1);
let b = ContentFeatures::new(2);
let vocab: Vec<String> = Vec::new();
let sim = item_similarity(&a, &b, &vocab);
assert!((sim).abs() < 1e-9);
}
#[test]
fn test_content_filter_add_and_by_genre() {
let mut cf = ContentFilter::new();
cf.add_item(make_item(1, &["a"], "drama"));
cf.add_item(make_item(2, &["b"], "comedy"));
cf.add_item(make_item(3, &["c"], "drama"));
let dramas = cf.by_genre("drama");
assert_eq!(dramas.len(), 2);
}
#[test]
fn test_by_tags_all_must_match() {
let mut cf = ContentFilter::new();
cf.add_item(make_item(1, &["action", "thriller"], "action"));
cf.add_item(make_item(2, &["action"], "action"));
let found = cf.by_tags(&["action", "thriller"]);
assert_eq!(found.len(), 1);
assert_eq!(found[0].item_id, 1);
}
#[test]
fn test_similar_to_unknown_item() {
let cf = ContentFilter::new();
assert!(cf.similar_to(999, 5).is_empty());
}
#[test]
fn test_similar_to_excludes_self() {
let mut cf = ContentFilter::new();
cf.add_item(make_item(1, &["a"], "drama"));
cf.add_item(make_item(2, &["a"], "drama"));
let sims = cf.similar_to(1, 5);
assert!(!sims.iter().any(|&(id, _)| id == 1));
}
#[test]
fn test_similar_to_ordering() {
let mut cf = ContentFilter::new();
cf.add_item(make_item(1, &["a", "b", "c"], "drama"));
cf.add_item(make_item(2, &["a", "b", "c"], "drama"));
cf.add_item(make_item(3, &["a"], "comedy"));
let sims = cf.similar_to(1, 2);
assert_eq!(sims[0].0, 2);
}
#[test]
fn test_by_tags_empty_filter() {
let cf = ContentFilter::new();
assert!(cf.by_tags(&["action"]).is_empty());
}
#[test]
fn test_all_tags_deduplicated() {
let mut cf = ContentFilter::new();
cf.add_item(make_item(1, &["a", "b"], "drama"));
cf.add_item(make_item(2, &["b", "c"], "comedy"));
let count = cf.all_tags.iter().filter(|t| t.as_str() == "b").count();
assert_eq!(count, 1);
}
}