#![allow(dead_code)]
#![allow(clippy::cast_precision_loss)]
use std::collections::HashMap;
use uuid::Uuid;
#[derive(Debug, Clone, PartialEq)]
pub struct FeatureVector {
pub item_id: Uuid,
pub values: Vec<f32>,
}
impl FeatureVector {
#[must_use]
pub fn new(item_id: Uuid, values: Vec<f32>) -> Self {
Self { item_id, values }
}
#[must_use]
pub fn dim(&self) -> usize {
self.values.len()
}
#[must_use]
pub fn norm(&self) -> f32 {
self.values.iter().map(|v| v * v).sum::<f32>().sqrt()
}
}
#[must_use]
pub fn cosine_similarity(a: &FeatureVector, b: &FeatureVector) -> f32 {
if a.dim() != b.dim() || a.dim() == 0 {
return 0.0;
}
let dot: f32 = a.values.iter().zip(&b.values).map(|(x, y)| x * y).sum();
let norm_a = a.norm();
let norm_b = b.norm();
if norm_a < f32::EPSILON || norm_b < f32::EPSILON {
0.0
} else {
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
}
}
#[must_use]
pub fn euclidean_distance(a: &FeatureVector, b: &FeatureVector) -> f32 {
if a.dim() != b.dim() {
return f32::INFINITY;
}
a.values
.iter()
.zip(&b.values)
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}
#[must_use]
pub fn distance_to_similarity(distance: f32) -> f32 {
1.0 / (1.0 + distance)
}
#[derive(Debug, Clone, Default)]
pub struct FeatureStore {
items: HashMap<Uuid, FeatureVector>,
}
impl FeatureStore {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn upsert(&mut self, fv: FeatureVector) {
self.items.insert(fv.item_id, fv);
}
#[must_use]
pub fn get(&self, item_id: Uuid) -> Option<&FeatureVector> {
self.items.get(&item_id)
}
pub fn remove(&mut self, item_id: Uuid) -> Option<FeatureVector> {
self.items.remove(&item_id)
}
#[must_use]
pub fn len(&self) -> usize {
self.items.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.items.is_empty()
}
#[must_use]
pub fn most_similar(&self, query_id: Uuid, limit: usize) -> Vec<(Uuid, f32)> {
let query = match self.items.get(&query_id) {
Some(q) => q,
None => return Vec::new(),
};
let mut scores: Vec<(Uuid, f32)> = self
.items
.iter()
.filter(|(id, _)| **id != query_id)
.map(|(id, fv)| (*id, cosine_similarity(query, fv)))
.collect();
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scores.truncate(limit);
scores
}
}
#[must_use]
pub fn build_user_profile(
store: &FeatureStore,
liked_items: &[(Uuid, f32)],
rating_threshold: f32,
) -> Option<FeatureVector> {
let relevant: Vec<&FeatureVector> = liked_items
.iter()
.filter(|(_, r)| *r >= rating_threshold)
.filter_map(|(id, _)| store.get(*id))
.collect();
if relevant.is_empty() {
return None;
}
let dim = relevant[0].dim();
if dim == 0 {
return None;
}
let n = relevant.len() as f32;
let mut avg = vec![0.0_f32; dim];
for fv in &relevant {
if fv.dim() == dim {
for (a, v) in avg.iter_mut().zip(&fv.values) {
*a += v;
}
}
}
for a in &mut avg {
*a /= n;
}
Some(FeatureVector::new(Uuid::new_v4(), avg))
}
#[must_use]
pub fn recommend_from_profile(
store: &FeatureStore,
profile: &FeatureVector,
candidate_ids: &[Uuid],
limit: usize,
) -> Vec<(Uuid, f32)> {
let mut scores: Vec<(Uuid, f32)> = candidate_ids
.iter()
.filter_map(|id| {
store
.get(*id)
.map(|fv| (*id, cosine_similarity(profile, fv)))
})
.collect();
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scores.truncate(limit);
scores
}
#[cfg(test)]
mod tests {
use super::*;
fn uid() -> Uuid {
Uuid::new_v4()
}
fn fv(id: Uuid, vals: &[f32]) -> FeatureVector {
FeatureVector::new(id, vals.to_vec())
}
#[test]
fn test_feature_vector_dim() {
let v = fv(uid(), &[1.0, 2.0, 3.0]);
assert_eq!(v.dim(), 3);
}
#[test]
fn test_feature_vector_norm() {
let v = fv(uid(), &[3.0, 4.0]);
assert!((v.norm() - 5.0).abs() < 1e-5);
}
#[test]
fn test_feature_vector_zero_norm() {
let v = fv(uid(), &[0.0, 0.0]);
assert!((v.norm()).abs() < f32::EPSILON);
}
#[test]
fn test_cosine_identical_vectors() {
let id = uid();
let v = fv(id, &[1.0, 2.0, 3.0]);
assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-5);
}
#[test]
fn test_cosine_orthogonal_vectors() {
let a = fv(uid(), &[1.0, 0.0]);
let b = fv(uid(), &[0.0, 1.0]);
assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
}
#[test]
fn test_cosine_dimension_mismatch() {
let a = fv(uid(), &[1.0, 2.0]);
let b = fv(uid(), &[1.0]);
assert!((cosine_similarity(&a, &b)).abs() < f32::EPSILON);
}
#[test]
fn test_euclidean_same_vector() {
let v = fv(uid(), &[1.0, 2.0, 3.0]);
assert!(euclidean_distance(&v, &v) < 1e-5);
}
#[test]
fn test_euclidean_known_distance() {
let a = fv(uid(), &[0.0, 0.0]);
let b = fv(uid(), &[3.0, 4.0]);
assert!((euclidean_distance(&a, &b) - 5.0).abs() < 1e-5);
}
#[test]
fn test_euclidean_dim_mismatch() {
let a = fv(uid(), &[1.0, 2.0]);
let b = fv(uid(), &[1.0]);
assert!(euclidean_distance(&a, &b).is_infinite());
}
#[test]
fn test_distance_to_similarity_zero() {
assert!((distance_to_similarity(0.0) - 1.0).abs() < f32::EPSILON);
}
#[test]
fn test_distance_to_similarity_large() {
let s = distance_to_similarity(1_000_000.0);
assert!(s < 0.001);
}
#[test]
fn test_feature_store_upsert_get() {
let mut store = FeatureStore::new();
let id = uid();
store.upsert(fv(id, &[1.0, 0.0]));
assert!(store.get(id).is_some());
}
#[test]
fn test_feature_store_remove() {
let mut store = FeatureStore::new();
let id = uid();
store.upsert(fv(id, &[1.0]));
assert!(store.remove(id).is_some());
assert!(store.is_empty());
}
#[test]
fn test_feature_store_most_similar() {
let mut store = FeatureStore::new();
let q = uid();
let near = uid();
let far = uid();
store.upsert(fv(q, &[1.0, 0.0]));
store.upsert(fv(near, &[0.9, 0.1]));
store.upsert(fv(far, &[0.0, 1.0]));
let results = store.most_similar(q, 2);
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, near);
}
#[test]
fn test_build_user_profile_basic() {
let mut store = FeatureStore::new();
let id1 = uid();
let id2 = uid();
store.upsert(fv(id1, &[1.0, 0.0]));
store.upsert(fv(id2, &[0.0, 1.0]));
let liked = vec![(id1, 5.0), (id2, 4.0)];
let profile = build_user_profile(&store, &liked, 3.0);
assert!(profile.is_some());
let p = profile.expect("should succeed in test");
assert_eq!(p.dim(), 2);
assert!((p.values[0] - 0.5).abs() < 1e-5);
assert!((p.values[1] - 0.5).abs() < 1e-5);
}
#[test]
fn test_build_user_profile_threshold_filters() {
let mut store = FeatureStore::new();
let id = uid();
store.upsert(fv(id, &[1.0]));
let liked = vec![(id, 1.0)];
let profile = build_user_profile(&store, &liked, 3.0);
assert!(profile.is_none());
}
#[test]
fn test_recommend_from_profile() {
let mut store = FeatureStore::new();
let cand1 = uid();
let cand2 = uid();
store.upsert(fv(cand1, &[1.0, 0.0]));
store.upsert(fv(cand2, &[0.0, 1.0]));
let profile = fv(uid(), &[1.0, 0.0]);
let recs = recommend_from_profile(&store, &profile, &[cand1, cand2], 2);
assert_eq!(recs.len(), 2);
assert_eq!(recs[0].0, cand1);
}
}