const EPSILON: f64 = 1e-6;
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f64 {
assert_eq!(
a.len(),
b.len(),
"Vectors must have same length: {} vs {}",
a.len(),
b.len()
);
let mut dot_product: f64 = 0.0;
let mut norm_a: f64 = 0.0;
let mut norm_b: f64 = 0.0;
for (ai, bi) in a.iter().zip(b.iter()) {
let ai_f64 = *ai as f64;
let bi_f64 = *bi as f64;
dot_product += ai_f64 * bi_f64;
norm_a += ai_f64 * ai_f64;
norm_b += bi_f64 * bi_f64;
}
let magnitude = (norm_a * norm_b).sqrt();
if magnitude < EPSILON {
return 0.0;
}
dot_product / magnitude
}
pub fn normalize(v: &mut [f32]) {
let norm: f64 = v.iter().map(|x| (*x as f64).powi(2)).sum::<f64>().sqrt();
if norm < EPSILON {
return;
}
let scale = 1.0 / norm;
for x in v.iter_mut() {
*x = (*x as f64 * scale) as f32;
}
}
pub fn is_normalized(v: &[f32]) -> bool {
let norm_squared: f64 = v.iter().map(|x| (*x as f64).powi(2)).sum();
let norm = norm_squared.sqrt();
(norm - 1.0).abs() < 1e-4
}
pub fn top_k_similar(
query: &[f32],
candidates: &[(usize, &[f32])],
k: usize,
threshold: f64,
) -> Vec<(usize, f64)> {
if candidates.is_empty() || k == 0 {
return Vec::new();
}
let mut scored: Vec<(usize, f64)> = candidates
.iter()
.map(|(idx, embedding)| {
let score = cosine_similarity(query, embedding);
(*idx, score)
})
.filter(|(_, score)| *score >= threshold)
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(k);
scored
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cosine_similarity_identical_vectors_equals_one() {
let v = vec![0.5_f32, 0.5, 0.5, 0.5];
let mut normalized = v.clone();
normalize(&mut normalized);
let sim = cosine_similarity(&normalized, &normalized);
assert!((sim - 1.0).abs() < 1e-6, "Expected 1.0, got {}", sim);
}
#[test]
fn cosine_similarity_orthogonal_vectors_equals_zero() {
let a = vec![1.0_f32, 0.0, 0.0];
let b = vec![0.0_f32, 1.0, 0.0];
let sim = cosine_similarity(&a, &b);
assert!(sim.abs() < 1e-6, "Expected 0.0, got {}", sim);
}
#[test]
fn cosine_similarity_opposite_vectors_equals_negative_one() {
let a = vec![1.0_f32, 0.0, 0.0];
let b = vec![-1.0_f32, 0.0, 0.0];
let sim = cosine_similarity(&a, &b);
assert!((sim - (-1.0)).abs() < 1e-6, "Expected -1.0, got {}", sim);
}
#[test]
fn cosine_similarity_is_symmetric() {
let a = vec![0.3_f32, 0.7, 0.2, 0.5];
let b = vec![0.6_f32, 0.1, 0.8, 0.3];
let sim_ab = cosine_similarity(&a, &b);
let sim_ba = cosine_similarity(&b, &a);
assert!((sim_ab - sim_ba).abs() < 1e-6);
}
#[test]
#[should_panic(expected = "Vectors must have same length")]
fn cosine_similarity_different_lengths_panics() {
let a = vec![1.0_f32, 0.0, 0.0];
let b = vec![1.0_f32, 0.0];
let _ = cosine_similarity(&a, &b);
}
#[test]
fn cosine_similarity_zero_vectors_returns_zero() {
let zero = vec![0.0_f32, 0.0, 0.0];
let normal = vec![1.0_f32, 0.0, 0.0];
let sim1 = cosine_similarity(&zero, &zero);
assert!(
sim1.abs() < 1e-6,
"Zero vs zero should be 0.0, got {}",
sim1
);
assert!(!sim1.is_nan(), "Should not return NaN for zero vectors");
let sim2 = cosine_similarity(&zero, &normal);
assert!(
sim2.abs() < 1e-6,
"Zero vs normal should be 0.0, got {}",
sim2
);
assert!(!sim2.is_nan(), "Should not return NaN for zero vector");
}
#[test]
fn normalize_creates_unit_vector() {
let mut v = vec![3.0_f32, 4.0, 0.0];
normalize(&mut v);
let l2_norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((l2_norm - 1.0).abs() < 1e-6);
assert!((v[0] - 0.6).abs() < 1e-6); assert!((v[1] - 0.8).abs() < 1e-6); }
#[test]
fn normalize_zero_vector_stays_zero() {
let mut v = vec![0.0_f32, 0.0, 0.0];
normalize(&mut v);
for x in &v {
assert!(x.abs() < 1e-6);
assert!(!x.is_nan());
}
}
#[test]
fn normalize_already_normalized_stays_same() {
let mut v = vec![0.6_f32, 0.8, 0.0];
let original = v.clone();
normalize(&mut v);
for (a, b) in v.iter().zip(original.iter()) {
assert!((a - b).abs() < 1e-6);
}
}
#[test]
fn is_normalized_detects_unit_vectors() {
let unit = vec![0.6_f32, 0.8, 0.0]; let non_unit = vec![3.0_f32, 4.0, 0.0];
assert!(is_normalized(&unit));
assert!(!is_normalized(&non_unit));
}
#[test]
fn is_normalized_false_for_zero_vector() {
let zero = vec![0.0_f32, 0.0, 0.0];
assert!(!is_normalized(&zero));
}
#[test]
fn top_k_similar_returns_k_results() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![
(0, &[0.9_f32, 0.1][..]),
(1, &[0.1_f32, 0.9][..]),
(2, &[0.7_f32, 0.3][..]),
(3, &[0.8_f32, 0.2][..]),
];
let results = top_k_similar(&query, &candidates, 2, 0.0);
assert_eq!(results.len(), 2);
}
#[test]
fn top_k_similar_ordered_by_score_descending() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![
(0, &[0.9_f32, 0.1][..]), (1, &[0.1_f32, 0.9][..]), (2, &[0.7_f32, 0.3][..]), ];
let results = top_k_similar(&query, &candidates, 3, 0.0);
assert_eq!(results.len(), 3);
assert!(results[0].1 >= results[1].1);
assert!(results[1].1 >= results[2].1);
assert_eq!(results[0].0, 0); }
#[test]
fn top_k_similar_respects_threshold() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![
(0, &[0.99_f32, 0.01][..]), (1, &[0.1_f32, 0.9][..]), (2, &[0.5_f32, 0.5][..]), ];
let results = top_k_similar(&query, &candidates, 10, 0.8);
assert!(!results.is_empty());
for (_, score) in &results {
assert!(*score >= 0.8, "Score {} below threshold 0.8", score);
}
}
#[test]
fn top_k_similar_empty_candidates_returns_empty() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![];
let results = top_k_similar(&query, &candidates, 10, 0.0);
assert!(results.is_empty());
}
#[test]
fn top_k_similar_k_larger_than_candidates() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> =
vec![(0, &[0.9_f32, 0.1][..]), (1, &[0.1_f32, 0.9][..])];
let results = top_k_similar(&query, &candidates, 10, 0.0);
assert_eq!(results.len(), 2);
}
#[test]
fn top_k_similar_k_zero_returns_empty() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![(0, &[0.9_f32, 0.1][..])];
let results = top_k_similar(&query, &candidates, 0, 0.0);
assert!(results.is_empty());
}
#[test]
fn top_k_similar_all_below_threshold_returns_empty() {
let query = vec![1.0_f32, 0.0];
let candidates: Vec<(usize, &[f32])> = vec![
(0, &[0.0_f32, 1.0][..]), (1, &[-1.0_f32, 0.0][..]), ];
let results = top_k_similar(&query, &candidates, 10, 0.5);
assert!(results.is_empty());
}
}