pub fn cosine_similarity(a: &[f32], b: &[f32]) -> Option<f32> {
if a.len() != b.len() || a.is_empty() {
return None;
}
use simsimd::SpatialSimilarity;
let score = f32::dot(a, b).unwrap_or_else(|| {
a.iter()
.zip(b)
.map(|(&x, &y)| (x as f64) * (y as f64))
.sum::<f64>()
}) as f32;
if score.is_finite() {
Some(score)
} else {
None
}
}
pub fn full_cosine_similarity(a: &[f32], b: &[f32]) -> Option<f32> {
if a.len() != b.len() || a.is_empty() {
if a.len() != b.len() {
tracing::warn!(
a_len = a.len(),
b_len = b.len(),
"full_cosine_similarity: dimension mismatch"
);
}
return None;
}
let mut dot = 0.0f64;
let mut norm_a = 0.0f64;
let mut norm_b = 0.0f64;
for (x, y) in a.iter().zip(b.iter()) {
let xd = *x as f64;
let yd = *y as f64;
dot += xd * yd;
norm_a += xd * xd;
norm_b += yd * yd;
}
let denom = norm_a.sqrt() * norm_b.sqrt();
if denom == 0.0 {
None
} else {
let result = (dot / denom) as f32;
if result.is_finite() {
Some(result)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_embedding(val: f32) -> Vec<f32> {
vec![val; crate::EMBEDDING_DIM]
}
fn make_unit_embedding(idx: usize) -> Vec<f32> {
let mut v = vec![0.0; crate::EMBEDDING_DIM];
v[idx] = 1.0;
v
}
#[test]
fn test_cosine_similarity_identical() {
let a = make_embedding(0.5);
let sim = cosine_similarity(&a, &a).expect("Should succeed for valid embeddings");
assert!(sim > 0.99, "Expected ~1.0, got {}", sim);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let a = make_unit_embedding(0);
let b = make_unit_embedding(1);
let sim = cosine_similarity(&a, &b).expect("Should succeed for valid embeddings");
assert!(sim.abs() < 0.01, "Expected ~0, got {}", sim);
}
#[test]
fn test_cosine_similarity_symmetric() {
let a: Vec<f32> = (0..crate::EMBEDDING_DIM)
.map(|i| (i as f32) / crate::EMBEDDING_DIM as f32)
.collect();
let b: Vec<f32> = (0..crate::EMBEDDING_DIM)
.map(|i| 1.0 - (i as f32) / crate::EMBEDDING_DIM as f32)
.collect();
let sim_ab = cosine_similarity(&a, &b).expect("Should succeed");
let sim_ba = cosine_similarity(&b, &a).expect("Should succeed");
assert!((sim_ab - sim_ba).abs() < 1e-6, "Should be symmetric");
}
#[test]
fn test_cosine_similarity_range() {
let a: Vec<f32> = (0..crate::EMBEDDING_DIM)
.map(|i| ((i * 7) % 100) as f32 / 100.0)
.collect();
let b: Vec<f32> = (0..crate::EMBEDDING_DIM)
.map(|i| ((i * 13) % 100) as f32 / 100.0)
.collect();
let sim = cosine_similarity(&a, &b).expect("Should succeed");
assert!(sim.is_finite(), "Should be finite");
}
#[test]
fn test_cosine_similarity_dimension_mismatch() {
let a: Vec<f32> = vec![0.5; 100];
let b: Vec<f32> = vec![0.5; crate::EMBEDDING_DIM];
assert!(
cosine_similarity(&a, &b).is_none(),
"Should fail for mismatched dimensions"
);
assert!(
cosine_similarity(&a, &a).is_some(),
"Same-length vectors should succeed regardless of dimension"
);
}
#[test]
fn cosine_nan_embedding() {
let nan_emb = vec![f32::NAN; crate::EMBEDDING_DIM];
let normal_emb = make_embedding(0.5);
assert!(
cosine_similarity(&nan_emb, &normal_emb).is_none(),
"NaN embedding vs normal should return None"
);
assert!(
cosine_similarity(&normal_emb, &nan_emb).is_none(),
"Normal vs NaN embedding should return None"
);
}
#[test]
fn cosine_inf_embedding() {
let mut inf_emb = make_embedding(0.5);
inf_emb[42] = f32::INFINITY;
let normal_emb = make_embedding(0.5);
assert!(
cosine_similarity(&inf_emb, &normal_emb).is_none(),
"Vector with Inf value vs normal should return None"
);
}
#[test]
fn cosine_zero_norm_vector() {
let zero_emb = make_embedding(0.0);
let normal_emb = make_embedding(0.5);
let result = cosine_similarity(&zero_emb, &normal_emb);
match result {
None => {} Some(v) => assert!(v.is_finite(), "Zero-norm result must be finite, got {v}"),
}
}
#[test]
fn cosine_negative_inf_embedding() {
let mut neg_inf_emb = make_embedding(0.5);
neg_inf_emb[0] = f32::NEG_INFINITY;
let normal_emb = make_embedding(0.5);
assert!(
cosine_similarity(&neg_inf_emb, &normal_emb).is_none(),
"Vector with NEG_INFINITY vs normal should return None"
);
}
#[test]
fn cosine_subnormal_values() {
let subnormal_emb = make_embedding(f32::MIN_POSITIVE / 2.0);
let result = cosine_similarity(&subnormal_emb, &subnormal_emb);
match result {
None => {} Some(v) => assert!(v.is_finite(), "Subnormal result must be finite, got {v}"),
}
}
#[test]
fn full_cosine_normal_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let sim = full_cosine_similarity(&a, &b).unwrap();
assert!(
(sim - 0.9746).abs() < 0.001,
"Expected ~0.9746, got {}",
sim
);
}
#[test]
fn full_cosine_orthogonal_vectors() {
let a = vec![1.0, 0.0, 0.0];
let b = vec![0.0, 1.0, 0.0];
let sim = full_cosine_similarity(&a, &b).unwrap();
assert!(
sim.abs() < 1e-6,
"Orthogonal vectors should have ~0 similarity, got {}",
sim
);
}
#[test]
fn full_cosine_identical_vectors() {
let a = vec![3.0, 4.0, 5.0];
let sim = full_cosine_similarity(&a, &a).unwrap();
assert!(
(sim - 1.0).abs() < 1e-6,
"Identical vectors should have similarity ~1.0, got {}",
sim
);
}
#[test]
fn full_cosine_zero_norm_vector() {
let zero = vec![0.0, 0.0, 0.0];
let normal = vec![1.0, 2.0, 3.0];
assert_eq!(
full_cosine_similarity(&zero, &normal),
None,
"Zero-norm vector should return None"
);
assert_eq!(
full_cosine_similarity(&normal, &zero),
None,
"Normal vs zero-norm should return None"
);
assert_eq!(
full_cosine_similarity(&zero, &zero),
None,
"Both zero-norm should return None"
);
}
#[test]
fn full_cosine_nan_input() {
let nan_vec = vec![f32::NAN, 1.0, 2.0];
let normal = vec![1.0, 2.0, 3.0];
assert_eq!(
full_cosine_similarity(&nan_vec, &normal),
None,
"NaN input should return None"
);
}
#[test]
fn full_cosine_mismatched_dimensions() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0];
assert_eq!(
full_cosine_similarity(&a, &b),
None,
"Mismatched dimensions should return None"
);
}
}