use super::vector::Hypervector;
use super::HYPERVECTOR_BITS;
#[inline]
pub fn hamming_distance(v1: &Hypervector, v2: &Hypervector) -> u32 {
v1.hamming_distance(v2)
}
#[inline]
pub fn cosine_similarity(v1: &Hypervector, v2: &Hypervector) -> f32 {
v1.similarity(v2)
}
pub fn normalized_hamming(v1: &Hypervector, v2: &Hypervector) -> f32 {
let hamming = v1.hamming_distance(v2);
1.0 - (hamming as f32 / HYPERVECTOR_BITS as f32)
}
pub fn jaccard_similarity(v1: &Hypervector, v2: &Hypervector) -> f32 {
let mut intersection = 0u32;
let mut union = 0u32;
let bits1 = v1.bits();
let bits2 = v2.bits();
for i in 0..bits1.len() {
let and = bits1[i] & bits2[i];
let or = bits1[i] | bits2[i];
intersection += and.count_ones();
union += or.count_ones();
}
if union == 0 {
1.0 } else {
intersection as f32 / union as f32
}
}
pub fn top_k_similar(
query: &Hypervector,
candidates: &[Hypervector],
k: usize,
) -> Vec<(usize, f32)> {
let mut similarities: Vec<_> = candidates
.iter()
.enumerate()
.map(|(idx, candidate)| (idx, query.similarity(candidate)))
.collect();
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
similarities.into_iter().take(k).collect()
}
pub fn pairwise_similarities(vectors: &[Hypervector]) -> Vec<Vec<f32>> {
let n = vectors.len();
let mut matrix = vec![vec![0.0; n]; n];
for i in 0..n {
matrix[i][i] = 1.0;
for j in (i + 1)..n {
let sim = vectors[i].similarity(&vectors[j]);
matrix[i][j] = sim;
matrix[j][i] = sim; }
}
matrix
}
#[inline]
pub fn batch_similarities(query: &Hypervector, candidates: &[Hypervector]) -> Vec<f32> {
let n = candidates.len();
let mut results = Vec::with_capacity(n);
let chunks = n / 4;
let remainder = n % 4;
for i in 0..chunks {
let base = i * 4;
results.push(query.similarity(&candidates[base]));
results.push(query.similarity(&candidates[base + 1]));
results.push(query.similarity(&candidates[base + 2]));
results.push(query.similarity(&candidates[base + 3]));
}
let base = chunks * 4;
for i in 0..remainder {
results.push(query.similarity(&candidates[base + i]));
}
results
}
pub fn find_similar(query: &Hypervector, candidates: &[Hypervector], threshold: f32) -> Vec<usize> {
candidates
.iter()
.enumerate()
.filter_map(|(idx, candidate)| {
if query.similarity(candidate) >= threshold {
Some(idx)
} else {
None
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hamming_distance_identical() {
let v = Hypervector::random();
assert_eq!(hamming_distance(&v, &v), 0);
}
#[test]
fn test_hamming_distance_random() {
let v1 = Hypervector::random();
let v2 = Hypervector::random();
let dist = hamming_distance(&v1, &v2);
assert!(dist > 4000 && dist < 6000, "distance: {}", dist);
}
#[test]
fn test_cosine_similarity_identical() {
let v = Hypervector::random();
let sim = cosine_similarity(&v, &v);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_cosine_similarity_bounds() {
let v1 = Hypervector::random();
let v2 = Hypervector::random();
let sim = cosine_similarity(&v1, &v2);
assert!(
sim >= -1.0 && sim <= 1.0,
"similarity out of bounds: {}",
sim
);
}
#[test]
fn test_normalized_hamming_identical() {
let v = Hypervector::random();
let sim = normalized_hamming(&v, &v);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_normalized_hamming_random() {
let v1 = Hypervector::random();
let v2 = Hypervector::random();
let sim = normalized_hamming(&v1, &v2);
assert!(sim > 0.3 && sim < 0.7, "similarity: {}", sim);
}
#[test]
fn test_jaccard_identical() {
let v = Hypervector::random();
let sim = jaccard_similarity(&v, &v);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_jaccard_zero_vectors() {
let v1 = Hypervector::zero();
let v2 = Hypervector::zero();
let sim = jaccard_similarity(&v1, &v2);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_jaccard_bounds() {
let v1 = Hypervector::random();
let v2 = Hypervector::random();
let sim = jaccard_similarity(&v1, &v2);
assert!(sim >= 0.0 && sim <= 1.0);
}
#[test]
fn test_top_k_similar() {
let query = Hypervector::from_seed(0);
let candidates: Vec<_> = (1..11).map(|i| Hypervector::from_seed(i)).collect();
let top3 = top_k_similar(&query, &candidates, 3);
assert_eq!(top3.len(), 3);
assert!(top3[0].1 >= top3[1].1);
assert!(top3[1].1 >= top3[2].1);
}
#[test]
fn test_top_k_more_than_candidates() {
let query = Hypervector::random();
let candidates: Vec<_> = (0..5).map(|_| Hypervector::random()).collect();
let top10 = top_k_similar(&query, &candidates, 10);
assert_eq!(top10.len(), 5);
}
#[test]
fn test_pairwise_similarities_diagonal() {
let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
let matrix = pairwise_similarities(&vectors);
assert_eq!(matrix.len(), 5);
for i in 0..5 {
assert!((matrix[i][i] - 1.0).abs() < 0.001);
}
}
#[test]
fn test_pairwise_similarities_symmetric() {
let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
let matrix = pairwise_similarities(&vectors);
for i in 0..5 {
for j in 0..5 {
assert!((matrix[i][j] - matrix[j][i]).abs() < 0.001);
}
}
}
#[test]
fn test_pairwise_similarities_bounds() {
let vectors: Vec<_> = (0..5).map(|_| Hypervector::random()).collect();
let matrix = pairwise_similarities(&vectors);
for row in &matrix {
for &sim in row {
assert!(
sim >= -1.0 && sim <= 1.0,
"similarity out of bounds: {}",
sim
);
}
}
}
}