#![allow(clippy::unwrap_used, clippy::expect_used)]
use std::collections::HashSet;
use vicinity::hnsw::HNSWIndex;
fn main() -> vicinity::Result<()> {
let dim = 128;
let n = 2000;
let k = 10;
let ef = 100;
let n_queries = 100;
let raw_vectors: Vec<Vec<f32>> = (0..n).map(|i| random_vec(dim, i)).collect();
let norm_vectors: Vec<Vec<f32>> = raw_vectors.iter().map(|v| normalize(v)).collect();
let mut raw_index = HNSWIndex::new(dim, 16, 100)?;
for (id, vec) in raw_vectors.iter().enumerate() {
raw_index.add(id as u32, vec.clone())?;
}
raw_index.build()?;
let mut norm_index = HNSWIndex::new(dim, 16, 100)?;
for (id, vec) in norm_vectors.iter().enumerate() {
norm_index.add(id as u32, vec.clone())?;
}
norm_index.build()?;
let mut recall_raw = 0.0;
let mut recall_norm = 0.0;
for q in 0..n_queries {
let query_idx = (q * 19) % n;
let raw_query = &raw_vectors[query_idx];
let norm_query = &norm_vectors[query_idx];
let gt = brute_force_cosine_knn(norm_query, &norm_vectors, k);
let raw_results = raw_index.search(raw_query, k, ef)?;
let raw_ids: HashSet<u32> = raw_results.iter().map(|(id, _)| *id).collect();
recall_raw += gt.intersection(&raw_ids).count() as f32 / k as f32;
let norm_results = norm_index.search(norm_query, k, ef)?;
let norm_ids: HashSet<u32> = norm_results.iter().map(|(id, _)| *id).collect();
recall_norm += gt.intersection(&norm_ids).count() as f32 / k as f32;
}
recall_raw /= n_queries as f32;
recall_norm /= n_queries as f32;
println!("L2-normalization impact on HNSW recall");
println!(" n={}, dim={}, k={}, ef={}", n, dim, k, ef);
println!();
println!(
" Un-normalized vectors: recall@{} = {:.1}%",
k,
recall_raw * 100.0
);
println!(
" L2-normalized vectors: recall@{} = {:.1}%",
k,
recall_norm * 100.0
);
println!();
if recall_norm - recall_raw > 0.05 {
println!(
" --> Normalization improved recall by {:.1} percentage points.",
(recall_norm - recall_raw) * 100.0
);
println!(" HNSW's distance function assumes normalized input.");
println!(" Always normalize vectors before adding them to the index.");
} else {
println!(" --> Recall was similar (vectors may have had uniform magnitude).");
println!(" Normalization is still recommended for correctness.");
}
Ok(())
}
fn random_vec(dim: usize, seed: usize) -> Vec<f32> {
let scale = 1.0 + (seed % 10) as f32 * 2.0;
(0..dim)
.map(|i| ((seed * 31 + i * 17) as f32 * 0.001).sin() * scale)
.collect()
}
fn normalize(v: &[f32]) -> Vec<f32> {
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
v.iter().map(|x| x / norm).collect()
} else {
v.to_vec()
}
}
fn brute_force_cosine_knn(query: &[f32], data: &[Vec<f32>], k: usize) -> HashSet<u32> {
let mut dists: Vec<(u32, f32)> = data
.iter()
.enumerate()
.map(|(i, v)| {
let dot: f32 = query.iter().zip(v).map(|(a, b)| a * b).sum();
(i as u32, 1.0 - dot) })
.collect();
dists.sort_by(|a, b| a.1.total_cmp(&b.1));
dists.into_iter().take(k).map(|(id, _)| id).collect()
}