#![cfg(all(feature = "hnsw", feature = "vamana"))]
#![allow(clippy::unwrap_used, clippy::expect_used, clippy::needless_update)]
#![allow(dead_code, unused_imports)]
#[path = "common/mod.rs"]
mod common;
use common::*;
use proptest::prelude::*;
use std::collections::HashSet;
#[cfg(feature = "hnsw")]
fn build_hnsw(
n: usize,
dim: usize,
m: usize,
seed: u64,
) -> (vicinity::hnsw::HNSWIndex, Vec<Vec<f32>>) {
let params = vicinity::hnsw::HNSWParams {
m,
m_max: m * 2,
ef_construction: 100,
seed: Some(seed),
..vicinity::hnsw::HNSWParams::default()
};
let mut idx = vicinity::hnsw::HNSWIndex::with_params(dim, params).unwrap();
let vectors: Vec<Vec<f32>> = random_vectors(n, dim, seed)
.into_iter()
.map(|v| normalize(&v))
.collect();
for (i, v) in vectors.iter().enumerate() {
idx.add(i as u32, v.clone()).unwrap();
}
idx.build().unwrap();
(idx, vectors)
}
#[cfg(feature = "hnsw")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(20))]
#[test]
fn prop_deleted_ids_absent_from_results(
n in 50usize..200,
dim in prop::sample::select(vec![8, 16, 32]),
num_delete in 1usize..30,
seed in 1u64..10000,
) {
let (mut idx, _) = build_hnsw(n, dim, 8, seed);
let num_delete = num_delete.min(n / 2);
let deleted_ids: Vec<u32> = (0..num_delete as u32).collect();
for &id in &deleted_ids {
idx.delete_with_repair(id).unwrap();
}
let deleted_set: HashSet<u32> = deleted_ids.iter().copied().collect();
let queries = random_vectors(5, dim, seed.wrapping_add(9999));
for q in &queries {
let q_norm = normalize(q);
let results = idx.search(&q_norm, 10, 64).unwrap();
for (id, _) in &results {
prop_assert!(
!deleted_set.contains(id),
"deleted id {} found in results after delete_with_repair",
id
);
}
}
}
}
#[cfg(feature = "hnsw")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(15))]
#[test]
fn prop_batch_deleted_ids_absent(
n in 80usize..200,
dim in prop::sample::select(vec![8, 16, 32]),
num_delete in 5usize..40,
seed in 1u64..10000,
) {
let (mut idx, _) = build_hnsw(n, dim, 8, seed);
let num_delete = num_delete.min(n / 2);
let delete_ids: Vec<u32> = (0..num_delete as u32).collect();
idx.delete_batch_with_repair(&delete_ids).unwrap();
let deleted_set: HashSet<u32> = delete_ids.iter().copied().collect();
let queries = random_vectors(5, dim, seed.wrapping_add(7777));
for q in &queries {
let q_norm = normalize(q);
let results = idx.search(&q_norm, 10, 64).unwrap();
for (id, _) in &results {
prop_assert!(
!deleted_set.contains(id),
"deleted id {} in batch results",
id
);
}
}
}
}
#[cfg(feature = "hnsw")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(15))]
#[test]
fn prop_deleted_exhaustive_check(
n in 50usize..150,
dim in prop::sample::select(vec![8, 16, 32]),
num_delete in 1usize..20,
seed in 1u64..10000,
) {
let (mut idx, vectors) = build_hnsw(n, dim, 8, seed);
let num_delete = num_delete.min(n / 3);
for doc_id in 0..num_delete as u32 {
idx.delete_with_repair(doc_id).unwrap();
}
for doc_id in 0..num_delete as u32 {
let q = &vectors[doc_id as usize];
let results = idx.search(q, n, n).unwrap();
for (id, _) in &results {
prop_assert!(
*id >= num_delete as u32,
"deleted id {} found when querying with deleted vector {}",
id,
doc_id
);
}
}
}
}
#[cfg(feature = "hnsw")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(10))]
#[test]
fn prop_seeded_hnsw_deterministic(
n in 20usize..100,
dim in prop::sample::select(vec![8, 16, 32]),
seed in 1u64..10000,
) {
let (idx1, _) = build_hnsw(n, dim, 8, seed);
let (idx2, _) = build_hnsw(n, dim, 8, seed);
let queries = random_vectors(5, dim, seed + 777);
for q in &queries {
let q_norm = normalize(q);
let r1 = idx1.search(&q_norm, 10, 64).unwrap();
let r2 = idx2.search(&q_norm, 10, 64).unwrap();
prop_assert_eq!(r1.len(), r2.len(), "different result count with same seed");
for (a, b) in r1.iter().zip(r2.iter()) {
prop_assert_eq!(a.0, b.0, "different result IDs with same seed");
}
}
}
}
#[cfg(feature = "vamana")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(8))]
#[test]
fn prop_seeded_vamana_deterministic(
n in 20usize..60,
dim in prop::sample::select(vec![8, 16]),
seed in 1u64..10000,
) {
let vectors: Vec<Vec<f32>> = random_vectors(n, dim, seed)
.into_iter()
.map(|v| normalize(&v))
.collect();
let build = |s: u64| {
let params = vicinity::vamana::VamanaParams {
max_degree: 16,
alpha: 1.3,
ef_construction: 50,
ef_search: 20,
seed: Some(s),
..vicinity::vamana::VamanaParams::default()
};
let mut idx = vicinity::vamana::VamanaIndex::new(dim, params).unwrap();
for (i, v) in vectors.iter().enumerate() {
idx.add(i as u32, v.clone()).unwrap();
}
idx.build().unwrap();
idx
};
let idx1 = build(seed);
let idx2 = build(seed);
let q = normalize(&random_vectors(1, dim, seed + 999)[0]);
let r1 = idx1.search(&q, 10, 50).unwrap();
let r2 = idx2.search(&q, 10, 50).unwrap();
prop_assert_eq!(r1.len(), r2.len(), "different result count with same seed");
for (a, b) in r1.iter().zip(r2.iter()) {
prop_assert_eq!(a.0, b.0, "different result IDs with same seed");
}
}
}
#[cfg(feature = "hnsw")]
proptest! {
#![proptest_config(ProptestConfig::with_cases(10))]
#[test]
fn prop_recall_survives_deletion(
n in 100usize..300,
dim in prop::sample::select(vec![16, 32]),
delete_frac in 10usize..40, seed in 1u64..10000,
) {
let (mut idx, _vectors) = build_hnsw(n, dim, 12, seed);
let num_delete = (n * delete_frac) / 100;
for i in 0..num_delete as u32 {
idx.delete_with_repair(i).unwrap();
}
let remaining = n - num_delete;
let k = 10.min(remaining);
if k == 0 {
return Ok(());
}
let queries = random_vectors(5, dim, seed + 5555);
let mut total_results = 0usize;
for q in &queries {
let q_norm = normalize(q);
let results = idx.search(&q_norm, k, 100).unwrap();
total_results += results.len();
for (id, _) in &results {
prop_assert!(*id >= num_delete as u32, "deleted id {} in results", id);
}
}
let expected_min = queries.len() * k;
let recall_fraction = total_results as f64 / expected_min as f64;
prop_assert!(
recall_fraction >= 0.3,
"only {:.0}% of expected results returned after deleting {}% of {} nodes \
({} of {} expected)",
recall_fraction * 100.0,
delete_frac,
n,
total_results,
expected_min,
);
}
}
#[cfg(all(feature = "hnsw", feature = "sq8"))]
proptest! {
#![proptest_config(ProptestConfig::with_cases(10))]
#[test]
fn prop_sq8_reranked_recall(
n in 100usize..500,
dim in prop::sample::select(vec![16, 32, 64, 128]),
seed in 1u64..10000,
) {
use vicinity::distance::DistanceMetric;
use vicinity::hnsw::{HNSWParams, sq8u::HNSWSq8Index};
let vecs = random_vectors(n, dim, seed);
let params = HNSWParams {
metric: DistanceMetric::L2,
..Default::default()
};
let mut index = HNSWSq8Index::with_params(dim, params).unwrap();
for (i, v) in vecs.iter().enumerate() {
index.add_slice(i as u32, v).unwrap();
}
index.build().unwrap();
let k = 10.min(n);
let query = &vecs[0];
let mut gt: Vec<(u32, f32)> = vecs
.iter()
.enumerate()
.map(|(i, v)| (i as u32, vicinity::distance::l2_distance(query, v)))
.collect();
gt.sort_by(|a, b| a.1.total_cmp(&b.1));
let gt_ids: HashSet<u32> = gt.iter().take(k).map(|(id, _)| *id).collect();
let results = index.search_reranked(query, k, 64, 50).unwrap();
let result_ids: HashSet<u32> = results.iter().map(|(id, _)| *id).collect();
let recall = gt_ids.intersection(&result_ids).count() as f64 / k as f64;
prop_assert!(
recall >= 0.5,
"SQ8 reranked recall@{k} = {:.1}%, expected >= 50% (n={n}, dim={dim}, seed={seed})",
recall * 100.0,
);
}
}