use std::borrow::Cow;
use selene_core::VectorMetric;
use super::*;
fn vector(values: &[f32]) -> VectorValue {
VectorValue::new(values.to_vec()).expect("test vector is valid")
}
#[test]
fn ivf_target_centroid_count_scales_until_cap() {
assert_eq!(target_centroid_count(0), 1);
assert_eq!(target_centroid_count(100_000), 317);
assert_eq!(target_centroid_count(10_000_000), MAX_CENTROIDS);
}
#[test]
fn ivf_training_entry_ids_borrow_when_below_sample_cap() {
let live_entries = (0..100_000).collect::<Vec<_>>();
let training_entries = training_entry_ids(&live_entries);
assert!(matches!(training_entries, Cow::Borrowed(_)));
assert_eq!(training_entries.as_ref(), live_entries.as_slice());
}
#[test]
fn ivf_training_entry_ids_sample_evenly_when_above_cap() {
let live_entries = (0..250_000).collect::<Vec<_>>();
let training_entries = training_entry_ids(&live_entries);
assert!(matches!(training_entries, Cow::Owned(_)));
assert_eq!(training_entries.len(), TRAINING_SAMPLE_MAX_ENTRIES);
assert_eq!(training_entries[0], 0);
assert_eq!(training_entries[training_entries.len() - 1], 249_999);
assert!(training_entries.windows(2).all(|pair| pair[0] < pair[1]));
}
#[test]
fn ivf_search_finds_near_rows_when_all_lists_are_probed() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..32 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
let usage = index.memory_usage();
assert!(!index.has_stale_entries());
let hits = index
.search(&vector(&[4.1, 0.0]), 3, usage.list_count)
.unwrap();
assert_eq!(hits[0].row, 4);
assert!(hits.iter().any(|hit| hit.row == 5));
assert_eq!(usage.live_entries, 32);
assert_eq!(usage.assigned_entries, 32);
assert_eq!(usage.pending_retrain_entries, 0);
assert!(usage.non_empty_list_count > 0);
assert!(usage.max_list_len > 0);
assert_eq!(
usage.average_list_len_basis_points,
usage.assigned_entries * 10_000 / usage.list_count
);
assert!(usage.centroids > 1);
}
#[test]
fn ivf_memory_usage_reports_list_distribution() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..16 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
let usage = index.memory_usage();
assert_eq!(
usage.non_empty_list_count,
index.lists.iter().filter(|list| !list.is_empty()).count()
);
assert_eq!(
usage.max_list_len,
index.lists.iter().map(Vec::len).max().unwrap()
);
assert_eq!(
usage.average_list_len_basis_points,
usage.assigned_entries * 10_000 / usage.list_count
);
}
#[test]
fn ivf_replace_reuses_current_row_version() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
index.insert(1, vector(&[100.0, 0.0])).unwrap();
index.insert(2, vector(&[2.0, 0.0])).unwrap();
index.finish_bulk_load().unwrap();
index.insert(1, vector(&[1.0, 0.0])).unwrap();
assert!(!index.has_stale_entries());
let hits = index.search(&vector(&[1.1, 0.0]), 2, 16).unwrap();
assert_eq!(hits[0].row, 1);
let usage = index.memory_usage();
assert_eq!(usage.entries, 2);
assert_eq!(usage.live_entries, 2);
assert_eq!(usage.deleted_entries, 0);
assert_eq!(usage.assigned_entries, 2);
assert_eq!(usage.pending_retrain_entries, 1);
}
#[test]
fn ivf_replace_moves_entry_between_lists() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..16 {
index
.insert(row, vector(&[row as f32 * 10.0, 0.0]))
.unwrap();
}
index.finish_bulk_load().unwrap();
let before = index.memory_usage();
index.insert(1, vector(&[150.0, 0.0])).unwrap();
assert!(!index.has_stale_entries());
let after = index.memory_usage();
assert_eq!(after.entries, before.entries);
assert_eq!(after.assigned_entries, before.assigned_entries);
assert_eq!(after.pending_retrain_entries, 1);
let hits = index
.search(&vector(&[150.0, 0.0]), 1, after.list_count)
.unwrap();
assert_eq!(hits[0].row, 1);
}
#[test]
fn ivf_replace_keeps_same_list_assignment_in_place() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..32 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
let (entry_id, list_id) = index
.lists
.iter()
.enumerate()
.find_map(|(list_id, list)| (list.len() > 1).then_some((list[0], list_id)))
.expect("test fixture has at least one multi-entry IVF list");
let row = index.entries[entry_id as usize].row;
let replacement = index.entries[entry_id as usize].vector.clone();
let list_before = index.lists[list_id].clone();
index.insert(row, replacement).unwrap();
assert_eq!(index.assigned_list_for_entry(entry_id), Some(list_id));
assert_eq!(index.lists[list_id], list_before);
let usage = index.memory_usage();
assert_eq!(usage.assigned_entries, 32);
assert_eq!(usage.pending_retrain_entries, 1);
}
#[test]
fn ivf_remove_excludes_row_from_results() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
index.insert(1, vector(&[1.0, 0.0])).unwrap();
index.insert(2, vector(&[2.0, 0.0])).unwrap();
index.finish_bulk_load().unwrap();
let removed_entry = index.row_to_entry[&1];
index.remove(1);
assert!(index.has_stale_entries());
assert_eq!(index.assigned_list_for_entry(removed_entry), None);
let hits = index.search(&vector(&[1.0, 0.0]), 2, 16).unwrap();
assert_eq!(hits[0].row, 2);
let usage = index.memory_usage();
assert_eq!(usage.live_entries, 1);
assert_eq!(usage.deleted_entries, 1);
assert_eq!(usage.assigned_entries, 1);
assert_eq!(usage.pending_retrain_entries, 0);
}
#[test]
fn ivf_finish_bulk_load_rebuilds_lists_after_updates() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..12 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
index.remove(0);
index.insert(12, vector(&[0.1, 0.0])).unwrap();
index.finish_bulk_load().unwrap();
assert!(index.has_stale_entries());
let hits = index.search(&vector(&[0.0, 0.0]), 1, 16).unwrap();
assert_eq!(hits[0].row, 12);
let usage = index.memory_usage();
assert_eq!(usage.live_entries, 12);
assert_eq!(usage.assigned_entries, 12);
assert_eq!(usage.pending_retrain_entries, 0);
}
#[test]
fn ivf_pending_retrain_tracks_post_training_churn() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..16 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
assert_eq!(index.memory_usage().pending_retrain_entries, 0);
index.insert(16, vector(&[16.0, 0.0])).unwrap();
assert_eq!(index.memory_usage().pending_retrain_entries, 1);
index.insert(16, vector(&[17.0, 0.0])).unwrap();
assert_eq!(index.memory_usage().pending_retrain_entries, 1);
index.insert(1, vector(&[100.0, 0.0])).unwrap();
assert_eq!(index.memory_usage().pending_retrain_entries, 2);
index.remove(16);
assert_eq!(index.memory_usage().pending_retrain_entries, 1);
index.finish_bulk_load().unwrap();
assert_eq!(index.memory_usage().pending_retrain_entries, 0);
}
#[test]
fn ivf_parallel_assignment_path_keeps_exact_full_probe_results() {
let mut index = IvfVectorIndex::new(VectorMetric::SquaredEuclidean);
for row in 0..16 {
index.insert(row, vector(&[row as f32, 0.0])).unwrap();
}
index.finish_bulk_load().unwrap();
let usage = index.memory_usage();
assert!(should_parallelize_assignments(
usage.live_entries,
usage.centroids
));
assert!(index.entry_squared_norms.is_empty());
assert!(index.centroid_squared_norms.is_empty());
assert_eq!(usage.assigned_entries, 16);
assert_eq!(usage.pending_retrain_entries, 0);
assert_eq!(index.lists.iter().map(Vec::capacity).sum::<usize>(), 16);
let hits = index
.search(&vector(&[9.2, 0.0]), 3, usage.list_count)
.unwrap();
assert_eq!(
hits.iter().map(|hit| hit.row).collect::<Vec<_>>(),
[9, 10, 8]
);
}
#[test]
fn ivf_cosine_bulk_load_refreshes_centroid_norm_cache() {
let mut index = IvfVectorIndex::new(VectorMetric::Cosine);
for row in 0..16 {
index.insert(row, vector(&[1.0, row as f32 + 1.0])).unwrap();
}
index.finish_bulk_load().unwrap();
assert_eq!(index.entry_squared_norms.len(), index.entries.len());
assert!(index.entry_squared_norms.iter().all(|norm| *norm > 0.0));
assert_eq!(index.centroid_squared_norms.len(), index.centroids.len());
assert!(index.centroid_squared_norms.iter().all(|norm| *norm > 0.0));
let hits = index
.search(&vector(&[1.0, 9.1]), 1, index.lists.len())
.unwrap();
assert_eq!(hits[0].row, 8);
}
#[test]
fn ivf_cosine_replace_keeps_entry_norm_cache_aligned() {
let mut index = IvfVectorIndex::new(VectorMetric::Cosine);
index.insert(1, vector(&[1.0, 0.0])).unwrap();
index.insert(2, vector(&[0.0, 1.0])).unwrap();
index.finish_bulk_load().unwrap();
index.insert(1, vector(&[0.9, 0.1])).unwrap();
assert_eq!(index.entry_squared_norms.len(), index.entries.len());
assert_eq!(index.memory_usage().deleted_entries, 0);
let hits = index.search(&vector(&[1.0, 0.0]), 1, 16).unwrap();
assert_eq!(hits[0].row, 1);
}
#[test]
fn ivf_cosine_rejects_zero_norm_query() {
let mut index = IvfVectorIndex::new(VectorMetric::Cosine);
index.insert(1, vector(&[1.0, 0.0])).unwrap();
index.finish_bulk_load().unwrap();
let err = index.search(&vector(&[0.0, 0.0]), 1, 16).unwrap_err();
assert!(err.to_string().contains("zero-norm vector"));
}