const BASIS_POINTS_DENOMINATOR: usize = 10_000;
pub const IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES: usize = 100;
pub const IVF_REBUILD_PENDING_RETRAIN_BASIS_POINTS: usize = 100;
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct VectorIndexMemoryUsage {
pub indexed_rows: u64,
pub row_bitmap_bytes: usize,
pub row_bitmap_serialized_bytes: usize,
pub hnsw_index_bytes: usize,
pub hnsw_referenced_vector_bytes: usize,
pub hnsw_entries: usize,
pub hnsw_live_entries: usize,
pub hnsw_deleted_entries: usize,
pub hnsw_link_count: usize,
pub hnsw_level_zero_link_count: usize,
pub hnsw_upper_layer_link_count: usize,
pub hnsw_max_layer_count: usize,
pub hnsw_max_links_per_layer: usize,
pub hnsw_average_links_per_entry_basis_points: usize,
pub ivf_index_bytes: usize,
pub ivf_referenced_vector_bytes: usize,
pub ivf_entries: usize,
pub ivf_live_entries: usize,
pub ivf_deleted_entries: usize,
pub ivf_centroids: usize,
pub ivf_list_count: usize,
pub ivf_non_empty_list_count: usize,
pub ivf_max_list_len: usize,
pub ivf_average_list_len_basis_points: usize,
pub ivf_assigned_entries: usize,
pub ivf_pending_retrain_entries: usize,
pub turbo_quant_index_bytes: usize,
pub turbo_quant_referenced_vector_bytes: usize,
pub turbo_quant_entries: usize,
pub turbo_quant_live_entries: usize,
pub turbo_quant_deleted_entries: usize,
pub turbo_quant_code_bytes: usize,
pub turbo_quant_codebook_bytes: usize,
pub turbo_quant_calibration_bytes: usize,
pub estimated_index_bytes: usize,
pub estimated_reachable_bytes: usize,
}
impl VectorIndexMemoryUsage {
#[must_use]
pub fn ivf_pending_retrain_basis_points(&self) -> usize {
self.ivf_pending_retrain_entries
.saturating_mul(BASIS_POINTS_DENOMINATOR)
.checked_div(self.ivf_live_entries)
.unwrap_or_default()
}
#[must_use]
pub fn ivf_rebuild_recommended(&self) -> bool {
self.ivf_pending_retrain_entries >= IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES
&& self.ivf_pending_retrain_basis_points() >= IVF_REBUILD_PENDING_RETRAIN_BASIS_POINTS
}
}
#[cfg(test)]
mod tests {
use super::{
IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES, IVF_REBUILD_PENDING_RETRAIN_BASIS_POINTS,
VectorIndexMemoryUsage,
};
#[test]
fn ivf_pending_retrain_ratio_uses_live_entries() {
let usage = VectorIndexMemoryUsage {
ivf_live_entries: 10_000,
ivf_pending_retrain_entries: 100,
..VectorIndexMemoryUsage::default()
};
assert_eq!(
usage.ivf_pending_retrain_basis_points(),
IVF_REBUILD_PENDING_RETRAIN_BASIS_POINTS
);
}
#[test]
fn ivf_rebuild_recommendation_requires_ratio_and_floor() {
let below_floor = VectorIndexMemoryUsage {
ivf_live_entries: 1_000,
ivf_pending_retrain_entries: IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES - 1,
..VectorIndexMemoryUsage::default()
};
let below_ratio = VectorIndexMemoryUsage {
ivf_live_entries: 20_000,
ivf_pending_retrain_entries: IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES,
..VectorIndexMemoryUsage::default()
};
let recommended = VectorIndexMemoryUsage {
ivf_live_entries: 10_000,
ivf_pending_retrain_entries: IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES,
..VectorIndexMemoryUsage::default()
};
assert!(!below_floor.ivf_rebuild_recommended());
assert!(!below_ratio.ivf_rebuild_recommended());
assert!(recommended.ivf_rebuild_recommended());
}
}