use std::mem::size_of;
#[derive(Debug, Clone, Default)]
pub struct IndexMemoryStats {
pub n_vectors: usize,
pub dimension: usize,
pub raw_vectors_bytes: usize,
pub stored_vectors_bytes: usize,
pub index_overhead_bytes: usize,
pub total_bytes: usize,
}
impl IndexMemoryStats {
pub fn new(
n_vectors: usize,
dimension: usize,
stored_vectors_bytes: usize,
index_overhead_bytes: usize,
) -> Self {
let raw_vectors_bytes = n_vectors * dimension * size_of::<f32>();
let total_bytes = stored_vectors_bytes + index_overhead_bytes;
Self {
n_vectors,
dimension,
raw_vectors_bytes,
stored_vectors_bytes,
index_overhead_bytes,
total_bytes,
}
}
pub fn bytes_per_vector(&self) -> f64 {
if self.n_vectors == 0 {
return 0.0;
}
self.total_bytes as f64 / self.n_vectors as f64
}
pub fn compression_ratio(&self) -> f64 {
if self.stored_vectors_bytes == 0 {
return 0.0;
}
self.raw_vectors_bytes as f64 / self.stored_vectors_bytes as f64
}
pub fn overhead_ratio(&self) -> f64 {
if self.raw_vectors_bytes == 0 {
return 0.0;
}
self.index_overhead_bytes as f64 / self.raw_vectors_bytes as f64
}
pub fn bits_per_dimension(&self) -> f64 {
if self.n_vectors == 0 || self.dimension == 0 {
return 0.0;
}
(self.stored_vectors_bytes * 8) as f64 / (self.n_vectors * self.dimension) as f64
}
}
#[derive(Debug, Default)]
pub struct MemoryTracker {
pub start_bytes: Option<usize>,
pub peak_bytes: Option<usize>,
pub end_bytes: Option<usize>,
}
impl MemoryTracker {
pub fn new() -> Self {
Self::default()
}
pub fn start(&mut self) {
self.start_bytes = Self::current_memory();
self.peak_bytes = self.start_bytes;
}
pub fn checkpoint(&mut self) {
if let Some(current) = Self::current_memory() {
match self.peak_bytes {
Some(peak) if current > peak => self.peak_bytes = Some(current),
None => self.peak_bytes = Some(current),
_ => {}
}
}
}
pub fn finish(&mut self) {
self.end_bytes = Self::current_memory();
self.checkpoint();
}
pub fn allocated(&self) -> Option<usize> {
match (self.start_bytes, self.end_bytes) {
(Some(start), Some(end)) if end > start => Some(end - start),
_ => None,
}
}
pub fn peak_allocated(&self) -> Option<usize> {
match (self.start_bytes, self.peak_bytes) {
(Some(start), Some(peak)) if peak > start => Some(peak - start),
_ => None,
}
}
#[cfg(target_os = "linux")]
fn current_memory() -> Option<usize> {
use std::fs;
let statm = fs::read_to_string("/proc/self/statm").ok()?;
let rss_pages: usize = statm.split_whitespace().nth(1)?.parse().ok()?;
let page_size = 4096; Some(rss_pages * page_size)
}
#[cfg(target_os = "macos")]
fn current_memory() -> Option<usize> {
None
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
fn current_memory() -> Option<usize> {
None
}
}
pub mod theoretical {
use super::*;
pub fn hnsw_memory(n_vectors: usize, dimension: usize, m: usize) -> IndexMemoryStats {
let stored_vectors_bytes = n_vectors * dimension * size_of::<f32>();
let avg_edges_per_node = (2.5 * m as f64) as usize;
let graph_edges_bytes = n_vectors * avg_edges_per_node * size_of::<u32>();
let metadata_bytes = n_vectors * (size_of::<u32>() + size_of::<u8>());
let index_overhead_bytes = graph_edges_bytes + metadata_bytes;
IndexMemoryStats::new(
n_vectors,
dimension,
stored_vectors_bytes,
index_overhead_bytes,
)
}
pub fn ivf_pq_memory(
n_vectors: usize,
dimension: usize,
n_clusters: usize,
n_subquantizers: usize,
bits_per_code: usize,
) -> IndexMemoryStats {
let _raw_vectors_bytes = n_vectors * dimension * size_of::<f32>();
let code_bytes = (n_subquantizers * bits_per_code).div_ceil(8);
let stored_vectors_bytes = n_vectors * code_bytes;
let centroid_bytes = n_clusters * dimension * size_of::<f32>();
let codebook_size = 1 << bits_per_code; let sub_dimension = dimension / n_subquantizers;
let codebook_bytes = n_subquantizers * codebook_size * sub_dimension * size_of::<f32>();
let invlist_bytes = n_vectors * size_of::<u32>();
let index_overhead_bytes = centroid_bytes + codebook_bytes + invlist_bytes;
IndexMemoryStats::new(
n_vectors,
dimension,
stored_vectors_bytes,
index_overhead_bytes,
)
}
pub fn flat_memory(n_vectors: usize, dimension: usize) -> IndexMemoryStats {
let stored_bytes = n_vectors * dimension * size_of::<f32>();
IndexMemoryStats::new(n_vectors, dimension, stored_bytes, 0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_index_memory_stats() {
let stats = IndexMemoryStats::new(
1000,
128,
1000 * 128 * 4, 1000 * 32 * 4, );
assert_eq!(stats.n_vectors, 1000);
assert_eq!(stats.raw_vectors_bytes, 1000 * 128 * 4);
assert!((stats.compression_ratio() - 1.0).abs() < 0.001);
assert!(stats.bytes_per_vector() > 128.0 * 4.0); }
#[test]
fn test_compression_ratio() {
let stats = IndexMemoryStats::new(
1000,
128,
1000 * 128 / 2, 0,
);
assert!((stats.compression_ratio() - 8.0).abs() < 0.001);
}
#[test]
fn test_theoretical_hnsw() {
let stats = theoretical::hnsw_memory(10000, 128, 16);
assert_eq!(stats.raw_vectors_bytes, 10000 * 128 * 4);
assert!(stats.index_overhead_bytes > 0);
assert!(stats.overhead_ratio() > 0.0);
assert!(stats.bytes_per_vector() > 512.0);
}
#[test]
fn test_theoretical_ivf_pq() {
let stats = theoretical::ivf_pq_memory(
10000, 128, 256, 16, 8, );
assert!(stats.compression_ratio() > 1.0);
assert!(stats.stored_vectors_bytes < stats.raw_vectors_bytes);
}
}