Skip to main content

nodedb_vector/collection/
stats.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Live statistics aggregation for `VectorCollection`.
4
5use crate::index_config::IndexType;
6
7use super::lifecycle::VectorCollection;
8
9impl VectorCollection {
10    /// Collect live statistics from all segments.
11    pub fn stats(&self) -> nodedb_types::VectorIndexStats {
12        let growing_vectors = self.growing.len();
13        let sealed_vectors: usize = self.sealed.iter().map(|s| s.index.len()).sum();
14        let building_vectors: usize = self.building.iter().map(|s| s.flat.len()).sum();
15
16        let tombstone_count: usize = self
17            .sealed
18            .iter()
19            .map(|s| s.index.tombstone_count())
20            .sum::<usize>()
21            + self.growing.tombstone_count()
22            + self
23                .building
24                .iter()
25                .map(|s| s.flat.tombstone_count())
26                .sum::<usize>();
27
28        let total = growing_vectors + sealed_vectors + building_vectors;
29        let tombstone_ratio = if total > 0 {
30            tombstone_count as f64 / total as f64
31        } else {
32            0.0
33        };
34
35        let quantization = if let Some(ref dispatch) = self.codec_dispatch {
36            match dispatch.quantization() {
37                "rabitq" => nodedb_types::VectorIndexQuantization::RaBitQ,
38                "bbq" => nodedb_types::VectorIndexQuantization::Bbq,
39                _ => nodedb_types::VectorIndexQuantization::None,
40            }
41        } else if self.sealed.iter().any(|s| s.pq.is_some()) {
42            nodedb_types::VectorIndexQuantization::Pq
43        } else if self.sealed.iter().any(|s| s.sq8.is_some()) {
44            nodedb_types::VectorIndexQuantization::Sq8
45        } else {
46            nodedb_types::VectorIndexQuantization::None
47        };
48
49        let index_type = match self.index_config.index_type {
50            IndexType::HnswPq => nodedb_types::VectorIndexType::HnswPq,
51            IndexType::IvfPq => nodedb_types::VectorIndexType::IvfPq,
52            IndexType::Hnsw => nodedb_types::VectorIndexType::Hnsw,
53        };
54
55        let hnsw_mem: usize = self
56            .sealed
57            .iter()
58            .map(|s| s.index.memory_usage_bytes())
59            .sum();
60        let sq8_mem: usize = self
61            .sealed
62            .iter()
63            .filter_map(|s| s.sq8.as_ref().map(|(_, data)| data.len()))
64            .sum();
65        let growing_mem = growing_vectors * self.dim * std::mem::size_of::<f32>();
66        let building_mem = building_vectors * self.dim * std::mem::size_of::<f32>();
67        let memory_bytes = hnsw_mem + sq8_mem + growing_mem + building_mem;
68
69        let disk_bytes: usize = self
70            .sealed
71            .iter()
72            .filter_map(|s| s.mmap_vectors.as_ref().map(|m| m.file_size()))
73            .sum();
74
75        let metric_name = format!("{:?}", self.params.metric).to_lowercase();
76
77        nodedb_types::VectorIndexStats {
78            sealed_count: self.sealed.len(),
79            building_count: self.building.len(),
80            growing_vectors,
81            sealed_vectors,
82            live_count: self.live_count(),
83            tombstone_count,
84            tombstone_ratio,
85            quantization,
86            memory_bytes,
87            disk_bytes,
88            build_in_progress: !self.building.is_empty(),
89            index_type,
90            hnsw_m: self.params.m,
91            hnsw_m0: self.params.m0,
92            hnsw_ef_construction: self.params.ef_construction,
93            metric: metric_name,
94            dimensions: self.dim,
95            seal_threshold: self.seal_threshold,
96            mmap_segment_count: self.mmap_segment_count,
97            // `arena_bytes` is populated by the Data Plane handler which
98            // has access to `nodedb_mem::CollectionArenaHandle`. The field
99            // is always `None` here; callers overwrite it after calling
100            // `stats()` when a dedicated arena handle is available.
101            arena_bytes: None,
102        }
103    }
104}