Skip to main content

nodedb_vector/collection/
codec_build.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Methods on `VectorCollection` for building the collection-level
4//! codec-dispatch index (RaBitQ, BBQ).
5//!
6//! All `impl VectorCollection` blocks in `collection/` extend the same type.
7
8use super::codec_dispatch::{CollectionCodec, build_collection_codec};
9use super::lifecycle::VectorCollection;
10
11impl VectorCollection {
12    /// Collect all live FP32 vectors from every segment (growing, building,
13    /// and sealed) in insertion order. Used to train the collection-level
14    /// codec-dispatch index.
15    pub(crate) fn gather_all_vectors_fp32(&self) -> Vec<Vec<f32>> {
16        let total = self.len();
17        // no-governor: gather is called once per codec-dispatch training cycle; vectors already live in memory
18        let mut out = Vec::with_capacity(total);
19
20        for i in 0..self.growing.len() as u32 {
21            if let Some(v) = self.growing.get_vector(i) {
22                out.push(v.to_vec());
23            }
24        }
25
26        for seg in &self.building {
27            for i in 0..seg.flat.len() as u32 {
28                if let Some(v) = seg.flat.get_vector(i) {
29                    out.push(v.to_vec());
30                }
31            }
32        }
33
34        for seg in &self.sealed {
35            let n = seg.index.len();
36            for i in 0..n as u32 {
37                if !seg.index.is_deleted(i)
38                    && let Some(v) = seg.index.get_vector(i)
39                {
40                    out.push(v.to_vec());
41                }
42            }
43        }
44
45        out
46    }
47
48    /// Build a codec-dispatched index over all current vectors using the
49    /// requested quantization. Replaces any existing dispatch index for
50    /// this collection. Idempotent.
51    ///
52    /// Returns a reference to the new index, or `None` if the quantization
53    /// tag is not supported (falls back to per-segment Sq8/PQ paths) or there
54    /// are no vectors to train on.
55    pub fn build_codec_dispatch(&mut self, quantization: &str) -> Option<&CollectionCodec> {
56        let vectors = self.gather_all_vectors_fp32();
57        let dim = self.dim;
58        let m = self.params.m;
59        let ef_construction = self.params.ef_construction;
60        let seed = 42_u64;
61        self.codec_dispatch =
62            build_collection_codec(quantization, &vectors, dim, m, ef_construction, seed);
63        self.codec_dispatch.as_ref()
64    }
65}