Skip to main content

nodedb_vector/collection/
lifecycle_compact.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Compact and snapshot operations for `VectorCollection`.
4
5use nodedb_types::Surrogate;
6
7use super::lifecycle::VectorCollection;
8
9impl VectorCollection {
10    /// Compact sealed segments by removing tombstoned nodes.
11    ///
12    /// Rewrites `surrogate_map` and `multi_doc_map` for every sealed
13    /// segment so that global ids continue to resolve to the correct
14    /// surrogate after local-id renumbering.
15    pub fn compact(&mut self) -> usize {
16        let mut total_removed = 0;
17        for seg in &mut self.sealed {
18            let base_id = seg.base_id;
19            let (removed, id_map) = seg.index.compact_with_map();
20            total_removed += removed;
21            if removed == 0 {
22                continue;
23            }
24
25            let segment_end = base_id as u64 + id_map.len() as u64;
26            let global_keys: Vec<u32> = self
27                .surrogate_map
28                .keys()
29                .copied()
30                .filter(|&k| (k as u64) >= base_id as u64 && (k as u64) < segment_end)
31                .collect();
32            // Two-phase: remove old entries first, then insert new ones
33            // so we don't clobber a freshly-remapped entry with a later
34            // tombstone removal.
35            // no-governor: VectorCollection is !Send and has no governor field; budget is enforced by the Data Plane core's arena before compaction is invoked
36            let mut new_entries: Vec<(u32, Surrogate)> = Vec::with_capacity(global_keys.len());
37            for old_global in &global_keys {
38                let surrogate = self.surrogate_map.remove(old_global);
39                let old_local = (old_global - base_id) as usize;
40                let new_local = id_map[old_local];
41                if new_local != u32::MAX
42                    && let Some(s) = surrogate
43                {
44                    new_entries.push((base_id + new_local, s));
45                } else if let Some(s) = surrogate {
46                    // Tombstoned — drop reverse mapping too.
47                    self.surrogate_to_local.remove(&s);
48                }
49            }
50            for (k, s) in new_entries {
51                self.surrogate_map.insert(k, s);
52                self.surrogate_to_local.insert(s, k);
53            }
54
55            // Rewrite multi_doc_map entries for this segment.
56            for ids in self.multi_doc_map.values_mut() {
57                ids.retain_mut(|vid| {
58                    let v = *vid;
59                    if (v as u64) >= base_id as u64 && (v as u64) < segment_end {
60                        let old_local = (v - base_id) as usize;
61                        let new_local = id_map[old_local];
62                        if new_local == u32::MAX {
63                            false
64                        } else {
65                            *vid = base_id + new_local;
66                            true
67                        }
68                    } else {
69                        true
70                    }
71                });
72            }
73        }
74        total_removed
75    }
76
77    /// Export all live vectors for snapshot.
78    pub fn export_snapshot(&self) -> Vec<(u32, Vec<f32>, Option<Surrogate>)> {
79        let mut result = Vec::new();
80
81        for i in 0..self.growing.len() as u32 {
82            let vid = self.growing_base_id + i;
83            if let Some(data) = self.growing.get_vector(i) {
84                let surrogate = self.surrogate_map.get(&vid).copied();
85                result.push((vid, data.to_vec(), surrogate));
86            }
87        }
88
89        for seg in &self.sealed {
90            let vectors = seg.index.export_vectors();
91            for (i, vec_data) in vectors.into_iter().enumerate() {
92                let vid = seg.base_id + i as u32;
93                let surrogate = self.surrogate_map.get(&vid).copied();
94                result.push((vid, vec_data, surrogate));
95            }
96        }
97
98        for seg in &self.building {
99            for i in 0..seg.flat.len() as u32 {
100                let vid = seg.base_id + i;
101                if let Some(data) = seg.flat.get_vector(i) {
102                    let surrogate = self.surrogate_map.get(&vid).copied();
103                    result.push((vid, data.to_vec(), surrogate));
104                }
105            }
106        }
107
108        result
109    }
110}