nodedb_vector/collection/
lifecycle_compact.rs1use nodedb_types::Surrogate;
6
7use super::lifecycle::VectorCollection;
8
9impl VectorCollection {
10 pub fn compact(&mut self) -> usize {
16 let mut total_removed = 0;
17 for seg in &mut self.sealed {
18 let base_id = seg.base_id;
19 let (removed, id_map) = seg.index.compact_with_map();
20 total_removed += removed;
21 if removed == 0 {
22 continue;
23 }
24
25 let segment_end = base_id as u64 + id_map.len() as u64;
26 let global_keys: Vec<u32> = self
27 .surrogate_map
28 .keys()
29 .copied()
30 .filter(|&k| (k as u64) >= base_id as u64 && (k as u64) < segment_end)
31 .collect();
32 let mut new_entries: Vec<(u32, Surrogate)> = Vec::with_capacity(global_keys.len());
36 for old_global in &global_keys {
37 let surrogate = self.surrogate_map.remove(old_global);
38 let old_local = (old_global - base_id) as usize;
39 let new_local = id_map[old_local];
40 if new_local != u32::MAX
41 && let Some(s) = surrogate
42 {
43 new_entries.push((base_id + new_local, s));
44 } else if let Some(s) = surrogate {
45 self.surrogate_to_local.remove(&s);
47 }
48 }
49 for (k, s) in new_entries {
50 self.surrogate_map.insert(k, s);
51 self.surrogate_to_local.insert(s, k);
52 }
53
54 for ids in self.multi_doc_map.values_mut() {
56 ids.retain_mut(|vid| {
57 let v = *vid;
58 if (v as u64) >= base_id as u64 && (v as u64) < segment_end {
59 let old_local = (v - base_id) as usize;
60 let new_local = id_map[old_local];
61 if new_local == u32::MAX {
62 false
63 } else {
64 *vid = base_id + new_local;
65 true
66 }
67 } else {
68 true
69 }
70 });
71 }
72 }
73 total_removed
74 }
75
76 pub fn export_snapshot(&self) -> Vec<(u32, Vec<f32>, Option<Surrogate>)> {
78 let mut result = Vec::new();
79
80 for i in 0..self.growing.len() as u32 {
81 let vid = self.growing_base_id + i;
82 if let Some(data) = self.growing.get_vector(i) {
83 let surrogate = self.surrogate_map.get(&vid).copied();
84 result.push((vid, data.to_vec(), surrogate));
85 }
86 }
87
88 for seg in &self.sealed {
89 let vectors = seg.index.export_vectors();
90 for (i, vec_data) in vectors.into_iter().enumerate() {
91 let vid = seg.base_id + i as u32;
92 let surrogate = self.surrogate_map.get(&vid).copied();
93 result.push((vid, vec_data, surrogate));
94 }
95 }
96
97 for seg in &self.building {
98 for i in 0..seg.flat.len() as u32 {
99 let vid = seg.base_id + i;
100 if let Some(data) = seg.flat.get_vector(i) {
101 let surrogate = self.surrogate_map.get(&vid).copied();
102 result.push((vid, data.to_vec(), surrogate));
103 }
104 }
105 }
106
107 result
108 }
109}