nodedb_vector/collection/
lifecycle_compact.rs1use nodedb_types::Surrogate;
6
7use super::lifecycle::VectorCollection;
8
9impl VectorCollection {
10 pub fn compact(&mut self) -> usize {
16 let mut total_removed = 0;
17 for seg in &mut self.sealed {
18 let base_id = seg.base_id;
19 let (removed, id_map) = seg.index.compact_with_map();
20 total_removed += removed;
21 if removed == 0 {
22 continue;
23 }
24
25 let segment_end = base_id as u64 + id_map.len() as u64;
26 let global_keys: Vec<u32> = self
27 .surrogate_map
28 .keys()
29 .copied()
30 .filter(|&k| (k as u64) >= base_id as u64 && (k as u64) < segment_end)
31 .collect();
32 let mut new_entries: Vec<(u32, Surrogate)> = Vec::with_capacity(global_keys.len());
37 for old_global in &global_keys {
38 let surrogate = self.surrogate_map.remove(old_global);
39 let old_local = (old_global - base_id) as usize;
40 let new_local = id_map[old_local];
41 if new_local != u32::MAX
42 && let Some(s) = surrogate
43 {
44 new_entries.push((base_id + new_local, s));
45 } else if let Some(s) = surrogate {
46 self.surrogate_to_local.remove(&s);
48 }
49 }
50 for (k, s) in new_entries {
51 self.surrogate_map.insert(k, s);
52 self.surrogate_to_local.insert(s, k);
53 }
54
55 for ids in self.multi_doc_map.values_mut() {
57 ids.retain_mut(|vid| {
58 let v = *vid;
59 if (v as u64) >= base_id as u64 && (v as u64) < segment_end {
60 let old_local = (v - base_id) as usize;
61 let new_local = id_map[old_local];
62 if new_local == u32::MAX {
63 false
64 } else {
65 *vid = base_id + new_local;
66 true
67 }
68 } else {
69 true
70 }
71 });
72 }
73 }
74 total_removed
75 }
76
77 pub fn export_snapshot(&self) -> Vec<(u32, Vec<f32>, Option<Surrogate>)> {
79 let mut result = Vec::new();
80
81 for i in 0..self.growing.len() as u32 {
82 let vid = self.growing_base_id + i;
83 if let Some(data) = self.growing.get_vector(i) {
84 let surrogate = self.surrogate_map.get(&vid).copied();
85 result.push((vid, data.to_vec(), surrogate));
86 }
87 }
88
89 for seg in &self.sealed {
90 let vectors = seg.index.export_vectors();
91 for (i, vec_data) in vectors.into_iter().enumerate() {
92 let vid = seg.base_id + i as u32;
93 let surrogate = self.surrogate_map.get(&vid).copied();
94 result.push((vid, vec_data, surrogate));
95 }
96 }
97
98 for seg in &self.building {
99 for i in 0..seg.flat.len() as u32 {
100 let vid = seg.base_id + i;
101 if let Some(data) = seg.flat.get_vector(i) {
102 let surrogate = self.surrogate_map.get(&vid).copied();
103 result.push((vid, data.to_vec(), surrogate));
104 }
105 }
106 }
107
108 result
109 }
110}