nodedb_vector/delta/
index.rs1use std::collections::HashSet;
10
11use crate::distance::distance;
12use nodedb_types::vector_distance::DistanceMetric;
13
14pub struct DeltaIndex {
17 pub dim: usize,
18 fresh: Vec<(u32, Vec<f32>)>,
20 tombstones: HashSet<u32>,
22 max_fresh: usize,
24}
25
26impl DeltaIndex {
27 pub fn new(dim: usize, max_fresh: usize) -> Self {
32 Self {
33 dim,
34 fresh: Vec::new(),
35 tombstones: HashSet::new(),
36 max_fresh,
37 }
38 }
39
40 pub fn insert(&mut self, id: u32, vector: Vec<f32>) {
43 self.fresh.push((id, vector));
44 }
45
46 pub fn tombstone(&mut self, id: u32) {
49 self.tombstones.insert(id);
50 }
51
52 pub fn is_full(&self) -> bool {
54 self.fresh.len() >= self.max_fresh
55 }
56
57 pub fn fresh_len(&self) -> usize {
59 self.fresh.len()
60 }
61
62 pub fn search(&self, query: &[f32], k: usize, metric: DistanceMetric) -> Vec<(u32, f32)> {
65 if k == 0 {
66 return Vec::new();
67 }
68
69 let mut scored: Vec<(u32, f32)> = self
70 .fresh
71 .iter()
72 .filter(|(id, _)| !self.tombstones.contains(id))
73 .map(|(id, vec)| (*id, distance(query, vec, metric)))
74 .collect();
75
76 if k < scored.len() {
78 scored.select_nth_unstable_by(k, |a, b| {
79 a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
80 });
81 scored.truncate(k);
82 }
83
84 scored.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
85
86 scored
87 }
88
89 pub fn drain_fresh(&mut self) -> Vec<(u32, Vec<f32>)> {
92 std::mem::take(&mut self.fresh)
93 }
94
95 pub fn drain_tombstones(&mut self) -> Vec<u32> {
98 std::mem::take(&mut self.tombstones).into_iter().collect()
99 }
100
101 pub fn is_tombstoned(&self, id: u32) -> bool {
103 self.tombstones.contains(&id)
104 }
105}
106
107#[cfg(test)]
108mod tests {
109 use super::*;
110 use nodedb_types::vector_distance::DistanceMetric;
111
112 fn make_delta() -> DeltaIndex {
113 let mut d = DeltaIndex::new(3, 16);
114 for i in 0u32..10 {
115 let v = vec![i as f32, 0.0, 0.0];
116 d.insert(i, v);
117 }
118 d
119 }
120
121 #[test]
122 fn top_k_returns_nearest() {
123 let d = make_delta();
124 let query = [0.0f32, 0.0, 0.0];
125 let results = d.search(&query, 3, DistanceMetric::L2);
126 assert_eq!(results.len(), 3);
127 assert_eq!(results[0].0, 0);
129 assert_eq!(results[1].0, 1);
130 assert_eq!(results[2].0, 2);
131 }
132
133 #[test]
134 fn tombstone_excluded_from_search() {
135 let mut d = make_delta();
136 d.tombstone(0);
137 let query = [0.0f32, 0.0, 0.0];
138 let results = d.search(&query, 3, DistanceMetric::L2);
139 assert!(results.iter().all(|(id, _)| *id != 0));
140 }
141
142 #[test]
143 fn is_full_triggers_at_threshold() {
144 let mut d = DeltaIndex::new(3, 3);
145 assert!(!d.is_full());
146 d.insert(0, vec![0.0, 0.0, 0.0]);
147 d.insert(1, vec![1.0, 0.0, 0.0]);
148 assert!(!d.is_full());
149 d.insert(2, vec![2.0, 0.0, 0.0]);
150 assert!(d.is_full());
151 }
152
153 #[test]
154 fn drain_fresh_empties_buffer() {
155 let mut d = make_delta();
156 let drained = d.drain_fresh();
157 assert_eq!(drained.len(), 10);
158 assert_eq!(d.fresh_len(), 0);
159 }
160
161 #[test]
162 fn drain_tombstones_empties_set() {
163 let mut d = make_delta();
164 d.tombstone(3);
165 d.tombstone(7);
166 let ts = d.drain_tombstones();
167 assert_eq!(ts.len(), 2);
168 assert!(!d.is_tombstoned(3));
169 assert!(!d.is_tombstoned(7));
170 }
171}