nodedb_vector/hnsw/
graph.rs1use crate::distance::distance;
7
8pub use nodedb_types::hnsw::HnswParams;
10
11#[derive(Debug, Clone)]
13pub struct SearchResult {
14 pub id: u32,
16 pub distance: f32,
18}
19
20pub struct Node {
22 pub vector: Vec<f32>,
24 pub neighbors: Vec<Vec<u32>>,
26 pub deleted: bool,
28}
29
30pub struct HnswIndex {
36 pub(crate) params: HnswParams,
37 pub(crate) dim: usize,
38 pub(crate) nodes: Vec<Node>,
39 pub(crate) entry_point: Option<u32>,
40 pub(crate) max_layer: usize,
41 pub(crate) rng: Xorshift64,
42 pub(crate) flat_neighbors: Option<crate::hnsw::flat_neighbors::FlatNeighborStore>,
46}
47
48impl HnswIndex {
49 #[inline]
52 pub(crate) fn neighbors_at(&self, node_id: u32, layer: usize) -> &[u32] {
53 if let Some(ref flat) = self.flat_neighbors {
54 return flat.neighbors_at(node_id, layer);
55 }
56 let node = &self.nodes[node_id as usize];
57 if layer < node.neighbors.len() {
58 &node.neighbors[layer]
59 } else {
60 &[]
61 }
62 }
63
64 #[inline]
66 pub(crate) fn node_num_layers(&self, node_id: u32) -> usize {
67 if let Some(ref flat) = self.flat_neighbors {
68 return flat.num_layers(node_id);
69 }
70 self.nodes[node_id as usize].neighbors.len()
71 }
72
73 pub(crate) fn ensure_mutable_neighbors(&mut self) {
76 if let Some(flat) = self.flat_neighbors.take() {
77 let nested = flat.to_nested(self.nodes.len());
78 for (i, layers) in nested.into_iter().enumerate() {
79 self.nodes[i].neighbors = layers;
80 }
81 }
82 }
83}
84
85pub struct Xorshift64(pub u64);
87
88impl Xorshift64 {
89 pub fn new(seed: u64) -> Self {
90 Self(seed.max(1))
91 }
92
93 pub fn next_f64(&mut self) -> f64 {
94 self.0 ^= self.0 << 13;
95 self.0 ^= self.0 >> 7;
96 self.0 ^= self.0 << 17;
97 (self.0 as f64) / (u64::MAX as f64)
98 }
99}
100
101#[derive(Clone, Copy, PartialEq)]
103pub struct Candidate {
104 pub dist: f32,
105 pub id: u32,
106}
107
108impl Eq for Candidate {}
109
110impl PartialOrd for Candidate {
111 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
112 Some(self.cmp(other))
113 }
114}
115
116impl Ord for Candidate {
117 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
118 self.dist
119 .partial_cmp(&other.dist)
120 .unwrap_or(std::cmp::Ordering::Equal)
121 .then(self.id.cmp(&other.id))
122 }
123}
124
125impl HnswIndex {
126 pub fn new(dim: usize, params: HnswParams) -> Self {
128 Self {
129 dim,
130 nodes: Vec::new(),
131 entry_point: None,
132 max_layer: 0,
133 rng: Xorshift64::new(42),
134 flat_neighbors: None,
135 params,
136 }
137 }
138
139 pub fn with_seed(dim: usize, params: HnswParams, seed: u64) -> Self {
141 Self {
142 dim,
143 nodes: Vec::new(),
144 entry_point: None,
145 max_layer: 0,
146 rng: Xorshift64::new(seed),
147 flat_neighbors: None,
148 params,
149 }
150 }
151
152 pub fn len(&self) -> usize {
153 self.nodes.len()
154 }
155
156 pub fn live_count(&self) -> usize {
157 self.nodes.len() - self.tombstone_count()
158 }
159
160 pub fn tombstone_count(&self) -> usize {
161 self.nodes.iter().filter(|n| n.deleted).count()
162 }
163
164 pub fn tombstone_ratio(&self) -> f64 {
166 if self.nodes.is_empty() {
167 0.0
168 } else {
169 self.tombstone_count() as f64 / self.nodes.len() as f64
170 }
171 }
172
173 pub fn is_empty(&self) -> bool {
174 self.live_count() == 0
175 }
176
177 pub fn delete(&mut self, id: u32) -> bool {
179 if let Some(node) = self.nodes.get_mut(id as usize) {
180 if node.deleted {
181 return false;
182 }
183 node.deleted = true;
184 true
185 } else {
186 false
187 }
188 }
189
190 pub fn is_deleted(&self, id: u32) -> bool {
191 self.nodes.get(id as usize).is_none_or(|n| n.deleted)
192 }
193
194 pub fn undelete(&mut self, id: u32) -> bool {
195 if let Some(node) = self.nodes.get_mut(id as usize)
196 && node.deleted
197 {
198 node.deleted = false;
199 return true;
200 }
201 false
202 }
203
204 pub fn dim(&self) -> usize {
205 self.dim
206 }
207
208 pub fn get_vector(&self, id: u32) -> Option<&[f32]> {
209 self.nodes.get(id as usize).map(|n| n.vector.as_slice())
210 }
211
212 pub fn params(&self) -> &HnswParams {
213 &self.params
214 }
215
216 pub fn entry_point(&self) -> Option<u32> {
217 self.entry_point
218 }
219
220 pub fn max_layer(&self) -> usize {
221 self.max_layer
222 }
223
224 pub fn rng_state(&self) -> u64 {
226 self.rng.0
227 }
228
229 pub fn memory_usage_bytes(&self) -> usize {
231 let vector_bytes = self.nodes.len() * self.dim * std::mem::size_of::<f32>();
232 let neighbor_bytes: usize = self
233 .nodes
234 .iter()
235 .map(|n| {
236 n.neighbors
237 .iter()
238 .map(|layer| layer.len() * 4)
239 .sum::<usize>()
240 })
241 .sum();
242 let node_overhead = self.nodes.len() * std::mem::size_of::<Node>();
243 vector_bytes + neighbor_bytes + node_overhead
244 }
245
246 pub fn export_vectors(&self) -> Vec<Vec<f32>> {
248 self.nodes.iter().map(|n| n.vector.clone()).collect()
249 }
250
251 pub fn export_neighbors(&self) -> Vec<Vec<Vec<u32>>> {
253 self.nodes.iter().map(|n| n.neighbors.clone()).collect()
254 }
255
256 pub(crate) fn random_layer(&mut self) -> usize {
258 let ml = 1.0 / (self.params.m as f64).ln();
259 let r = self.rng.next_f64().max(f64::MIN_POSITIVE);
260 (-r.ln() * ml).floor() as usize
261 }
262
263 pub(crate) fn dist_to_node(&self, query: &[f32], node_id: u32) -> f32 {
265 distance(
266 query,
267 &self.nodes[node_id as usize].vector,
268 self.params.metric,
269 )
270 }
271
272 pub(crate) fn max_neighbors(&self, layer: usize) -> usize {
274 if layer == 0 {
275 self.params.m0
276 } else {
277 self.params.m
278 }
279 }
280
281 pub fn compact(&mut self) -> usize {
283 let tombstone_count = self.tombstone_count();
284 if tombstone_count == 0 {
285 return 0;
286 }
287 self.ensure_mutable_neighbors();
288
289 let mut id_map: Vec<u32> = Vec::with_capacity(self.nodes.len());
290 let mut new_id = 0u32;
291 for node in &self.nodes {
292 if node.deleted {
293 id_map.push(u32::MAX);
294 } else {
295 id_map.push(new_id);
296 new_id += 1;
297 }
298 }
299
300 let mut new_nodes: Vec<Node> = Vec::with_capacity(new_id as usize);
301 for node in self.nodes.drain(..) {
302 if node.deleted {
303 continue;
304 }
305 let remapped_neighbors: Vec<Vec<u32>> = node
306 .neighbors
307 .into_iter()
308 .map(|layer_neighbors| {
309 layer_neighbors
310 .into_iter()
311 .filter_map(|old_nid| {
312 let new_nid = id_map[old_nid as usize];
313 if new_nid == u32::MAX {
314 None
315 } else {
316 Some(new_nid)
317 }
318 })
319 .collect()
320 })
321 .collect();
322 new_nodes.push(Node {
323 vector: node.vector,
324 neighbors: remapped_neighbors,
325 deleted: false,
326 });
327 }
328
329 self.entry_point = if let Some(old_ep) = self.entry_point {
330 let new_ep = id_map[old_ep as usize];
331 if new_ep == u32::MAX {
332 new_nodes
333 .iter()
334 .enumerate()
335 .max_by_key(|(_, n)| n.neighbors.len())
336 .map(|(i, _)| i as u32)
337 } else {
338 Some(new_ep)
339 }
340 } else {
341 None
342 };
343
344 self.max_layer = new_nodes
345 .iter()
346 .map(|n| n.neighbors.len().saturating_sub(1))
347 .max()
348 .unwrap_or(0);
349
350 self.nodes = new_nodes;
351 tombstone_count
352 }
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358 use crate::distance::DistanceMetric;
359
360 #[test]
361 fn create_empty_index() {
362 let idx = HnswIndex::new(3, HnswParams::default());
363 assert_eq!(idx.len(), 0);
364 assert!(idx.is_empty());
365 assert!(idx.entry_point().is_none());
366 }
367
368 #[test]
369 fn params_default() {
370 let p = HnswParams::default();
371 assert_eq!(p.m, 16);
372 assert_eq!(p.m0, 32);
373 assert_eq!(p.ef_construction, 200);
374 assert_eq!(p.metric, DistanceMetric::Cosine);
375 }
376
377 #[test]
378 fn candidate_ordering() {
379 let a = Candidate { dist: 0.1, id: 1 };
380 let b = Candidate { dist: 0.5, id: 2 };
381 assert!(a < b);
382 }
383}