nodedb_vector/codec_index/
graph.rs1use nodedb_codec::vector_quant::codec::VectorCodec;
11
12pub const MAX_LAYER_CAP: usize = 16;
14
15pub struct NodeC<C: VectorCodec> {
17 pub id: u32,
19 pub deleted: bool,
21 pub layer: usize,
23 pub quantized: C::Quantized,
25 pub neighbors: Vec<Vec<u32>>,
28}
29
30pub(crate) struct Xorshift64(pub u64);
32
33impl Xorshift64 {
34 pub(crate) fn new(seed: u64) -> Self {
35 Self(seed.max(1))
36 }
37
38 #[inline]
39 pub(crate) fn next_f64(&mut self) -> f64 {
40 self.0 ^= self.0 << 13;
41 self.0 ^= self.0 >> 7;
42 self.0 ^= self.0 << 17;
43 (self.0 as f64) / (u64::MAX as f64)
44 }
45}
46
47pub struct HnswCodecIndex<C: VectorCodec> {
53 pub dim: usize,
54 pub m: usize,
56 pub m0: usize,
58 pub ef_construction: usize,
59 pub max_layer: usize,
61 pub entry_point: Option<u32>,
63 pub level_mult: f32,
65 pub codec: C,
66 pub(crate) nodes: Vec<NodeC<C>>,
68 pub(crate) rng: Xorshift64,
69}
70
71impl<C: VectorCodec> HnswCodecIndex<C> {
72 pub fn new(dim: usize, m: usize, ef_construction: usize, codec: C, seed: u64) -> Self {
74 let m0 = m * 2;
75 let level_mult = 1.0 / (m as f32).ln();
76 Self {
77 dim,
78 m,
79 m0,
80 ef_construction,
81 max_layer: 0,
82 entry_point: None,
83 level_mult,
84 codec,
85 nodes: Vec::new(),
86 rng: Xorshift64::new(seed),
87 }
88 }
89
90 pub fn len(&self) -> usize {
92 self.nodes.len()
93 }
94
95 pub fn is_empty(&self) -> bool {
96 self.nodes.iter().all(|n| n.deleted)
97 }
98
99 pub fn dim(&self) -> usize {
100 self.dim
101 }
102
103 pub fn random_layer(&mut self) -> usize {
108 let r = self.rng.next_f64().max(f64::MIN_POSITIVE);
109 let layer = (-r.ln() * self.level_mult as f64).floor() as usize;
110 layer.min(MAX_LAYER_CAP)
111 }
112
113 pub fn quantized_at(&self, idx: u32) -> Option<&C::Quantized> {
116 self.nodes
117 .get(idx as usize)
118 .filter(|n| !n.deleted)
119 .map(|n| &n.quantized)
120 }
121
122 pub fn neighbors_at(&self, idx: u32, layer: usize) -> &[u32] {
124 let node = &self.nodes[idx as usize];
125 if layer < node.neighbors.len() {
126 &node.neighbors[layer]
127 } else {
128 &[]
129 }
130 }
131
132 pub(crate) fn max_neighbors(&self, layer: usize) -> usize {
134 if layer == 0 { self.m0 } else { self.m }
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141 use crate::quantize::Sq8Codec;
142
143 fn make_sq8(dim: usize) -> Sq8Codec {
144 let vecs: Vec<Vec<f32>> = (0..20)
145 .map(|i| (0..dim).map(|d| (i * dim + d) as f32 * 0.1).collect())
146 .collect();
147 let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
148 Sq8Codec::calibrate(&refs, dim)
149 }
150
151 #[test]
152 fn new_index_is_empty() {
153 let codec = make_sq8(4);
154 let idx: HnswCodecIndex<Sq8Codec> = HnswCodecIndex::new(4, 16, 100, codec, 42);
155 assert_eq!(idx.len(), 0);
156 assert!(idx.is_empty());
157 assert!(idx.entry_point.is_none());
158 assert_eq!(idx.m0, idx.m * 2);
159 }
160
161 #[test]
162 fn random_layer_is_bounded() {
163 let codec = make_sq8(4);
164 let mut idx: HnswCodecIndex<Sq8Codec> = HnswCodecIndex::new(4, 16, 100, codec, 123);
165 for _ in 0..1000 {
166 assert!(idx.random_layer() <= MAX_LAYER_CAP);
167 }
168 }
169}