use nodedb_codec::vector_quant::codec::VectorCodec;
pub const MAX_LAYER_CAP: usize = 16;
pub struct NodeC<C: VectorCodec> {
pub id: u32,
pub deleted: bool,
pub layer: usize,
pub quantized: C::Quantized,
pub neighbors: Vec<Vec<u32>>,
}
pub(crate) struct Xorshift64(pub u64);
impl Xorshift64 {
pub(crate) fn new(seed: u64) -> Self {
Self(seed.max(1))
}
#[inline]
pub(crate) fn next_f64(&mut self) -> f64 {
self.0 ^= self.0 << 13;
self.0 ^= self.0 >> 7;
self.0 ^= self.0 << 17;
(self.0 as f64) / (u64::MAX as f64)
}
}
pub struct HnswCodecIndex<C: VectorCodec> {
pub dim: usize,
pub m: usize,
pub m0: usize,
pub ef_construction: usize,
pub max_layer: usize,
pub entry_point: Option<u32>,
pub level_mult: f32,
pub codec: C,
pub(crate) nodes: Vec<NodeC<C>>,
pub(crate) rng: Xorshift64,
}
impl<C: VectorCodec> HnswCodecIndex<C> {
pub fn new(dim: usize, m: usize, ef_construction: usize, codec: C, seed: u64) -> Self {
let m0 = m * 2;
let level_mult = 1.0 / (m as f32).ln();
Self {
dim,
m,
m0,
ef_construction,
max_layer: 0,
entry_point: None,
level_mult,
codec,
nodes: Vec::new(),
rng: Xorshift64::new(seed),
}
}
pub fn len(&self) -> usize {
self.nodes.len()
}
pub fn is_empty(&self) -> bool {
self.nodes.iter().all(|n| n.deleted)
}
pub fn dim(&self) -> usize {
self.dim
}
pub fn random_layer(&mut self) -> usize {
let r = self.rng.next_f64().max(f64::MIN_POSITIVE);
let layer = (-r.ln() * self.level_mult as f64).floor() as usize;
layer.min(MAX_LAYER_CAP)
}
pub fn quantized_at(&self, idx: u32) -> Option<&C::Quantized> {
self.nodes
.get(idx as usize)
.filter(|n| !n.deleted)
.map(|n| &n.quantized)
}
pub fn neighbors_at(&self, idx: u32, layer: usize) -> &[u32] {
let node = &self.nodes[idx as usize];
if layer < node.neighbors.len() {
&node.neighbors[layer]
} else {
&[]
}
}
pub(crate) fn max_neighbors(&self, layer: usize) -> usize {
if layer == 0 { self.m0 } else { self.m }
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::quantize::Sq8Codec;
fn make_sq8(dim: usize) -> Sq8Codec {
let vecs: Vec<Vec<f32>> = (0..20)
.map(|i| (0..dim).map(|d| (i * dim + d) as f32 * 0.1).collect())
.collect();
let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
Sq8Codec::calibrate(&refs, dim)
}
#[test]
fn new_index_is_empty() {
let codec = make_sq8(4);
let idx: HnswCodecIndex<Sq8Codec> = HnswCodecIndex::new(4, 16, 100, codec, 42);
assert_eq!(idx.len(), 0);
assert!(idx.is_empty());
assert!(idx.entry_point.is_none());
assert_eq!(idx.m0, idx.m * 2);
}
#[test]
fn random_layer_is_bounded() {
let codec = make_sq8(4);
let mut idx: HnswCodecIndex<Sq8Codec> = HnswCodecIndex::new(4, 16, 100, codec, 123);
for _ in 0..1000 {
assert!(idx.random_layer() <= MAX_LAYER_CAP);
}
}
}