use crate::digest::ValueDigest;
pub const MAX_LEVEL: u8 = 31;
pub fn vector_level(id: &[u8], vector: &[f32], level_bits: u8) -> u8 {
let level_bits = level_bits.max(1);
let mut buf = Vec::with_capacity(id.len() + vector.len() * 4);
buf.extend_from_slice(id);
for f in vector {
buf.extend_from_slice(&f.to_le_bytes());
}
let hash = ValueDigest::<32>::new(&buf);
let lz = leading_zero_bits(hash.as_bytes());
let level = lz / u32::from(level_bits);
level.min(u32::from(MAX_LEVEL)) as u8
}
fn leading_zero_bits(bytes: &[u8]) -> u32 {
let mut count = 0u32;
for &b in bytes {
if b == 0 {
count += 8;
} else {
return count + b.leading_zeros();
}
}
count
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn leading_zeros_basic() {
assert_eq!(leading_zero_bits(&[]), 0);
assert_eq!(leading_zero_bits(&[0xFF]), 0);
assert_eq!(leading_zero_bits(&[0x80]), 0);
assert_eq!(leading_zero_bits(&[0x40]), 1);
assert_eq!(leading_zero_bits(&[0x00, 0x80]), 8);
assert_eq!(leading_zero_bits(&[0x00, 0x00, 0x10]), 19);
assert_eq!(leading_zero_bits(&[0x00, 0x00, 0x00, 0x00]), 32);
}
#[test]
fn determinism() {
let v = vec![0.1f32, 0.2, 0.3];
assert_eq!(vector_level(b"id-1", &v, 4), vector_level(b"id-1", &v, 4));
}
#[test]
fn different_id_different_level() {
let v = vec![0.0f32, 0.0, 0.0];
let mut seen_levels = std::collections::HashSet::new();
for i in 0..1000u32 {
let id = i.to_be_bytes();
seen_levels.insert(vector_level(&id, &v, 4));
}
assert!(
seen_levels.len() > 1,
"expected variety of levels, got {seen_levels:?}"
);
}
#[test]
fn level_bounded_by_max_level() {
for i in 0..256u32 {
let id = i.to_be_bytes();
let v = vec![0.0f32; 4];
let l = vector_level(&id, &v, 1);
assert!(l <= MAX_LEVEL);
}
}
#[test]
fn level_distribution_matches_expected() {
let mut counts: Vec<u32> = vec![0; (MAX_LEVEL as usize) + 1];
for i in 0..10_000u32 {
let id = i.to_be_bytes();
let v = vec![i as f32, (i * 2) as f32];
let l = vector_level(&id, &v, 4) as usize;
counts[l] += 1;
}
assert_eq!(counts.iter().sum::<u32>(), 10_000);
let level_0 = counts[0];
assert!(
level_0 > 9_000,
"expected most entries at level 0, got {level_0} / 10000"
);
for &c in &counts[9..] {
assert_eq!(c, 0);
}
}
}