use crate::probe_select::DiversityConfig;
use crate::vector_ops::DistanceMetric;
use core::fmt;
pub const STATE_NEW: u8 = 0;
pub const STATE_CODEBOOKS_TRAINED: u8 = 1;
pub const STATE_OPERATIONAL: u8 = 2;
pub const NO_PARENT: u32 = u32::MAX;
pub const FLAG_IS_ROOT: u8 = 0x01;
pub const FRACTAL_CONFIG_SIZE: usize = 96;
#[derive(Clone, PartialEq)]
pub struct FractalIndexConfig {
pub dim: u32,
pub num_subvectors: u32,
pub num_codewords: u16,
pub metric: DistanceMetric,
pub store_raw_vectors: bool,
pub default_nprobe: u32,
pub(crate) state: u8,
pub root_cluster_id: u32,
pub next_cluster_id: u32,
pub max_leaf_population: u32,
pub min_leaf_population: u32,
pub max_buffer_size: u32,
pub max_children: u32,
pub max_depth: u32,
pub num_vectors: u64,
pub num_clusters: u64,
pub variance_split_factor: f32,
}
impl FractalIndexConfig {
pub fn state(&self) -> u8 {
self.state
}
pub fn sub_dim(&self) -> usize {
if self.num_subvectors == 0 {
return 0;
}
self.dim as usize / self.num_subvectors as usize
}
pub fn metric_byte(&self) -> u8 {
match self.metric {
DistanceMetric::Cosine => 0,
DistanceMetric::EuclideanSq => 1,
DistanceMetric::DotProduct => 2,
DistanceMetric::Manhattan => 3,
}
}
pub fn alloc_cluster_id(&mut self) -> u32 {
let id = self.next_cluster_id;
self.next_cluster_id = self.next_cluster_id.saturating_add(1);
id
}
}
impl fmt::Debug for FractalIndexConfig {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("FractalIndexConfig")
.field("dim", &self.dim)
.field("num_subvectors", &self.num_subvectors)
.field("metric", &self.metric)
.field("store_raw", &self.store_raw_vectors)
.field("nprobe", &self.default_nprobe)
.field("state", &self.state)
.field("root", &self.root_cluster_id)
.field("num_vectors", &self.num_vectors)
.field("num_clusters", &self.num_clusters)
.finish_non_exhaustive()
}
}
pub fn encode_fractal_config(cfg: &FractalIndexConfig) -> [u8; FRACTAL_CONFIG_SIZE] {
let mut buf = [0u8; FRACTAL_CONFIG_SIZE];
buf[0..4].copy_from_slice(&cfg.dim.to_le_bytes());
buf[4..8].copy_from_slice(&cfg.num_subvectors.to_le_bytes());
buf[8..10].copy_from_slice(&cfg.num_codewords.to_le_bytes());
buf[10] = cfg.metric_byte();
buf[11] = u8::from(cfg.store_raw_vectors);
buf[12..16].copy_from_slice(&cfg.default_nprobe.to_le_bytes());
buf[16] = cfg.state;
buf[20..24].copy_from_slice(&cfg.root_cluster_id.to_le_bytes());
buf[24..28].copy_from_slice(&cfg.next_cluster_id.to_le_bytes());
buf[28..32].copy_from_slice(&cfg.max_leaf_population.to_le_bytes());
buf[32..36].copy_from_slice(&cfg.min_leaf_population.to_le_bytes());
buf[36..40].copy_from_slice(&cfg.max_buffer_size.to_le_bytes());
buf[40..44].copy_from_slice(&cfg.max_children.to_le_bytes());
buf[44..48].copy_from_slice(&cfg.max_depth.to_le_bytes());
buf[48..56].copy_from_slice(&cfg.num_vectors.to_le_bytes());
buf[56..64].copy_from_slice(&cfg.num_clusters.to_le_bytes());
buf[64..68].copy_from_slice(&cfg.variance_split_factor.to_bits().to_le_bytes());
buf
}
fn read_u16_le(data: &[u8], offset: usize) -> u16 {
if offset + 2 > data.len() {
return 0;
}
u16::from_le_bytes([data[offset], data[offset + 1]])
}
fn read_u32_le(data: &[u8], offset: usize) -> u32 {
if offset + 4 > data.len() {
return 0;
}
u32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
])
}
fn read_u64_le(data: &[u8], offset: usize) -> u64 {
if offset + 8 > data.len() {
return 0;
}
u64::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
data[offset + 4],
data[offset + 5],
data[offset + 6],
data[offset + 7],
])
}
fn read_byte(data: &[u8], offset: usize) -> u8 {
if offset >= data.len() {
return 0;
}
data[offset]
}
pub fn decode_fractal_config(data: &[u8]) -> FractalIndexConfig {
let dim = read_u32_le(data, 0);
let num_subvectors = read_u32_le(data, 4);
let num_codewords = read_u16_le(data, 8);
let metric = match read_byte(data, 10) {
0 => DistanceMetric::Cosine,
2 => DistanceMetric::DotProduct,
3 => DistanceMetric::Manhattan,
_ => DistanceMetric::EuclideanSq,
};
let store_raw_vectors = read_byte(data, 11) != 0;
let default_nprobe = read_u32_le(data, 12);
let state = read_byte(data, 16);
let root_cluster_id = read_u32_le(data, 20);
let next_cluster_id = read_u32_le(data, 24);
let max_leaf_population = read_u32_le(data, 28);
let min_leaf_population = read_u32_le(data, 32);
let max_buffer_size = read_u32_le(data, 36);
let max_children = read_u32_le(data, 40);
let max_depth = read_u32_le(data, 44);
let num_vectors = read_u64_le(data, 48);
let num_clusters = read_u64_le(data, 56);
let variance_split_factor = f32::from_bits(read_u32_le(data, 64));
FractalIndexConfig {
dim,
num_subvectors,
num_codewords,
metric,
store_raw_vectors,
default_nprobe,
state,
root_cluster_id,
next_cluster_id,
max_leaf_population,
min_leaf_population,
max_buffer_size,
max_children,
max_depth,
num_vectors,
num_clusters,
variance_split_factor,
}
}
#[derive(Debug, Clone)]
pub struct FractalSearchParams {
pub nprobe: u32,
pub candidates: usize,
pub k: usize,
pub rerank: bool,
pub min_hlc: u64,
pub diversity: DiversityConfig,
}
impl FractalSearchParams {
pub fn top_k(k: usize) -> Self {
Self {
nprobe: 8,
candidates: k.saturating_mul(10).max(100),
k,
rerank: true,
min_hlc: 0,
diversity: DiversityConfig { lambda: 0.0 },
}
}
#[must_use]
pub const fn with_nprobe(mut self, nprobe: u32) -> Self {
self.nprobe = nprobe;
self
}
#[must_use]
pub const fn with_min_hlc(mut self, min_hlc: u64) -> Self {
self.min_hlc = min_hlc;
self
}
#[must_use]
pub fn with_diversity(mut self, lambda: f32) -> Self {
self.diversity = DiversityConfig {
lambda: lambda.clamp(0.0, 1.0),
};
self
}
}
pub struct FractalIndexDefinition {
name: &'static str,
dim: u32,
num_subvectors: u32,
metric: DistanceMetric,
store_raw_vectors: bool,
default_nprobe: u32,
max_leaf_population: u32,
min_leaf_population: u32,
max_buffer_size: u32,
max_children: u32,
max_depth: u32,
variance_split_factor: f32,
}
impl FractalIndexDefinition {
pub const fn new(
name: &'static str,
dim: u32,
num_subvectors: u32,
metric: DistanceMetric,
) -> Self {
Self {
name,
dim,
num_subvectors,
metric,
store_raw_vectors: false,
default_nprobe: 8,
max_leaf_population: 1000,
min_leaf_population: 50,
max_buffer_size: 128,
max_children: 64,
max_depth: 8,
variance_split_factor: 2.0,
}
}
#[must_use]
pub const fn with_raw_vectors(mut self) -> Self {
self.store_raw_vectors = true;
self
}
#[must_use]
pub const fn with_nprobe(mut self, nprobe: u32) -> Self {
self.default_nprobe = nprobe;
self
}
#[must_use]
pub const fn with_max_leaf_population(mut self, max: u32) -> Self {
self.max_leaf_population = max;
self
}
#[must_use]
pub const fn with_min_leaf_population(mut self, min: u32) -> Self {
self.min_leaf_population = min;
self
}
#[must_use]
pub const fn with_max_buffer_size(mut self, size: u32) -> Self {
self.max_buffer_size = size;
self
}
pub const fn name(&self) -> &'static str {
self.name
}
pub fn to_config(&self) -> FractalIndexConfig {
FractalIndexConfig {
dim: self.dim,
num_subvectors: self.num_subvectors,
num_codewords: 256,
metric: self.metric,
store_raw_vectors: self.store_raw_vectors,
default_nprobe: self.default_nprobe,
state: STATE_NEW,
root_cluster_id: 0,
next_cluster_id: 0,
max_leaf_population: self.max_leaf_population,
min_leaf_population: self.min_leaf_population,
max_buffer_size: self.max_buffer_size,
max_children: self.max_children,
max_depth: self.max_depth,
num_vectors: 0,
num_clusters: 0,
variance_split_factor: self.variance_split_factor,
}
}
}
impl fmt::Debug for FractalIndexDefinition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"FractalIndexDefinition({:?}, dim={}, subvecs={}, {:?})",
self.name, self.dim, self.num_subvectors, self.metric,
)
}
}