use bytemuck::{Pod, Zeroable};
use half::f16;
use serde::{Deserialize, Serialize};
use std::fmt;
pub type VectorId = u32;
pub type DimIndex = u16;
pub type SegmentId = u64;
pub type Score = f32;
pub type Distance = u16;
pub type BlockIndex = u16;
pub type StripeId = u32;
pub const MAGIC: [u8; 8] = *b"SVSEGM\x00\x00";
pub const SEGMENT_VERSION: u32 = 1;
pub const DEFAULT_DIM: u32 = 768;
pub const DEFAULT_BPS_BLOCK_SIZE: u16 = 16;
pub const DEFAULT_BPS_PROJECTIONS: u16 = 1;
pub const DEFAULT_RDF_TOP_T: u16 = 32;
pub const DEFAULT_STRIPE_SHIFT: u8 = 8; pub const DEFAULT_NUM_OUTLIERS: u8 = 8;
pub const DEFAULT_STOP_DIM_THRESHOLD: u32 = 2048;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScoredCandidate {
pub id: VectorId,
pub score: Score,
}
impl Eq for ScoredCandidate {}
impl PartialOrd for ScoredCandidate {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ScoredCandidate {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other
.score
.partial_cmp(&self.score)
.unwrap_or(std::cmp::Ordering::Equal)
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Pod, Zeroable)]
pub struct OutlierEntry {
pub dim_id: DimIndex,
pub value: u16, }
impl OutlierEntry {
pub fn new(dim_id: DimIndex, value: f16) -> Self {
Self {
dim_id,
value: value.to_bits(),
}
}
pub fn get_value(&self) -> f16 {
f16::from_bits(self.value)
}
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy, Pod, Zeroable)]
pub struct RdfPosting {
pub vid_in_stripe: u8, pub sign_and_mag: u8, }
impl RdfPosting {
pub fn new(vid_in_stripe: u8, sign: bool, mag: u8) -> Self {
let sign_and_mag = if sign {
0x80 | (mag & 0x7F)
} else {
mag & 0x7F
};
Self {
vid_in_stripe,
sign_and_mag,
}
}
#[inline]
pub fn sign(&self) -> bool {
(self.sign_and_mag & 0x80) != 0
}
#[inline]
pub fn magnitude(&self) -> u8 {
self.sign_and_mag & 0x7F
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Pod, Zeroable)]
pub struct StripeChunkHeader {
pub stripe_id: StripeId,
pub count: u16,
pub _pad: u16,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryParams {
pub k: usize,
pub l_a: usize,
pub l_b: usize,
pub r: usize,
pub adaptive: bool,
pub filter: Option<Vec<u64>>,
}
impl Default for QueryParams {
fn default() -> Self {
Self {
k: 10,
l_a: 5000,
l_b: 20000,
r: 500,
adaptive: true,
filter: None,
}
}
}
#[derive(Debug, Clone)]
pub struct QueryResult {
pub candidates: Vec<ScoredCandidate>,
pub stats: QueryStats,
}
#[derive(Debug, Clone, Default)]
pub struct QueryStats {
pub rdf_candidates: usize,
pub bps_candidates: usize,
pub union_size: usize,
pub post_filter_size: usize,
pub rerank_count: usize,
pub widening_applied: bool,
pub time_rotate_ns: u64,
pub time_rdf_ns: u64,
pub time_bps_ns: u64,
pub time_filter_ns: u64,
pub time_rerank_ns: u64,
pub total_time_ns: u64,
}
impl fmt::Display for QueryStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"RDF:{} BPS:{} Union:{} Filtered:{} Rerank:{} Widen:{} Total:{:.2}ms",
self.rdf_candidates,
self.bps_candidates,
self.union_size,
self.post_filter_size,
self.rerank_count,
self.widening_applied,
self.total_time_ns as f64 / 1_000_000.0
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Metric {
DotProduct,
Cosine,
}
impl Default for Metric {
fn default() -> Self {
Metric::DotProduct
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum SegmentState {
Mutable,
Sealing,
Sealed,
Compacting,
Deleted,
}