Skip to main content

sochdb_vector/
types.rs

1//! Common types used throughout the engine.
2
3use bytemuck::{Pod, Zeroable};
4use half::f16;
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8/// Vector ID - unique identifier for a vector within a segment
9pub type VectorId = u32;
10
11/// Dimension index
12pub type DimIndex = u16;
13
14/// Segment ID - unique across the collection
15pub type SegmentId = u64;
16
17/// Score type for ranking (higher = better for dot product)
18pub type Score = f32;
19
20/// Distance type for BPS (lower = better, L1 distance)
21pub type Distance = u16;
22
23/// Block index for BPS
24pub type BlockIndex = u16;
25
26/// Stripe ID for RDF posting lists
27pub type StripeId = u32;
28
29/// Configuration constants
30pub const MAGIC: [u8; 8] = *b"SVSEGM\x00\x00";
31pub const SEGMENT_VERSION: u32 = 1;
32
33/// Default configuration values
34pub const DEFAULT_DIM: u32 = 768;
35pub const DEFAULT_BPS_BLOCK_SIZE: u16 = 16;
36pub const DEFAULT_BPS_PROJECTIONS: u16 = 1;
37pub const DEFAULT_RDF_TOP_T: u16 = 32;
38pub const DEFAULT_STRIPE_SHIFT: u8 = 8; // 256 vids per stripe
39pub const DEFAULT_NUM_OUTLIERS: u8 = 8;
40pub const DEFAULT_STOP_DIM_THRESHOLD: u32 = 2048;
41
42/// A scored candidate from search
43#[derive(Debug, Clone, Copy, PartialEq)]
44pub struct ScoredCandidate {
45    pub id: VectorId,
46    pub score: Score,
47}
48
49impl Eq for ScoredCandidate {}
50
51impl PartialOrd for ScoredCandidate {
52    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
53        Some(self.cmp(other))
54    }
55}
56
57impl Ord for ScoredCandidate {
58    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
59        // Higher score is better, so reverse ordering for min-heap usage
60        other
61            .score
62            .partial_cmp(&self.score)
63            .unwrap_or(std::cmp::Ordering::Equal)
64    }
65}
66
67/// Outlier entry for a vector (stored separately for precision)
68#[repr(C)]
69#[derive(Debug, Clone, Copy, Pod, Zeroable)]
70pub struct OutlierEntry {
71    pub dim_id: DimIndex,
72    pub value: u16, // f16 stored as u16 bits
73}
74
75impl OutlierEntry {
76    pub fn new(dim_id: DimIndex, value: f16) -> Self {
77        Self {
78            dim_id,
79            value: value.to_bits(),
80        }
81    }
82
83    pub fn get_value(&self) -> f16 {
84        f16::from_bits(self.value)
85    }
86}
87
88/// RDF posting entry (stored in striped chunks)
89#[repr(C, packed)]
90#[derive(Debug, Clone, Copy, Pod, Zeroable)]
91pub struct RdfPosting {
92    pub vid_in_stripe: u8, // Local ID within stripe (0-255 for shift=8)
93    pub sign_and_mag: u8,  // High bit = sign, low 7 bits = magnitude
94}
95
96impl RdfPosting {
97    pub fn new(vid_in_stripe: u8, sign: bool, mag: u8) -> Self {
98        let sign_and_mag = if sign {
99            0x80 | (mag & 0x7F)
100        } else {
101            mag & 0x7F
102        };
103        Self {
104            vid_in_stripe,
105            sign_and_mag,
106        }
107    }
108
109    #[inline]
110    pub fn sign(&self) -> bool {
111        (self.sign_and_mag & 0x80) != 0
112    }
113
114    #[inline]
115    pub fn magnitude(&self) -> u8 {
116        self.sign_and_mag & 0x7F
117    }
118}
119
120/// Stripe chunk header for RDF posting lists
121#[repr(C)]
122#[derive(Debug, Clone, Copy, Pod, Zeroable)]
123pub struct StripeChunkHeader {
124    pub stripe_id: StripeId,
125    pub count: u16,
126    pub _pad: u16,
127}
128
129/// Query parameters for search
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct QueryParams {
132    /// Number of results to return
133    pub k: usize,
134    /// RDF candidate limit
135    pub l_a: usize,
136    /// BPS candidate limit  
137    pub l_b: usize,
138    /// Rerank candidate limit
139    pub r: usize,
140    /// Enable adaptive widening
141    pub adaptive: bool,
142    /// Filter bitset (if any)
143    pub filter: Option<Vec<u64>>,
144}
145
146impl Default for QueryParams {
147    fn default() -> Self {
148        Self {
149            k: 10,
150            l_a: 5000,
151            l_b: 20000,
152            r: 500,
153            adaptive: true,
154            filter: None,
155        }
156    }
157}
158
159/// Query result with timing information
160#[derive(Debug, Clone)]
161pub struct QueryResult {
162    pub candidates: Vec<ScoredCandidate>,
163    pub stats: QueryStats,
164}
165
166/// Statistics from query execution
167#[derive(Debug, Clone, Default)]
168pub struct QueryStats {
169    pub rdf_candidates: usize,
170    pub bps_candidates: usize,
171    pub union_size: usize,
172    pub post_filter_size: usize,
173    pub rerank_count: usize,
174    pub widening_applied: bool,
175    pub time_rotate_ns: u64,
176    pub time_rdf_ns: u64,
177    pub time_bps_ns: u64,
178    pub time_filter_ns: u64,
179    pub time_rerank_ns: u64,
180    pub total_time_ns: u64,
181}
182
183impl fmt::Display for QueryStats {
184    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
185        write!(
186            f,
187            "RDF:{} BPS:{} Union:{} Filtered:{} Rerank:{} Widen:{} Total:{:.2}ms",
188            self.rdf_candidates,
189            self.bps_candidates,
190            self.union_size,
191            self.post_filter_size,
192            self.rerank_count,
193            self.widening_applied,
194            self.total_time_ns as f64 / 1_000_000.0
195        )
196    }
197}
198
199/// Similarity metric
200#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
201pub enum Metric {
202    DotProduct,
203    Cosine,
204}
205
206impl Default for Metric {
207    fn default() -> Self {
208        Metric::DotProduct
209    }
210}
211
212/// Segment state in LSM
213#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
214pub enum SegmentState {
215    /// Currently accepting writes
216    Mutable,
217    /// Sealed, immutable, being written
218    Sealing,
219    /// Immutable, ready for queries
220    Sealed,
221    /// Marked for compaction
222    Compacting,
223    /// Deleted (tombstone)
224    Deleted,
225}