sochdb_vector/segment/
format.rs1use crate::types::*;
10use bytemuck::{Pod, Zeroable};
11
12#[repr(C)]
14#[derive(Debug, Clone, Copy, Pod, Zeroable)]
15pub struct SegmentHeader {
16 pub magic: [u8; 8],
18 pub version: u32,
20 pub flags: SegmentFlags,
22 pub n_vec: u32,
24 pub dim: u32,
26 pub bps_block: u16,
28 pub bps_proj: u16,
30 pub rdf_t: u16,
32 pub rdf_stripe_shift: u8,
34 pub num_outliers: u8,
36
37 pub off_bps: u64,
40 pub off_i8: u64,
42 pub off_scales: u64,
44 pub off_outliers: u64,
46 pub off_tombstone: u64,
48 pub off_rdf_dir: u64,
50 pub off_rdf_data: u64,
52 pub off_dim_weights: u64,
54 pub off_fp32: u64,
56 pub off_bps_qparams: u64,
58 pub file_len: u64,
60
61 pub _reserved1: [u8; 128],
64 pub _reserved2: [u8; 8],
65}
66
67impl SegmentHeader {
68 pub const SIZE: usize = std::mem::size_of::<Self>();
69
70 pub fn new(n_vec: u32, dim: u32) -> Self {
72 Self {
73 magic: MAGIC,
74 version: SEGMENT_VERSION,
75 flags: SegmentFlags::empty(),
76 n_vec,
77 dim,
78 bps_block: DEFAULT_BPS_BLOCK_SIZE,
79 bps_proj: DEFAULT_BPS_PROJECTIONS,
80 rdf_t: DEFAULT_RDF_TOP_T,
81 rdf_stripe_shift: DEFAULT_STRIPE_SHIFT,
82 num_outliers: DEFAULT_NUM_OUTLIERS,
83 off_bps: 0,
84 off_i8: 0,
85 off_scales: 0,
86 off_outliers: 0,
87 off_tombstone: 0,
88 off_rdf_dir: 0,
89 off_rdf_data: 0,
90 off_dim_weights: 0,
91 off_fp32: 0,
92 off_bps_qparams: 0,
93 file_len: 0,
94 _reserved1: [0; 128],
95 _reserved2: [0; 8],
96 }
97 }
98
99 pub fn validate(&self) -> crate::Result<()> {
101 if self.magic != MAGIC {
102 return Err(crate::Error::InvalidMagic);
103 }
104 if self.version != SEGMENT_VERSION {
105 return Err(crate::Error::UnsupportedVersion(self.version));
106 }
107 Ok(())
108 }
109
110 pub fn num_bps_blocks(&self) -> u32 {
112 (self.dim + self.bps_block as u32 - 1) / self.bps_block as u32
113 }
114
115 pub fn bps_size(&self) -> usize {
117 self.num_bps_blocks() as usize * self.n_vec as usize * self.bps_proj as usize
118 }
119
120 pub fn i8_size(&self) -> usize {
122 self.n_vec as usize * self.dim as usize
123 }
124
125 pub fn stripe_size(&self) -> usize {
127 1usize << self.rdf_stripe_shift
128 }
129}
130
131#[repr(transparent)]
133#[derive(Debug, Clone, Copy, Pod, Zeroable, PartialEq, Eq)]
134pub struct SegmentFlags(pub u32);
135
136impl SegmentFlags {
137 pub const NONE: u32 = 0;
138 pub const HAS_FP32: u32 = 1 << 0;
139 pub const HAS_OUTLIERS: u32 = 1 << 1;
140 pub const HAS_RDF: u32 = 1 << 2;
141 pub const HAS_BPS: u32 = 1 << 3;
142 pub const NORMALIZED: u32 = 1 << 4;
143 pub const ROTATED: u32 = 1 << 5;
144
145 pub fn empty() -> Self {
146 Self(Self::NONE)
147 }
148
149 pub fn has(&self, flag: u32) -> bool {
150 (self.0 & flag) != 0
151 }
152
153 pub fn set(&mut self, flag: u32) {
154 self.0 |= flag;
155 }
156}
157
158#[repr(C)]
160#[derive(Debug, Clone, Copy, Pod, Zeroable)]
161pub struct PostingListEntry {
162 pub offset: u64,
164 pub length: u32,
166 pub num_stripes: u16,
168 pub flags: u16,
170}
171
172impl PostingListEntry {
173 pub const FLAG_STOPWORD: u16 = 1 << 0;
174
175 pub fn is_stopword(&self) -> bool {
176 (self.flags & Self::FLAG_STOPWORD) != 0
177 }
178}
179
180#[repr(C)]
182#[derive(Debug, Clone, Copy, Pod, Zeroable)]
183pub struct BlockScale {
184 pub scale: f32,
186}
187
188#[inline]
190pub const fn align_to(value: usize, alignment: usize) -> usize {
191 (value + alignment - 1) & !(alignment - 1)
192}
193
194#[inline]
196pub const fn bps_offset(block: usize, vec_id: usize, n_vec: usize, proj: usize) -> usize {
197 if proj == 1 {
200 block * n_vec + vec_id
201 } else {
202 (block * 2) * n_vec + vec_id * 2
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209
210 #[test]
211 fn test_header_size() {
212 assert_eq!(SegmentHeader::SIZE, 256);
213 }
214
215 #[test]
216 fn test_header_validation() {
217 let mut header = SegmentHeader::new(1000, 768);
218 assert!(header.validate().is_ok());
219
220 header.magic = [0; 8];
221 assert!(header.validate().is_err());
222 }
223
224 #[test]
225 fn test_flags() {
226 let mut flags = SegmentFlags::empty();
227 assert!(!flags.has(SegmentFlags::HAS_BPS));
228
229 flags.set(SegmentFlags::HAS_BPS);
230 assert!(flags.has(SegmentFlags::HAS_BPS));
231 assert!(!flags.has(SegmentFlags::HAS_RDF));
232 }
233}