1use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{AsyncFileRead, LazyFileSlice, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
11
12#[inline]
18pub fn dequantize_raw(
19 raw: &[u8],
20 quant: DenseVectorQuantization,
21 num_floats: usize,
22 out: &mut [f32],
23) {
24 debug_assert!(out.len() >= num_floats);
25 match quant {
26 DenseVectorQuantization::F32 => {
27 debug_assert!(
28 (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<f32>()),
29 "f32 vector data not 4-byte aligned"
30 );
31 out[..num_floats].copy_from_slice(unsafe {
32 std::slice::from_raw_parts(raw.as_ptr() as *const f32, num_floats)
33 });
34 }
35 DenseVectorQuantization::F16 => {
36 debug_assert!(
37 (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<u16>()),
38 "f16 vector data not 2-byte aligned"
39 );
40 let f16_slice =
41 unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, num_floats) };
42 for (i, &h) in f16_slice.iter().enumerate() {
43 out[i] = f16_to_f32(h);
44 }
45 }
46 DenseVectorQuantization::UInt8 => {
47 for (i, &b) in raw.iter().enumerate().take(num_floats) {
48 out[i] = u8_to_f32(b);
49 }
50 }
51 }
52}
53
54const FLAT_BINARY_MAGIC: u32 = 0x46564433;
56
57const FLAT_BINARY_HEADER_SIZE: usize = 16;
59const DOC_ID_ENTRY_SIZE: usize = size_of::<u32>() + size_of::<u16>();
61
62pub struct FlatVectorData;
75
76impl FlatVectorData {
77 pub fn write_binary_header(
79 dim: usize,
80 num_vectors: usize,
81 quant: DenseVectorQuantization,
82 writer: &mut dyn std::io::Write,
83 ) -> std::io::Result<()> {
84 writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
85 writer.write_all(&(dim as u32).to_le_bytes())?;
86 writer.write_all(&(num_vectors as u32).to_le_bytes())?;
87 writer.write_all(&[quant.tag(), 0, 0, 0])?; Ok(())
89 }
90
91 pub fn serialized_binary_size(
93 dim: usize,
94 num_vectors: usize,
95 quant: DenseVectorQuantization,
96 ) -> usize {
97 FLAT_BINARY_HEADER_SIZE
98 + num_vectors * dim * quant.element_size()
99 + num_vectors * DOC_ID_ENTRY_SIZE
100 }
101
102 pub fn serialize_binary_from_flat_streaming(
107 dim: usize,
108 flat_vectors: &[f32],
109 doc_ids: &[(u32, u16)],
110 quant: DenseVectorQuantization,
111 writer: &mut dyn std::io::Write,
112 ) -> std::io::Result<()> {
113 let num_vectors = doc_ids.len();
114 Self::write_binary_header(dim, num_vectors, quant, writer)?;
115
116 match quant {
117 DenseVectorQuantization::F32 => {
118 let bytes: &[u8] = unsafe {
119 std::slice::from_raw_parts(
120 flat_vectors.as_ptr() as *const u8,
121 std::mem::size_of_val(flat_vectors),
122 )
123 };
124 writer.write_all(bytes)?;
125 }
126 DenseVectorQuantization::F16 => {
127 let mut buf = vec![0u16; dim];
128 for v in flat_vectors.chunks_exact(dim) {
129 batch_f32_to_f16(v, &mut buf);
130 let bytes: &[u8] =
131 unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
132 writer.write_all(bytes)?;
133 }
134 }
135 DenseVectorQuantization::UInt8 => {
136 let mut buf = vec![0u8; dim];
137 for v in flat_vectors.chunks_exact(dim) {
138 batch_f32_to_u8(v, &mut buf);
139 writer.write_all(&buf)?;
140 }
141 }
142 }
143
144 for &(doc_id, ordinal) in doc_ids {
145 writer.write_all(&doc_id.to_le_bytes())?;
146 writer.write_all(&ordinal.to_le_bytes())?;
147 }
148
149 Ok(())
150 }
151
152 pub fn write_raw_vector_bytes(
156 raw_bytes: &[u8],
157 writer: &mut dyn std::io::Write,
158 ) -> std::io::Result<()> {
159 writer.write_all(raw_bytes)
160 }
161}
162
163#[derive(Debug, Clone)]
175pub struct LazyFlatVectorData {
176 pub dim: usize,
178 pub num_vectors: usize,
180 pub quantization: DenseVectorQuantization,
182 pub doc_ids: Vec<(u32, u16)>,
184 handle: LazyFileSlice,
186 vectors_offset: u64,
188 element_size: usize,
190}
191
192impl LazyFlatVectorData {
193 pub async fn open(handle: LazyFileSlice) -> io::Result<Self> {
198 let header = handle
200 .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
201 .await?;
202 let hdr = header.as_slice();
203
204 let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
205 if magic != FLAT_BINARY_MAGIC {
206 return Err(io::Error::new(
207 io::ErrorKind::InvalidData,
208 "Invalid FlatVectorData binary magic",
209 ));
210 }
211
212 let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
213 let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
214 let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
215 io::Error::new(
216 io::ErrorKind::InvalidData,
217 format!("Unknown quantization tag: {}", hdr[12]),
218 )
219 })?;
220 let element_size = quantization.element_size();
221
222 let vectors_byte_len = num_vectors * dim * element_size;
224 let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
225 let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
226
227 let doc_ids_bytes = handle
228 .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
229 .await?;
230 let d = doc_ids_bytes.as_slice();
231
232 let mut doc_ids = Vec::with_capacity(num_vectors);
233 for i in 0..num_vectors {
234 let off = i * DOC_ID_ENTRY_SIZE;
235 let doc_id = u32::from_le_bytes([d[off], d[off + 1], d[off + 2], d[off + 3]]);
236 let ordinal = u16::from_le_bytes([d[off + 4], d[off + 5]]);
237 doc_ids.push((doc_id, ordinal));
238 }
239
240 Ok(Self {
241 dim,
242 num_vectors,
243 quantization,
244 doc_ids,
245 handle,
246 vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
247 element_size,
248 })
249 }
250
251 pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
256 debug_assert!(out.len() >= self.dim);
257 let vec_byte_len = self.dim * self.element_size;
258 let byte_offset = self.vectors_offset + (idx * vec_byte_len) as u64;
259 let bytes = self
260 .handle
261 .read_bytes_range(byte_offset..byte_offset + vec_byte_len as u64)
262 .await?;
263 let raw = bytes.as_slice();
264
265 dequantize_raw(raw, self.quantization, self.dim, out);
266 Ok(())
267 }
268
269 pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
271 let mut vector = vec![0f32; self.dim];
272 self.read_vector_into(idx, &mut vector).await?;
273 Ok(vector)
274 }
275
276 pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
281 let vbs = self.vector_byte_size();
282 debug_assert!(out.len() >= vbs);
283 let byte_offset = self.vectors_offset + (idx * vbs) as u64;
284 let bytes = self
285 .handle
286 .read_bytes_range(byte_offset..byte_offset + vbs as u64)
287 .await?;
288 out[..vbs].copy_from_slice(bytes.as_slice());
289 Ok(())
290 }
291
292 pub async fn read_vectors_batch(
298 &self,
299 start_idx: usize,
300 count: usize,
301 ) -> io::Result<OwnedBytes> {
302 debug_assert!(start_idx + count <= self.num_vectors);
303 let vec_byte_len = self.dim * self.element_size;
304 let byte_offset = self.vectors_offset + (start_idx * vec_byte_len) as u64;
305 let byte_len = (count * vec_byte_len) as u64;
306 self.handle
307 .read_bytes_range(byte_offset..byte_offset + byte_len)
308 .await
309 }
310
311 pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, &[(u32, u16)]) {
319 let start = self.doc_ids.partition_point(|&(id, _)| id < doc_id);
320 let end = start + self.doc_ids[start..].partition_point(|&(id, _)| id == doc_id);
321 (start, &self.doc_ids[start..end])
322 }
323
324 #[inline]
326 pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
327 self.doc_ids[idx]
328 }
329
330 #[inline]
332 pub fn vector_byte_size(&self) -> usize {
333 self.dim * self.element_size
334 }
335
336 pub fn vector_bytes_len(&self) -> u64 {
338 (self.num_vectors as u64) * (self.vector_byte_size() as u64)
339 }
340
341 pub fn vectors_byte_offset(&self) -> u64 {
343 self.vectors_offset
344 }
345
346 pub fn handle(&self) -> &LazyFileSlice {
348 &self.handle
349 }
350
351 pub fn estimated_memory_bytes(&self) -> usize {
353 self.doc_ids.capacity() * size_of::<(u32, u16)>() + size_of::<Self>()
354 }
355}
356
357#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct IVFRaBitQIndexData {
360 pub index: crate::structures::IVFRaBitQIndex,
361 pub centroids: crate::structures::CoarseCentroids,
362 pub codebook: crate::structures::RaBitQCodebook,
363}
364
365impl IVFRaBitQIndexData {
366 pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
367 bincode::serde::encode_to_vec(self, bincode::config::standard())
368 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
369 }
370
371 pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
372 bincode::serde::decode_from_slice(data, bincode::config::standard())
373 .map(|(v, _)| v)
374 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
375 }
376}
377
378#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct ScaNNIndexData {
381 pub index: crate::structures::IVFPQIndex,
382 pub centroids: crate::structures::CoarseCentroids,
383 pub codebook: crate::structures::PQCodebook,
384}
385
386impl ScaNNIndexData {
387 pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
388 bincode::serde::encode_to_vec(self, bincode::config::standard())
389 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
390 }
391
392 pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
393 bincode::serde::decode_from_slice(data, bincode::config::standard())
394 .map(|(v, _)| v)
395 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
396 }
397}