hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{AsyncFileRead, LazyFileSlice, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
11
12/// Magic number for binary flat vector format v3 ("FVD3" in little-endian)
13const FLAT_BINARY_MAGIC: u32 = 0x46564433;
14
15/// Binary header: magic(u32) + dim(u32) + num_vectors(u32) + quant_type(u8) + padding(3)
16const FLAT_BINARY_HEADER_SIZE: usize = 16;
17/// Per-doc_id entry: doc_id(u32) + ordinal(u16)
18const DOC_ID_ENTRY_SIZE: usize = size_of::<u32>() + size_of::<u16>();
19
20/// Flat vector binary format helpers for writing.
21///
22/// Binary format v3:
23/// ```text
24/// [magic(u32)][dim(u32)][num_vectors(u32)][quant_type(u8)][padding(3)]
25/// [vectors: N×dim×element_size]
26/// [doc_ids: N×(u32+u16)]
27/// ```
28///
29/// `element_size` is determined by `quant_type`: f32=4, f16=2, uint8=1.
30/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
31/// and accesses vector data lazily via mmap-backed range reads.
32pub struct FlatVectorData;
33
34impl FlatVectorData {
35    /// Write the binary header to a writer.
36    pub fn write_binary_header(
37        dim: usize,
38        num_vectors: usize,
39        quant: DenseVectorQuantization,
40        writer: &mut dyn std::io::Write,
41    ) -> std::io::Result<()> {
42        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
43        writer.write_all(&(dim as u32).to_le_bytes())?;
44        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
45        writer.write_all(&[quant.tag(), 0, 0, 0])?; // quant_type + 3 bytes padding
46        Ok(())
47    }
48
49    /// Compute the serialized size without actually serializing.
50    pub fn serialized_binary_size(
51        dim: usize,
52        num_vectors: usize,
53        quant: DenseVectorQuantization,
54    ) -> usize {
55        FLAT_BINARY_HEADER_SIZE
56            + num_vectors * dim * quant.element_size()
57            + num_vectors * DOC_ID_ENTRY_SIZE
58    }
59
60    /// Stream from flat f32 storage to a writer, quantizing on write.
61    ///
62    /// `flat_vectors` is contiguous storage of dim*n f32 floats.
63    /// Vectors are quantized to the specified format before writing.
64    pub fn serialize_binary_from_flat_streaming(
65        dim: usize,
66        flat_vectors: &[f32],
67        doc_ids: &[(u32, u16)],
68        quant: DenseVectorQuantization,
69        writer: &mut dyn std::io::Write,
70    ) -> std::io::Result<()> {
71        let num_vectors = doc_ids.len();
72        Self::write_binary_header(dim, num_vectors, quant, writer)?;
73
74        match quant {
75            DenseVectorQuantization::F32 => {
76                let bytes: &[u8] = unsafe {
77                    std::slice::from_raw_parts(
78                        flat_vectors.as_ptr() as *const u8,
79                        std::mem::size_of_val(flat_vectors),
80                    )
81                };
82                writer.write_all(bytes)?;
83            }
84            DenseVectorQuantization::F16 => {
85                let mut buf = vec![0u16; dim];
86                for v in flat_vectors.chunks_exact(dim) {
87                    batch_f32_to_f16(v, &mut buf);
88                    let bytes: &[u8] =
89                        unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
90                    writer.write_all(bytes)?;
91                }
92            }
93            DenseVectorQuantization::UInt8 => {
94                let mut buf = vec![0u8; dim];
95                for v in flat_vectors.chunks_exact(dim) {
96                    batch_f32_to_u8(v, &mut buf);
97                    writer.write_all(&buf)?;
98                }
99            }
100        }
101
102        for &(doc_id, ordinal) in doc_ids {
103            writer.write_all(&doc_id.to_le_bytes())?;
104            writer.write_all(&ordinal.to_le_bytes())?;
105        }
106
107        Ok(())
108    }
109
110    /// Write raw pre-quantized vector bytes to a writer (for merger streaming).
111    ///
112    /// `raw_bytes` is already in the target quantized format.
113    pub fn write_raw_vector_bytes(
114        raw_bytes: &[u8],
115        writer: &mut dyn std::io::Write,
116    ) -> std::io::Result<()> {
117        writer.write_all(raw_bytes)
118    }
119}
120
121/// Lazy flat vector data — doc_ids in memory, vectors accessed via range reads.
122///
123/// Only the doc_id index (~6 bytes/vector) is loaded into memory.
124/// Vector data stays on disk and is accessed via mmap-backed range reads.
125/// Element size depends on quantization: f32=4, f16=2, uint8=1 bytes/dim.
126///
127/// Used for:
128/// - Brute-force search (batched scoring with native-precision SIMD)
129/// - Reranking (read individual vectors by doc_id via binary search)
130/// - doc() hydration (dequantize to f32 for stored documents)
131/// - Merge streaming (chunked raw vector bytes + doc_id iteration)
132#[derive(Debug, Clone)]
133pub struct LazyFlatVectorData {
134    /// Vector dimension
135    pub dim: usize,
136    /// Total number of vectors
137    pub num_vectors: usize,
138    /// Storage quantization type
139    pub quantization: DenseVectorQuantization,
140    /// In-memory doc_id index: (doc_id, ordinal) per vector
141    pub doc_ids: Vec<(u32, u16)>,
142    /// Lazy handle to this field's flat data region in the .vectors file
143    handle: LazyFileSlice,
144    /// Byte offset within handle where raw vector data starts (after header)
145    vectors_offset: u64,
146    /// Bytes per vector element (cached from quantization.element_size())
147    element_size: usize,
148}
149
150impl LazyFlatVectorData {
151    /// Open from a lazy file slice pointing to the flat binary data region.
152    ///
153    /// Reads header (16 bytes) + doc_ids (~6 bytes/vector) into memory.
154    /// Vector data stays lazy on disk.
155    pub async fn open(handle: LazyFileSlice) -> io::Result<Self> {
156        // Read header: magic(4) + dim(4) + num_vectors(4) + quant_type(1) + pad(3) = 16 bytes
157        let header = handle
158            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
159            .await?;
160        let hdr = header.as_slice();
161
162        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
163        if magic != FLAT_BINARY_MAGIC {
164            return Err(io::Error::new(
165                io::ErrorKind::InvalidData,
166                "Invalid FlatVectorData binary magic",
167            ));
168        }
169
170        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
171        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
172        let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
173            io::Error::new(
174                io::ErrorKind::InvalidData,
175                format!("Unknown quantization tag: {}", hdr[12]),
176            )
177        })?;
178        let element_size = quantization.element_size();
179
180        // Read doc_ids section (small: 6 bytes per vector)
181        let vectors_byte_len = num_vectors * dim * element_size;
182        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
183        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
184
185        let doc_ids_bytes = handle
186            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
187            .await?;
188        let d = doc_ids_bytes.as_slice();
189
190        let mut doc_ids = Vec::with_capacity(num_vectors);
191        for i in 0..num_vectors {
192            let off = i * DOC_ID_ENTRY_SIZE;
193            let doc_id = u32::from_le_bytes([d[off], d[off + 1], d[off + 2], d[off + 3]]);
194            let ordinal = u16::from_le_bytes([d[off + 4], d[off + 5]]);
195            doc_ids.push((doc_id, ordinal));
196        }
197
198        Ok(Self {
199            dim,
200            num_vectors,
201            quantization,
202            doc_ids,
203            handle,
204            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
205            element_size,
206        })
207    }
208
209    /// Read a single vector by index, dequantized to f32.
210    ///
211    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
212    /// Used for ANN training and doc() hydration where f32 is needed.
213    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
214        debug_assert!(out.len() >= self.dim);
215        let vec_byte_len = self.dim * self.element_size;
216        let byte_offset = self.vectors_offset + (idx * vec_byte_len) as u64;
217        let bytes = self
218            .handle
219            .read_bytes_range(byte_offset..byte_offset + vec_byte_len as u64)
220            .await?;
221        let raw = bytes.as_slice();
222
223        match self.quantization {
224            DenseVectorQuantization::F32 => unsafe {
225                std::ptr::copy_nonoverlapping(
226                    raw.as_ptr(),
227                    out.as_mut_ptr() as *mut u8,
228                    vec_byte_len,
229                );
230            },
231            DenseVectorQuantization::F16 => {
232                let f16_slice =
233                    unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, self.dim) };
234                for (i, &h) in f16_slice.iter().enumerate() {
235                    out[i] = f16_to_f32(h);
236                }
237            }
238            DenseVectorQuantization::UInt8 => {
239                for (i, &b) in raw.iter().enumerate().take(self.dim) {
240                    out[i] = u8_to_f32(b);
241                }
242            }
243        }
244        Ok(())
245    }
246
247    /// Read a single vector by index, dequantized to f32 (allocates a new Vec<f32>).
248    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
249        let mut vector = vec![0f32; self.dim];
250        self.read_vector_into(idx, &mut vector).await?;
251        Ok(vector)
252    }
253
254    /// Read a single vector's raw bytes (no dequantization) into a caller-provided buffer.
255    ///
256    /// `out` must have length >= `self.vector_byte_size()`.
257    /// Used for native-precision reranking where raw quantized bytes are scored directly.
258    pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
259        let vbs = self.vector_byte_size();
260        debug_assert!(out.len() >= vbs);
261        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
262        let bytes = self
263            .handle
264            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
265            .await?;
266        out[..vbs].copy_from_slice(bytes.as_slice());
267        Ok(())
268    }
269
270    /// Read a contiguous batch of raw quantized bytes by index range.
271    ///
272    /// Returns raw bytes for vectors `[start_idx..start_idx+count)`.
273    /// Bytes are in native quantized format — pass to `batch_cosine_scores_f16/u8`
274    /// or `batch_cosine_scores` (for f32) for scoring.
275    pub async fn read_vectors_batch(
276        &self,
277        start_idx: usize,
278        count: usize,
279    ) -> io::Result<OwnedBytes> {
280        debug_assert!(start_idx + count <= self.num_vectors);
281        let vec_byte_len = self.dim * self.element_size;
282        let byte_offset = self.vectors_offset + (start_idx * vec_byte_len) as u64;
283        let byte_len = (count * vec_byte_len) as u64;
284        self.handle
285            .read_bytes_range(byte_offset..byte_offset + byte_len)
286            .await
287    }
288
289    /// Find flat indexes for a given doc_id via binary search on sorted doc_ids.
290    ///
291    /// doc_ids are sorted by (doc_id, ordinal) — segment builder adds docs
292    /// sequentially. Returns a slice of (doc_id, ordinal) entries; the position
293    /// of each entry in `self.doc_ids` is its flat vector index.
294    ///
295    /// Returns `(start_index, slice)` where start_index is the position in doc_ids.
296    pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, &[(u32, u16)]) {
297        let start = self.doc_ids.partition_point(|&(id, _)| id < doc_id);
298        let end = start + self.doc_ids[start..].partition_point(|&(id, _)| id == doc_id);
299        (start, &self.doc_ids[start..end])
300    }
301
302    /// Get doc_id and ordinal at index (from in-memory index).
303    #[inline]
304    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
305        self.doc_ids[idx]
306    }
307
308    /// Bytes per vector in storage.
309    #[inline]
310    pub fn vector_byte_size(&self) -> usize {
311        self.dim * self.element_size
312    }
313
314    /// Total byte length of raw vector data (for chunked merger streaming).
315    pub fn vector_bytes_len(&self) -> u64 {
316        (self.num_vectors as u64) * (self.vector_byte_size() as u64)
317    }
318
319    /// Byte offset where vector data starts (for direct handle access in merger).
320    pub fn vectors_byte_offset(&self) -> u64 {
321        self.vectors_offset
322    }
323
324    /// Access the underlying lazy file handle (for chunked byte-range reads in merger).
325    pub fn handle(&self) -> &LazyFileSlice {
326        &self.handle
327    }
328
329    /// Estimated memory usage (only doc_ids are in memory).
330    pub fn estimated_memory_bytes(&self) -> usize {
331        self.doc_ids.capacity() * size_of::<(u32, u16)>() + size_of::<Self>()
332    }
333}
334
335/// IVF-RaBitQ index data with embedded centroids and codebook
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct IVFRaBitQIndexData {
338    pub index: crate::structures::IVFRaBitQIndex,
339    pub centroids: crate::structures::CoarseCentroids,
340    pub codebook: crate::structures::RaBitQCodebook,
341}
342
343impl IVFRaBitQIndexData {
344    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
345        serde_json::to_vec(self)
346            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
347    }
348
349    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
350        serde_json::from_slice(data)
351            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
352    }
353}
354
355/// ScaNN index data with embedded centroids and codebook
356#[derive(Debug, Clone, Serialize, Deserialize)]
357pub struct ScaNNIndexData {
358    pub index: crate::structures::IVFPQIndex,
359    pub centroids: crate::structures::CoarseCentroids,
360    pub codebook: crate::structures::PQCodebook,
361}
362
363impl ScaNNIndexData {
364    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
365        serde_json::to_vec(self)
366            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
367    }
368
369    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
370        serde_json::from_slice(data)
371            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
372    }
373}
hermes_core/segment/vector_data.rs

hermes_core/segment/
vector_data.rs