hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{AsyncFileRead, LazyFileSlice, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::segment::format::{DOC_ID_ENTRY_SIZE, FLAT_BINARY_HEADER_SIZE, FLAT_BINARY_MAGIC};
11use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
12
13/// Dequantize raw bytes to f32 based on storage quantization.
14///
15/// `raw` is the quantized byte slice, `out` receives the f32 values.
16/// `num_floats` is the number of f32 values to produce (= num_vectors × dim).
17/// Data-first file layout guarantees alignment for f32/f16 access.
18#[inline]
19pub fn dequantize_raw(
20    raw: &[u8],
21    quant: DenseVectorQuantization,
22    num_floats: usize,
23    out: &mut [f32],
24) {
25    debug_assert!(out.len() >= num_floats);
26    match quant {
27        DenseVectorQuantization::F32 => {
28            debug_assert!(
29                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<f32>()),
30                "f32 vector data not 4-byte aligned"
31            );
32            out[..num_floats].copy_from_slice(unsafe {
33                std::slice::from_raw_parts(raw.as_ptr() as *const f32, num_floats)
34            });
35        }
36        DenseVectorQuantization::F16 => {
37            debug_assert!(
38                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<u16>()),
39                "f16 vector data not 2-byte aligned"
40            );
41            let f16_slice =
42                unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, num_floats) };
43            for (i, &h) in f16_slice.iter().enumerate() {
44                out[i] = f16_to_f32(h);
45            }
46        }
47        DenseVectorQuantization::UInt8 => {
48            for (i, &b) in raw.iter().enumerate().take(num_floats) {
49                out[i] = u8_to_f32(b);
50            }
51        }
52    }
53}
54
55/// Flat vector binary format helpers for writing.
56///
57/// Binary format v3:
58/// ```text
59/// [magic(u32)][dim(u32)][num_vectors(u32)][quant_type(u8)][padding(3)]
60/// [vectors: N×dim×element_size]
61/// [doc_ids: N×(u32+u16)]
62/// ```
63///
64/// `element_size` is determined by `quant_type`: f32=4, f16=2, uint8=1.
65/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
66/// and accesses vector data lazily via mmap-backed range reads.
67pub struct FlatVectorData;
68
69impl FlatVectorData {
70    /// Write the binary header to a writer.
71    pub fn write_binary_header(
72        dim: usize,
73        num_vectors: usize,
74        quant: DenseVectorQuantization,
75        writer: &mut dyn std::io::Write,
76    ) -> std::io::Result<()> {
77        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
78        writer.write_all(&(dim as u32).to_le_bytes())?;
79        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
80        writer.write_all(&[quant.tag(), 0, 0, 0])?; // quant_type + 3 bytes padding
81        Ok(())
82    }
83
84    /// Compute the serialized size without actually serializing.
85    pub fn serialized_binary_size(
86        dim: usize,
87        num_vectors: usize,
88        quant: DenseVectorQuantization,
89    ) -> usize {
90        FLAT_BINARY_HEADER_SIZE
91            + num_vectors * dim * quant.element_size()
92            + num_vectors * DOC_ID_ENTRY_SIZE
93    }
94
95    /// Stream from flat f32 storage to a writer, quantizing on write.
96    ///
97    /// `flat_vectors` is contiguous storage of dim*n f32 floats.
98    /// Vectors are quantized to the specified format before writing.
99    pub fn serialize_binary_from_flat_streaming(
100        dim: usize,
101        flat_vectors: &[f32],
102        doc_ids: &[(u32, u16)],
103        quant: DenseVectorQuantization,
104        writer: &mut dyn std::io::Write,
105    ) -> std::io::Result<()> {
106        let num_vectors = doc_ids.len();
107        Self::write_binary_header(dim, num_vectors, quant, writer)?;
108
109        match quant {
110            DenseVectorQuantization::F32 => {
111                let bytes: &[u8] = unsafe {
112                    std::slice::from_raw_parts(
113                        flat_vectors.as_ptr() as *const u8,
114                        std::mem::size_of_val(flat_vectors),
115                    )
116                };
117                writer.write_all(bytes)?;
118            }
119            DenseVectorQuantization::F16 => {
120                let mut buf = vec![0u16; dim];
121                for v in flat_vectors.chunks_exact(dim) {
122                    batch_f32_to_f16(v, &mut buf);
123                    let bytes: &[u8] =
124                        unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
125                    writer.write_all(bytes)?;
126                }
127            }
128            DenseVectorQuantization::UInt8 => {
129                let mut buf = vec![0u8; dim];
130                for v in flat_vectors.chunks_exact(dim) {
131                    batch_f32_to_u8(v, &mut buf);
132                    writer.write_all(&buf)?;
133                }
134            }
135        }
136
137        for &(doc_id, ordinal) in doc_ids {
138            writer.write_all(&doc_id.to_le_bytes())?;
139            writer.write_all(&ordinal.to_le_bytes())?;
140        }
141
142        Ok(())
143    }
144
145    /// Write raw pre-quantized vector bytes to a writer (for merger streaming).
146    ///
147    /// `raw_bytes` is already in the target quantized format.
148    pub fn write_raw_vector_bytes(
149        raw_bytes: &[u8],
150        writer: &mut dyn std::io::Write,
151    ) -> std::io::Result<()> {
152        writer.write_all(raw_bytes)
153    }
154}
155
156/// Lazy flat vector data — zero-copy doc_id index, vectors via range reads.
157///
158/// The doc_id index is kept as `OwnedBytes` (mmap-backed, zero heap copy).
159/// Vector data stays on disk and is accessed via mmap-backed range reads.
160/// Element size depends on quantization: f32=4, f16=2, uint8=1 bytes/dim.
161///
162/// Used for:
163/// - Brute-force search (batched scoring with native-precision SIMD)
164/// - Reranking (read individual vectors by doc_id via binary search)
165/// - doc() hydration (dequantize to f32 for stored documents)
166/// - Merge streaming (chunked raw vector bytes + doc_id iteration)
167#[derive(Debug, Clone)]
168pub struct LazyFlatVectorData {
169    /// Vector dimension
170    pub dim: usize,
171    /// Total number of vectors
172    pub num_vectors: usize,
173    /// Storage quantization type
174    pub quantization: DenseVectorQuantization,
175    /// Zero-copy doc_id index: packed [u32_le doc_id + u16_le ordinal] × num_vectors
176    doc_ids_bytes: OwnedBytes,
177    /// Lazy handle to this field's flat data region in the .vectors file
178    handle: LazyFileSlice,
179    /// Byte offset within handle where raw vector data starts (after header)
180    vectors_offset: u64,
181    /// Bytes per vector element (cached from quantization.element_size())
182    element_size: usize,
183}
184
185impl LazyFlatVectorData {
186    /// Open from a lazy file slice pointing to the flat binary data region.
187    ///
188    /// Reads header (16 bytes) + doc_ids (~6 bytes/vector) into memory.
189    /// Vector data stays lazy on disk.
190    pub async fn open(handle: LazyFileSlice) -> io::Result<Self> {
191        // Read header: magic(4) + dim(4) + num_vectors(4) + quant_type(1) + pad(3) = 16 bytes
192        let header = handle
193            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
194            .await?;
195        let hdr = header.as_slice();
196
197        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
198        if magic != FLAT_BINARY_MAGIC {
199            return Err(io::Error::new(
200                io::ErrorKind::InvalidData,
201                "Invalid FlatVectorData binary magic",
202            ));
203        }
204
205        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
206        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
207        let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
208            io::Error::new(
209                io::ErrorKind::InvalidData,
210                format!("Unknown quantization tag: {}", hdr[12]),
211            )
212        })?;
213        let element_size = quantization.element_size();
214
215        // Read doc_ids section as zero-copy OwnedBytes (6 bytes per vector)
216        let vectors_byte_len = num_vectors * dim * element_size;
217        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
218        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
219
220        let doc_ids_bytes = handle
221            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
222            .await?;
223
224        Ok(Self {
225            dim,
226            num_vectors,
227            quantization,
228            doc_ids_bytes,
229            handle,
230            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
231            element_size,
232        })
233    }
234
235    /// Read a single vector by index, dequantized to f32.
236    ///
237    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
238    /// Used for ANN training and doc() hydration where f32 is needed.
239    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
240        debug_assert!(out.len() >= self.dim);
241        let vec_byte_len = self.dim * self.element_size;
242        let byte_offset = self.vectors_offset + (idx * vec_byte_len) as u64;
243        let bytes = self
244            .handle
245            .read_bytes_range(byte_offset..byte_offset + vec_byte_len as u64)
246            .await?;
247        let raw = bytes.as_slice();
248
249        dequantize_raw(raw, self.quantization, self.dim, out);
250        Ok(())
251    }
252
253    /// Read a single vector by index, dequantized to f32 (allocates a new Vec<f32>).
254    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
255        let mut vector = vec![0f32; self.dim];
256        self.read_vector_into(idx, &mut vector).await?;
257        Ok(vector)
258    }
259
260    /// Read a single vector's raw bytes (no dequantization) into a caller-provided buffer.
261    ///
262    /// `out` must have length >= `self.vector_byte_size()`.
263    /// Used for native-precision reranking where raw quantized bytes are scored directly.
264    pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
265        let vbs = self.vector_byte_size();
266        debug_assert!(out.len() >= vbs);
267        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
268        let bytes = self
269            .handle
270            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
271            .await?;
272        out[..vbs].copy_from_slice(bytes.as_slice());
273        Ok(())
274    }
275
276    /// Read a contiguous batch of raw quantized bytes by index range.
277    ///
278    /// Returns raw bytes for vectors `[start_idx..start_idx+count)`.
279    /// Bytes are in native quantized format — pass to `batch_cosine_scores_f16/u8`
280    /// or `batch_cosine_scores` (for f32) for scoring.
281    pub async fn read_vectors_batch(
282        &self,
283        start_idx: usize,
284        count: usize,
285    ) -> io::Result<OwnedBytes> {
286        debug_assert!(start_idx + count <= self.num_vectors);
287        let vec_byte_len = self.dim * self.element_size;
288        let byte_offset = self.vectors_offset + (start_idx * vec_byte_len) as u64;
289        let byte_len = (count * vec_byte_len) as u64;
290        self.handle
291            .read_bytes_range(byte_offset..byte_offset + byte_len)
292            .await
293    }
294
295    /// Find flat index range for a given doc_id (non-allocating).
296    ///
297    /// Returns `(start_index, count)` — the flat vector index range for this doc_id.
298    /// Use `get_doc_id(start + i)` for `i in 0..count` to read individual entries.
299    /// More efficient than `flat_indexes_for_doc` as it avoids Vec allocation.
300    pub fn flat_indexes_for_doc_range(&self, doc_id: u32) -> (usize, usize) {
301        let n = self.num_vectors;
302        let start = {
303            let mut lo = 0usize;
304            let mut hi = n;
305            while lo < hi {
306                let mid = lo + (hi - lo) / 2;
307                if self.doc_id_at(mid) < doc_id {
308                    lo = mid + 1;
309                } else {
310                    hi = mid;
311                }
312            }
313            lo
314        };
315        let mut count = 0;
316        let mut i = start;
317        while i < n && self.doc_id_at(i) == doc_id {
318            count += 1;
319            i += 1;
320        }
321        (start, count)
322    }
323
324    /// Find flat indexes for a given doc_id via binary search on sorted doc_ids.
325    ///
326    /// doc_ids are sorted by (doc_id, ordinal) — segment builder adds docs
327    /// sequentially. Binary search runs directly on zero-copy mmap bytes.
328    ///
329    /// Returns `(start_index, entries)` where start_index is the flat vector index.
330    pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, Vec<(u32, u16)>) {
331        let n = self.num_vectors;
332        // Binary search: find first entry where doc_id >= target
333        let start = {
334            let mut lo = 0usize;
335            let mut hi = n;
336            while lo < hi {
337                let mid = lo + (hi - lo) / 2;
338                if self.doc_id_at(mid) < doc_id {
339                    lo = mid + 1;
340                } else {
341                    hi = mid;
342                }
343            }
344            lo
345        };
346        // Collect entries with matching doc_id
347        let mut entries = Vec::new();
348        let mut i = start;
349        while i < n {
350            let (did, ord) = self.get_doc_id(i);
351            if did != doc_id {
352                break;
353            }
354            entries.push((did, ord));
355            i += 1;
356        }
357        (start, entries)
358    }
359
360    /// Read doc_id at index from raw bytes (no ordinal).
361    #[inline]
362    fn doc_id_at(&self, idx: usize) -> u32 {
363        let off = idx * DOC_ID_ENTRY_SIZE;
364        let d = &self.doc_ids_bytes[off..];
365        u32::from_le_bytes([d[0], d[1], d[2], d[3]])
366    }
367
368    /// Get doc_id and ordinal at index (parsed from zero-copy mmap bytes).
369    #[inline]
370    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
371        let off = idx * DOC_ID_ENTRY_SIZE;
372        let d = &self.doc_ids_bytes[off..];
373        let doc_id = u32::from_le_bytes([d[0], d[1], d[2], d[3]]);
374        let ordinal = u16::from_le_bytes([d[4], d[5]]);
375        (doc_id, ordinal)
376    }
377
378    /// Bytes per vector in storage.
379    #[inline]
380    pub fn vector_byte_size(&self) -> usize {
381        self.dim * self.element_size
382    }
383
384    /// Total byte length of raw vector data (for chunked merger streaming).
385    pub fn vector_bytes_len(&self) -> u64 {
386        (self.num_vectors as u64) * (self.vector_byte_size() as u64)
387    }
388
389    /// Byte offset where vector data starts (for direct handle access in merger).
390    pub fn vectors_byte_offset(&self) -> u64 {
391        self.vectors_offset
392    }
393
394    /// Access the underlying lazy file handle (for chunked byte-range reads in merger).
395    pub fn handle(&self) -> &LazyFileSlice {
396        &self.handle
397    }
398
399    /// Estimated memory usage — doc_ids are mmap-backed (only Arc overhead).
400    pub fn estimated_memory_bytes(&self) -> usize {
401        size_of::<Self>() + size_of::<OwnedBytes>()
402    }
403}
404
405/// IVF-RaBitQ index data (codebook + cluster assignments)
406///
407/// Centroids are stored at the index level (`field_X_centroids.bin`),
408/// not duplicated per segment.
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct IVFRaBitQIndexData {
411    pub index: crate::structures::IVFRaBitQIndex,
412    pub codebook: crate::structures::RaBitQCodebook,
413}
414
415impl IVFRaBitQIndexData {
416    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
417        bincode::serde::encode_to_vec(self, bincode::config::standard())
418            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
419    }
420
421    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
422        bincode::serde::decode_from_slice(data, bincode::config::standard())
423            .map(|(v, _)| v)
424            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
425    }
426}
427
428/// ScaNN index data (codebook + cluster assignments)
429///
430/// Centroids are stored at the index level (`field_X_centroids.bin`),
431/// not duplicated per segment.
432#[derive(Debug, Clone, Serialize, Deserialize)]
433pub struct ScaNNIndexData {
434    pub index: crate::structures::IVFPQIndex,
435    pub codebook: crate::structures::PQCodebook,
436}
437
438impl ScaNNIndexData {
439    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
440        bincode::serde::encode_to_vec(self, bincode::config::standard())
441            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
442    }
443
444    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
445        bincode::serde::decode_from_slice(data, bincode::config::standard())
446            .map(|(v, _)| v)
447            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
448    }
449}
hermes_core/segment/vector_data.rs

hermes_core/segment/
vector_data.rs