Skip to main content

hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{FileHandle, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::segment::format::{DOC_ID_ENTRY_SIZE, FLAT_BINARY_HEADER_SIZE, FLAT_BINARY_MAGIC};
11use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
12
13/// Dequantize raw bytes to f32 based on storage quantization.
14///
15/// `raw` is the quantized byte slice, `out` receives the f32 values.
16/// `num_floats` is the number of f32 values to produce (= num_vectors × dim).
17/// Data-first file layout guarantees alignment for f32/f16 access.
18#[inline]
19pub fn dequantize_raw(
20    raw: &[u8],
21    quant: DenseVectorQuantization,
22    num_floats: usize,
23    out: &mut [f32],
24) {
25    debug_assert!(out.len() >= num_floats);
26    match quant {
27        DenseVectorQuantization::F32 => {
28            debug_assert!(
29                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<f32>()),
30                "f32 vector data not 4-byte aligned"
31            );
32            out[..num_floats].copy_from_slice(unsafe {
33                std::slice::from_raw_parts(raw.as_ptr() as *const f32, num_floats)
34            });
35        }
36        DenseVectorQuantization::F16 => {
37            debug_assert!(
38                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<u16>()),
39                "f16 vector data not 2-byte aligned"
40            );
41            let f16_slice =
42                unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, num_floats) };
43            for (i, &h) in f16_slice.iter().enumerate() {
44                out[i] = f16_to_f32(h);
45            }
46        }
47        DenseVectorQuantization::UInt8 => {
48            for (i, &b) in raw.iter().enumerate().take(num_floats) {
49                out[i] = u8_to_f32(b);
50            }
51        }
52        DenseVectorQuantization::Binary => {
53            unreachable!("Binary vectors use raw bytes, not f32 dequantization");
54        }
55    }
56}
57
58/// Flat vector binary format helpers for writing.
59///
60/// Binary format v3:
61/// ```text
62/// [magic(u32)][dim(u32)][num_vectors(u32)][quant_type(u8)][padding(3)]
63/// [vectors: N×dim×element_size]
64/// [doc_ids: N×(u32+u16)]
65/// ```
66///
67/// `element_size` is determined by `quant_type`: f32=4, f16=2, uint8=1.
68/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
69/// and accesses vector data lazily via mmap-backed range reads.
70pub struct FlatVectorData;
71
72impl FlatVectorData {
73    /// Write the binary header to a writer.
74    pub fn write_binary_header(
75        dim: usize,
76        num_vectors: usize,
77        quant: DenseVectorQuantization,
78        writer: &mut dyn std::io::Write,
79    ) -> std::io::Result<()> {
80        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
81        writer.write_all(&(dim as u32).to_le_bytes())?;
82        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
83        writer.write_all(&[quant.tag(), 0, 0, 0])?; // quant_type + 3 bytes padding
84        Ok(())
85    }
86
87    /// Compute the serialized size without actually serializing.
88    pub fn serialized_binary_size(
89        dim: usize,
90        num_vectors: usize,
91        quant: DenseVectorQuantization,
92    ) -> usize {
93        let bytes_per_vector = match quant {
94            DenseVectorQuantization::Binary => dim.div_ceil(8),
95            _ => dim * quant.element_size(),
96        };
97        FLAT_BINARY_HEADER_SIZE + num_vectors * bytes_per_vector + num_vectors * DOC_ID_ENTRY_SIZE
98    }
99
100    /// Stream from flat f32 storage to a writer, quantizing on write.
101    ///
102    /// `flat_vectors` is contiguous storage of dim*n f32 floats.
103    /// Vectors are quantized to the specified format before writing.
104    pub fn serialize_binary_from_flat_streaming(
105        dim: usize,
106        flat_vectors: &[f32],
107        doc_ids: &[(u32, u16)],
108        quant: DenseVectorQuantization,
109        writer: &mut dyn std::io::Write,
110    ) -> std::io::Result<()> {
111        let num_vectors = doc_ids.len();
112        Self::write_binary_header(dim, num_vectors, quant, writer)?;
113
114        match quant {
115            DenseVectorQuantization::F32 => {
116                let bytes: &[u8] = unsafe {
117                    std::slice::from_raw_parts(
118                        flat_vectors.as_ptr() as *const u8,
119                        std::mem::size_of_val(flat_vectors),
120                    )
121                };
122                writer.write_all(bytes)?;
123            }
124            DenseVectorQuantization::F16 => {
125                let mut buf = vec![0u16; dim];
126                for v in flat_vectors.chunks_exact(dim) {
127                    batch_f32_to_f16(v, &mut buf);
128                    let bytes: &[u8] =
129                        unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
130                    writer.write_all(bytes)?;
131                }
132            }
133            DenseVectorQuantization::UInt8 => {
134                let mut buf = vec![0u8; dim];
135                for v in flat_vectors.chunks_exact(dim) {
136                    batch_f32_to_u8(v, &mut buf);
137                    writer.write_all(&buf)?;
138                }
139            }
140            DenseVectorQuantization::Binary => {
141                // Binary vectors use serialize_binary_from_bits_streaming(), not this path
142                unreachable!("Binary quantization should use serialize_binary_from_bits_streaming");
143            }
144        }
145
146        for &(doc_id, ordinal) in doc_ids {
147            writer.write_all(&doc_id.to_le_bytes())?;
148            writer.write_all(&ordinal.to_le_bytes())?;
149        }
150
151        Ok(())
152    }
153
154    /// Stream packed binary vectors (pre-packed bytes) to a writer.
155    ///
156    /// `packed_vectors` is contiguous storage of num_vectors * byte_len bytes.
157    /// `dim_bits` is the number of bits (dimensions).
158    pub fn serialize_binary_from_bits_streaming(
159        dim_bits: usize,
160        packed_vectors: &[u8],
161        doc_ids: &[(u32, u16)],
162        writer: &mut dyn std::io::Write,
163    ) -> std::io::Result<()> {
164        let num_vectors = doc_ids.len();
165        let byte_len = dim_bits.div_ceil(8);
166        debug_assert_eq!(packed_vectors.len(), num_vectors * byte_len);
167
168        Self::write_binary_header(
169            dim_bits,
170            num_vectors,
171            DenseVectorQuantization::Binary,
172            writer,
173        )?;
174        writer.write_all(packed_vectors)?;
175
176        for &(doc_id, ordinal) in doc_ids {
177            writer.write_all(&doc_id.to_le_bytes())?;
178            writer.write_all(&ordinal.to_le_bytes())?;
179        }
180
181        Ok(())
182    }
183
184    /// Write raw pre-quantized vector bytes to a writer (for merger streaming).
185    ///
186    /// `raw_bytes` is already in the target quantized format.
187    pub fn write_raw_vector_bytes(
188        raw_bytes: &[u8],
189        writer: &mut dyn std::io::Write,
190    ) -> std::io::Result<()> {
191        writer.write_all(raw_bytes)
192    }
193}
194
195/// Lazy flat vector data — zero-copy doc_id index, vectors via range reads.
196///
197/// The doc_id index is kept as `OwnedBytes` (mmap-backed, zero heap copy).
198/// Vector data stays on disk and is accessed via mmap-backed range reads.
199/// Element size depends on quantization: f32=4, f16=2, uint8=1 bytes/dim.
200///
201/// Used for:
202/// - Brute-force search (batched scoring with native-precision SIMD)
203/// - Reranking (read individual vectors by doc_id via binary search)
204/// - doc() hydration (dequantize to f32 for stored documents)
205/// - Merge streaming (chunked raw vector bytes + doc_id iteration)
206#[derive(Debug, Clone)]
207pub struct LazyFlatVectorData {
208    /// Vector dimension
209    pub dim: usize,
210    /// Total number of vectors
211    pub num_vectors: usize,
212    /// Storage quantization type
213    pub quantization: DenseVectorQuantization,
214    /// Zero-copy doc_id index: packed [u32_le doc_id + u16_le ordinal] × num_vectors
215    doc_ids_bytes: OwnedBytes,
216    /// File handle for this field's flat data region in the .vectors file
217    handle: FileHandle,
218    /// Byte offset within handle where raw vector data starts (after header)
219    vectors_offset: u64,
220    /// Bytes per vector in storage (cached: Binary = ceil(dim/8), else dim * element_size)
221    vbs: usize,
222}
223
224impl LazyFlatVectorData {
225    /// Open from a lazy file slice pointing to the flat binary data region.
226    ///
227    /// Reads header (16 bytes) + doc_ids (~6 bytes/vector) into memory.
228    /// Vector data stays lazy on disk.
229    pub async fn open(handle: FileHandle) -> io::Result<Self> {
230        // Read header: magic(4) + dim(4) + num_vectors(4) + quant_type(1) + pad(3) = 16 bytes
231        let header = handle
232            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
233            .await?;
234        let hdr = header.as_slice();
235
236        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
237        if magic != FLAT_BINARY_MAGIC {
238            return Err(io::Error::new(
239                io::ErrorKind::InvalidData,
240                "Invalid FlatVectorData binary magic",
241            ));
242        }
243
244        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
245        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
246        let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
247            io::Error::new(
248                io::ErrorKind::InvalidData,
249                format!("Unknown quantization tag: {}", hdr[12]),
250            )
251        })?;
252        // Read doc_ids section as zero-copy OwnedBytes (6 bytes per vector)
253        let vbs = if quantization == DenseVectorQuantization::Binary {
254            dim.div_ceil(8)
255        } else {
256            dim * quantization.element_size()
257        };
258        let vectors_byte_len = num_vectors * vbs;
259        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
260        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
261
262        let doc_ids_bytes = handle
263            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
264            .await?;
265
266        Ok(Self {
267            dim,
268            num_vectors,
269            quantization,
270            doc_ids_bytes,
271            handle,
272            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
273            vbs,
274        })
275    }
276
277    /// Read a single vector by index, dequantized to f32.
278    ///
279    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
280    /// Used for ANN training and doc() hydration where f32 is needed.
281    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
282        debug_assert!(out.len() >= self.dim);
283        let vbs = self.vector_byte_size();
284        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
285        let bytes = self
286            .handle
287            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
288            .await?;
289        let raw = bytes.as_slice();
290
291        dequantize_raw(raw, self.quantization, self.dim, out);
292        Ok(())
293    }
294
295    /// Read a single vector by index, dequantized to f32 (allocates a new Vec<f32>).
296    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
297        let mut vector = vec![0f32; self.dim];
298        self.read_vector_into(idx, &mut vector).await?;
299        Ok(vector)
300    }
301
302    /// Read a single vector's raw bytes (no dequantization) into a caller-provided buffer.
303    ///
304    /// `out` must have length >= `self.vector_byte_size()`.
305    /// Used for native-precision reranking where raw quantized bytes are scored directly.
306    pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
307        let vbs = self.vector_byte_size();
308        debug_assert!(out.len() >= vbs);
309        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
310        let bytes = self
311            .handle
312            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
313            .await?;
314        out[..vbs].copy_from_slice(bytes.as_slice());
315        Ok(())
316    }
317
318    /// Read a contiguous batch of raw quantized bytes by index range.
319    ///
320    /// Returns raw bytes for vectors `[start_idx..start_idx+count)`.
321    /// Bytes are in native quantized format — pass to `batch_cosine_scores_f16/u8`
322    /// or `batch_cosine_scores` (for f32) for scoring.
323    pub async fn read_vectors_batch(
324        &self,
325        start_idx: usize,
326        count: usize,
327    ) -> io::Result<OwnedBytes> {
328        debug_assert!(start_idx + count <= self.num_vectors);
329        let vbs = self.vector_byte_size();
330        let byte_offset = self.vectors_offset + (start_idx * vbs) as u64;
331        let byte_len = (count * vbs) as u64;
332        self.handle
333            .read_bytes_range(byte_offset..byte_offset + byte_len)
334            .await
335    }
336
337    /// Synchronous read of a single vector's raw bytes.
338    #[cfg(feature = "sync")]
339    pub fn read_vector_raw_into_sync(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
340        let vbs = self.vector_byte_size();
341        debug_assert!(out.len() >= vbs);
342        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
343        let bytes = self
344            .handle
345            .read_bytes_range_sync(byte_offset..byte_offset + vbs as u64)?;
346        out[..vbs].copy_from_slice(bytes.as_slice());
347        Ok(())
348    }
349
350    /// Synchronous batch read of raw quantized bytes.
351    #[cfg(feature = "sync")]
352    pub fn read_vectors_batch_sync(
353        &self,
354        start_idx: usize,
355        count: usize,
356    ) -> io::Result<OwnedBytes> {
357        debug_assert!(start_idx + count <= self.num_vectors);
358        let vbs = self.vector_byte_size();
359        let byte_offset = self.vectors_offset + (start_idx * vbs) as u64;
360        let byte_len = (count * vbs) as u64;
361        self.handle
362            .read_bytes_range_sync(byte_offset..byte_offset + byte_len)
363    }
364
365    /// Find flat index range for a given doc_id (non-allocating).
366    ///
367    /// Returns `(start_index, count)` — the flat vector index range for this doc_id.
368    /// Use `get_doc_id(start + i)` for `i in 0..count` to read individual entries.
369    /// More efficient than `flat_indexes_for_doc` as it avoids Vec allocation.
370    pub fn flat_indexes_for_doc_range(&self, doc_id: u32) -> (usize, usize) {
371        let n = self.num_vectors;
372        let start = {
373            let mut lo = 0usize;
374            let mut hi = n;
375            while lo < hi {
376                let mid = lo + (hi - lo) / 2;
377                if self.doc_id_at(mid) < doc_id {
378                    lo = mid + 1;
379                } else {
380                    hi = mid;
381                }
382            }
383            lo
384        };
385        let mut count = 0;
386        let mut i = start;
387        while i < n && self.doc_id_at(i) == doc_id {
388            count += 1;
389            i += 1;
390        }
391        (start, count)
392    }
393
394    /// Find flat indexes for a given doc_id via binary search on sorted doc_ids.
395    ///
396    /// doc_ids are sorted by (doc_id, ordinal) — segment builder adds docs
397    /// sequentially. Binary search runs directly on zero-copy mmap bytes.
398    ///
399    /// Returns `(start_index, entries)` where start_index is the flat vector index.
400    pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, Vec<(u32, u16)>) {
401        let n = self.num_vectors;
402        // Binary search: find first entry where doc_id >= target
403        let start = {
404            let mut lo = 0usize;
405            let mut hi = n;
406            while lo < hi {
407                let mid = lo + (hi - lo) / 2;
408                if self.doc_id_at(mid) < doc_id {
409                    lo = mid + 1;
410                } else {
411                    hi = mid;
412                }
413            }
414            lo
415        };
416        // Collect entries with matching doc_id
417        let mut entries = Vec::new();
418        let mut i = start;
419        while i < n {
420            let (did, ord) = self.get_doc_id(i);
421            if did != doc_id {
422                break;
423            }
424            entries.push((did, ord));
425            i += 1;
426        }
427        (start, entries)
428    }
429
430    /// Read doc_id at index from raw bytes (no ordinal).
431    #[inline]
432    fn doc_id_at(&self, idx: usize) -> u32 {
433        let off = idx * DOC_ID_ENTRY_SIZE;
434        let d = &self.doc_ids_bytes[off..];
435        u32::from_le_bytes([d[0], d[1], d[2], d[3]])
436    }
437
438    /// Get doc_id and ordinal at index (parsed from zero-copy mmap bytes).
439    #[inline]
440    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
441        let off = idx * DOC_ID_ENTRY_SIZE;
442        let d = &self.doc_ids_bytes[off..];
443        let doc_id = u32::from_le_bytes([d[0], d[1], d[2], d[3]]);
444        let ordinal = u16::from_le_bytes([d[4], d[5]]);
445        (doc_id, ordinal)
446    }
447
448    /// Bytes per vector in storage (cached).
449    #[inline]
450    pub fn vector_byte_size(&self) -> usize {
451        self.vbs
452    }
453
454    /// Total byte length of raw vector data (for chunked merger streaming).
455    pub fn vector_bytes_len(&self) -> u64 {
456        (self.num_vectors as u64) * (self.vector_byte_size() as u64)
457    }
458
459    /// Byte offset where vector data starts (for direct handle access in merger).
460    pub fn vectors_byte_offset(&self) -> u64 {
461        self.vectors_offset
462    }
463
464    /// Access the underlying file handle (for chunked byte-range reads in merger).
465    pub fn handle(&self) -> &FileHandle {
466        &self.handle
467    }
468
469    /// Estimated memory usage — doc_ids are mmap-backed (only Arc overhead).
470    pub fn estimated_memory_bytes(&self) -> usize {
471        size_of::<Self>() + size_of::<OwnedBytes>()
472    }
473}
474
475/// IVF-RaBitQ index data (codebook + cluster assignments)
476///
477/// Centroids are stored at the index level (`field_X_centroids.bin`),
478/// not duplicated per segment.
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct IVFRaBitQIndexData {
481    pub index: crate::structures::IVFRaBitQIndex,
482    pub codebook: crate::structures::RaBitQCodebook,
483}
484
485impl IVFRaBitQIndexData {
486    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
487        bincode::serde::encode_to_vec(self, bincode::config::standard())
488            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
489    }
490
491    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
492        bincode::serde::decode_from_slice(data, bincode::config::standard())
493            .map(|(v, _)| v)
494            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
495    }
496}
497
498/// ScaNN index data (codebook + cluster assignments)
499///
500/// Centroids are stored at the index level (`field_X_centroids.bin`),
501/// not duplicated per segment.
502#[derive(Debug, Clone, Serialize, Deserialize)]
503pub struct ScaNNIndexData {
504    pub index: crate::structures::IVFPQIndex,
505    pub codebook: crate::structures::PQCodebook,
506}
507
508impl ScaNNIndexData {
509    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
510        bincode::serde::encode_to_vec(self, bincode::config::standard())
511            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
512    }
513
514    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
515        bincode::serde::decode_from_slice(data, bincode::config::standard())
516            .map(|(v, _)| v)
517            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
518    }
519}