hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{FileHandle, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::segment::format::{DOC_ID_ENTRY_SIZE, FLAT_BINARY_HEADER_SIZE, FLAT_BINARY_MAGIC};
11use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
12
13/// Dequantize raw bytes to f32 based on storage quantization.
14///
15/// `raw` is the quantized byte slice, `out` receives the f32 values.
16/// `num_floats` is the number of f32 values to produce (= num_vectors × dim).
17/// Data-first file layout guarantees alignment for f32/f16 access.
18#[inline]
19pub fn dequantize_raw(
20    raw: &[u8],
21    quant: DenseVectorQuantization,
22    num_floats: usize,
23    out: &mut [f32],
24) {
25    debug_assert!(out.len() >= num_floats);
26    match quant {
27        DenseVectorQuantization::F32 => {
28            debug_assert!(
29                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<f32>()),
30                "f32 vector data not 4-byte aligned"
31            );
32            out[..num_floats].copy_from_slice(unsafe {
33                std::slice::from_raw_parts(raw.as_ptr() as *const f32, num_floats)
34            });
35        }
36        DenseVectorQuantization::F16 => {
37            debug_assert!(
38                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<u16>()),
39                "f16 vector data not 2-byte aligned"
40            );
41            let f16_slice =
42                unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, num_floats) };
43            for (i, &h) in f16_slice.iter().enumerate() {
44                out[i] = f16_to_f32(h);
45            }
46        }
47        DenseVectorQuantization::UInt8 => {
48            for (i, &b) in raw.iter().enumerate().take(num_floats) {
49                out[i] = u8_to_f32(b);
50            }
51        }
52        DenseVectorQuantization::Binary => {
53            // Binary vectors are packed bits — dequantization to f32 is not meaningful.
54            // Fill with raw byte values as f32 for debug/display purposes only.
55            for (i, &b) in raw.iter().enumerate().take(num_floats) {
56                out[i] = b as f32;
57            }
58        }
59    }
60}
61
62/// Flat vector binary format helpers for writing.
63///
64/// Binary format v3:
65/// ```text
66/// [magic(u32)][dim(u32)][num_vectors(u32)][quant_type(u8)][padding(3)]
67/// [vectors: N×dim×element_size]
68/// [doc_ids: N×(u32+u16)]
69/// ```
70///
71/// `element_size` is determined by `quant_type`: f32=4, f16=2, uint8=1.
72/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
73/// and accesses vector data lazily via mmap-backed range reads.
74pub struct FlatVectorData;
75
76impl FlatVectorData {
77    /// Write the binary header to a writer.
78    pub fn write_binary_header(
79        dim: usize,
80        num_vectors: usize,
81        quant: DenseVectorQuantization,
82        writer: &mut dyn std::io::Write,
83    ) -> std::io::Result<()> {
84        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
85        writer.write_all(&(dim as u32).to_le_bytes())?;
86        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
87        writer.write_all(&[quant.tag(), 0, 0, 0])?; // quant_type + 3 bytes padding
88        Ok(())
89    }
90
91    /// Compute the serialized size without actually serializing.
92    pub fn serialized_binary_size(
93        dim: usize,
94        num_vectors: usize,
95        quant: DenseVectorQuantization,
96    ) -> usize {
97        let bytes_per_vector = match quant {
98            DenseVectorQuantization::Binary => dim.div_ceil(8),
99            _ => dim * quant.element_size(),
100        };
101        FLAT_BINARY_HEADER_SIZE + num_vectors * bytes_per_vector + num_vectors * DOC_ID_ENTRY_SIZE
102    }
103
104    /// Stream from flat f32 storage to a writer, quantizing on write.
105    ///
106    /// `flat_vectors` is contiguous storage of dim*n f32 floats.
107    /// Vectors are quantized to the specified format before writing.
108    pub fn serialize_binary_from_flat_streaming(
109        dim: usize,
110        flat_vectors: &[f32],
111        doc_ids: &[(u32, u16)],
112        quant: DenseVectorQuantization,
113        writer: &mut dyn std::io::Write,
114    ) -> std::io::Result<()> {
115        let num_vectors = doc_ids.len();
116        Self::write_binary_header(dim, num_vectors, quant, writer)?;
117
118        match quant {
119            DenseVectorQuantization::F32 => {
120                let bytes: &[u8] = unsafe {
121                    std::slice::from_raw_parts(
122                        flat_vectors.as_ptr() as *const u8,
123                        std::mem::size_of_val(flat_vectors),
124                    )
125                };
126                writer.write_all(bytes)?;
127            }
128            DenseVectorQuantization::F16 => {
129                let mut buf = vec![0u16; dim];
130                for v in flat_vectors.chunks_exact(dim) {
131                    batch_f32_to_f16(v, &mut buf);
132                    let bytes: &[u8] =
133                        unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
134                    writer.write_all(bytes)?;
135                }
136            }
137            DenseVectorQuantization::UInt8 => {
138                let mut buf = vec![0u8; dim];
139                for v in flat_vectors.chunks_exact(dim) {
140                    batch_f32_to_u8(v, &mut buf);
141                    writer.write_all(&buf)?;
142                }
143            }
144            DenseVectorQuantization::Binary => {
145                // Binary vectors use serialize_binary_from_bits_streaming(), not this path
146                unreachable!("Binary quantization should use serialize_binary_from_bits_streaming");
147            }
148        }
149
150        for &(doc_id, ordinal) in doc_ids {
151            writer.write_all(&doc_id.to_le_bytes())?;
152            writer.write_all(&ordinal.to_le_bytes())?;
153        }
154
155        Ok(())
156    }
157
158    /// Stream packed binary vectors (pre-packed bytes) to a writer.
159    ///
160    /// `packed_vectors` is contiguous storage of num_vectors * byte_len bytes.
161    /// `dim_bits` is the number of bits (dimensions).
162    pub fn serialize_binary_from_bits_streaming(
163        dim_bits: usize,
164        packed_vectors: &[u8],
165        doc_ids: &[(u32, u16)],
166        writer: &mut dyn std::io::Write,
167    ) -> std::io::Result<()> {
168        let num_vectors = doc_ids.len();
169        let byte_len = dim_bits.div_ceil(8);
170        debug_assert_eq!(packed_vectors.len(), num_vectors * byte_len);
171
172        Self::write_binary_header(
173            dim_bits,
174            num_vectors,
175            DenseVectorQuantization::Binary,
176            writer,
177        )?;
178        writer.write_all(packed_vectors)?;
179
180        for &(doc_id, ordinal) in doc_ids {
181            writer.write_all(&doc_id.to_le_bytes())?;
182            writer.write_all(&ordinal.to_le_bytes())?;
183        }
184
185        Ok(())
186    }
187
188    /// Write raw pre-quantized vector bytes to a writer (for merger streaming).
189    ///
190    /// `raw_bytes` is already in the target quantized format.
191    pub fn write_raw_vector_bytes(
192        raw_bytes: &[u8],
193        writer: &mut dyn std::io::Write,
194    ) -> std::io::Result<()> {
195        writer.write_all(raw_bytes)
196    }
197}
198
199/// Lazy flat vector data — zero-copy doc_id index, vectors via range reads.
200///
201/// The doc_id index is kept as `OwnedBytes` (mmap-backed, zero heap copy).
202/// Vector data stays on disk and is accessed via mmap-backed range reads.
203/// Element size depends on quantization: f32=4, f16=2, uint8=1 bytes/dim.
204///
205/// Used for:
206/// - Brute-force search (batched scoring with native-precision SIMD)
207/// - Reranking (read individual vectors by doc_id via binary search)
208/// - doc() hydration (dequantize to f32 for stored documents)
209/// - Merge streaming (chunked raw vector bytes + doc_id iteration)
210#[derive(Debug, Clone)]
211pub struct LazyFlatVectorData {
212    /// Vector dimension
213    pub dim: usize,
214    /// Total number of vectors
215    pub num_vectors: usize,
216    /// Storage quantization type
217    pub quantization: DenseVectorQuantization,
218    /// Zero-copy doc_id index: packed [u32_le doc_id + u16_le ordinal] × num_vectors
219    doc_ids_bytes: OwnedBytes,
220    /// File handle for this field's flat data region in the .vectors file
221    handle: FileHandle,
222    /// Byte offset within handle where raw vector data starts (after header)
223    vectors_offset: u64,
224    /// Bytes per vector in storage (cached: Binary = ceil(dim/8), else dim * element_size)
225    vbs: usize,
226}
227
228impl LazyFlatVectorData {
229    /// Open from a lazy file slice pointing to the flat binary data region.
230    ///
231    /// Reads header (16 bytes) + doc_ids (~6 bytes/vector) into memory.
232    /// Vector data stays lazy on disk.
233    pub async fn open(handle: FileHandle) -> io::Result<Self> {
234        // Read header: magic(4) + dim(4) + num_vectors(4) + quant_type(1) + pad(3) = 16 bytes
235        let header = handle
236            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
237            .await?;
238        let hdr = header.as_slice();
239
240        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
241        if magic != FLAT_BINARY_MAGIC {
242            return Err(io::Error::new(
243                io::ErrorKind::InvalidData,
244                "Invalid FlatVectorData binary magic",
245            ));
246        }
247
248        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
249        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
250        let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
251            io::Error::new(
252                io::ErrorKind::InvalidData,
253                format!("Unknown quantization tag: {}", hdr[12]),
254            )
255        })?;
256        // Read doc_ids section as zero-copy OwnedBytes (6 bytes per vector)
257        let vbs = if quantization == DenseVectorQuantization::Binary {
258            dim.div_ceil(8)
259        } else {
260            dim * quantization.element_size()
261        };
262        let vectors_byte_len = num_vectors * vbs;
263        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
264        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
265
266        let doc_ids_bytes = handle
267            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
268            .await?;
269
270        Ok(Self {
271            dim,
272            num_vectors,
273            quantization,
274            doc_ids_bytes,
275            handle,
276            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
277            vbs,
278        })
279    }
280
281    /// Read a single vector by index, dequantized to f32.
282    ///
283    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
284    /// Used for ANN training and doc() hydration where f32 is needed.
285    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
286        debug_assert!(out.len() >= self.dim);
287        let vbs = self.vector_byte_size();
288        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
289        let bytes = self
290            .handle
291            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
292            .await?;
293        let raw = bytes.as_slice();
294
295        dequantize_raw(raw, self.quantization, self.dim, out);
296        Ok(())
297    }
298
299    /// Read a single vector by index, dequantized to f32 (allocates a new Vec<f32>).
300    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
301        let mut vector = vec![0f32; self.dim];
302        self.read_vector_into(idx, &mut vector).await?;
303        Ok(vector)
304    }
305
306    /// Read a single vector's raw bytes (no dequantization) into a caller-provided buffer.
307    ///
308    /// `out` must have length >= `self.vector_byte_size()`.
309    /// Used for native-precision reranking where raw quantized bytes are scored directly.
310    pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
311        let vbs = self.vector_byte_size();
312        debug_assert!(out.len() >= vbs);
313        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
314        let bytes = self
315            .handle
316            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
317            .await?;
318        out[..vbs].copy_from_slice(bytes.as_slice());
319        Ok(())
320    }
321
322    /// Read a contiguous batch of raw quantized bytes by index range.
323    ///
324    /// Returns raw bytes for vectors `[start_idx..start_idx+count)`.
325    /// Bytes are in native quantized format — pass to `batch_cosine_scores_f16/u8`
326    /// or `batch_cosine_scores` (for f32) for scoring.
327    pub async fn read_vectors_batch(
328        &self,
329        start_idx: usize,
330        count: usize,
331    ) -> io::Result<OwnedBytes> {
332        debug_assert!(start_idx + count <= self.num_vectors);
333        let vbs = self.vector_byte_size();
334        let byte_offset = self.vectors_offset + (start_idx * vbs) as u64;
335        let byte_len = (count * vbs) as u64;
336        self.handle
337            .read_bytes_range(byte_offset..byte_offset + byte_len)
338            .await
339    }
340
341    /// Synchronous read of a single vector's raw bytes.
342    #[cfg(feature = "sync")]
343    pub fn read_vector_raw_into_sync(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
344        let vbs = self.vector_byte_size();
345        debug_assert!(out.len() >= vbs);
346        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
347        let bytes = self
348            .handle
349            .read_bytes_range_sync(byte_offset..byte_offset + vbs as u64)?;
350        out[..vbs].copy_from_slice(bytes.as_slice());
351        Ok(())
352    }
353
354    /// Synchronous batch read of raw quantized bytes.
355    #[cfg(feature = "sync")]
356    pub fn read_vectors_batch_sync(
357        &self,
358        start_idx: usize,
359        count: usize,
360    ) -> io::Result<OwnedBytes> {
361        debug_assert!(start_idx + count <= self.num_vectors);
362        let vbs = self.vector_byte_size();
363        let byte_offset = self.vectors_offset + (start_idx * vbs) as u64;
364        let byte_len = (count * vbs) as u64;
365        self.handle
366            .read_bytes_range_sync(byte_offset..byte_offset + byte_len)
367    }
368
369    /// Find flat index range for a given doc_id (non-allocating).
370    ///
371    /// Returns `(start_index, count)` — the flat vector index range for this doc_id.
372    /// Use `get_doc_id(start + i)` for `i in 0..count` to read individual entries.
373    /// More efficient than `flat_indexes_for_doc` as it avoids Vec allocation.
374    pub fn flat_indexes_for_doc_range(&self, doc_id: u32) -> (usize, usize) {
375        let n = self.num_vectors;
376        let start = {
377            let mut lo = 0usize;
378            let mut hi = n;
379            while lo < hi {
380                let mid = lo + (hi - lo) / 2;
381                if self.doc_id_at(mid) < doc_id {
382                    lo = mid + 1;
383                } else {
384                    hi = mid;
385                }
386            }
387            lo
388        };
389        let mut count = 0;
390        let mut i = start;
391        while i < n && self.doc_id_at(i) == doc_id {
392            count += 1;
393            i += 1;
394        }
395        (start, count)
396    }
397
398    /// Find flat indexes for a given doc_id via binary search on sorted doc_ids.
399    ///
400    /// doc_ids are sorted by (doc_id, ordinal) — segment builder adds docs
401    /// sequentially. Binary search runs directly on zero-copy mmap bytes.
402    ///
403    /// Returns `(start_index, entries)` where start_index is the flat vector index.
404    pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, Vec<(u32, u16)>) {
405        let n = self.num_vectors;
406        // Binary search: find first entry where doc_id >= target
407        let start = {
408            let mut lo = 0usize;
409            let mut hi = n;
410            while lo < hi {
411                let mid = lo + (hi - lo) / 2;
412                if self.doc_id_at(mid) < doc_id {
413                    lo = mid + 1;
414                } else {
415                    hi = mid;
416                }
417            }
418            lo
419        };
420        // Collect entries with matching doc_id
421        let mut entries = Vec::new();
422        let mut i = start;
423        while i < n {
424            let (did, ord) = self.get_doc_id(i);
425            if did != doc_id {
426                break;
427            }
428            entries.push((did, ord));
429            i += 1;
430        }
431        (start, entries)
432    }
433
434    /// Read doc_id at index from raw bytes (no ordinal).
435    #[inline]
436    fn doc_id_at(&self, idx: usize) -> u32 {
437        let off = idx * DOC_ID_ENTRY_SIZE;
438        let d = &self.doc_ids_bytes[off..];
439        u32::from_le_bytes([d[0], d[1], d[2], d[3]])
440    }
441
442    /// Get doc_id and ordinal at index (parsed from zero-copy mmap bytes).
443    #[inline]
444    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
445        let off = idx * DOC_ID_ENTRY_SIZE;
446        let d = &self.doc_ids_bytes[off..];
447        let doc_id = u32::from_le_bytes([d[0], d[1], d[2], d[3]]);
448        let ordinal = u16::from_le_bytes([d[4], d[5]]);
449        (doc_id, ordinal)
450    }
451
452    /// Bytes per vector in storage (cached).
453    #[inline]
454    pub fn vector_byte_size(&self) -> usize {
455        self.vbs
456    }
457
458    /// Total byte length of raw vector data (for chunked merger streaming).
459    pub fn vector_bytes_len(&self) -> u64 {
460        (self.num_vectors as u64) * (self.vector_byte_size() as u64)
461    }
462
463    /// Byte offset where vector data starts (for direct handle access in merger).
464    pub fn vectors_byte_offset(&self) -> u64 {
465        self.vectors_offset
466    }
467
468    /// Access the underlying file handle (for chunked byte-range reads in merger).
469    pub fn handle(&self) -> &FileHandle {
470        &self.handle
471    }
472
473    /// Estimated memory usage — doc_ids are mmap-backed (only Arc overhead).
474    pub fn estimated_memory_bytes(&self) -> usize {
475        size_of::<Self>() + size_of::<OwnedBytes>()
476    }
477}
478
479/// IVF-RaBitQ index data (codebook + cluster assignments)
480///
481/// Centroids are stored at the index level (`field_X_centroids.bin`),
482/// not duplicated per segment.
483#[derive(Debug, Clone, Serialize, Deserialize)]
484pub struct IVFRaBitQIndexData {
485    pub index: crate::structures::IVFRaBitQIndex,
486    pub codebook: crate::structures::RaBitQCodebook,
487}
488
489impl IVFRaBitQIndexData {
490    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
491        bincode::serde::encode_to_vec(self, bincode::config::standard())
492            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
493    }
494
495    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
496        bincode::serde::decode_from_slice(data, bincode::config::standard())
497            .map(|(v, _)| v)
498            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
499    }
500}
501
502/// ScaNN index data (codebook + cluster assignments)
503///
504/// Centroids are stored at the index level (`field_X_centroids.bin`),
505/// not duplicated per segment.
506#[derive(Debug, Clone, Serialize, Deserialize)]
507pub struct ScaNNIndexData {
508    pub index: crate::structures::IVFPQIndex,
509    pub codebook: crate::structures::PQCodebook,
510}
511
512impl ScaNNIndexData {
513    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
514        bincode::serde::encode_to_vec(self, bincode::config::standard())
515            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
516    }
517
518    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
519        bincode::serde::decode_from_slice(data, bincode::config::standard())
520            .map(|(v, _)| v)
521            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
522    }
523}
hermes_core/segment/vector_data.rs

hermes_core/segment/
vector_data.rs