Skip to main content

hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{AsyncFileRead, LazyFileSlice, OwnedBytes};
9use crate::dsl::DenseVectorQuantization;
10use crate::structures::simd::{batch_f32_to_f16, batch_f32_to_u8, f16_to_f32, u8_to_f32};
11
12/// Dequantize raw bytes to f32 based on storage quantization.
13///
14/// `raw` is the quantized byte slice, `out` receives the f32 values.
15/// `num_floats` is the number of f32 values to produce (= num_vectors × dim).
16/// Data-first file layout guarantees alignment for f32/f16 access.
17#[inline]
18pub fn dequantize_raw(
19    raw: &[u8],
20    quant: DenseVectorQuantization,
21    num_floats: usize,
22    out: &mut [f32],
23) {
24    debug_assert!(out.len() >= num_floats);
25    match quant {
26        DenseVectorQuantization::F32 => {
27            debug_assert!(
28                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<f32>()),
29                "f32 vector data not 4-byte aligned"
30            );
31            out[..num_floats].copy_from_slice(unsafe {
32                std::slice::from_raw_parts(raw.as_ptr() as *const f32, num_floats)
33            });
34        }
35        DenseVectorQuantization::F16 => {
36            debug_assert!(
37                (raw.as_ptr() as usize).is_multiple_of(std::mem::align_of::<u16>()),
38                "f16 vector data not 2-byte aligned"
39            );
40            let f16_slice =
41                unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const u16, num_floats) };
42            for (i, &h) in f16_slice.iter().enumerate() {
43                out[i] = f16_to_f32(h);
44            }
45        }
46        DenseVectorQuantization::UInt8 => {
47            for (i, &b) in raw.iter().enumerate().take(num_floats) {
48                out[i] = u8_to_f32(b);
49            }
50        }
51    }
52}
53
54/// Magic number for binary flat vector format v3 ("FVD3" in little-endian)
55const FLAT_BINARY_MAGIC: u32 = 0x46564433;
56
57/// Binary header: magic(u32) + dim(u32) + num_vectors(u32) + quant_type(u8) + padding(3)
58const FLAT_BINARY_HEADER_SIZE: usize = 16;
59/// Per-doc_id entry: doc_id(u32) + ordinal(u16)
60const DOC_ID_ENTRY_SIZE: usize = size_of::<u32>() + size_of::<u16>();
61
62/// Flat vector binary format helpers for writing.
63///
64/// Binary format v3:
65/// ```text
66/// [magic(u32)][dim(u32)][num_vectors(u32)][quant_type(u8)][padding(3)]
67/// [vectors: N×dim×element_size]
68/// [doc_ids: N×(u32+u16)]
69/// ```
70///
71/// `element_size` is determined by `quant_type`: f32=4, f16=2, uint8=1.
72/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
73/// and accesses vector data lazily via mmap-backed range reads.
74pub struct FlatVectorData;
75
76impl FlatVectorData {
77    /// Write the binary header to a writer.
78    pub fn write_binary_header(
79        dim: usize,
80        num_vectors: usize,
81        quant: DenseVectorQuantization,
82        writer: &mut dyn std::io::Write,
83    ) -> std::io::Result<()> {
84        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
85        writer.write_all(&(dim as u32).to_le_bytes())?;
86        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
87        writer.write_all(&[quant.tag(), 0, 0, 0])?; // quant_type + 3 bytes padding
88        Ok(())
89    }
90
91    /// Compute the serialized size without actually serializing.
92    pub fn serialized_binary_size(
93        dim: usize,
94        num_vectors: usize,
95        quant: DenseVectorQuantization,
96    ) -> usize {
97        FLAT_BINARY_HEADER_SIZE
98            + num_vectors * dim * quant.element_size()
99            + num_vectors * DOC_ID_ENTRY_SIZE
100    }
101
102    /// Stream from flat f32 storage to a writer, quantizing on write.
103    ///
104    /// `flat_vectors` is contiguous storage of dim*n f32 floats.
105    /// Vectors are quantized to the specified format before writing.
106    pub fn serialize_binary_from_flat_streaming(
107        dim: usize,
108        flat_vectors: &[f32],
109        doc_ids: &[(u32, u16)],
110        quant: DenseVectorQuantization,
111        writer: &mut dyn std::io::Write,
112    ) -> std::io::Result<()> {
113        let num_vectors = doc_ids.len();
114        Self::write_binary_header(dim, num_vectors, quant, writer)?;
115
116        match quant {
117            DenseVectorQuantization::F32 => {
118                let bytes: &[u8] = unsafe {
119                    std::slice::from_raw_parts(
120                        flat_vectors.as_ptr() as *const u8,
121                        std::mem::size_of_val(flat_vectors),
122                    )
123                };
124                writer.write_all(bytes)?;
125            }
126            DenseVectorQuantization::F16 => {
127                let mut buf = vec![0u16; dim];
128                for v in flat_vectors.chunks_exact(dim) {
129                    batch_f32_to_f16(v, &mut buf);
130                    let bytes: &[u8] =
131                        unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, dim * 2) };
132                    writer.write_all(bytes)?;
133                }
134            }
135            DenseVectorQuantization::UInt8 => {
136                let mut buf = vec![0u8; dim];
137                for v in flat_vectors.chunks_exact(dim) {
138                    batch_f32_to_u8(v, &mut buf);
139                    writer.write_all(&buf)?;
140                }
141            }
142        }
143
144        for &(doc_id, ordinal) in doc_ids {
145            writer.write_all(&doc_id.to_le_bytes())?;
146            writer.write_all(&ordinal.to_le_bytes())?;
147        }
148
149        Ok(())
150    }
151
152    /// Write raw pre-quantized vector bytes to a writer (for merger streaming).
153    ///
154    /// `raw_bytes` is already in the target quantized format.
155    pub fn write_raw_vector_bytes(
156        raw_bytes: &[u8],
157        writer: &mut dyn std::io::Write,
158    ) -> std::io::Result<()> {
159        writer.write_all(raw_bytes)
160    }
161}
162
163/// Lazy flat vector data — doc_ids in memory, vectors accessed via range reads.
164///
165/// Only the doc_id index (~6 bytes/vector) is loaded into memory.
166/// Vector data stays on disk and is accessed via mmap-backed range reads.
167/// Element size depends on quantization: f32=4, f16=2, uint8=1 bytes/dim.
168///
169/// Used for:
170/// - Brute-force search (batched scoring with native-precision SIMD)
171/// - Reranking (read individual vectors by doc_id via binary search)
172/// - doc() hydration (dequantize to f32 for stored documents)
173/// - Merge streaming (chunked raw vector bytes + doc_id iteration)
174#[derive(Debug, Clone)]
175pub struct LazyFlatVectorData {
176    /// Vector dimension
177    pub dim: usize,
178    /// Total number of vectors
179    pub num_vectors: usize,
180    /// Storage quantization type
181    pub quantization: DenseVectorQuantization,
182    /// In-memory doc_id index: (doc_id, ordinal) per vector
183    pub doc_ids: Vec<(u32, u16)>,
184    /// Lazy handle to this field's flat data region in the .vectors file
185    handle: LazyFileSlice,
186    /// Byte offset within handle where raw vector data starts (after header)
187    vectors_offset: u64,
188    /// Bytes per vector element (cached from quantization.element_size())
189    element_size: usize,
190}
191
192impl LazyFlatVectorData {
193    /// Open from a lazy file slice pointing to the flat binary data region.
194    ///
195    /// Reads header (16 bytes) + doc_ids (~6 bytes/vector) into memory.
196    /// Vector data stays lazy on disk.
197    pub async fn open(handle: LazyFileSlice) -> io::Result<Self> {
198        // Read header: magic(4) + dim(4) + num_vectors(4) + quant_type(1) + pad(3) = 16 bytes
199        let header = handle
200            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
201            .await?;
202        let hdr = header.as_slice();
203
204        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
205        if magic != FLAT_BINARY_MAGIC {
206            return Err(io::Error::new(
207                io::ErrorKind::InvalidData,
208                "Invalid FlatVectorData binary magic",
209            ));
210        }
211
212        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
213        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
214        let quantization = DenseVectorQuantization::from_tag(hdr[12]).ok_or_else(|| {
215            io::Error::new(
216                io::ErrorKind::InvalidData,
217                format!("Unknown quantization tag: {}", hdr[12]),
218            )
219        })?;
220        let element_size = quantization.element_size();
221
222        // Read doc_ids section (small: 6 bytes per vector)
223        let vectors_byte_len = num_vectors * dim * element_size;
224        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
225        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
226
227        let doc_ids_bytes = handle
228            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
229            .await?;
230        let d = doc_ids_bytes.as_slice();
231
232        let mut doc_ids = Vec::with_capacity(num_vectors);
233        for i in 0..num_vectors {
234            let off = i * DOC_ID_ENTRY_SIZE;
235            let doc_id = u32::from_le_bytes([d[off], d[off + 1], d[off + 2], d[off + 3]]);
236            let ordinal = u16::from_le_bytes([d[off + 4], d[off + 5]]);
237            doc_ids.push((doc_id, ordinal));
238        }
239
240        Ok(Self {
241            dim,
242            num_vectors,
243            quantization,
244            doc_ids,
245            handle,
246            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
247            element_size,
248        })
249    }
250
251    /// Read a single vector by index, dequantized to f32.
252    ///
253    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
254    /// Used for ANN training and doc() hydration where f32 is needed.
255    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
256        debug_assert!(out.len() >= self.dim);
257        let vec_byte_len = self.dim * self.element_size;
258        let byte_offset = self.vectors_offset + (idx * vec_byte_len) as u64;
259        let bytes = self
260            .handle
261            .read_bytes_range(byte_offset..byte_offset + vec_byte_len as u64)
262            .await?;
263        let raw = bytes.as_slice();
264
265        dequantize_raw(raw, self.quantization, self.dim, out);
266        Ok(())
267    }
268
269    /// Read a single vector by index, dequantized to f32 (allocates a new Vec<f32>).
270    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
271        let mut vector = vec![0f32; self.dim];
272        self.read_vector_into(idx, &mut vector).await?;
273        Ok(vector)
274    }
275
276    /// Read a single vector's raw bytes (no dequantization) into a caller-provided buffer.
277    ///
278    /// `out` must have length >= `self.vector_byte_size()`.
279    /// Used for native-precision reranking where raw quantized bytes are scored directly.
280    pub async fn read_vector_raw_into(&self, idx: usize, out: &mut [u8]) -> io::Result<()> {
281        let vbs = self.vector_byte_size();
282        debug_assert!(out.len() >= vbs);
283        let byte_offset = self.vectors_offset + (idx * vbs) as u64;
284        let bytes = self
285            .handle
286            .read_bytes_range(byte_offset..byte_offset + vbs as u64)
287            .await?;
288        out[..vbs].copy_from_slice(bytes.as_slice());
289        Ok(())
290    }
291
292    /// Read a contiguous batch of raw quantized bytes by index range.
293    ///
294    /// Returns raw bytes for vectors `[start_idx..start_idx+count)`.
295    /// Bytes are in native quantized format — pass to `batch_cosine_scores_f16/u8`
296    /// or `batch_cosine_scores` (for f32) for scoring.
297    pub async fn read_vectors_batch(
298        &self,
299        start_idx: usize,
300        count: usize,
301    ) -> io::Result<OwnedBytes> {
302        debug_assert!(start_idx + count <= self.num_vectors);
303        let vec_byte_len = self.dim * self.element_size;
304        let byte_offset = self.vectors_offset + (start_idx * vec_byte_len) as u64;
305        let byte_len = (count * vec_byte_len) as u64;
306        self.handle
307            .read_bytes_range(byte_offset..byte_offset + byte_len)
308            .await
309    }
310
311    /// Find flat indexes for a given doc_id via binary search on sorted doc_ids.
312    ///
313    /// doc_ids are sorted by (doc_id, ordinal) — segment builder adds docs
314    /// sequentially. Returns a slice of (doc_id, ordinal) entries; the position
315    /// of each entry in `self.doc_ids` is its flat vector index.
316    ///
317    /// Returns `(start_index, slice)` where start_index is the position in doc_ids.
318    pub fn flat_indexes_for_doc(&self, doc_id: u32) -> (usize, &[(u32, u16)]) {
319        let start = self.doc_ids.partition_point(|&(id, _)| id < doc_id);
320        let end = start + self.doc_ids[start..].partition_point(|&(id, _)| id == doc_id);
321        (start, &self.doc_ids[start..end])
322    }
323
324    /// Get doc_id and ordinal at index (from in-memory index).
325    #[inline]
326    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
327        self.doc_ids[idx]
328    }
329
330    /// Bytes per vector in storage.
331    #[inline]
332    pub fn vector_byte_size(&self) -> usize {
333        self.dim * self.element_size
334    }
335
336    /// Total byte length of raw vector data (for chunked merger streaming).
337    pub fn vector_bytes_len(&self) -> u64 {
338        (self.num_vectors as u64) * (self.vector_byte_size() as u64)
339    }
340
341    /// Byte offset where vector data starts (for direct handle access in merger).
342    pub fn vectors_byte_offset(&self) -> u64 {
343        self.vectors_offset
344    }
345
346    /// Access the underlying lazy file handle (for chunked byte-range reads in merger).
347    pub fn handle(&self) -> &LazyFileSlice {
348        &self.handle
349    }
350
351    /// Estimated memory usage (only doc_ids are in memory).
352    pub fn estimated_memory_bytes(&self) -> usize {
353        self.doc_ids.capacity() * size_of::<(u32, u16)>() + size_of::<Self>()
354    }
355}
356
357/// IVF-RaBitQ index data with embedded centroids and codebook
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct IVFRaBitQIndexData {
360    pub index: crate::structures::IVFRaBitQIndex,
361    pub centroids: crate::structures::CoarseCentroids,
362    pub codebook: crate::structures::RaBitQCodebook,
363}
364
365impl IVFRaBitQIndexData {
366    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
367        bincode::serde::encode_to_vec(self, bincode::config::standard())
368            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
369    }
370
371    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
372        bincode::serde::decode_from_slice(data, bincode::config::standard())
373            .map(|(v, _)| v)
374            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
375    }
376}
377
378/// ScaNN index data with embedded centroids and codebook
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct ScaNNIndexData {
381    pub index: crate::structures::IVFPQIndex,
382    pub centroids: crate::structures::CoarseCentroids,
383    pub codebook: crate::structures::PQCodebook,
384}
385
386impl ScaNNIndexData {
387    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
388        bincode::serde::encode_to_vec(self, bincode::config::standard())
389            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
390    }
391
392    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
393        bincode::serde::decode_from_slice(data, bincode::config::standard())
394            .map(|(v, _)| v)
395            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
396    }
397}