Skip to main content

hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use std::io;
4use std::mem::size_of;
5
6use serde::{Deserialize, Serialize};
7
8use crate::directories::{AsyncFileRead, LazyFileSlice, OwnedBytes};
9
10/// Magic number for binary flat vector format ("FVD2" in little-endian)
11const FLAT_BINARY_MAGIC: u32 = 0x46564432;
12
13/// Binary header: magic(u32) + dim(u32) + num_vectors(u32)
14const FLAT_BINARY_HEADER_SIZE: usize = 3 * size_of::<u32>();
15/// Per-vector element size
16const FLOAT_SIZE: usize = size_of::<f32>();
17/// Per-doc_id entry: doc_id(u32) + ordinal(u16)
18const DOC_ID_ENTRY_SIZE: usize = size_of::<u32>() + size_of::<u16>();
19
20/// Flat vector binary format helpers for writing.
21///
22/// Binary format: `[magic(u32)][dim(u32)][num_vectors(u32)][vectors: N×dim×f32][doc_ids: N×(u32+u16)]`
23///
24/// Reading is handled by [`LazyFlatVectorData`] which loads only doc_ids into memory
25/// and accesses vector data lazily via mmap-backed range reads.
26pub struct FlatVectorData;
27
28impl FlatVectorData {
29    /// Write the binary header (magic + dim + num_vectors) to a writer.
30    pub fn write_binary_header(
31        dim: usize,
32        num_vectors: usize,
33        writer: &mut dyn std::io::Write,
34    ) -> std::io::Result<()> {
35        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
36        writer.write_all(&(dim as u32).to_le_bytes())?;
37        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
38        Ok(())
39    }
40
41    /// Compute the serialized size without actually serializing.
42    pub fn serialized_binary_size(dim: usize, num_vectors: usize) -> usize {
43        FLAT_BINARY_HEADER_SIZE + num_vectors * dim * FLOAT_SIZE + num_vectors * DOC_ID_ENTRY_SIZE
44    }
45
46    /// Stream directly from flat f32 storage to a writer (zero-buffer serialization).
47    ///
48    /// `flat_vectors` is contiguous storage of dim*n floats.
49    /// `original_dim` is the dimension in flat_vectors (may differ from `dim` for MRL).
50    pub fn serialize_binary_from_flat_streaming(
51        dim: usize,
52        flat_vectors: &[f32],
53        original_dim: usize,
54        doc_ids: &[(u32, u16)],
55        writer: &mut dyn std::io::Write,
56    ) -> std::io::Result<()> {
57        let num_vectors = doc_ids.len();
58
59        writer.write_all(&FLAT_BINARY_MAGIC.to_le_bytes())?;
60        writer.write_all(&(dim as u32).to_le_bytes())?;
61        writer.write_all(&(num_vectors as u32).to_le_bytes())?;
62
63        if dim == original_dim {
64            // No trimming — write all floats directly
65            let bytes: &[u8] = unsafe {
66                std::slice::from_raw_parts(
67                    flat_vectors.as_ptr() as *const u8,
68                    flat_vectors.len() * FLOAT_SIZE,
69                )
70            };
71            writer.write_all(bytes)?;
72        } else {
73            // Trim each vector to dim (matryoshka/MRL)
74            for i in 0..num_vectors {
75                let start = i * original_dim;
76                let slice = &flat_vectors[start..start + dim];
77                let bytes: &[u8] = unsafe {
78                    std::slice::from_raw_parts(slice.as_ptr() as *const u8, dim * FLOAT_SIZE)
79                };
80                writer.write_all(bytes)?;
81            }
82        }
83
84        for &(doc_id, ordinal) in doc_ids {
85            writer.write_all(&doc_id.to_le_bytes())?;
86            writer.write_all(&ordinal.to_le_bytes())?;
87        }
88
89        Ok(())
90    }
91}
92
93/// Lazy flat vector data — doc_ids in memory, vectors accessed via range reads.
94///
95/// Only the doc_id index (~6 bytes/vector) is loaded into memory.
96/// Vector data (~dim×4 bytes/vector) stays on disk and is accessed via
97/// mmap-backed range reads on demand. For 768-dim vectors this is
98/// ~3KB per vector that stays lazy vs 6 bytes loaded.
99///
100/// Used for:
101/// - Reranking (read individual vectors by doc_id)
102/// - Merge streaming (bulk-read vector bytes in chunks)
103#[derive(Debug, Clone)]
104pub struct LazyFlatVectorData {
105    /// Vector dimension
106    pub dim: usize,
107    /// Total number of vectors
108    pub num_vectors: usize,
109    /// In-memory doc_id index: (doc_id, ordinal) per vector
110    pub doc_ids: Vec<(u32, u16)>,
111    /// Lazy handle to this field's flat data region in the .vectors file
112    handle: LazyFileSlice,
113    /// Byte offset within handle where raw vector f32 data starts (after header)
114    vectors_offset: u64,
115}
116
117impl LazyFlatVectorData {
118    /// Open from a lazy file slice pointing to the flat binary data region.
119    ///
120    /// Reads header (12 bytes) + doc_ids (~6 bytes/vector) into memory.
121    /// Vector data stays lazy on disk.
122    pub async fn open(handle: LazyFileSlice) -> io::Result<Self> {
123        // Read header: magic(4) + dim(4) + num_vectors(4) = 12 bytes
124        let header = handle
125            .read_bytes_range(0..FLAT_BINARY_HEADER_SIZE as u64)
126            .await?;
127        let hdr = header.as_slice();
128
129        let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
130        if magic != FLAT_BINARY_MAGIC {
131            return Err(io::Error::new(
132                io::ErrorKind::InvalidData,
133                "Invalid FlatVectorData binary magic",
134            ));
135        }
136
137        let dim = u32::from_le_bytes([hdr[4], hdr[5], hdr[6], hdr[7]]) as usize;
138        let num_vectors = u32::from_le_bytes([hdr[8], hdr[9], hdr[10], hdr[11]]) as usize;
139
140        // Read doc_ids section (small: 6 bytes per vector)
141        let vectors_byte_len = num_vectors * dim * FLOAT_SIZE;
142        let doc_ids_start = (FLAT_BINARY_HEADER_SIZE + vectors_byte_len) as u64;
143        let doc_ids_byte_len = (num_vectors * DOC_ID_ENTRY_SIZE) as u64;
144
145        let doc_ids_bytes = handle
146            .read_bytes_range(doc_ids_start..doc_ids_start + doc_ids_byte_len)
147            .await?;
148        let d = doc_ids_bytes.as_slice();
149
150        let mut doc_ids = Vec::with_capacity(num_vectors);
151        for i in 0..num_vectors {
152            let off = i * DOC_ID_ENTRY_SIZE;
153            let doc_id = u32::from_le_bytes([d[off], d[off + 1], d[off + 2], d[off + 3]]);
154            let ordinal = u16::from_le_bytes([d[off + 4], d[off + 5]]);
155            doc_ids.push((doc_id, ordinal));
156        }
157
158        Ok(Self {
159            dim,
160            num_vectors,
161            doc_ids,
162            handle,
163            vectors_offset: FLAT_BINARY_HEADER_SIZE as u64,
164        })
165    }
166
167    /// Read a single vector by index into a caller-provided slice (zero allocation).
168    ///
169    /// `out` must have length >= `self.dim`. Returns `Ok(())` on success.
170    pub async fn read_vector_into(&self, idx: usize, out: &mut [f32]) -> io::Result<()> {
171        debug_assert!(out.len() >= self.dim);
172        let byte_offset = self.vectors_offset + (idx * self.dim * FLOAT_SIZE) as u64;
173        let byte_len = (self.dim * FLOAT_SIZE) as u64;
174        let bytes = self
175            .handle
176            .read_bytes_range(byte_offset..byte_offset + byte_len)
177            .await?;
178
179        unsafe {
180            std::ptr::copy_nonoverlapping(
181                bytes.as_slice().as_ptr(),
182                out.as_mut_ptr() as *mut u8,
183                self.dim * FLOAT_SIZE,
184            );
185        }
186        Ok(())
187    }
188
189    /// Read a single vector by index (allocates a new Vec<f32>).
190    pub async fn get_vector(&self, idx: usize) -> io::Result<Vec<f32>> {
191        let mut vector = vec![0f32; self.dim];
192        self.read_vector_into(idx, &mut vector).await?;
193        Ok(vector)
194    }
195
196    /// Read all raw vector bytes at once (for bulk streaming in merger).
197    ///
198    /// Returns the contiguous f32 data as owned bytes. Caller writes it
199    /// directly to the output writer — no f32 parsing needed.
200    pub async fn read_all_vector_bytes(&self) -> io::Result<OwnedBytes> {
201        let byte_len = (self.num_vectors * self.dim * FLOAT_SIZE) as u64;
202        self.handle
203            .read_bytes_range(self.vectors_offset..self.vectors_offset + byte_len)
204            .await
205    }
206
207    /// Get doc_id and ordinal at index (from in-memory index).
208    #[inline]
209    pub fn get_doc_id(&self, idx: usize) -> (u32, u16) {
210        self.doc_ids[idx]
211    }
212
213    /// Estimated memory usage (only doc_ids are in memory).
214    pub fn estimated_memory_bytes(&self) -> usize {
215        self.doc_ids.capacity() * size_of::<(u32, u16)>() + size_of::<Self>()
216    }
217}
218
219/// IVF-RaBitQ index data with embedded centroids and codebook
220#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct IVFRaBitQIndexData {
222    pub index: crate::structures::IVFRaBitQIndex,
223    pub centroids: crate::structures::CoarseCentroids,
224    pub codebook: crate::structures::RaBitQCodebook,
225}
226
227impl IVFRaBitQIndexData {
228    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
229        serde_json::to_vec(self)
230            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
231    }
232
233    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
234        serde_json::from_slice(data)
235            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
236    }
237}
238
239/// ScaNN index data with embedded centroids and codebook
240#[derive(Debug, Clone, Serialize, Deserialize)]
241pub struct ScaNNIndexData {
242    pub index: crate::structures::IVFPQIndex,
243    pub centroids: crate::structures::CoarseCentroids,
244    pub codebook: crate::structures::PQCodebook,
245}
246
247impl ScaNNIndexData {
248    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
249        serde_json::to_vec(self)
250            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
251    }
252
253    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
254        serde_json::from_slice(data)
255            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
256    }
257}