Skip to main content

hermes_core/segment/
vector_data.rs

1//! Vector index data structures shared between builder and reader
2
3use serde::{Deserialize, Serialize};
4
5/// Flat vector data for brute-force search (accumulating state)
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct FlatVectorData {
8    pub dim: usize,
9    pub vectors: Vec<Vec<f32>>,
10    /// Document IDs with ordinals: (doc_id, ordinal) pairs
11    /// Ordinal tracks which vector in a multi-valued field
12    pub doc_ids: Vec<(u32, u16)>,
13}
14
15impl FlatVectorData {
16    /// Estimate memory usage
17    pub fn estimated_memory_bytes(&self) -> usize {
18        // Vec<Vec<f32>>: each inner vec has capacity * 4 bytes + Vec overhead
19        let vec_overhead = std::mem::size_of::<Vec<f32>>();
20        let vectors_bytes: usize = self
21            .vectors
22            .iter()
23            .map(|v| v.capacity() * 4 + vec_overhead)
24            .sum();
25        // doc_ids: (u32, u16) = 6 bytes + padding = 8 bytes each
26        let doc_ids_bytes = self.doc_ids.capacity() * 8;
27        vectors_bytes + doc_ids_bytes + vec_overhead * 2
28    }
29}
30
31/// IVF-RaBitQ index data with embedded centroids and codebook
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct IVFRaBitQIndexData {
34    pub index: crate::structures::IVFRaBitQIndex,
35    pub centroids: crate::structures::CoarseCentroids,
36    pub codebook: crate::structures::RaBitQCodebook,
37}
38
39impl IVFRaBitQIndexData {
40    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
41        serde_json::to_vec(self)
42            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
43    }
44
45    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
46        serde_json::from_slice(data)
47            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
48    }
49}
50
51/// ScaNN index data with embedded centroids and codebook
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct ScaNNIndexData {
54    pub index: crate::structures::IVFPQIndex,
55    pub centroids: crate::structures::CoarseCentroids,
56    pub codebook: crate::structures::PQCodebook,
57}
58
59impl ScaNNIndexData {
60    pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
61        serde_json::to_vec(self)
62            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
63    }
64
65    pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
66        serde_json::from_slice(data)
67            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
68    }
69}