hermes_core/segment/
vector_data.rs1use serde::{Deserialize, Serialize};
4
5const FLAT_BINARY_MAGIC: u32 = 0x46564432;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct FlatVectorData {
11 pub dim: usize,
12 pub vectors: Vec<Vec<f32>>,
13 pub doc_ids: Vec<(u32, u16)>,
16}
17
18impl FlatVectorData {
19 pub fn estimated_memory_bytes(&self) -> usize {
21 let vec_overhead = std::mem::size_of::<Vec<f32>>();
23 let vectors_bytes: usize = self
24 .vectors
25 .iter()
26 .map(|v| v.capacity() * 4 + vec_overhead)
27 .sum();
28 let doc_ids_bytes = self.doc_ids.capacity() * 8;
30 vectors_bytes + doc_ids_bytes + vec_overhead * 2
31 }
32
33 pub fn to_binary_bytes(&self) -> Vec<u8> {
38 let num_vectors = self.doc_ids.len();
39 let total = 12 + num_vectors * self.dim * 4 + num_vectors * 6;
40 let mut buf = Vec::with_capacity(total);
41
42 buf.extend_from_slice(&FLAT_BINARY_MAGIC.to_le_bytes());
43 buf.extend_from_slice(&(self.dim as u32).to_le_bytes());
44 buf.extend_from_slice(&(num_vectors as u32).to_le_bytes());
45
46 for vec in &self.vectors {
47 for &val in vec {
48 buf.extend_from_slice(&val.to_le_bytes());
49 }
50 }
51
52 for &(doc_id, ordinal) in &self.doc_ids {
53 buf.extend_from_slice(&doc_id.to_le_bytes());
54 buf.extend_from_slice(&ordinal.to_le_bytes());
55 }
56
57 buf
58 }
59
60 pub fn serialize_binary_from_flat(
65 index_dim: usize,
66 flat_vectors: &[f32],
67 original_dim: usize,
68 doc_ids: &[(u32, u16)],
69 ) -> Vec<u8> {
70 let num_vectors = doc_ids.len();
71 let total = 12 + num_vectors * index_dim * 4 + num_vectors * 6;
72 let mut buf = Vec::with_capacity(total);
73
74 buf.extend_from_slice(&FLAT_BINARY_MAGIC.to_le_bytes());
75 buf.extend_from_slice(&(index_dim as u32).to_le_bytes());
76 buf.extend_from_slice(&(num_vectors as u32).to_le_bytes());
77
78 if index_dim == original_dim {
79 for &val in flat_vectors {
81 buf.extend_from_slice(&val.to_le_bytes());
82 }
83 } else {
84 for i in 0..num_vectors {
86 let start = i * original_dim;
87 for j in 0..index_dim {
88 buf.extend_from_slice(&flat_vectors[start + j].to_le_bytes());
89 }
90 }
91 }
92
93 for &(doc_id, ordinal) in doc_ids {
94 buf.extend_from_slice(&doc_id.to_le_bytes());
95 buf.extend_from_slice(&ordinal.to_le_bytes());
96 }
97
98 buf
99 }
100
101 pub fn from_binary_bytes(data: &[u8]) -> std::io::Result<Self> {
103 if data.len() < 12 {
104 return Err(std::io::Error::new(
105 std::io::ErrorKind::InvalidData,
106 "FlatVectorData binary too short",
107 ));
108 }
109
110 let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
111 if magic != FLAT_BINARY_MAGIC {
112 return Err(std::io::Error::new(
113 std::io::ErrorKind::InvalidData,
114 "Invalid FlatVectorData binary magic",
115 ));
116 }
117
118 let dim = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
119 let num_vectors = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
120
121 let vectors_start = 12;
122 let vectors_byte_len = num_vectors * dim * 4;
123 let doc_ids_start = vectors_start + vectors_byte_len;
124 let doc_ids_byte_len = num_vectors * 6;
125
126 if data.len() < doc_ids_start + doc_ids_byte_len {
127 return Err(std::io::Error::new(
128 std::io::ErrorKind::InvalidData,
129 "FlatVectorData binary truncated",
130 ));
131 }
132
133 let mut vectors = Vec::with_capacity(num_vectors);
134 for i in 0..num_vectors {
135 let mut vec = Vec::with_capacity(dim);
136 let base = vectors_start + i * dim * 4;
137 for j in 0..dim {
138 let off = base + j * 4;
139 vec.push(f32::from_le_bytes([
140 data[off],
141 data[off + 1],
142 data[off + 2],
143 data[off + 3],
144 ]));
145 }
146 vectors.push(vec);
147 }
148
149 let mut doc_ids = Vec::with_capacity(num_vectors);
150 for i in 0..num_vectors {
151 let off = doc_ids_start + i * 6;
152 let doc_id =
153 u32::from_le_bytes([data[off], data[off + 1], data[off + 2], data[off + 3]]);
154 let ordinal = u16::from_le_bytes([data[off + 4], data[off + 5]]);
155 doc_ids.push((doc_id, ordinal));
156 }
157
158 Ok(FlatVectorData {
159 dim,
160 vectors,
161 doc_ids,
162 })
163 }
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct IVFRaBitQIndexData {
169 pub index: crate::structures::IVFRaBitQIndex,
170 pub centroids: crate::structures::CoarseCentroids,
171 pub codebook: crate::structures::RaBitQCodebook,
172}
173
174impl IVFRaBitQIndexData {
175 pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
176 serde_json::to_vec(self)
177 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
178 }
179
180 pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
181 serde_json::from_slice(data)
182 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
183 }
184}
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct ScaNNIndexData {
189 pub index: crate::structures::IVFPQIndex,
190 pub centroids: crate::structures::CoarseCentroids,
191 pub codebook: crate::structures::PQCodebook,
192}
193
194impl ScaNNIndexData {
195 pub fn to_bytes(&self) -> std::io::Result<Vec<u8>> {
196 serde_json::to_vec(self)
197 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
198 }
199
200 pub fn from_bytes(data: &[u8]) -> std::io::Result<Self> {
201 serde_json::from_slice(data)
202 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
203 }
204}