1use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15use std::path::Path;
16
17use crate::dsl::VectorIndexType;
18use crate::error::{Error, Result};
19use crate::schema::Schema;
20
21pub const INDEX_META_FILENAME: &str = "metadata.json";
23
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
26pub enum VectorIndexState {
27 #[default]
29 Flat,
30 Built {
32 vector_count: usize,
34 num_clusters: usize,
36 },
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldVectorMeta {
42 pub field_id: u32,
44 pub index_type: VectorIndexType,
46 pub state: VectorIndexState,
48 #[serde(skip_serializing_if = "Option::is_none")]
50 pub centroids_file: Option<String>,
51 #[serde(skip_serializing_if = "Option::is_none")]
53 pub codebook_file: Option<String>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct IndexMetadata {
59 pub version: u32,
61 pub schema: Schema,
63 pub segments: Vec<String>,
65 #[serde(default)]
67 pub vector_fields: HashMap<u32, FieldVectorMeta>,
68 #[serde(default)]
70 pub total_vectors: usize,
71}
72
73impl IndexMetadata {
74 pub fn new(schema: Schema) -> Self {
76 Self {
77 version: 1,
78 schema,
79 segments: Vec::new(),
80 vector_fields: HashMap::new(),
81 total_vectors: 0,
82 }
83 }
84
85 pub fn is_field_built(&self, field_id: u32) -> bool {
87 self.vector_fields
88 .get(&field_id)
89 .map(|f| matches!(f.state, VectorIndexState::Built { .. }))
90 .unwrap_or(false)
91 }
92
93 pub fn get_field_meta(&self, field_id: u32) -> Option<&FieldVectorMeta> {
95 self.vector_fields.get(&field_id)
96 }
97
98 pub fn init_field(&mut self, field_id: u32, index_type: VectorIndexType) {
100 self.vector_fields
101 .entry(field_id)
102 .or_insert(FieldVectorMeta {
103 field_id,
104 index_type,
105 state: VectorIndexState::Flat,
106 centroids_file: None,
107 codebook_file: None,
108 });
109 }
110
111 pub fn mark_field_built(
113 &mut self,
114 field_id: u32,
115 vector_count: usize,
116 num_clusters: usize,
117 centroids_file: String,
118 codebook_file: Option<String>,
119 ) {
120 if let Some(field) = self.vector_fields.get_mut(&field_id) {
121 field.state = VectorIndexState::Built {
122 vector_count,
123 num_clusters,
124 };
125 field.centroids_file = Some(centroids_file);
126 field.codebook_file = codebook_file;
127 }
128 }
129
130 pub fn should_build_field(&self, field_id: u32, threshold: usize) -> bool {
132 if self.is_field_built(field_id) {
134 return false;
135 }
136 self.total_vectors >= threshold
138 }
139
140 pub fn add_segment(&mut self, segment_id: String) {
142 if !self.segments.contains(&segment_id) {
143 self.segments.push(segment_id);
144 }
145 }
146
147 pub fn remove_segments(&mut self, to_remove: &[String]) {
149 self.segments.retain(|s| !to_remove.contains(s));
150 }
151
152 pub async fn load<D: crate::directories::Directory>(dir: &D) -> Result<Self> {
154 let path = Path::new(INDEX_META_FILENAME);
155 let slice = dir.open_read(path).await?;
156 let bytes = slice.read_bytes().await?;
157 serde_json::from_slice(bytes.as_slice()).map_err(|e| Error::Serialization(e.to_string()))
158 }
159
160 pub async fn save<D: crate::directories::DirectoryWriter>(&self, dir: &D) -> Result<()> {
162 let path = Path::new(INDEX_META_FILENAME);
163 let bytes =
164 serde_json::to_vec_pretty(self).map_err(|e| Error::Serialization(e.to_string()))?;
165 dir.write(path, &bytes).await.map_err(Error::Io)
166 }
167
168 pub async fn load_trained_structures<D: crate::directories::Directory>(
172 &self,
173 dir: &D,
174 ) -> (
175 rustc_hash::FxHashMap<u32, std::sync::Arc<crate::structures::CoarseCentroids>>,
176 rustc_hash::FxHashMap<u32, std::sync::Arc<crate::structures::PQCodebook>>,
177 ) {
178 use std::sync::Arc;
179
180 let mut centroids = rustc_hash::FxHashMap::default();
181 let mut codebooks = rustc_hash::FxHashMap::default();
182
183 for (field_id, field_meta) in &self.vector_fields {
184 if !matches!(field_meta.state, VectorIndexState::Built { .. }) {
185 continue;
186 }
187
188 if let Some(ref file) = field_meta.centroids_file
190 && let Ok(slice) = dir.open_read(Path::new(file)).await
191 && let Ok(bytes) = slice.read_bytes().await
192 && let Ok(c) =
193 serde_json::from_slice::<crate::structures::CoarseCentroids>(bytes.as_slice())
194 {
195 centroids.insert(*field_id, Arc::new(c));
196 }
197
198 if let Some(ref file) = field_meta.codebook_file
200 && let Ok(slice) = dir.open_read(Path::new(file)).await
201 && let Ok(bytes) = slice.read_bytes().await
202 && let Ok(c) =
203 serde_json::from_slice::<crate::structures::PQCodebook>(bytes.as_slice())
204 {
205 codebooks.insert(*field_id, Arc::new(c));
206 }
207 }
208
209 (centroids, codebooks)
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 fn test_schema() -> Schema {
218 Schema::default()
219 }
220
221 #[test]
222 fn test_metadata_init() {
223 let mut meta = IndexMetadata::new(test_schema());
224 assert_eq!(meta.total_vectors, 0);
225 assert!(meta.segments.is_empty());
226 assert!(!meta.is_field_built(0));
227
228 meta.init_field(0, VectorIndexType::IvfRaBitQ);
229 assert!(!meta.is_field_built(0));
230 assert!(meta.vector_fields.contains_key(&0));
231 }
232
233 #[test]
234 fn test_metadata_segments() {
235 let mut meta = IndexMetadata::new(test_schema());
236 meta.add_segment("abc123".to_string());
237 meta.add_segment("def456".to_string());
238 assert_eq!(meta.segments.len(), 2);
239
240 meta.add_segment("abc123".to_string());
242 assert_eq!(meta.segments.len(), 2);
243
244 meta.remove_segments(&["abc123".to_string()]);
245 assert_eq!(meta.segments.len(), 1);
246 assert_eq!(meta.segments[0], "def456");
247 }
248
249 #[test]
250 fn test_mark_field_built() {
251 let mut meta = IndexMetadata::new(test_schema());
252 meta.init_field(0, VectorIndexType::IvfRaBitQ);
253 meta.total_vectors = 10000;
254
255 assert!(!meta.is_field_built(0));
256
257 meta.mark_field_built(0, 10000, 256, "field_0_centroids.bin".to_string(), None);
258
259 assert!(meta.is_field_built(0));
260 let field = meta.get_field_meta(0).unwrap();
261 assert_eq!(
262 field.centroids_file.as_deref(),
263 Some("field_0_centroids.bin")
264 );
265 }
266
267 #[test]
268 fn test_should_build_field() {
269 let mut meta = IndexMetadata::new(test_schema());
270 meta.init_field(0, VectorIndexType::IvfRaBitQ);
271
272 meta.total_vectors = 500;
274 assert!(!meta.should_build_field(0, 1000));
275
276 meta.total_vectors = 1500;
278 assert!(meta.should_build_field(0, 1000));
279
280 meta.mark_field_built(0, 1500, 256, "centroids.bin".to_string(), None);
282 assert!(!meta.should_build_field(0, 1000));
283 }
284
285 #[test]
286 fn test_serialization() {
287 let mut meta = IndexMetadata::new(test_schema());
288 meta.add_segment("seg1".to_string());
289 meta.init_field(0, VectorIndexType::IvfRaBitQ);
290 meta.total_vectors = 5000;
291
292 let json = serde_json::to_string_pretty(&meta).unwrap();
293 let loaded: IndexMetadata = serde_json::from_str(&json).unwrap();
294
295 assert_eq!(loaded.segments, meta.segments);
296 assert_eq!(loaded.total_vectors, meta.total_vectors);
297 assert!(loaded.vector_fields.contains_key(&0));
298 }
299}