1use anyhow::{Context, Result};
44use serde::{Deserialize, Serialize};
45use std::fs::File;
46use std::io::{BufReader, BufWriter};
47use std::path::Path;
48use tracing::info;
49
50#[cfg(feature = "zerocopy")]
51use std::io::Write;
52
53#[cfg(feature = "mmap")]
54use memmap2::Mmap;
55
56#[cfg(feature = "zerocopy")]
57use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
58
59pub fn save_index<T: Serialize, P: AsRef<Path>>(index: &T, path: P) -> Result<()> {
68 let path = path.as_ref();
69 info!("Saving index to: {}", path.display());
70
71 let file =
72 File::create(path).with_context(|| format!("Failed to create file: {}", path.display()))?;
73
74 let writer = BufWriter::new(file);
75 serde_json::to_writer_pretty(writer, index)
76 .with_context(|| format!("Failed to serialize index to: {}", path.display()))?;
77
78 info!("Index saved successfully");
79 Ok(())
80}
81
82pub fn load_index<T: for<'de> Deserialize<'de>, P: AsRef<Path>>(path: P) -> Result<T> {
89 let path = path.as_ref();
90 info!("Loading index from: {}", path.display());
91
92 let file =
93 File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
94
95 let reader = BufReader::new(file);
96 let index = serde_json::from_reader(reader)
97 .with_context(|| format!("Failed to deserialize index from: {}", path.display()))?;
98
99 info!("Index loaded successfully");
100 Ok(index)
101}
102
103pub fn get_serialized_size<T: Serialize>(index: &T) -> Result<usize> {
107 let json =
108 serde_json::to_string(index).context("Failed to serialize index for size calculation")?;
109 Ok(json.len())
110}
111
112pub fn index_file_exists<P: AsRef<Path>>(path: P) -> bool {
114 path.as_ref().exists() && path.as_ref().is_file()
115}
116
117#[cfg(feature = "zerocopy")]
128pub fn save_index_binary<T, P>(index: &T, path: P) -> Result<()>
129where
130 T: for<'a> RkyvSerialize<
131 rkyv::rancor::Strategy<
132 rkyv::ser::Serializer<
133 rkyv::util::AlignedVec,
134 rkyv::ser::allocator::ArenaHandle<'a>,
135 rkyv::ser::sharing::Share,
136 >,
137 rkyv::rancor::Error,
138 >,
139 >,
140 P: AsRef<Path>,
141{
142 let path = path.as_ref();
143 info!("Saving index (binary) to: {}", path.display());
144
145 let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(index)
147 .map_err(|e| anyhow::anyhow!("Failed to serialize index: {}", e))?;
148
149 let mut file =
151 File::create(path).with_context(|| format!("Failed to create file: {}", path.display()))?;
152 file.write_all(&bytes)
153 .with_context(|| format!("Failed to write to file: {}", path.display()))?;
154
155 info!("Index saved successfully ({} bytes)", bytes.len());
156 Ok(())
157}
158
159#[cfg(feature = "zerocopy")]
170pub fn load_index_binary<T, P>(path: P) -> Result<T>
171where
172 T: Archive,
173 T::Archived: RkyvDeserialize<T, rkyv::rancor::Strategy<rkyv::de::Pool, rkyv::rancor::Error>>,
174 P: AsRef<Path>,
175{
176 let path = path.as_ref();
177 info!("Loading index (binary) from: {}", path.display());
178
179 let bytes =
181 std::fs::read(path).with_context(|| format!("Failed to read file: {}", path.display()))?;
182
183 let archived = unsafe { rkyv::access_unchecked::<T::Archived>(&bytes) };
186
187 let mut deserializer = rkyv::de::Pool::new();
188 let index: T = archived
189 .deserialize(rkyv::rancor::Strategy::wrap(&mut deserializer))
190 .map_err(|e| anyhow::anyhow!("Failed to deserialize archived data: {}", e))?;
191
192 info!("Index loaded successfully");
193 Ok(index)
194}
195
196#[cfg(feature = "mmap")]
208pub struct MappedIndex {
209 _mmap: Mmap,
210 data: Vec<u8>,
211}
212
213#[cfg(feature = "mmap")]
214impl MappedIndex {
215 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
220 let path = path.as_ref();
221 info!("Memory-mapping index from: {}", path.display());
222
223 let file =
224 File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
225
226 let mmap = unsafe {
228 Mmap::map(&file)
229 .with_context(|| format!("Failed to memory-map file: {}", path.display()))?
230 };
231
232 let data = mmap.to_vec();
235
236 info!("Index memory-mapped successfully ({} bytes)", data.len());
237 Ok(Self { _mmap: mmap, data })
238 }
239
240 pub fn as_bytes(&self) -> &[u8] {
242 &self.data
243 }
244
245 pub fn deserialize<T: for<'de> Deserialize<'de>>(&self) -> Result<T> {
249 serde_json::from_slice(&self.data).context("Failed to deserialize memory-mapped index")
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256 use crate::hnsw::{HnswConfig, HnswIndex};
257 use crate::ivf::{IvfPqConfig, IvfPqIndex};
258 use crate::search::VectorSearchIndex;
259 use crate::types::SearchConfig;
260 use std::collections::HashMap;
261 use tempfile::TempDir;
262
263 fn create_test_embeddings() -> HashMap<String, Vec<f32>> {
264 let mut embeddings = HashMap::new();
265 embeddings.insert("doc1".to_string(), vec![0.1, 0.2, 0.3]);
266 embeddings.insert("doc2".to_string(), vec![0.4, 0.5, 0.6]);
267 embeddings.insert("doc3".to_string(), vec![0.7, 0.8, 0.9]);
268 embeddings
269 }
270
271 #[test]
272 fn test_save_and_load_hnsw() {
273 let temp_dir = TempDir::new().unwrap();
274 let index_path = temp_dir.path().join("hnsw_index.json");
275
276 let embeddings = create_test_embeddings();
278 let mut index = HnswIndex::new(HnswConfig::default());
279 index.build(&embeddings).unwrap();
280
281 save_index(&index, &index_path).unwrap();
282 assert!(index_file_exists(&index_path));
283
284 let loaded_index: HnswIndex = load_index(&index_path).unwrap();
286
287 let query = vec![0.2, 0.3, 0.4];
289 let results = loaded_index.search(&query, 2).unwrap();
290 assert_eq!(results.len(), 2);
291 }
292
293 #[test]
294 fn test_save_and_load_exact_search() {
295 let temp_dir = TempDir::new().unwrap();
296 let index_path = temp_dir.path().join("exact_index.json");
297
298 let embeddings = create_test_embeddings();
300 let mut index = VectorSearchIndex::new(SearchConfig::default());
301 index.build(&embeddings).unwrap();
302
303 save_index(&index, &index_path).unwrap();
304
305 let loaded_index: VectorSearchIndex = load_index(&index_path).unwrap();
307
308 let query = vec![0.5, 0.6, 0.7];
310 let results = loaded_index.search(&query, 2).unwrap();
311 assert_eq!(results.len(), 2);
312 }
313
314 #[test]
315 fn test_save_and_load_ivf_pq() {
316 let temp_dir = TempDir::new().unwrap();
318 let index_path = temp_dir.path().join("ivf_index.json");
319
320 let mut embeddings = HashMap::new();
321 for i in 0..500 {
322 let vec = vec![
323 i as f32 * 0.001,
324 (i + 1) as f32 * 0.001,
325 (i + 2) as f32 * 0.001,
326 (i + 3) as f32 * 0.001,
327 ];
328 embeddings.insert(format!("doc{}", i), vec);
329 }
330
331 let config = IvfPqConfig {
332 nclusters: 8, nsubvectors: 4,
334 nbits: 4, nprobe: 2, max_kmeans_iterations: 20, ..IvfPqConfig::default()
338 };
339 let mut index = IvfPqIndex::new(config);
340 index.build(&embeddings).unwrap();
341
342 save_index(&index, &index_path).unwrap();
343
344 let loaded_index: IvfPqIndex = load_index(&index_path).unwrap();
346
347 let query = vec![0.5, 0.6, 0.7, 0.8];
349 let results = loaded_index.search(&query, 5).unwrap();
350 assert!(!results.is_empty());
351 }
352
353 #[test]
354 #[ignore]
355 fn test_save_and_load_ivf_pq_full() {
356 let temp_dir = TempDir::new().unwrap();
359 let index_path = temp_dir.path().join("ivf_index_full.json");
360
361 let mut embeddings = HashMap::new();
363 for i in 0..1000 {
364 let vec = vec![
365 i as f32 * 0.001,
366 (i + 1) as f32 * 0.001,
367 (i + 2) as f32 * 0.001,
368 (i + 3) as f32 * 0.001,
369 ];
370 embeddings.insert(format!("doc{}", i), vec);
371 }
372
373 let config = IvfPqConfig {
374 nclusters: 16, nsubvectors: 4,
376 nprobe: 4,
377 ..IvfPqConfig::default()
378 };
379 let mut index = IvfPqIndex::new(config);
380 index.build(&embeddings).unwrap();
381
382 save_index(&index, &index_path).unwrap();
383
384 let loaded_index: IvfPqIndex = load_index(&index_path).unwrap();
386
387 let query = vec![0.5, 0.6, 0.7, 0.8];
389 let results = loaded_index.search(&query, 5).unwrap();
390 assert!(!results.is_empty());
391 }
392
393 #[test]
394 fn test_get_serialized_size() {
395 let embeddings = create_test_embeddings();
396 let mut index = HnswIndex::new(HnswConfig::default());
397 index.build(&embeddings).unwrap();
398
399 let size = get_serialized_size(&index).unwrap();
400 assert!(size > 0);
401 assert!(size < 100000); }
403
404 #[test]
405 fn test_index_file_exists() {
406 let temp_dir = TempDir::new().unwrap();
407 let index_path = temp_dir.path().join("test_index.json");
408
409 assert!(!index_file_exists(&index_path));
410
411 let embeddings = create_test_embeddings();
413 let mut index = HnswIndex::new(HnswConfig::default());
414 index.build(&embeddings).unwrap();
415 save_index(&index, &index_path).unwrap();
416
417 assert!(index_file_exists(&index_path));
418 }
419
420 #[test]
421 fn test_load_nonexistent_file() {
422 let result: Result<HnswIndex> = load_index("/nonexistent/path/index.json");
423 assert!(result.is_err());
424 }
425
426 #[test]
427 fn test_save_to_invalid_path() {
428 let embeddings = create_test_embeddings();
429 let mut index = HnswIndex::new(HnswConfig::default());
430 index.build(&embeddings).unwrap();
431
432 let result = save_index(&index, "/invalid/nonexistent/path/index.json");
433 assert!(result.is_err());
434 }
435
436 #[test]
441 #[cfg(feature = "mmap")]
442 fn test_mmap_index_creation() {
443 let temp_dir = TempDir::new().unwrap();
444 let index_path = temp_dir.path().join("mmap_index.json");
445
446 let embeddings = create_test_embeddings();
448 let mut index = HnswIndex::new(HnswConfig::default());
449 index.build(&embeddings).unwrap();
450 save_index(&index, &index_path).unwrap();
451
452 let mapped = MappedIndex::new(&index_path).unwrap();
454 assert!(!mapped.as_bytes().is_empty());
455
456 let loaded_index: HnswIndex = mapped.deserialize().unwrap();
458 let query = vec![0.2, 0.3, 0.4];
459 let results = loaded_index.search(&query, 2).unwrap();
460 assert_eq!(results.len(), 2);
461 }
462
463 #[test]
464 #[cfg(feature = "mmap")]
465 fn test_mmap_nonexistent_file() {
466 let result = MappedIndex::new("/nonexistent/file.json");
467 assert!(result.is_err());
468 }
469
470 #[test]
471 #[cfg(feature = "mmap")]
472 fn test_mmap_large_index() {
473 let temp_dir = TempDir::new().unwrap();
474 let index_path = temp_dir.path().join("mmap_large_index.json");
475
476 let mut embeddings = HashMap::new();
478 for i in 0..1000 {
479 embeddings.insert(
480 format!("doc{}", i),
481 vec![
482 i as f32 * 0.001,
483 (i + 1) as f32 * 0.001,
484 (i + 2) as f32 * 0.001,
485 ],
486 );
487 }
488
489 let mut index = HnswIndex::new(HnswConfig::default());
490 index.build(&embeddings).unwrap();
491 save_index(&index, &index_path).unwrap();
492
493 let mapped = MappedIndex::new(&index_path).unwrap();
495 let loaded_index: HnswIndex = mapped.deserialize().unwrap();
496
497 let query = vec![0.5, 0.6, 0.7];
498 let results = loaded_index.search(&query, 10).unwrap();
499 assert_eq!(results.len(), 10);
500 }
501}