1use crate::diskann::config::DiskAnnConfig;
17use crate::diskann::graph::VamanaGraph;
18use crate::diskann::types::{DiskAnnError, DiskAnnResult, VectorId};
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use std::fs::{File, OpenOptions};
22use std::io::{BufReader, BufWriter, Read, Write};
23use std::path::{Path, PathBuf};
24
25pub trait StorageBackend: Send + Sync {
27 fn write_vector(&mut self, vector_id: &VectorId, vector: &[f32]) -> DiskAnnResult<()>;
29
30 fn read_vector(&self, vector_id: &VectorId) -> DiskAnnResult<Vec<f32>>;
32
33 fn write_graph(&mut self, graph: &VamanaGraph) -> DiskAnnResult<()>;
35
36 fn read_graph(&self) -> DiskAnnResult<VamanaGraph>;
38
39 fn write_metadata(&mut self, metadata: &StorageMetadata) -> DiskAnnResult<()>;
41
42 fn read_metadata(&self) -> DiskAnnResult<StorageMetadata>;
44
45 fn clear(&mut self) -> DiskAnnResult<()>;
47
48 fn flush(&mut self) -> DiskAnnResult<()>;
50
51 fn size(&self) -> DiskAnnResult<u64>;
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct StorageMetadata {
58 pub version: String,
59 pub config: DiskAnnConfig,
60 pub num_vectors: usize,
61 pub created_at: chrono::DateTime<chrono::Utc>,
62 pub updated_at: chrono::DateTime<chrono::Utc>,
63}
64
65impl StorageMetadata {
66 pub fn new(config: DiskAnnConfig) -> Self {
67 let now = chrono::Utc::now();
68 Self {
69 version: env!("CARGO_PKG_VERSION").to_string(),
70 config,
71 num_vectors: 0,
72 created_at: now,
73 updated_at: now,
74 }
75 }
76
77 pub fn update_timestamp(&mut self) {
78 self.updated_at = chrono::Utc::now();
79 }
80}
81
82#[derive(Debug)]
84pub struct DiskStorage {
85 base_path: PathBuf,
86 vector_file: Option<PathBuf>,
87 graph_file: Option<PathBuf>,
88 metadata_file: Option<PathBuf>,
89 dimension: usize,
90 vector_cache: HashMap<VectorId, Vec<f32>>,
91 cache_limit: usize,
92}
93
94impl DiskStorage {
95 pub fn new<P: AsRef<Path>>(base_path: P, dimension: usize) -> DiskAnnResult<Self> {
97 let base_path = base_path.as_ref().to_path_buf();
98
99 if !base_path.exists() {
101 std::fs::create_dir_all(&base_path).map_err(|e| DiskAnnError::IoError {
102 message: format!("Failed to create directory: {}", e),
103 })?;
104 }
105
106 let vector_file = Some(base_path.join("vectors.bin"));
107 let graph_file = Some(base_path.join("graph.bin"));
108 let metadata_file = Some(base_path.join("metadata.json"));
109
110 Ok(Self {
111 base_path,
112 vector_file,
113 graph_file,
114 metadata_file,
115 dimension,
116 vector_cache: HashMap::new(),
117 cache_limit: 1000,
118 })
119 }
120
121 pub fn with_cache_limit(mut self, limit: usize) -> Self {
123 self.cache_limit = limit;
124 self
125 }
126
127 pub fn vector_file_path(&self) -> &Option<PathBuf> {
129 &self.vector_file
130 }
131
132 pub fn graph_file_path(&self) -> &Option<PathBuf> {
134 &self.graph_file
135 }
136
137 fn evict_cache_if_needed(&mut self) {
139 if self.vector_cache.len() > self.cache_limit {
140 if let Some(key) = self.vector_cache.keys().next().cloned() {
142 self.vector_cache.remove(&key);
143 }
144 }
145 }
146}
147
148impl Clone for DiskStorage {
149 fn clone(&self) -> Self {
150 Self {
151 base_path: self.base_path.clone(),
152 vector_file: self.vector_file.clone(),
153 graph_file: self.graph_file.clone(),
154 metadata_file: self.metadata_file.clone(),
155 dimension: self.dimension,
156 vector_cache: HashMap::new(), cache_limit: self.cache_limit,
158 }
159 }
160}
161
162impl StorageBackend for DiskStorage {
163 fn write_vector(&mut self, vector_id: &VectorId, vector: &[f32]) -> DiskAnnResult<()> {
164 if vector.len() != self.dimension {
165 return Err(DiskAnnError::DimensionMismatch {
166 expected: self.dimension,
167 actual: vector.len(),
168 });
169 }
170
171 self.vector_cache.insert(vector_id.clone(), vector.to_vec());
173 self.evict_cache_if_needed();
174
175 if let Some(path) = &self.vector_file {
177 let file = OpenOptions::new()
178 .create(true)
179 .append(true)
180 .open(path)
181 .map_err(|e| DiskAnnError::IoError {
182 message: format!("Failed to open vector file: {}", e),
183 })?;
184
185 let mut writer = BufWriter::new(file);
186
187 let id_bytes = vector_id.as_bytes();
189 writer
190 .write_all(&(id_bytes.len() as u32).to_le_bytes())
191 .map_err(|e| DiskAnnError::IoError {
192 message: format!("Failed to write vector ID length: {}", e),
193 })?;
194 writer
195 .write_all(id_bytes)
196 .map_err(|e| DiskAnnError::IoError {
197 message: format!("Failed to write vector ID: {}", e),
198 })?;
199
200 for &value in vector {
202 writer
203 .write_all(&value.to_le_bytes())
204 .map_err(|e| DiskAnnError::IoError {
205 message: format!("Failed to write vector data: {}", e),
206 })?;
207 }
208
209 writer.flush().map_err(|e| DiskAnnError::IoError {
210 message: format!("Failed to flush vector file: {}", e),
211 })?;
212 }
213
214 Ok(())
215 }
216
217 fn read_vector(&self, vector_id: &VectorId) -> DiskAnnResult<Vec<f32>> {
218 if let Some(vector) = self.vector_cache.get(vector_id) {
220 return Ok(vector.clone());
221 }
222
223 if let Some(path) = &self.vector_file {
225 if !path.exists() {
226 return Err(DiskAnnError::VectorNotFound {
227 id: vector_id.clone(),
228 });
229 }
230
231 let file = File::open(path).map_err(|e| DiskAnnError::IoError {
232 message: format!("Failed to open vector file: {}", e),
233 })?;
234 let mut reader = BufReader::new(file);
235
236 loop {
238 let mut id_len_bytes = [0u8; 4];
240 if reader.read_exact(&mut id_len_bytes).is_err() {
241 break; }
243 let id_len = u32::from_le_bytes(id_len_bytes) as usize;
244
245 let mut id_bytes = vec![0u8; id_len];
247 reader
248 .read_exact(&mut id_bytes)
249 .map_err(|e| DiskAnnError::IoError {
250 message: format!("Failed to read vector ID: {}", e),
251 })?;
252 let id = String::from_utf8(id_bytes).map_err(|e| DiskAnnError::IoError {
253 message: format!("Invalid UTF-8 in vector ID: {}", e),
254 })?;
255
256 let mut vector = vec![0.0f32; self.dimension];
258 for value in &mut vector {
259 let mut bytes = [0u8; 4];
260 reader
261 .read_exact(&mut bytes)
262 .map_err(|e| DiskAnnError::IoError {
263 message: format!("Failed to read vector data: {}", e),
264 })?;
265 *value = f32::from_le_bytes(bytes);
266 }
267
268 if &id == vector_id {
269 return Ok(vector);
270 }
271 }
272
273 Err(DiskAnnError::VectorNotFound {
274 id: vector_id.clone(),
275 })
276 } else {
277 Err(DiskAnnError::VectorNotFound {
278 id: vector_id.clone(),
279 })
280 }
281 }
282
283 fn write_graph(&mut self, graph: &VamanaGraph) -> DiskAnnResult<()> {
284 if let Some(path) = &self.graph_file {
285 let file = File::create(path).map_err(|e| DiskAnnError::IoError {
286 message: format!("Failed to create graph file: {}", e),
287 })?;
288
289 let mut writer = BufWriter::new(file);
290 oxicode::serde::encode_into_std_write(graph, &mut writer, oxicode::config::standard())?;
291 }
292 Ok(())
293 }
294
295 fn read_graph(&self) -> DiskAnnResult<VamanaGraph> {
296 if let Some(path) = &self.graph_file {
297 if !path.exists() {
298 return Err(DiskAnnError::StorageError {
299 message: "Graph file does not exist".to_string(),
300 });
301 }
302
303 let file = File::open(path).map_err(|e| DiskAnnError::IoError {
304 message: format!("Failed to open graph file: {}", e),
305 })?;
306
307 let mut reader = BufReader::new(file);
308 let (graph, _) =
309 oxicode::serde::decode_from_std_read(&mut reader, oxicode::config::standard())?;
310 Ok(graph)
311 } else {
312 Err(DiskAnnError::StorageError {
313 message: "Graph file path not set".to_string(),
314 })
315 }
316 }
317
318 fn write_metadata(&mut self, metadata: &StorageMetadata) -> DiskAnnResult<()> {
319 if let Some(path) = &self.metadata_file {
320 let mut file = File::create(path).map_err(|e| DiskAnnError::IoError {
321 message: format!("Failed to create metadata file: {}", e),
322 })?;
323
324 serde_json::to_writer_pretty(&mut file, metadata).map_err(|e| {
325 DiskAnnError::SerializationError {
326 message: format!("Failed to serialize metadata: {}", e),
327 }
328 })?;
329
330 file.sync_all().map_err(|e| DiskAnnError::IoError {
332 message: format!("Failed to sync metadata file: {}", e),
333 })?;
334 }
335 Ok(())
336 }
337
338 fn read_metadata(&self) -> DiskAnnResult<StorageMetadata> {
339 if let Some(path) = &self.metadata_file {
340 if !path.exists() {
341 return Err(DiskAnnError::StorageError {
342 message: "Metadata file does not exist".to_string(),
343 });
344 }
345
346 let file = File::open(path).map_err(|e| DiskAnnError::IoError {
347 message: format!("Failed to open metadata file: {}", e),
348 })?;
349
350 let metadata =
351 serde_json::from_reader(file).map_err(|e| DiskAnnError::SerializationError {
352 message: format!("Failed to deserialize metadata: {}", e),
353 })?;
354
355 Ok(metadata)
356 } else {
357 Err(DiskAnnError::StorageError {
358 message: "Metadata file path not set".to_string(),
359 })
360 }
361 }
362
363 fn clear(&mut self) -> DiskAnnResult<()> {
364 self.vector_cache.clear();
365
366 if let Some(path) = &self.vector_file {
367 if path.exists() {
368 std::fs::remove_file(path).map_err(|e| DiskAnnError::IoError {
369 message: format!("Failed to remove vector file: {}", e),
370 })?;
371 }
372 }
373
374 if let Some(path) = &self.graph_file {
375 if path.exists() {
376 std::fs::remove_file(path).map_err(|e| DiskAnnError::IoError {
377 message: format!("Failed to remove graph file: {}", e),
378 })?;
379 }
380 }
381
382 if let Some(path) = &self.metadata_file {
383 if path.exists() {
384 std::fs::remove_file(path).map_err(|e| DiskAnnError::IoError {
385 message: format!("Failed to remove metadata file: {}", e),
386 })?;
387 }
388 }
389
390 Ok(())
391 }
392
393 fn flush(&mut self) -> DiskAnnResult<()> {
394 Ok(())
396 }
397
398 fn size(&self) -> DiskAnnResult<u64> {
399 let mut total_size = 0u64;
400
401 if let Some(path) = &self.vector_file {
402 if path.exists() {
403 total_size += std::fs::metadata(path)
404 .map_err(|e| DiskAnnError::IoError {
405 message: format!("Failed to get vector file size: {}", e),
406 })?
407 .len();
408 }
409 }
410
411 if let Some(path) = &self.graph_file {
412 if path.exists() {
413 total_size += std::fs::metadata(path)
414 .map_err(|e| DiskAnnError::IoError {
415 message: format!("Failed to get graph file size: {}", e),
416 })?
417 .len();
418 }
419 }
420
421 if let Some(path) = &self.metadata_file {
422 if path.exists() {
423 total_size += std::fs::metadata(path)
424 .map_err(|e| DiskAnnError::IoError {
425 message: format!("Failed to get metadata file size: {}", e),
426 })?
427 .len();
428 }
429 }
430
431 Ok(total_size)
432 }
433}
434
435#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct MemoryMappedStorage {
438 base_path: PathBuf,
439 dimension: usize,
440}
441
442impl MemoryMappedStorage {
443 pub fn new<P: AsRef<Path>>(base_path: P, dimension: usize) -> DiskAnnResult<Self> {
444 Ok(Self {
445 base_path: base_path.as_ref().to_path_buf(),
446 dimension,
447 })
448 }
449}
450
451impl StorageBackend for MemoryMappedStorage {
452 fn write_vector(&mut self, _vector_id: &VectorId, _vector: &[f32]) -> DiskAnnResult<()> {
453 Err(DiskAnnError::StorageError {
454 message: "MemoryMappedStorage not yet implemented".to_string(),
455 })
456 }
457
458 fn read_vector(&self, _vector_id: &VectorId) -> DiskAnnResult<Vec<f32>> {
459 Err(DiskAnnError::StorageError {
460 message: "MemoryMappedStorage not yet implemented".to_string(),
461 })
462 }
463
464 fn write_graph(&mut self, _graph: &VamanaGraph) -> DiskAnnResult<()> {
465 Err(DiskAnnError::StorageError {
466 message: "MemoryMappedStorage not yet implemented".to_string(),
467 })
468 }
469
470 fn read_graph(&self) -> DiskAnnResult<VamanaGraph> {
471 Err(DiskAnnError::StorageError {
472 message: "MemoryMappedStorage not yet implemented".to_string(),
473 })
474 }
475
476 fn write_metadata(&mut self, _metadata: &StorageMetadata) -> DiskAnnResult<()> {
477 Err(DiskAnnError::StorageError {
478 message: "MemoryMappedStorage not yet implemented".to_string(),
479 })
480 }
481
482 fn read_metadata(&self) -> DiskAnnResult<StorageMetadata> {
483 Err(DiskAnnError::StorageError {
484 message: "MemoryMappedStorage not yet implemented".to_string(),
485 })
486 }
487
488 fn clear(&mut self) -> DiskAnnResult<()> {
489 Ok(())
490 }
491
492 fn flush(&mut self) -> DiskAnnResult<()> {
493 Ok(())
494 }
495
496 fn size(&self) -> DiskAnnResult<u64> {
497 Ok(0)
498 }
499}
500
501#[cfg(test)]
502mod tests {
503 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
504 use super::*;
505 use crate::diskann::config::PruningStrategy;
506 use std::env;
507
508 fn temp_dir() -> PathBuf {
509 use std::sync::atomic::{AtomicU64, Ordering};
510 static COUNTER: AtomicU64 = AtomicU64::new(0);
511 let id = COUNTER.fetch_add(1, Ordering::Relaxed);
512 env::temp_dir().join(format!(
513 "diskann_storage_test_{}_{}",
514 chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0),
515 id
516 ))
517 }
518
519 #[test]
520 fn test_disk_storage_vector_write_read() -> Result<()> {
521 let dir = temp_dir();
522 let mut storage = DiskStorage::new(&dir, 3)?;
523
524 let vector = vec![1.0, 2.0, 3.0];
525 storage.write_vector(&"vec1".to_string(), &vector)?;
526
527 let read_vector = storage.read_vector(&"vec1".to_string())?;
528 assert_eq!(read_vector, vector);
529
530 storage.clear()?;
531 Ok(())
532 }
533
534 #[test]
535 fn test_disk_storage_dimension_mismatch() -> Result<()> {
536 let dir = temp_dir();
537 let mut storage = DiskStorage::new(&dir, 3)?;
538
539 let vector = vec![1.0, 2.0]; let result = storage.write_vector(&"vec1".to_string(), &vector);
541
542 assert!(result.is_err());
543 storage.clear()?;
544 Ok(())
545 }
546
547 #[test]
548 fn test_disk_storage_graph() -> Result<()> {
549 let dir = temp_dir();
550 std::fs::remove_dir_all(&dir).ok(); let mut storage = DiskStorage::new(&dir, 3)?;
552
553 let mut graph = VamanaGraph::new(3, PruningStrategy::Alpha, 1.2);
554 graph.add_node("v1".to_string())?;
555 graph.add_node("v2".to_string())?;
556
557 storage.write_graph(&graph)?;
558 let read_graph = storage.read_graph()?;
559
560 assert_eq!(read_graph.num_nodes(), 2);
561 storage.clear()?;
562 std::fs::remove_dir_all(&dir).ok();
563 Ok(())
564 }
565
566 #[test]
567 fn test_disk_storage_metadata() -> Result<()> {
568 let dir = temp_dir();
569 std::fs::remove_dir_all(&dir).ok(); let mut storage = DiskStorage::new(&dir, 128)?;
571
572 let config = DiskAnnConfig::default_config(128);
573 let metadata = StorageMetadata::new(config);
574
575 storage.write_metadata(&metadata)?;
576 let read_metadata = storage.read_metadata()?;
577
578 assert_eq!(read_metadata.config.dimension, 128);
579 storage.clear()?;
580 std::fs::remove_dir_all(&dir).ok();
581 Ok(())
582 }
583
584 #[test]
585 fn test_disk_storage_size() -> Result<()> {
586 let dir = temp_dir();
587 let mut storage = DiskStorage::new(&dir, 3)?;
588
589 let initial_size = storage.size()?;
590 assert_eq!(initial_size, 0);
591
592 let vector = vec![1.0, 2.0, 3.0];
593 storage.write_vector(&"vec1".to_string(), &vector)?;
594
595 let after_write = storage.size()?;
596 assert!(after_write > initial_size);
597
598 storage.clear()?;
599 Ok(())
600 }
601
602 #[test]
603 fn test_disk_storage_cache() -> Result<()> {
604 let dir = temp_dir();
605 std::fs::remove_dir_all(&dir).ok(); let mut storage = DiskStorage::new(&dir, 3)?.with_cache_limit(2);
607
608 storage.write_vector(&"v1".to_string(), &[1.0, 2.0, 3.0])?;
609 storage.write_vector(&"v2".to_string(), &[4.0, 5.0, 6.0])?;
610 storage.write_vector(&"v3".to_string(), &[7.0, 8.0, 9.0])?;
611
612 assert!(storage.vector_cache.len() <= 2);
614
615 storage.clear()?;
616 std::fs::remove_dir_all(&dir).ok();
617 Ok(())
618 }
619
620 #[test]
621 fn test_vector_not_found() -> Result<()> {
622 let dir = temp_dir();
623 let storage = DiskStorage::new(&dir, 3)?;
624
625 let result = storage.read_vector(&"nonexistent".to_string());
626 assert!(result.is_err());
627 Ok(())
628 }
629
630 #[test]
631 fn test_storage_clear() -> Result<()> {
632 let dir = temp_dir();
633 std::fs::remove_dir_all(&dir).ok(); let mut storage = DiskStorage::new(&dir, 3)?;
635
636 storage.write_vector(&"v1".to_string(), &[1.0, 2.0, 3.0])?;
637
638 let vector_file = storage
640 .vector_file
641 .as_ref()
642 .ok_or("vector_file is None")?
643 .clone();
644 assert!(
645 vector_file.exists(),
646 "Vector file should exist after write: {:?}",
647 vector_file
648 );
649
650 storage.clear()?;
651 assert!(
652 !vector_file.exists(),
653 "Vector file should not exist after clear: {:?}",
654 vector_file
655 );
656
657 std::fs::remove_dir_all(&dir).ok();
659 Ok(())
660 }
661}