vectradb_components/
storage.rs

1use crate::{DatabaseStats, VectorDatabase, VectorDocument, VectraDBError};
2use ndarray::Array1;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::RwLock;
6
7/// In-memory storage implementation for the vector database
8pub struct InMemoryVectorDB {
9    vectors: RwLock<HashMap<String, VectorDocument>>,
10    dimension: Option<usize>,
11}
12
13impl InMemoryVectorDB {
14    /// Create a new in-memory vector database
15    pub fn new() -> Self {
16        Self {
17            vectors: RwLock::new(HashMap::new()),
18            dimension: None,
19        }
20    }
21
22    /// Create a new in-memory vector database with fixed dimension
23    pub fn with_dimension(dimension: usize) -> Self {
24        Self {
25            vectors: RwLock::new(HashMap::new()),
26            dimension: Some(dimension),
27        }
28    }
29
30    /// Get current memory usage (approximate)
31    fn calculate_memory_usage(&self) -> u64 {
32        let vectors = self.vectors.read().unwrap();
33        let mut total_size = 0;
34
35        for (id, doc) in vectors.iter() {
36            total_size += id.len() + doc.data.len() * 4; // 4 bytes per f32
37            total_size += doc.metadata.tags.len() * 16; // Rough estimate for tags
38        }
39
40        total_size as u64
41    }
42}
43
44impl Default for InMemoryVectorDB {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl VectorDatabase for InMemoryVectorDB {
51    fn create_vector(
52        &mut self,
53        id: String,
54        vector: Array1<f32>,
55        tags: Option<HashMap<String, String>>,
56    ) -> Result<(), VectraDBError> {
57        // Check dimension consistency if dimension is fixed
58        if let Some(expected_dim) = self.dimension {
59            if vector.len() != expected_dim {
60                return Err(VectraDBError::DimensionMismatch {
61                    expected: expected_dim,
62                    actual: vector.len(),
63                });
64            }
65        } else {
66            // Set dimension on first vector
67            self.dimension = Some(vector.len());
68        }
69
70        let doc = crate::vector_operations::create_vector_document(id.clone(), vector, tags)?;
71
72        let mut vectors = self.vectors.write().unwrap();
73        if vectors.contains_key(&id) {
74            return Err(VectraDBError::VectorNotFound { id }); // Vector already exists
75        }
76
77        vectors.insert(id, doc);
78        Ok(())
79    }
80
81    fn get_vector(&self, id: &str) -> Result<VectorDocument, VectraDBError> {
82        let vectors = self.vectors.read().unwrap();
83        vectors
84            .get(id)
85            .cloned()
86            .ok_or_else(|| VectraDBError::VectorNotFound { id: id.to_string() })
87    }
88
89    fn update_vector(
90        &mut self,
91        id: &str,
92        vector: Array1<f32>,
93        tags: Option<HashMap<String, String>>,
94    ) -> Result<(), VectraDBError> {
95        // Check dimension consistency
96        if let Some(expected_dim) = self.dimension {
97            if vector.len() != expected_dim {
98                return Err(VectraDBError::DimensionMismatch {
99                    expected: expected_dim,
100                    actual: vector.len(),
101                });
102            }
103        }
104
105        let mut vectors = self.vectors.write().unwrap();
106        let doc = vectors
107            .get_mut(id)
108            .ok_or_else(|| VectraDBError::VectorNotFound { id: id.to_string() })?;
109
110        let updated_doc =
111            crate::vector_operations::update_vector_document(doc.clone(), vector, tags)?;
112        *doc = updated_doc;
113        Ok(())
114    }
115
116    fn delete_vector(&mut self, id: &str) -> Result<(), VectraDBError> {
117        let mut vectors = self.vectors.write().unwrap();
118        vectors
119            .remove(id)
120            .map(|_| ())
121            .ok_or_else(|| VectraDBError::VectorNotFound { id: id.to_string() })
122    }
123
124    fn upsert_vector(
125        &mut self,
126        id: String,
127        vector: Array1<f32>,
128        tags: Option<HashMap<String, String>>,
129    ) -> Result<(), VectraDBError> {
130        // Check dimension consistency
131        if let Some(expected_dim) = self.dimension {
132            if vector.len() != expected_dim {
133                return Err(VectraDBError::DimensionMismatch {
134                    expected: expected_dim,
135                    actual: vector.len(),
136                });
137            }
138        } else {
139            // Set dimension on first vector
140            self.dimension = Some(vector.len());
141        }
142
143        let mut vectors = self.vectors.write().unwrap();
144
145        use std::collections::hash_map::Entry;
146        match vectors.entry(id.clone()) {
147            Entry::Occupied(mut entry) => {
148                // Update existing vector
149                let updated_doc = crate::vector_operations::update_vector_document(
150                    entry.get().clone(),
151                    vector,
152                    tags,
153                )?;
154                *entry.get_mut() = updated_doc;
155            }
156            Entry::Vacant(entry) => {
157                // Create new vector
158                let doc = crate::vector_operations::create_vector_document(id, vector, tags)?;
159                entry.insert(doc);
160            }
161        }
162
163        Ok(())
164    }
165
166    fn search_similar(
167        &self,
168        query_vector: Array1<f32>,
169        top_k: usize,
170    ) -> Result<Vec<crate::SimilarityResult>, VectraDBError> {
171        let vectors = self.vectors.read().unwrap();
172        let documents: Vec<VectorDocument> = vectors.values().cloned().collect();
173
174        crate::similarity::find_similar_vectors_cosine(&query_vector.view(), &documents, top_k)
175    }
176
177    fn list_vectors(&self) -> Result<Vec<String>, VectraDBError> {
178        let vectors = self.vectors.read().unwrap();
179        Ok(vectors.keys().cloned().collect())
180    }
181
182    fn get_stats(&self) -> Result<DatabaseStats, VectraDBError> {
183        let vectors = self.vectors.read().unwrap();
184        let total_vectors = vectors.len();
185        let dimension = self.dimension.unwrap_or(0);
186        let memory_usage = self.calculate_memory_usage();
187
188        Ok(DatabaseStats {
189            total_vectors,
190            dimension,
191            memory_usage,
192        })
193    }
194}
195
196/// Persistent storage configuration
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct StorageConfig {
199    pub data_dir: String,
200    pub max_file_size: u64,
201    pub compression_enabled: bool,
202    pub cache_size: usize,
203}
204
205impl Default for StorageConfig {
206    fn default() -> Self {
207        Self {
208            data_dir: "./data".to_string(),
209            max_file_size: 1024 * 1024 * 1024, // 1GB
210            compression_enabled: true,
211            cache_size: 1000,
212        }
213    }
214}
215
216/// Storage trait for different storage backends
217pub trait StorageBackend {
218    fn save_vector(&mut self, id: &str, document: &VectorDocument) -> Result<(), VectraDBError>;
219    fn load_vector(&self, id: &str) -> Result<VectorDocument, VectraDBError>;
220    fn delete_vector(&mut self, id: &str) -> Result<(), VectraDBError>;
221    fn list_vector_ids(&self) -> Result<Vec<String>, VectraDBError>;
222    fn exists(&self, id: &str) -> Result<bool, VectraDBError>;
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_in_memory_db_creation() {
231        let mut db = InMemoryVectorDB::new();
232        let vector = Array1::from_vec(vec![1.0, 2.0, 3.0]);
233
234        assert!(db
235            .create_vector("test_id".to_string(), vector, None)
236            .is_ok());
237        assert!(db.get_vector("test_id").is_ok());
238    }
239
240    #[test]
241    fn test_in_memory_db_dimension_check() {
242        let mut db = InMemoryVectorDB::with_dimension(3);
243        let vector = Array1::from_vec(vec![1.0, 2.0]); // Wrong dimension
244
245        assert!(db
246            .create_vector("test_id".to_string(), vector, None)
247            .is_err());
248    }
249
250    #[test]
251    fn test_in_memory_db_upsert() {
252        let mut db = InMemoryVectorDB::new();
253        let vector1 = Array1::from_vec(vec![1.0, 2.0, 3.0]);
254        let vector2 = Array1::from_vec(vec![4.0, 5.0, 6.0]);
255
256        // First upsert should create
257        assert!(db
258            .upsert_vector("test_id".to_string(), vector1, None)
259            .is_ok());
260
261        // Second upsert should update
262        assert!(db
263            .upsert_vector("test_id".to_string(), vector2, None)
264            .is_ok());
265
266        let doc = db.get_vector("test_id").unwrap();
267        assert_eq!(doc.data[0], 4.0);
268    }
269
270    #[test]
271    fn test_in_memory_db_stats() {
272        let mut db = InMemoryVectorDB::new();
273        let vector = Array1::from_vec(vec![1.0, 2.0, 3.0]);
274
275        db.create_vector("test_id".to_string(), vector, None)
276            .unwrap();
277        let stats = db.get_stats().unwrap();
278
279        assert_eq!(stats.total_vectors, 1);
280        assert_eq!(stats.dimension, 3);
281        assert!(stats.memory_usage > 0);
282    }
283}