1use crate::error::Result;
4use crate::index::flat::FlatIndex;
5
6#[cfg(feature = "hnsw")]
7use crate::index::hnsw::HnswIndex;
8
9use crate::index::VectorIndex;
10use crate::types::*;
11use parking_lot::RwLock;
12use std::sync::Arc;
13
14#[cfg(feature = "storage")]
16use crate::storage::VectorStorage;
17
18#[cfg(not(feature = "storage"))]
19use crate::storage_memory::MemoryStorage as VectorStorage;
20
21pub struct VectorDB {
23 storage: Arc<VectorStorage>,
24 index: Arc<RwLock<Box<dyn VectorIndex>>>,
25 options: DbOptions,
26}
27
28impl VectorDB {
29 #[allow(unused_mut)] pub fn new(mut options: DbOptions) -> Result<Self> {
37 #[cfg(feature = "storage")]
38 let storage = {
39 let temp_storage = VectorStorage::new(&options.storage_path, options.dimensions)?;
42
43 let stored_config = temp_storage.load_config()?;
44
45 if let Some(config) = stored_config {
46 tracing::info!(
48 "Loading existing database with {} dimensions",
49 config.dimensions
50 );
51 options = DbOptions {
52 storage_path: options.storage_path.clone(),
54 dimensions: config.dimensions,
56 distance_metric: config.distance_metric,
57 hnsw_config: config.hnsw_config,
58 quantization: config.quantization,
59 };
60 Arc::new(VectorStorage::new(
62 &options.storage_path,
63 options.dimensions,
64 )?)
65 } else {
66 tracing::info!(
68 "Creating new database with {} dimensions",
69 options.dimensions
70 );
71 temp_storage.save_config(&options)?;
72 Arc::new(temp_storage)
73 }
74 };
75
76 #[cfg(not(feature = "storage"))]
77 let storage = Arc::new(VectorStorage::new(options.dimensions)?);
78
79 #[allow(unused_mut)] let mut index: Box<dyn VectorIndex> = if let Some(hnsw_config) = &options.hnsw_config {
82 #[cfg(feature = "hnsw")]
83 {
84 Box::new(HnswIndex::new(
85 options.dimensions,
86 options.distance_metric,
87 hnsw_config.clone(),
88 )?)
89 }
90 #[cfg(not(feature = "hnsw"))]
91 {
92 tracing::warn!("HNSW requested but not available (WASM build), using flat index");
94 Box::new(FlatIndex::new(options.dimensions, options.distance_metric))
95 }
96 } else {
97 Box::new(FlatIndex::new(options.dimensions, options.distance_metric))
98 };
99
100 if !matches!(
105 options.quantization,
106 None | Some(crate::types::QuantizationConfig::None)
107 ) {
108 tracing::warn!(
109 "DbOptions.quantization = {:?} is set but not yet applied — the \
110 index is stored unquantized (no compression / memory reduction). \
111 See issue #563.",
112 options.quantization
113 );
114 }
115
116 #[cfg(feature = "storage")]
119 {
120 let stored_ids = storage.all_ids()?;
121 if !stored_ids.is_empty() {
122 tracing::info!(
123 "Rebuilding index from {} persisted vectors",
124 stored_ids.len()
125 );
126
127 let mut entries = Vec::with_capacity(stored_ids.len());
129 for id in stored_ids {
130 if let Some(entry) = storage.get(&id)? {
131 entries.push((id, entry.vector));
132 }
133 }
134
135 index.add_batch(entries)?;
137
138 tracing::info!("Index rebuilt successfully");
139 }
140 }
141
142 Ok(Self {
143 storage,
144 index: Arc::new(RwLock::new(index)),
145 options,
146 })
147 }
148
149 pub fn with_dimensions(dimensions: usize) -> Result<Self> {
151 let options = DbOptions {
152 dimensions,
153 ..DbOptions::default()
154 };
155 Self::new(options)
156 }
157
158 pub fn insert(&self, entry: VectorEntry) -> Result<VectorId> {
160 let id = self.storage.insert(&entry)?;
161
162 let mut index = self.index.write();
164 index.add(id.clone(), entry.vector)?;
165
166 Ok(id)
167 }
168
169 pub fn insert_batch(&self, entries: impl AsRef<[VectorEntry]>) -> Result<Vec<VectorId>> {
171 let entries = entries.as_ref();
172 let ids = self.storage.insert_batch(entries)?;
173
174 let mut index = self.index.write();
176 let index_entries: Vec<_> = ids
177 .iter()
178 .zip(entries.iter())
179 .map(|(id, entry)| (id.clone(), entry.vector.clone()))
180 .collect();
181
182 index.add_batch(index_entries)?;
183
184 Ok(ids)
185 }
186
187 pub fn search(&self, query: SearchQuery) -> Result<Vec<SearchResult>> {
189 let index = self.index.read();
190 let mut results = index.search(&query.vector, query.k)?;
191
192 for result in &mut results {
194 if let Ok(Some(entry)) = self.storage.get(&result.id) {
195 result.vector = Some(entry.vector);
196 result.metadata = entry.metadata;
197 }
198 }
199
200 if let Some(filter) = &query.filter {
202 results.retain(|r| {
203 if let Some(metadata) = &r.metadata {
204 filter
205 .iter()
206 .all(|(key, value)| metadata.get(key).is_some_and(|v| v == value))
207 } else {
208 false
209 }
210 });
211 }
212
213 Ok(results)
214 }
215
216 pub fn delete(&self, id: &str) -> Result<bool> {
218 let deleted_storage = self.storage.delete(id)?;
219
220 if deleted_storage {
221 let mut index = self.index.write();
222 let _ = index.remove(&id.to_string())?;
223 }
224
225 Ok(deleted_storage)
226 }
227
228 pub fn get(&self, id: &str) -> Result<Option<VectorEntry>> {
230 self.storage.get(id)
231 }
232
233 pub fn len(&self) -> Result<usize> {
235 self.storage.len()
236 }
237
238 pub fn is_empty(&self) -> Result<bool> {
240 self.storage.is_empty()
241 }
242
243 pub fn options(&self) -> &DbOptions {
245 &self.options
246 }
247
248 pub fn keys(&self) -> Result<Vec<String>> {
250 self.storage.all_ids()
251 }
252}
253
254#[cfg(test)]
255mod tests {
256 use super::*;
257 use std::path::Path;
258 use tempfile::tempdir;
259
260 #[test]
261 fn test_vector_db_creation() -> Result<()> {
262 let dir = tempdir().unwrap();
263 let mut options = DbOptions::default();
264 options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
265 options.dimensions = 3;
266
267 let db = VectorDB::new(options)?;
268 assert!(db.is_empty()?);
269
270 Ok(())
271 }
272
273 #[test]
274 fn test_insert_and_search() -> Result<()> {
275 let dir = tempdir().unwrap();
276 let mut options = DbOptions::default();
277 options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
278 options.dimensions = 3;
279 options.distance_metric = DistanceMetric::Euclidean; options.hnsw_config = None; let db = VectorDB::new(options)?;
283
284 db.insert(VectorEntry {
286 id: Some("v1".to_string()),
287 vector: vec![1.0, 0.0, 0.0],
288 metadata: None,
289 })?;
290
291 db.insert(VectorEntry {
292 id: Some("v2".to_string()),
293 vector: vec![0.0, 1.0, 0.0],
294 metadata: None,
295 })?;
296
297 db.insert(VectorEntry {
298 id: Some("v3".to_string()),
299 vector: vec![0.0, 0.0, 1.0],
300 metadata: None,
301 })?;
302
303 let results = db.search(SearchQuery {
305 vector: vec![1.0, 0.0, 0.0],
306 k: 2,
307 filter: None,
308 ef_search: None,
309 })?;
310
311 assert!(results.len() >= 1);
312 assert_eq!(results[0].id, "v1", "First result should be exact match");
313 assert!(
314 results[0].score < 0.01,
315 "Exact match should have ~0 distance"
316 );
317
318 Ok(())
319 }
320
321 #[test]
324 #[cfg(feature = "storage")]
325 fn test_search_after_restart() -> Result<()> {
326 let dir = tempdir().unwrap();
327 let db_path = dir.path().join("persist.db").to_string_lossy().to_string();
328
329 {
331 let mut options = DbOptions::default();
332 options.storage_path = db_path.clone();
333 options.dimensions = 3;
334 options.distance_metric = DistanceMetric::Euclidean;
335 options.hnsw_config = None;
336
337 let db = VectorDB::new(options)?;
338
339 db.insert(VectorEntry {
340 id: Some("v1".to_string()),
341 vector: vec![1.0, 0.0, 0.0],
342 metadata: None,
343 })?;
344
345 db.insert(VectorEntry {
346 id: Some("v2".to_string()),
347 vector: vec![0.0, 1.0, 0.0],
348 metadata: None,
349 })?;
350
351 db.insert(VectorEntry {
352 id: Some("v3".to_string()),
353 vector: vec![0.7, 0.7, 0.0],
354 metadata: None,
355 })?;
356
357 let results = db.search(SearchQuery {
359 vector: vec![0.8, 0.6, 0.0],
360 k: 3,
361 filter: None,
362 ef_search: None,
363 })?;
364 assert_eq!(results.len(), 3, "Should find all 3 vectors before restart");
365 }
366 {
370 let mut options = DbOptions::default();
371 options.storage_path = db_path.clone();
372 options.dimensions = 3;
373 options.distance_metric = DistanceMetric::Euclidean;
374 options.hnsw_config = None;
375
376 let db = VectorDB::new(options)?;
377
378 assert_eq!(db.len()?, 3, "Should have 3 vectors after restart");
380
381 let v1 = db.get("v1")?;
383 assert!(v1.is_some(), "get() should work after restart");
384
385 let results = db.search(SearchQuery {
387 vector: vec![0.8, 0.6, 0.0],
388 k: 3,
389 filter: None,
390 ef_search: None,
391 })?;
392
393 assert_eq!(
394 results.len(),
395 3,
396 "search() should return results after restart (was returning 0 before fix)"
397 );
398
399 assert_eq!(
401 results[0].id, "v3",
402 "v3 [0.7, 0.7, 0.0] should be closest to query [0.8, 0.6, 0.0]"
403 );
404 }
405
406 Ok(())
407 }
408}