1use std::ops::Bound;
4
5use manifoldb_core::EntityId;
6use manifoldb_storage::{Cursor, StorageEngine, Transaction};
7
8use crate::encoding::{
9 decode_embedding_entity_id, encode_embedding_key, encode_embedding_prefix,
10 encode_embedding_space_key,
11};
12use crate::error::VectorError;
13use crate::types::{Embedding, EmbeddingName, EmbeddingSpace};
14
15const TABLE_EMBEDDING_SPACES: &str = "vector_spaces";
17
18const TABLE_EMBEDDINGS: &str = "vector_embeddings";
20
21pub struct VectorStore<E: StorageEngine> {
27 engine: E,
28}
29
30impl<E: StorageEngine> VectorStore<E> {
31 #[must_use]
33 pub const fn new(engine: E) -> Self {
34 Self { engine }
35 }
36
37 pub fn create_space(&self, space: &EmbeddingSpace) -> Result<(), VectorError> {
43 let mut tx = self.engine.begin_write()?;
44
45 let key = encode_embedding_space_key(space.name());
46
47 if tx.get(TABLE_EMBEDDING_SPACES, &key)?.is_some() {
49 return Err(VectorError::InvalidName(format!(
50 "embedding space '{}' already exists",
51 space.name()
52 )));
53 }
54
55 tx.put(TABLE_EMBEDDING_SPACES, &key, &space.to_bytes()?)?;
57 tx.commit()?;
58
59 Ok(())
60 }
61
62 pub fn get_space(&self, name: &EmbeddingName) -> Result<EmbeddingSpace, VectorError> {
68 let tx = self.engine.begin_read()?;
69 let key = encode_embedding_space_key(name);
70
71 let bytes = tx
72 .get(TABLE_EMBEDDING_SPACES, &key)?
73 .ok_or_else(|| VectorError::SpaceNotFound(name.to_string()))?;
74
75 EmbeddingSpace::from_bytes(&bytes)
76 }
77
78 pub fn delete_space(&self, name: &EmbeddingName) -> Result<(), VectorError> {
84 let mut tx = self.engine.begin_write()?;
85
86 let space_key = encode_embedding_space_key(name);
87
88 if tx.get(TABLE_EMBEDDING_SPACES, &space_key)?.is_none() {
90 return Err(VectorError::SpaceNotFound(name.to_string()));
91 }
92
93 let prefix = encode_embedding_prefix(name);
95 let prefix_end = next_prefix(&prefix);
96
97 let mut keys_to_delete = Vec::new();
98 {
99 let cursor = tx.range(
100 TABLE_EMBEDDINGS,
101 Bound::Included(prefix.as_slice()),
102 Bound::Excluded(prefix_end.as_slice()),
103 )?;
104
105 let mut cursor = cursor;
107 while let Some((key, _)) = cursor.next()? {
108 keys_to_delete.push(key);
109 }
110 }
111
112 for key in keys_to_delete {
113 tx.delete(TABLE_EMBEDDINGS, &key)?;
114 }
115
116 tx.delete(TABLE_EMBEDDING_SPACES, &space_key)?;
118
119 tx.commit()?;
120 Ok(())
121 }
122
123 pub fn list_spaces(&self) -> Result<Vec<EmbeddingSpace>, VectorError> {
129 let tx = self.engine.begin_read()?;
130
131 let prefix = vec![crate::encoding::PREFIX_EMBEDDING_SPACE];
133 let prefix_end = next_prefix(&prefix);
134
135 let mut cursor = tx.range(
136 TABLE_EMBEDDING_SPACES,
137 Bound::Included(prefix.as_slice()),
138 Bound::Excluded(prefix_end.as_slice()),
139 )?;
140
141 let mut spaces = Vec::new();
142 while let Some((_, value)) = cursor.next()? {
143 spaces.push(EmbeddingSpace::from_bytes(&value)?);
144 }
145
146 Ok(spaces)
147 }
148
149 pub fn put(
158 &self,
159 entity_id: EntityId,
160 space_name: &EmbeddingName,
161 embedding: &Embedding,
162 ) -> Result<(), VectorError> {
163 let space = self.get_space(space_name)?;
165
166 if embedding.dimension() != space.dimension() {
167 return Err(VectorError::DimensionMismatch {
168 expected: space.dimension(),
169 actual: embedding.dimension(),
170 });
171 }
172
173 let mut tx = self.engine.begin_write()?;
174
175 let key = encode_embedding_key(space_name, entity_id);
176 tx.put(TABLE_EMBEDDINGS, &key, &embedding.to_bytes())?;
177
178 tx.commit()?;
179 Ok(())
180 }
181
182 pub fn get(
191 &self,
192 entity_id: EntityId,
193 space_name: &EmbeddingName,
194 ) -> Result<Embedding, VectorError> {
195 let _ = self.get_space(space_name)?;
197
198 let tx = self.engine.begin_read()?;
199
200 let key = encode_embedding_key(space_name, entity_id);
201 let bytes =
202 tx.get(TABLE_EMBEDDINGS, &key)?.ok_or_else(|| VectorError::EmbeddingNotFound {
203 entity_id: entity_id.as_u64(),
204 space: space_name.to_string(),
205 })?;
206
207 Embedding::from_bytes(&bytes)
208 }
209
210 pub fn delete(
220 &self,
221 entity_id: EntityId,
222 space_name: &EmbeddingName,
223 ) -> Result<bool, VectorError> {
224 let mut tx = self.engine.begin_write()?;
225
226 let key = encode_embedding_key(space_name, entity_id);
227 let existed = tx.delete(TABLE_EMBEDDINGS, &key)?;
228
229 tx.commit()?;
230 Ok(existed)
231 }
232
233 pub fn exists(
239 &self,
240 entity_id: EntityId,
241 space_name: &EmbeddingName,
242 ) -> Result<bool, VectorError> {
243 let tx = self.engine.begin_read()?;
244 let key = encode_embedding_key(space_name, entity_id);
245 Ok(tx.get(TABLE_EMBEDDINGS, &key)?.is_some())
246 }
247
248 pub fn list_entities(&self, space_name: &EmbeddingName) -> Result<Vec<EntityId>, VectorError> {
254 let tx = self.engine.begin_read()?;
255
256 let prefix = encode_embedding_prefix(space_name);
257 let prefix_end = next_prefix(&prefix);
258
259 let mut cursor = tx.range(
260 TABLE_EMBEDDINGS,
261 Bound::Included(prefix.as_slice()),
262 Bound::Excluded(prefix_end.as_slice()),
263 )?;
264
265 let mut entities = Vec::new();
266 while let Some((key, _)) = cursor.next()? {
267 if let Some(entity_id) = decode_embedding_entity_id(&key) {
268 entities.push(entity_id);
269 }
270 }
271
272 Ok(entities)
273 }
274
275 pub fn count(&self, space_name: &EmbeddingName) -> Result<usize, VectorError> {
281 let tx = self.engine.begin_read()?;
282
283 let prefix = encode_embedding_prefix(space_name);
284 let prefix_end = next_prefix(&prefix);
285
286 let mut cursor = tx.range(
287 TABLE_EMBEDDINGS,
288 Bound::Included(prefix.as_slice()),
289 Bound::Excluded(prefix_end.as_slice()),
290 )?;
291
292 let mut count = 0;
293 while cursor.next()?.is_some() {
294 count += 1;
295 }
296
297 Ok(count)
298 }
299
300 pub fn get_many(
309 &self,
310 entity_ids: &[EntityId],
311 space_name: &EmbeddingName,
312 ) -> Result<Vec<(EntityId, Option<Embedding>)>, VectorError> {
313 let tx = self.engine.begin_read()?;
314
315 let mut results = Vec::with_capacity(entity_ids.len());
316
317 for &entity_id in entity_ids {
318 let key = encode_embedding_key(space_name, entity_id);
319 let embedding = tx
320 .get(TABLE_EMBEDDINGS, &key)?
321 .map(|bytes| Embedding::from_bytes(&bytes))
322 .transpose()?;
323
324 results.push((entity_id, embedding));
325 }
326
327 Ok(results)
328 }
329
330 pub fn put_many(
341 &self,
342 embeddings: &[(EntityId, Embedding)],
343 space_name: &EmbeddingName,
344 ) -> Result<(), VectorError> {
345 if embeddings.is_empty() {
346 return Ok(());
347 }
348
349 let space = self.get_space(space_name)?;
351
352 for (entity_id, embedding) in embeddings {
354 if embedding.dimension() != space.dimension() {
355 return Err(VectorError::DimensionMismatch {
356 expected: space.dimension(),
357 actual: embedding.dimension(),
358 });
359 }
360 let _ = entity_id; }
362
363 let mut tx = self.engine.begin_write()?;
364
365 for (entity_id, embedding) in embeddings {
366 let key = encode_embedding_key(space_name, *entity_id);
367 tx.put(TABLE_EMBEDDINGS, &key, &embedding.to_bytes())?;
368 }
369
370 tx.commit()?;
371 Ok(())
372 }
373
374 pub fn delete_entity(&self, entity_id: EntityId) -> Result<usize, VectorError> {
380 let spaces = self.list_spaces()?;
382
383 let mut deleted = 0;
384 for space in spaces {
385 if self.delete(entity_id, space.name())? {
386 deleted += 1;
387 }
388 }
389
390 Ok(deleted)
391 }
392}
393
394fn next_prefix(prefix: &[u8]) -> Vec<u8> {
398 let mut result = prefix.to_vec();
399
400 for byte in result.iter_mut().rev() {
402 if *byte < 0xFF {
403 *byte += 1;
404 return result;
405 }
406 }
407
408 result.push(0xFF);
410 result
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use crate::distance::DistanceMetric;
417 use manifoldb_storage::backends::RedbEngine;
418 use std::sync::atomic::{AtomicUsize, Ordering};
419
420 static TEST_COUNTER: AtomicUsize = AtomicUsize::new(0);
421
422 fn create_test_store() -> VectorStore<RedbEngine> {
423 let engine = RedbEngine::in_memory().unwrap();
424 VectorStore::new(engine)
425 }
426
427 fn unique_space_name() -> EmbeddingName {
428 let count = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
429 EmbeddingName::new(format!("test_space_{}", count)).unwrap()
430 }
431
432 #[test]
433 fn create_and_get_space() {
434 let store = create_test_store();
435 let name = unique_space_name();
436 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
437
438 store.create_space(&space).unwrap();
439
440 let retrieved = store.get_space(&name).unwrap();
441 assert_eq!(retrieved.dimension(), 128);
442 assert_eq!(retrieved.distance_metric(), DistanceMetric::Cosine);
443 }
444
445 #[test]
446 fn create_duplicate_space_fails() {
447 let store = create_test_store();
448 let name = unique_space_name();
449 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
450
451 store.create_space(&space).unwrap();
452 let result = store.create_space(&space);
453
454 assert!(result.is_err());
455 }
456
457 #[test]
458 fn get_nonexistent_space_fails() {
459 let store = create_test_store();
460 let name = EmbeddingName::new("nonexistent").unwrap();
461
462 let result = store.get_space(&name);
463 assert!(result.is_err());
464 match result.unwrap_err() {
465 VectorError::SpaceNotFound(n) => assert_eq!(n, "nonexistent"),
466 _ => panic!("unexpected error type"),
467 }
468 }
469
470 #[test]
471 fn list_spaces() {
472 let store = create_test_store();
473
474 let name1 = unique_space_name();
475 let name2 = unique_space_name();
476
477 let space1 = EmbeddingSpace::new(name1.clone(), 128, DistanceMetric::Cosine);
478 let space2 = EmbeddingSpace::new(name2.clone(), 256, DistanceMetric::Euclidean);
479
480 store.create_space(&space1).unwrap();
481 store.create_space(&space2).unwrap();
482
483 let spaces = store.list_spaces().unwrap();
484 assert!(spaces.len() >= 2);
485
486 let names: Vec<_> = spaces.iter().map(|s| s.name().as_str()).collect();
487 assert!(names.contains(&name1.as_str()));
488 assert!(names.contains(&name2.as_str()));
489 }
490
491 #[test]
492 fn delete_space() {
493 let store = create_test_store();
494 let name = unique_space_name();
495 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
496
497 store.create_space(&space).unwrap();
498
499 let embedding = Embedding::new(vec![1.0; 128]).unwrap();
501 store.put(EntityId::new(1), &name, &embedding).unwrap();
502
503 store.delete_space(&name).unwrap();
505
506 assert!(store.get_space(&name).is_err());
508
509 assert!(!store.exists(EntityId::new(1), &name).unwrap_or(true));
511 }
512
513 #[test]
514 fn put_and_get_embedding() {
515 let store = create_test_store();
516 let name = unique_space_name();
517 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
518 store.create_space(&space).unwrap();
519
520 let embedding = Embedding::new(vec![1.0, 2.0, 3.0]).unwrap();
521 store.put(EntityId::new(42), &name, &embedding).unwrap();
522
523 let retrieved = store.get(EntityId::new(42), &name).unwrap();
524 assert_eq!(retrieved.as_slice(), &[1.0, 2.0, 3.0]);
525 }
526
527 #[test]
528 fn put_wrong_dimension_fails() {
529 let store = create_test_store();
530 let name = unique_space_name();
531 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
532 store.create_space(&space).unwrap();
533
534 let embedding = Embedding::new(vec![1.0, 2.0, 3.0]).unwrap(); let result = store.put(EntityId::new(1), &name, &embedding);
537 assert!(result.is_err());
538 match result.unwrap_err() {
539 VectorError::DimensionMismatch { expected, actual } => {
540 assert_eq!(expected, 128);
541 assert_eq!(actual, 3);
542 }
543 _ => panic!("unexpected error type"),
544 }
545 }
546
547 #[test]
548 fn get_nonexistent_embedding_fails() {
549 let store = create_test_store();
550 let name = unique_space_name();
551 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
552 store.create_space(&space).unwrap();
553
554 let result = store.get(EntityId::new(999), &name);
555 assert!(result.is_err());
556 match result.unwrap_err() {
557 VectorError::EmbeddingNotFound { entity_id, space } => {
558 assert_eq!(entity_id, 999);
559 assert_eq!(space, name.as_str());
560 }
561 _ => panic!("unexpected error type"),
562 }
563 }
564
565 #[test]
566 fn delete_embedding() {
567 let store = create_test_store();
568 let name = unique_space_name();
569 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
570 store.create_space(&space).unwrap();
571
572 let embedding = Embedding::new(vec![1.0, 2.0, 3.0]).unwrap();
573 store.put(EntityId::new(1), &name, &embedding).unwrap();
574
575 assert!(store.exists(EntityId::new(1), &name).unwrap());
576 assert!(store.delete(EntityId::new(1), &name).unwrap());
577 assert!(!store.exists(EntityId::new(1), &name).unwrap());
578
579 assert!(!store.delete(EntityId::new(1), &name).unwrap());
581 }
582
583 #[test]
584 fn list_entities() {
585 let store = create_test_store();
586 let name = unique_space_name();
587 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
588 store.create_space(&space).unwrap();
589
590 for i in 1..=5 {
591 let embedding = Embedding::new(vec![i as f32; 3]).unwrap();
592 store.put(EntityId::new(i), &name, &embedding).unwrap();
593 }
594
595 let entities = store.list_entities(&name).unwrap();
596 assert_eq!(entities.len(), 5);
597
598 let ids: Vec<u64> = entities.iter().map(|e| e.as_u64()).collect();
599 for i in 1..=5 {
600 assert!(ids.contains(&i));
601 }
602 }
603
604 #[test]
605 fn count_embeddings() {
606 let store = create_test_store();
607 let name = unique_space_name();
608 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
609 store.create_space(&space).unwrap();
610
611 assert_eq!(store.count(&name).unwrap(), 0);
612
613 for i in 1..=10 {
614 let embedding = Embedding::new(vec![i as f32; 3]).unwrap();
615 store.put(EntityId::new(i), &name, &embedding).unwrap();
616 }
617
618 assert_eq!(store.count(&name).unwrap(), 10);
619 }
620
621 #[test]
622 fn get_many() {
623 let store = create_test_store();
624 let name = unique_space_name();
625 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
626 store.create_space(&space).unwrap();
627
628 let embedding = Embedding::new(vec![1.0, 2.0, 3.0]).unwrap();
629 store.put(EntityId::new(1), &name, &embedding).unwrap();
630 store.put(EntityId::new(3), &name, &embedding).unwrap();
631
632 let results =
633 store.get_many(&[EntityId::new(1), EntityId::new(2), EntityId::new(3)], &name).unwrap();
634
635 assert_eq!(results.len(), 3);
636 assert!(results[0].1.is_some()); assert!(results[1].1.is_none()); assert!(results[2].1.is_some()); }
640
641 #[test]
642 fn put_many() {
643 let store = create_test_store();
644 let name = unique_space_name();
645 let space = EmbeddingSpace::new(name.clone(), 3, DistanceMetric::Cosine);
646 store.create_space(&space).unwrap();
647
648 let embeddings: Vec<_> = (1..=5)
649 .map(|i| (EntityId::new(i), Embedding::new(vec![i as f32; 3]).unwrap()))
650 .collect();
651
652 store.put_many(&embeddings, &name).unwrap();
653
654 assert_eq!(store.count(&name).unwrap(), 5);
655
656 for i in 1..=5 {
657 let retrieved = store.get(EntityId::new(i), &name).unwrap();
658 assert_eq!(retrieved.as_slice(), &[i as f32; 3]);
659 }
660 }
661
662 #[test]
663 fn put_many_wrong_dimension_fails() {
664 let store = create_test_store();
665 let name = unique_space_name();
666 let space = EmbeddingSpace::new(name.clone(), 128, DistanceMetric::Cosine);
667 store.create_space(&space).unwrap();
668
669 let embeddings = vec![
670 (EntityId::new(1), Embedding::new(vec![1.0; 128]).unwrap()),
671 (EntityId::new(2), Embedding::new(vec![1.0, 2.0, 3.0]).unwrap()), ];
673
674 let result = store.put_many(&embeddings, &name);
675 assert!(result.is_err());
676 }
677
678 #[test]
679 fn delete_entity_across_spaces() {
680 let store = create_test_store();
681
682 let name1 = unique_space_name();
683 let name2 = unique_space_name();
684
685 let space1 = EmbeddingSpace::new(name1.clone(), 3, DistanceMetric::Cosine);
686 let space2 = EmbeddingSpace::new(name2.clone(), 5, DistanceMetric::Euclidean);
687
688 store.create_space(&space1).unwrap();
689 store.create_space(&space2).unwrap();
690
691 let entity_id = EntityId::new(42);
692
693 store.put(entity_id, &name1, &Embedding::new(vec![1.0; 3]).unwrap()).unwrap();
694 store.put(entity_id, &name2, &Embedding::new(vec![2.0; 5]).unwrap()).unwrap();
695
696 assert!(store.exists(entity_id, &name1).unwrap());
697 assert!(store.exists(entity_id, &name2).unwrap());
698
699 let deleted = store.delete_entity(entity_id).unwrap();
700 assert_eq!(deleted, 2);
701
702 assert!(!store.exists(entity_id, &name1).unwrap());
703 assert!(!store.exists(entity_id, &name2).unwrap());
704 }
705
706 #[test]
707 fn multiple_embeddings_per_entity() {
708 let store = create_test_store();
709
710 let text_space = unique_space_name();
711 let image_space = unique_space_name();
712
713 store
714 .create_space(&EmbeddingSpace::new(text_space.clone(), 384, DistanceMetric::Cosine))
715 .unwrap();
716 store
717 .create_space(&EmbeddingSpace::new(
718 image_space.clone(),
719 512,
720 DistanceMetric::DotProduct,
721 ))
722 .unwrap();
723
724 let entity_id = EntityId::new(1);
725
726 let text_embedding = Embedding::new(vec![0.1; 384]).unwrap();
727 let image_embedding = Embedding::new(vec![0.2; 512]).unwrap();
728
729 store.put(entity_id, &text_space, &text_embedding).unwrap();
730 store.put(entity_id, &image_space, &image_embedding).unwrap();
731
732 let retrieved_text = store.get(entity_id, &text_space).unwrap();
733 let retrieved_image = store.get(entity_id, &image_space).unwrap();
734
735 assert_eq!(retrieved_text.dimension(), 384);
736 assert_eq!(retrieved_image.dimension(), 512);
737 }
738
739 #[test]
740 fn next_prefix_increments_correctly() {
741 assert_eq!(next_prefix(&[0x00]), vec![0x01]);
742 assert_eq!(next_prefix(&[0x10, 0x00]), vec![0x10, 0x01]);
743 assert_eq!(next_prefix(&[0x10, 0xFF]), vec![0x11, 0xFF]);
744 assert_eq!(next_prefix(&[0xFF]), vec![0xFF, 0xFF]);
745 assert_eq!(next_prefix(&[0xFF, 0xFF]), vec![0xFF, 0xFF, 0xFF]);
746 }
747}