engram/vector.rs
1//! `VectorStore` trait — semantic similarity search interface for Engram.
2//!
3//! Implementations store dense embedding vectors alongside metadata and
4//! support nearest-neighbour queries via cosine similarity. The trait is
5//! `async_trait`-annotated and requires `Send + Sync` for use across tasks.
6
7use crate::fact::FactId;
8use crate::scope::Scope;
9use crate::store::MemoryError;
10use async_trait::async_trait;
11
12// ---------------------------------------------------------------------------
13// VectorFilter
14// ---------------------------------------------------------------------------
15
16/// Optional filters applied during a vector similarity search.
17#[derive(Debug, Clone, Default)]
18pub struct VectorFilter {
19 /// Restrict results to vectors belonging to this scope.
20 pub scope: Option<Scope>,
21 /// Exclude results whose cosine similarity score is below this threshold.
22 pub min_score: Option<f32>,
23}
24
25// ---------------------------------------------------------------------------
26// VectorMatch
27// ---------------------------------------------------------------------------
28
29/// A single result returned by `VectorStore::search`.
30#[derive(Debug, Clone)]
31pub struct VectorMatch {
32 /// The fact this vector belongs to.
33 pub id: FactId,
34 /// Cosine similarity score in [0.0, 1.0].
35 pub score: f32,
36 /// Metadata stored alongside the embedding at upsert time.
37 pub metadata: serde_json::Value,
38}
39
40// ---------------------------------------------------------------------------
41// VectorStore trait
42// ---------------------------------------------------------------------------
43
44/// Semantic vector storage and nearest-neighbour search.
45///
46/// Implementations MUST be `Send + Sync` so that `Arc<dyn VectorStore>` can
47/// be shared across async tasks. All methods are fallible and return
48/// `Result<_, MemoryError>`.
49#[async_trait]
50pub trait VectorStore: Send + Sync {
51 /// Insert or replace the embedding for `id`.
52 ///
53 /// `embedding` must have the correct dimensionality for the store;
54 /// implementations SHOULD return `MemoryError::Embedding` on a mismatch.
55 async fn upsert(
56 &self,
57 id: FactId,
58 embedding: Vec<f32>,
59 metadata: serde_json::Value,
60 ) -> Result<(), MemoryError>;
61
62 /// Return the top-`top_k` most similar vectors to `query`, applying
63 /// `filter` constraints. Results are ordered by descending similarity score.
64 async fn search(
65 &self,
66 query: &[f32],
67 filter: &VectorFilter,
68 top_k: usize,
69 ) -> Result<Vec<VectorMatch>, MemoryError>;
70
71 /// Delete the embedding for `id`. Returns `Ok(())` if the id was not found
72 /// (idempotent).
73 async fn delete(&self, id: FactId) -> Result<(), MemoryError>;
74
75 /// Delete all embeddings associated with `scope`.
76 ///
77 /// Returns the number of entries removed.
78 async fn delete_by_scope(&self, scope: &Scope) -> Result<u64, MemoryError>;
79}