Skip to main content

engram/
vector.rs

1//! `VectorStore` trait — semantic similarity search interface for Engram.
2//!
3//! Implementations store dense embedding vectors alongside metadata and
4//! support nearest-neighbour queries via cosine similarity. The trait is
5//! `async_trait`-annotated and requires `Send + Sync` for use across tasks.
6
7use crate::fact::FactId;
8use crate::scope::Scope;
9use crate::store::MemoryError;
10use async_trait::async_trait;
11
12// ---------------------------------------------------------------------------
13// VectorFilter
14// ---------------------------------------------------------------------------
15
16/// Optional filters applied during a vector similarity search.
17#[derive(Debug, Clone, Default)]
18pub struct VectorFilter {
19    /// Restrict results to vectors belonging to this scope.
20    pub scope: Option<Scope>,
21    /// Exclude results whose cosine similarity score is below this threshold.
22    pub min_score: Option<f32>,
23}
24
25// ---------------------------------------------------------------------------
26// VectorMatch
27// ---------------------------------------------------------------------------
28
29/// A single result returned by `VectorStore::search`.
30#[derive(Debug, Clone)]
31pub struct VectorMatch {
32    /// The fact this vector belongs to.
33    pub id: FactId,
34    /// Cosine similarity score in [0.0, 1.0].
35    pub score: f32,
36    /// Metadata stored alongside the embedding at upsert time.
37    pub metadata: serde_json::Value,
38}
39
40// ---------------------------------------------------------------------------
41// VectorStore trait
42// ---------------------------------------------------------------------------
43
44/// Semantic vector storage and nearest-neighbour search.
45///
46/// Implementations MUST be `Send + Sync` so that `Arc<dyn VectorStore>` can
47/// be shared across async tasks. All methods are fallible and return
48/// `Result<_, MemoryError>`.
49#[async_trait]
50pub trait VectorStore: Send + Sync {
51    /// Insert or replace the embedding for `id`.
52    ///
53    /// `embedding` must have the correct dimensionality for the store;
54    /// implementations SHOULD return `MemoryError::Embedding` on a mismatch.
55    async fn upsert(
56        &self,
57        id: FactId,
58        embedding: Vec<f32>,
59        metadata: serde_json::Value,
60    ) -> Result<(), MemoryError>;
61
62    /// Return the top-`top_k` most similar vectors to `query`, applying
63    /// `filter` constraints. Results are ordered by descending similarity score.
64    async fn search(
65        &self,
66        query: &[f32],
67        filter: &VectorFilter,
68        top_k: usize,
69    ) -> Result<Vec<VectorMatch>, MemoryError>;
70
71    /// Delete the embedding for `id`. Returns `Ok(())` if the id was not found
72    /// (idempotent).
73    async fn delete(&self, id: FactId) -> Result<(), MemoryError>;
74
75    /// Delete all embeddings associated with `scope`.
76    ///
77    /// Returns the number of entries removed.
78    async fn delete_by_scope(&self, scope: &Scope) -> Result<u64, MemoryError>;
79}