entelix_memory/
semantic.rs

1//! `SemanticMemory<E, V>` — generic composition of `Embedder` +
2//! `VectorStore` scoped to one `Namespace`. Plus
3//! [`SemanticMemoryBackend`], the object-safe consumer trait.
4//!
5//! ## Three-tier layering
6//!
7//! 1. **Primitives** — [`crate::Embedder`] + [`crate::VectorStore`]
8//!    are operator-implemented backend traits. The `VectorStore`
9//!    takes `Namespace` as a per-call parameter so a single store
10//!    instance backs many tenants. The `Embedder` is independent
11//!    and pool-shared via `Arc<Self>`.
12//! 2. **Bundle** — [`SemanticMemory<E, V>`] glues `Arc<E>` +
13//!    `Arc<V>` + a fixed `Namespace` into one surface. Generic over
14//!    the concrete embedder / vector-store types so static dispatch
15//!    is preserved on hot paths.
16//! 3. **Consumer trait** — [`SemanticMemoryBackend`] is the object-
17//!    safe view tools and orchestration code consume as
18//!    `Arc<dyn SemanticMemoryBackend>`. The bound `Namespace` is
19//!    baked in via [`SemanticMemoryBackend::namespace`]; consumers
20//!    don't pass one. Implemented automatically for every
21//!    `SemanticMemory<E, V>`.
22//!
23//! Operators add a backend by implementing `VectorStore` (and
24//! optionally `Embedder` for non-OpenAI vendors); they never need
25//! to implement `SemanticMemoryBackend` directly — wrapping in
26//! `SemanticMemory::new` produces the trait-object view for free.
27
28use std::sync::Arc;
29
30use async_trait::async_trait;
31use entelix_core::{Error, ExecutionContext, Result};
32
33use crate::namespace::Namespace;
34use crate::traits::{Document, Embedder, RerankedDocument, Reranker, VectorFilter, VectorStore};
35
36/// Object-safe consumer trait — tier 3 of the semantic-memory
37/// layering documented at the module level. Consumers (tools,
38/// orchestration code, recipes) take
39/// `Arc<dyn SemanticMemoryBackend>` to operate on a namespace-scoped
40/// embed-and-search surface without parameterising over the
41/// concrete embedder / vector-store types.
42///
43/// **Operators do not implement this trait directly.** Implement
44/// [`crate::VectorStore`] (and optionally [`crate::Embedder`]),
45/// then wrap in [`SemanticMemory::new`] — the
46/// `impl SemanticMemoryBackend for SemanticMemory<E, V>` blanket
47/// produces the trait-object view automatically.
48///
49/// The trait mirrors the full [`SemanticMemory`] surface (search,
50/// add, delete, update, add_batch, search_filtered, plus a
51/// rerank-aware variant via `&dyn Reranker`) so consumers do not
52/// need to downcast to the concrete generic type to access mutating
53/// or rerank operations.
54#[async_trait]
55pub trait SemanticMemoryBackend: Send + Sync + 'static {
56    /// Borrow the bound [`Namespace`]. Tools and orchestration code
57    /// that route queries by tenant or scope read this to validate
58    /// the backend is wired to the expected slice without downcasting
59    /// to the concrete generic type.
60    fn namespace(&self) -> &Namespace;
61
62    /// Vector dimension the backend embeds and indexes at. Lets
63    /// schedulers verify a query embedder matches before issuing a
64    /// search, and lets dashboards report index width per tenant.
65    fn dimension(&self) -> usize;
66
67    /// Embed `query` and return the top `top_k` matches.
68    async fn search(
69        &self,
70        ctx: &ExecutionContext,
71        query: &str,
72        top_k: usize,
73    ) -> Result<Vec<Document>>;
74
75    /// Embed `query`, fetch `candidates`, push down `filter` if the
76    /// backend supports it; otherwise the underlying `VectorStore`
77    /// returns `Error::Config`.
78    async fn search_filtered(
79        &self,
80        ctx: &ExecutionContext,
81        query: &str,
82        top_k: usize,
83        filter: &VectorFilter,
84    ) -> Result<Vec<Document>>;
85
86    /// Two-stage retrieval: over-fetch `candidates` then rerank
87    /// down to `top_k`. The reranker is supplied as a trait object
88    /// so the backend trait stays object-safe (the concrete
89    /// [`SemanticMemory::search_with_rerank`] also accepts
90    /// monomorphic `R: Reranker` for users who prefer static
91    /// dispatch). Returns [`RerankedDocument`]s so callers can
92    /// inspect the reranker's score alongside the retrieval score.
93    async fn search_with_rerank_dyn(
94        &self,
95        ctx: &ExecutionContext,
96        query: &str,
97        top_k: usize,
98        candidates: usize,
99        reranker: &dyn Reranker,
100    ) -> Result<Vec<RerankedDocument>>;
101
102    /// Embed `document.content` and add the document to the index.
103    async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()>;
104
105    /// Add many documents at once. Default implementations defer to
106    /// the embedder's batch path then to the vector store's batch
107    /// path so backends that support either can amortise round-trips.
108    async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()>;
109
110    /// Delete a previously-indexed document by its backend id.
111    async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()>;
112
113    /// Replace an existing document's vector and metadata atomically
114    /// when the backend supports it; otherwise non-atomic via
115    /// delete + add.
116    async fn update(&self, ctx: &ExecutionContext, doc_id: &str, document: Document) -> Result<()>;
117
118    /// Count documents in the bound namespace, optionally narrowed
119    /// by a metadata filter. Pass-through to
120    /// [`VectorStore::count`] — backends without count support
121    /// surface `Error::Config`.
122    async fn count(&self, ctx: &ExecutionContext, filter: Option<&VectorFilter>) -> Result<usize>;
123
124    /// Enumerate documents in the bound namespace. Pass-through to
125    /// [`VectorStore::list`] — backends without enumeration
126    /// support surface `Error::Config`.
127    async fn list(
128        &self,
129        ctx: &ExecutionContext,
130        filter: Option<&VectorFilter>,
131        limit: usize,
132        offset: usize,
133    ) -> Result<Vec<Document>>;
134}
135
136/// `Embedder + VectorStore + Namespace` bundle.
137///
138/// The embedder produces vectors at `add` and `search` time; the vector
139/// store holds them. Both must agree on `dimension()` — checked at
140/// construction.
141pub struct SemanticMemory<E, V>
142where
143    E: Embedder,
144    V: VectorStore,
145{
146    embedder: Arc<E>,
147    vector_store: Arc<V>,
148    namespace: Namespace,
149}
150
151impl<E, V> SemanticMemory<E, V>
152where
153    E: Embedder,
154    V: VectorStore,
155{
156    /// Construct from owned components, validating dimension parity.
157    ///
158    /// Returns `Error::Config` if the embedder and vector store report
159    /// different dimensions.
160    pub fn new(embedder: Arc<E>, vector_store: Arc<V>, namespace: Namespace) -> Result<Self> {
161        let e_dim = embedder.dimension();
162        let v_dim = vector_store.dimension();
163        if e_dim != v_dim {
164            return Err(Error::config(format!(
165                "SemanticMemory: embedder dimension ({e_dim}) does not match vector-store \
166                 dimension ({v_dim})"
167            )));
168        }
169        Ok(Self {
170            embedder,
171            vector_store,
172            namespace,
173        })
174    }
175
176    /// Borrow the bound namespace.
177    pub const fn namespace(&self) -> &Namespace {
178        &self.namespace
179    }
180
181    /// Embed `document.content` and add it to the vector store.
182    /// The embedder's usage metadata (when surfaced) is dropped here —
183    /// callers that need to charge cost meters per-embed should use
184    /// the embedder directly and then call
185    /// [`VectorStore::add`](crate::VectorStore::add).
186    pub async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()> {
187        let embedding = self.embedder.embed(&document.content, ctx).await?;
188        self.vector_store
189            .add(ctx, &self.namespace, document, embedding.vector)
190            .await
191    }
192
193    /// Add many documents at once — uses `Embedder::embed_batch` to
194    /// amortise embedder calls then `VectorStore::add_batch` to
195    /// amortise index writes.
196    ///
197    /// Returns [`Error::Config`] if the embedder produces a vector
198    /// count that doesn't match the input documents — silent
199    /// truncation via `zip` would drop documents without surfacing
200    /// the embedder bug.
201    pub async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()> {
202        if documents.is_empty() {
203            return Ok(());
204        }
205        let texts: Vec<String> = documents.iter().map(|d| d.content.clone()).collect();
206        let embeddings = self.embedder.embed_batch(&texts, ctx).await?;
207        if embeddings.len() != texts.len() {
208            return Err(Error::config(format!(
209                "SemanticMemory::add_batch: embedder returned {} vectors for {} documents",
210                embeddings.len(),
211                texts.len()
212            )));
213        }
214        let items: Vec<(Document, Vec<f32>)> = documents
215            .into_iter()
216            .zip(embeddings)
217            .map(|(doc, embedding)| (doc, embedding.vector))
218            .collect();
219        self.vector_store
220            .add_batch(ctx, &self.namespace, items)
221            .await
222    }
223
224    /// Delete a previously-indexed document by id.
225    pub async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()> {
226        self.vector_store.delete(ctx, &self.namespace, doc_id).await
227    }
228
229    /// Update a previously-indexed document. Re-embeds the
230    /// document's content via the embedder and asks the vector
231    /// store to swap vector + metadata under the same id.
232    pub async fn update(
233        &self,
234        ctx: &ExecutionContext,
235        doc_id: &str,
236        document: Document,
237    ) -> Result<()> {
238        let embedding = self.embedder.embed(&document.content, ctx).await?;
239        self.vector_store
240            .update(ctx, &self.namespace, doc_id, document, embedding.vector)
241            .await
242    }
243
244    /// Embed `query` and search the vector store for the top `top_k`
245    /// matches.
246    pub async fn search(
247        &self,
248        ctx: &ExecutionContext,
249        query: &str,
250        top_k: usize,
251    ) -> Result<Vec<Document>> {
252        let embedding = self.embedder.embed(query, ctx).await?;
253        self.vector_store
254            .search(ctx, &self.namespace, &embedding.vector, top_k)
255            .await
256    }
257
258    /// Embed `query` and search with a metadata filter. Backends
259    /// without filter support return `Error::Config` per the
260    /// `VectorStore::search_filtered` contract.
261    pub async fn search_filtered(
262        &self,
263        ctx: &ExecutionContext,
264        query: &str,
265        top_k: usize,
266        filter: &VectorFilter,
267    ) -> Result<Vec<Document>> {
268        let embedding = self.embedder.embed(query, ctx).await?;
269        self.vector_store
270            .search_filtered(ctx, &self.namespace, &embedding.vector, top_k, filter)
271            .await
272    }
273
274    /// Two-stage retrieval: over-fetch `candidates` from the vector
275    /// store, then rerank down to `top_k` via the supplied
276    /// [`Reranker`]. The over-fetch factor is the operator's lever
277    /// for trading recall against rerank latency — passing
278    /// `candidates == top_k` makes the reranker no-op-shaped, while
279    /// `candidates >> top_k` exposes more candidates to the
280    /// reranker's scoring. Returns [`RerankedDocument`]s so callers
281    /// retain both the retrieval and rerank scores for explainability.
282    pub async fn search_with_rerank<R: Reranker>(
283        &self,
284        ctx: &ExecutionContext,
285        query: &str,
286        top_k: usize,
287        candidates: usize,
288        reranker: &R,
289    ) -> Result<Vec<RerankedDocument>> {
290        let pool = self.search(ctx, query, candidates.max(top_k)).await?;
291        reranker.rerank(query, pool, top_k, ctx).await
292    }
293
294    /// Count documents in the bound namespace. Pass-through to
295    /// [`VectorStore::count`] — backends without count support
296    /// surface `Error::Config`.
297    pub async fn count(
298        &self,
299        ctx: &ExecutionContext,
300        filter: Option<&VectorFilter>,
301    ) -> Result<usize> {
302        self.vector_store.count(ctx, &self.namespace, filter).await
303    }
304
305    /// Enumerate documents in the bound namespace. Pass-through to
306    /// [`VectorStore::list`] — backends without enumeration
307    /// support surface `Error::Config`.
308    pub async fn list(
309        &self,
310        ctx: &ExecutionContext,
311        filter: Option<&VectorFilter>,
312        limit: usize,
313        offset: usize,
314    ) -> Result<Vec<Document>> {
315        self.vector_store
316            .list(ctx, &self.namespace, filter, limit, offset)
317            .await
318    }
319}
320
321#[async_trait]
322impl<E, V> SemanticMemoryBackend for SemanticMemory<E, V>
323where
324    E: Embedder,
325    V: VectorStore,
326{
327    fn namespace(&self) -> &Namespace {
328        &self.namespace
329    }
330
331    fn dimension(&self) -> usize {
332        self.embedder.dimension()
333    }
334
335    async fn search(
336        &self,
337        ctx: &ExecutionContext,
338        query: &str,
339        top_k: usize,
340    ) -> Result<Vec<Document>> {
341        Self::search(self, ctx, query, top_k).await
342    }
343
344    async fn search_filtered(
345        &self,
346        ctx: &ExecutionContext,
347        query: &str,
348        top_k: usize,
349        filter: &VectorFilter,
350    ) -> Result<Vec<Document>> {
351        Self::search_filtered(self, ctx, query, top_k, filter).await
352    }
353
354    async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()> {
355        Self::add(self, ctx, document).await
356    }
357
358    async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()> {
359        Self::add_batch(self, ctx, documents).await
360    }
361
362    async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()> {
363        Self::delete(self, ctx, doc_id).await
364    }
365
366    async fn update(&self, ctx: &ExecutionContext, doc_id: &str, document: Document) -> Result<()> {
367        Self::update(self, ctx, doc_id, document).await
368    }
369
370    async fn search_with_rerank_dyn(
371        &self,
372        ctx: &ExecutionContext,
373        query: &str,
374        top_k: usize,
375        candidates: usize,
376        reranker: &dyn Reranker,
377    ) -> Result<Vec<RerankedDocument>> {
378        let pool = self.search(ctx, query, candidates.max(top_k)).await?;
379        reranker.rerank(query, pool, top_k, ctx).await
380    }
381
382    async fn count(&self, ctx: &ExecutionContext, filter: Option<&VectorFilter>) -> Result<usize> {
383        Self::count(self, ctx, filter).await
384    }
385
386    async fn list(
387        &self,
388        ctx: &ExecutionContext,
389        filter: Option<&VectorFilter>,
390        limit: usize,
391        offset: usize,
392    ) -> Result<Vec<Document>> {
393        Self::list(self, ctx, filter, limit, offset).await
394    }
395}
entelix_memory/semantic.rs

entelix_memory/
semantic.rs