entelix_memory/semantic.rs
1//! `SemanticMemory<E, V>` — generic composition of `Embedder` +
2//! `VectorStore` scoped to one `Namespace`. Plus
3//! [`SemanticMemoryBackend`], the object-safe consumer trait.
4//!
5//! ## Three-tier layering
6//!
7//! 1. **Primitives** — [`crate::Embedder`] + [`crate::VectorStore`]
8//! are operator-implemented backend traits. The `VectorStore`
9//! takes `Namespace` as a per-call parameter so a single store
10//! instance backs many tenants. The `Embedder` is independent
11//! and pool-shared via `Arc<Self>`.
12//! 2. **Bundle** — [`SemanticMemory<E, V>`] glues `Arc<E>` +
13//! `Arc<V>` + a fixed `Namespace` into one surface. Generic over
14//! the concrete embedder / vector-store types so static dispatch
15//! is preserved on hot paths.
16//! 3. **Consumer trait** — [`SemanticMemoryBackend`] is the object-
17//! safe view tools and orchestration code consume as
18//! `Arc<dyn SemanticMemoryBackend>`. The bound `Namespace` is
19//! baked in via [`SemanticMemoryBackend::namespace`]; consumers
20//! don't pass one. Implemented automatically for every
21//! `SemanticMemory<E, V>`.
22//!
23//! Operators add a backend by implementing `VectorStore` (and
24//! optionally `Embedder` for non-OpenAI vendors); they never need
25//! to implement `SemanticMemoryBackend` directly — wrapping in
26//! `SemanticMemory::new` produces the trait-object view for free.
27
28use std::sync::Arc;
29
30use async_trait::async_trait;
31use entelix_core::{Error, ExecutionContext, Result};
32
33use crate::namespace::Namespace;
34use crate::traits::{Document, Embedder, RerankedDocument, Reranker, VectorFilter, VectorStore};
35
36/// Object-safe consumer trait — tier 3 of the semantic-memory
37/// layering documented at the module level. Consumers (tools,
38/// orchestration code, recipes) take
39/// `Arc<dyn SemanticMemoryBackend>` to operate on a namespace-scoped
40/// embed-and-search surface without parameterising over the
41/// concrete embedder / vector-store types.
42///
43/// **Operators do not implement this trait directly.** Implement
44/// [`crate::VectorStore`] (and optionally [`crate::Embedder`]),
45/// then wrap in [`SemanticMemory::new`] — the
46/// `impl SemanticMemoryBackend for SemanticMemory<E, V>` blanket
47/// produces the trait-object view automatically.
48///
49/// The trait mirrors the full [`SemanticMemory`] surface (search,
50/// add, delete, update, add_batch, search_filtered, plus a
51/// rerank-aware variant via `&dyn Reranker`) so consumers do not
52/// need to downcast to the concrete generic type to access mutating
53/// or rerank operations.
54#[async_trait]
55pub trait SemanticMemoryBackend: Send + Sync + 'static {
56 /// Borrow the bound [`Namespace`]. Tools and orchestration code
57 /// that route queries by tenant or scope read this to validate
58 /// the backend is wired to the expected slice without downcasting
59 /// to the concrete generic type.
60 fn namespace(&self) -> &Namespace;
61
62 /// Vector dimension the backend embeds and indexes at. Lets
63 /// schedulers verify a query embedder matches before issuing a
64 /// search, and lets dashboards report index width per tenant.
65 fn dimension(&self) -> usize;
66
67 /// Embed `query` and return the top `top_k` matches.
68 async fn search(
69 &self,
70 ctx: &ExecutionContext,
71 query: &str,
72 top_k: usize,
73 ) -> Result<Vec<Document>>;
74
75 /// Embed `query`, fetch `candidates`, push down `filter` if the
76 /// backend supports it; otherwise the underlying `VectorStore`
77 /// returns `Error::Config`.
78 async fn search_filtered(
79 &self,
80 ctx: &ExecutionContext,
81 query: &str,
82 top_k: usize,
83 filter: &VectorFilter,
84 ) -> Result<Vec<Document>>;
85
86 /// Two-stage retrieval: over-fetch `candidates` then rerank
87 /// down to `top_k`. The reranker is supplied as a trait object
88 /// so the backend trait stays object-safe (the concrete
89 /// [`SemanticMemory::search_with_rerank`] also accepts
90 /// monomorphic `R: Reranker` for users who prefer static
91 /// dispatch). Returns [`RerankedDocument`]s so callers can
92 /// inspect the reranker's score alongside the retrieval score.
93 async fn search_with_rerank_dyn(
94 &self,
95 ctx: &ExecutionContext,
96 query: &str,
97 top_k: usize,
98 candidates: usize,
99 reranker: &dyn Reranker,
100 ) -> Result<Vec<RerankedDocument>>;
101
102 /// Embed `document.content` and add the document to the index.
103 async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()>;
104
105 /// Add many documents at once. Default implementations defer to
106 /// the embedder's batch path then to the vector store's batch
107 /// path so backends that support either can amortise round-trips.
108 async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()>;
109
110 /// Delete a previously-indexed document by its backend id.
111 async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()>;
112
113 /// Replace an existing document's vector and metadata atomically
114 /// when the backend supports it; otherwise non-atomic via
115 /// delete + add.
116 async fn update(&self, ctx: &ExecutionContext, doc_id: &str, document: Document) -> Result<()>;
117
118 /// Count documents in the bound namespace, optionally narrowed
119 /// by a metadata filter. Pass-through to
120 /// [`VectorStore::count`] — backends without count support
121 /// surface `Error::Config`.
122 async fn count(&self, ctx: &ExecutionContext, filter: Option<&VectorFilter>) -> Result<usize>;
123
124 /// Enumerate documents in the bound namespace. Pass-through to
125 /// [`VectorStore::list`] — backends without enumeration
126 /// support surface `Error::Config`.
127 async fn list(
128 &self,
129 ctx: &ExecutionContext,
130 filter: Option<&VectorFilter>,
131 limit: usize,
132 offset: usize,
133 ) -> Result<Vec<Document>>;
134}
135
136/// `Embedder + VectorStore + Namespace` bundle.
137///
138/// The embedder produces vectors at `add` and `search` time; the vector
139/// store holds them. Both must agree on `dimension()` — checked at
140/// construction.
141pub struct SemanticMemory<E, V>
142where
143 E: Embedder,
144 V: VectorStore,
145{
146 embedder: Arc<E>,
147 vector_store: Arc<V>,
148 namespace: Namespace,
149}
150
151impl<E, V> SemanticMemory<E, V>
152where
153 E: Embedder,
154 V: VectorStore,
155{
156 /// Construct from owned components, validating dimension parity.
157 ///
158 /// Returns `Error::Config` if the embedder and vector store report
159 /// different dimensions.
160 pub fn new(embedder: Arc<E>, vector_store: Arc<V>, namespace: Namespace) -> Result<Self> {
161 let e_dim = embedder.dimension();
162 let v_dim = vector_store.dimension();
163 if e_dim != v_dim {
164 return Err(Error::config(format!(
165 "SemanticMemory: embedder dimension ({e_dim}) does not match vector-store \
166 dimension ({v_dim})"
167 )));
168 }
169 Ok(Self {
170 embedder,
171 vector_store,
172 namespace,
173 })
174 }
175
176 /// Borrow the bound namespace.
177 pub const fn namespace(&self) -> &Namespace {
178 &self.namespace
179 }
180
181 /// Embed `document.content` and add it to the vector store.
182 /// The embedder's usage metadata (when surfaced) is dropped here —
183 /// callers that need to charge cost meters per-embed should use
184 /// the embedder directly and then call
185 /// [`VectorStore::add`](crate::VectorStore::add).
186 pub async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()> {
187 let embedding = self.embedder.embed(&document.content, ctx).await?;
188 self.vector_store
189 .add(ctx, &self.namespace, document, embedding.vector)
190 .await
191 }
192
193 /// Add many documents at once — uses `Embedder::embed_batch` to
194 /// amortise embedder calls then `VectorStore::add_batch` to
195 /// amortise index writes.
196 ///
197 /// Returns [`Error::Config`] if the embedder produces a vector
198 /// count that doesn't match the input documents — silent
199 /// truncation via `zip` would drop documents without surfacing
200 /// the embedder bug.
201 pub async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()> {
202 if documents.is_empty() {
203 return Ok(());
204 }
205 let texts: Vec<String> = documents.iter().map(|d| d.content.clone()).collect();
206 let embeddings = self.embedder.embed_batch(&texts, ctx).await?;
207 if embeddings.len() != texts.len() {
208 return Err(Error::config(format!(
209 "SemanticMemory::add_batch: embedder returned {} vectors for {} documents",
210 embeddings.len(),
211 texts.len()
212 )));
213 }
214 let items: Vec<(Document, Vec<f32>)> = documents
215 .into_iter()
216 .zip(embeddings)
217 .map(|(doc, embedding)| (doc, embedding.vector))
218 .collect();
219 self.vector_store
220 .add_batch(ctx, &self.namespace, items)
221 .await
222 }
223
224 /// Delete a previously-indexed document by id.
225 pub async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()> {
226 self.vector_store.delete(ctx, &self.namespace, doc_id).await
227 }
228
229 /// Update a previously-indexed document. Re-embeds the
230 /// document's content via the embedder and asks the vector
231 /// store to swap vector + metadata under the same id.
232 pub async fn update(
233 &self,
234 ctx: &ExecutionContext,
235 doc_id: &str,
236 document: Document,
237 ) -> Result<()> {
238 let embedding = self.embedder.embed(&document.content, ctx).await?;
239 self.vector_store
240 .update(ctx, &self.namespace, doc_id, document, embedding.vector)
241 .await
242 }
243
244 /// Embed `query` and search the vector store for the top `top_k`
245 /// matches.
246 pub async fn search(
247 &self,
248 ctx: &ExecutionContext,
249 query: &str,
250 top_k: usize,
251 ) -> Result<Vec<Document>> {
252 let embedding = self.embedder.embed(query, ctx).await?;
253 self.vector_store
254 .search(ctx, &self.namespace, &embedding.vector, top_k)
255 .await
256 }
257
258 /// Embed `query` and search with a metadata filter. Backends
259 /// without filter support return `Error::Config` per the
260 /// `VectorStore::search_filtered` contract.
261 pub async fn search_filtered(
262 &self,
263 ctx: &ExecutionContext,
264 query: &str,
265 top_k: usize,
266 filter: &VectorFilter,
267 ) -> Result<Vec<Document>> {
268 let embedding = self.embedder.embed(query, ctx).await?;
269 self.vector_store
270 .search_filtered(ctx, &self.namespace, &embedding.vector, top_k, filter)
271 .await
272 }
273
274 /// Two-stage retrieval: over-fetch `candidates` from the vector
275 /// store, then rerank down to `top_k` via the supplied
276 /// [`Reranker`]. The over-fetch factor is the operator's lever
277 /// for trading recall against rerank latency — passing
278 /// `candidates == top_k` makes the reranker no-op-shaped, while
279 /// `candidates >> top_k` exposes more candidates to the
280 /// reranker's scoring. Returns [`RerankedDocument`]s so callers
281 /// retain both the retrieval and rerank scores for explainability.
282 pub async fn search_with_rerank<R: Reranker>(
283 &self,
284 ctx: &ExecutionContext,
285 query: &str,
286 top_k: usize,
287 candidates: usize,
288 reranker: &R,
289 ) -> Result<Vec<RerankedDocument>> {
290 let pool = self.search(ctx, query, candidates.max(top_k)).await?;
291 reranker.rerank(query, pool, top_k, ctx).await
292 }
293
294 /// Count documents in the bound namespace. Pass-through to
295 /// [`VectorStore::count`] — backends without count support
296 /// surface `Error::Config`.
297 pub async fn count(
298 &self,
299 ctx: &ExecutionContext,
300 filter: Option<&VectorFilter>,
301 ) -> Result<usize> {
302 self.vector_store.count(ctx, &self.namespace, filter).await
303 }
304
305 /// Enumerate documents in the bound namespace. Pass-through to
306 /// [`VectorStore::list`] — backends without enumeration
307 /// support surface `Error::Config`.
308 pub async fn list(
309 &self,
310 ctx: &ExecutionContext,
311 filter: Option<&VectorFilter>,
312 limit: usize,
313 offset: usize,
314 ) -> Result<Vec<Document>> {
315 self.vector_store
316 .list(ctx, &self.namespace, filter, limit, offset)
317 .await
318 }
319}
320
321#[async_trait]
322impl<E, V> SemanticMemoryBackend for SemanticMemory<E, V>
323where
324 E: Embedder,
325 V: VectorStore,
326{
327 fn namespace(&self) -> &Namespace {
328 &self.namespace
329 }
330
331 fn dimension(&self) -> usize {
332 self.embedder.dimension()
333 }
334
335 async fn search(
336 &self,
337 ctx: &ExecutionContext,
338 query: &str,
339 top_k: usize,
340 ) -> Result<Vec<Document>> {
341 Self::search(self, ctx, query, top_k).await
342 }
343
344 async fn search_filtered(
345 &self,
346 ctx: &ExecutionContext,
347 query: &str,
348 top_k: usize,
349 filter: &VectorFilter,
350 ) -> Result<Vec<Document>> {
351 Self::search_filtered(self, ctx, query, top_k, filter).await
352 }
353
354 async fn add(&self, ctx: &ExecutionContext, document: Document) -> Result<()> {
355 Self::add(self, ctx, document).await
356 }
357
358 async fn add_batch(&self, ctx: &ExecutionContext, documents: Vec<Document>) -> Result<()> {
359 Self::add_batch(self, ctx, documents).await
360 }
361
362 async fn delete(&self, ctx: &ExecutionContext, doc_id: &str) -> Result<()> {
363 Self::delete(self, ctx, doc_id).await
364 }
365
366 async fn update(&self, ctx: &ExecutionContext, doc_id: &str, document: Document) -> Result<()> {
367 Self::update(self, ctx, doc_id, document).await
368 }
369
370 async fn search_with_rerank_dyn(
371 &self,
372 ctx: &ExecutionContext,
373 query: &str,
374 top_k: usize,
375 candidates: usize,
376 reranker: &dyn Reranker,
377 ) -> Result<Vec<RerankedDocument>> {
378 let pool = self.search(ctx, query, candidates.max(top_k)).await?;
379 reranker.rerank(query, pool, top_k, ctx).await
380 }
381
382 async fn count(&self, ctx: &ExecutionContext, filter: Option<&VectorFilter>) -> Result<usize> {
383 Self::count(self, ctx, filter).await
384 }
385
386 async fn list(
387 &self,
388 ctx: &ExecutionContext,
389 filter: Option<&VectorFilter>,
390 limit: usize,
391 offset: usize,
392 ) -> Result<Vec<Document>> {
393 Self::list(self, ctx, filter, limit, offset).await
394 }
395}