entelix_memory/traits.rs
1//! `Embedder`, `Retriever`, `VectorStore` traits + `Document`.
2//!
3//! Per, entelix 1.0 ships only the traits. Concrete impls
4//! (`OpenAI`/Voyage/Cohere embedders, qdrant/lancedb vector stores,
5//! BM25 retrievers) land in 1.1 companion crates.
6
7use async_trait::async_trait;
8use entelix_core::{ExecutionContext, Result};
9use serde::{Deserialize, Serialize};
10
11use crate::namespace::Namespace;
12
13/// One retrieved document with optional similarity score.
14#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
15pub struct Document {
16 /// Backend-assigned stable id, when the originating
17 /// [`VectorStore`] mints one. Pass back to
18 /// [`VectorStore::update`] / [`VectorStore::delete`] to mutate
19 /// or remove this document. `None` for embedded usages where
20 /// stable identity isn't tracked.
21 #[serde(default)]
22 pub doc_id: Option<DocumentId>,
23 /// Body text. Implementations may store any UTF-8 payload.
24 pub content: String,
25 /// Free-form metadata — stored alongside the document for filtering
26 /// / display. Use a JSON object by convention.
27 #[serde(default)]
28 pub metadata: serde_json::Value,
29 /// Similarity score from the retriever, if available. Higher = better
30 /// match. Comparable only within a single query's result set.
31 #[serde(default)]
32 pub score: Option<f32>,
33}
34
35impl Document {
36 /// Convenience: build a document with empty metadata and no score.
37 pub fn new(content: impl Into<String>) -> Self {
38 Self {
39 doc_id: None,
40 content: content.into(),
41 metadata: serde_json::Value::Null,
42 score: None,
43 }
44 }
45
46 /// Builder-style metadata setter.
47 #[must_use]
48 pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
49 self.metadata = metadata;
50 self
51 }
52
53 /// Builder-style doc_id setter — backends use this when
54 /// surfacing previously-indexed documents so callers can mutate
55 /// or delete via the same id.
56 #[must_use]
57 pub fn with_doc_id(mut self, id: impl Into<DocumentId>) -> Self {
58 self.doc_id = Some(id.into());
59 self
60 }
61}
62
63/// Token-accounting metadata an [`Embedder`] reports alongside the
64/// computed vector. Mirrors the `ChatModel` `Usage` shape so cost
65/// meters can charge embedding calls with the same machinery they
66/// use for completions.
67///
68/// Marked `#[non_exhaustive]` so future fields (cached_tokens, total
69/// for cost, latency_ms) are forward-compatible.
70#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
71#[non_exhaustive]
72pub struct EmbeddingUsage {
73 /// Tokens the embedder counted as input. `0` when the backend
74 /// does not surface a count (some local impls do not).
75 pub input_tokens: u32,
76}
77
78impl EmbeddingUsage {
79 /// Build a usage record with the supplied input-token count.
80 #[must_use]
81 pub const fn new(input_tokens: u32) -> Self {
82 Self { input_tokens }
83 }
84}
85
86/// One embedded text's vector plus optional usage metadata.
87///
88/// `usage` is `None` when the backend does not surface token
89/// accounting (in-process stub embedders, hash-based encoders).
90/// Cost-aware backends (`OpenAI`, Voyage, Cohere) return `Some` so
91/// downstream meters can charge per call.
92#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
93pub struct Embedding {
94 /// The encoded vector. Length always equals
95 /// [`Embedder::dimension`].
96 pub vector: Vec<f32>,
97 /// Token-accounting metadata when the backend reports it.
98 #[serde(default)]
99 pub usage: Option<EmbeddingUsage>,
100}
101
102impl Embedding {
103 /// Build an embedding with no usage metadata. Stub backends and
104 /// tests use this when they have no token count to report.
105 #[must_use]
106 pub const fn new(vector: Vec<f32>) -> Self {
107 Self {
108 vector,
109 usage: None,
110 }
111 }
112
113 /// Builder-style usage attachment.
114 #[must_use]
115 pub const fn with_usage(mut self, usage: EmbeddingUsage) -> Self {
116 self.usage = Some(usage);
117 self
118 }
119}
120
121/// Text → vector encoder.
122///
123/// Implementations are typically backed by a remote API
124/// (`OpenAI` / Voyage / Cohere). Per F10, instances are wrapped in `Arc`
125/// at the call boundary; do not create a new client per call.
126///
127/// **Override `embed_batch`** when the underlying API supports
128/// batch inference. The provided default loops sequentially via
129/// `embed`, which is correct but allocates one HTTP call per
130/// document — avoid in production.
131#[async_trait]
132pub trait Embedder: Send + Sync + 'static {
133 /// Output vector dimension. Used by `VectorStore` impls to validate
134 /// inserts against the configured index dimension.
135 fn dimension(&self) -> usize;
136
137 /// Embed one input string. Returns the vector plus optional
138 /// usage metadata so downstream cost meters can charge the
139 /// call without a second round-trip.
140 async fn embed(&self, text: &str, ctx: &ExecutionContext) -> Result<Embedding>;
141
142 /// Batch embed. Default impl runs sequentially via `embed`,
143 /// polling [`ExecutionContext::is_cancelled`] between iterations
144 /// so a long batch bails out within one `embed` round-trip of
145 /// cancellation rather than draining the full pool.
146 /// Implementations that support a true batch endpoint
147 /// **should** override — sequential calls amplify network
148 /// latency by `N`.
149 async fn embed_batch(
150 &self,
151 texts: &[String],
152 ctx: &ExecutionContext,
153 ) -> Result<Vec<Embedding>> {
154 let mut out = Vec::with_capacity(texts.len());
155 for text in texts {
156 if ctx.is_cancelled() {
157 return Err(entelix_core::Error::Cancelled);
158 }
159 out.push(self.embed(text, ctx).await?);
160 }
161 Ok(out)
162 }
163}
164
165/// Declarative description of one retrieval call.
166///
167/// Carries the text query plus optional knobs: a metadata filter, a
168/// minimum score floor, and a top-k cap. Future hybrid-search
169/// dimensions (per-field boosts, dense+keyword fusion, reranker
170/// hints) ride on `#[non_exhaustive]` without breaking call sites.
171#[derive(Clone, Debug)]
172#[non_exhaustive]
173pub struct RetrievalQuery {
174 /// Free-text query. Backends interpret it however they like —
175 /// dense vector embed-and-search, BM25 token match, hybrid fusion.
176 pub text: String,
177 /// Maximum number of hits to return. Backends truncate at this
178 /// cap; exceeding it is a backend bug.
179 pub top_k: usize,
180 /// Minimum similarity score required to include a hit. `None`
181 /// surfaces every hit regardless of score; useful when score
182 /// semantics differ across backends. Comparable only within a
183 /// single query's result set.
184 pub min_score: Option<f32>,
185 /// Metadata predicate. `None` is identity (no filter). Backends
186 /// that cannot honour the supplied [`VectorFilter`] variant fall
187 /// back to filterless retrieval and emit a `LossyEncode`-style
188 /// warning at the operator layer.
189 pub filter: Option<VectorFilter>,
190}
191
192impl RetrievalQuery {
193 /// Build a query with sensible defaults — no filter, no
194 /// floor, top-k cap supplied.
195 #[must_use]
196 pub fn new(text: impl Into<String>, top_k: usize) -> Self {
197 Self {
198 text: text.into(),
199 top_k,
200 min_score: None,
201 filter: None,
202 }
203 }
204
205 /// Attach a metadata filter. Builder-style.
206 #[must_use]
207 pub fn with_filter(mut self, filter: VectorFilter) -> Self {
208 self.filter = Some(filter);
209 self
210 }
211
212 /// Attach a minimum-score floor. Builder-style.
213 #[must_use]
214 pub const fn with_min_score(mut self, min_score: f32) -> Self {
215 self.min_score = Some(min_score);
216 self
217 }
218}
219
220/// Returns documents ranked by relevance to a [`RetrievalQuery`].
221///
222/// Backed by anything from a BM25 index to a remote search API; the
223/// signature is intentionally backend-agnostic. Implementations
224/// honour the query's `top_k`, `min_score`, and `filter` to the
225/// extent the underlying backend supports them.
226#[async_trait]
227pub trait Retriever: Send + Sync + 'static {
228 /// Look up matches for `query`.
229 async fn retrieve(
230 &self,
231 query: RetrievalQuery,
232 ctx: &ExecutionContext,
233 ) -> Result<Vec<Document>>;
234}
235
236/// Stable identifier for an indexed document. Backends mint these
237/// at insertion time; passing the same id to [`VectorStore::update`]
238/// or [`VectorStore::delete`] is the canonical way to mutate or
239/// remove a previously-indexed document.
240pub type DocumentId = String;
241
242/// Predicate against [`Document::metadata`] used by
243/// [`VectorStore::search_filtered`]. Backends translate this into
244/// their native filter language (`pgvector` `WHERE`, qdrant `Filter`,
245/// lancedb `where`); backends that cannot honour a given variant
246/// fall back to filterless search and emit a `LossyEncode`-style
247/// warning at the operator layer.
248#[derive(Clone, Debug, PartialEq)]
249#[non_exhaustive]
250pub enum VectorFilter {
251 /// Always passes — useful as identity inside `And`/`Or` chains.
252 All,
253 /// `metadata.<key> == value`.
254 Eq {
255 /// Metadata key path (dotted notation).
256 key: String,
257 /// Comparison value.
258 value: serde_json::Value,
259 },
260 /// `metadata.<key> < value`. Numeric semantics; non-numeric
261 /// operands produce an empty result set.
262 Lt {
263 /// Metadata key path.
264 key: String,
265 /// Comparison value (numeric).
266 value: serde_json::Value,
267 },
268 /// `metadata.<key> <= value`. Inclusive variant of [`Self::Lt`].
269 Lte {
270 /// Metadata key path.
271 key: String,
272 /// Comparison value (numeric).
273 value: serde_json::Value,
274 },
275 /// `metadata.<key> > value`.
276 Gt {
277 /// Metadata key path.
278 key: String,
279 /// Comparison value (numeric).
280 value: serde_json::Value,
281 },
282 /// `metadata.<key> >= value`. Inclusive variant of [`Self::Gt`].
283 Gte {
284 /// Metadata key path.
285 key: String,
286 /// Comparison value (numeric).
287 value: serde_json::Value,
288 },
289 /// `min <= metadata.<key> <= max`. Closed interval — backends
290 /// that natively support range queries can push this down as a
291 /// single index probe instead of decomposing into `And(Gte, Lte)`.
292 Range {
293 /// Metadata key path.
294 key: String,
295 /// Lower bound (inclusive). Numeric.
296 min: serde_json::Value,
297 /// Upper bound (inclusive). Numeric.
298 max: serde_json::Value,
299 },
300 /// `metadata.<key>` is one of `values`. Empty `values` matches
301 /// no documents — equivalent to a no-op filter that can be used
302 /// to short-circuit zero-result queries without consulting the
303 /// index.
304 In {
305 /// Metadata key path.
306 key: String,
307 /// Allowed values.
308 values: Vec<serde_json::Value>,
309 },
310 /// `metadata.<key>` is present (any value, including `null`).
311 /// Distinguishes "field unset" from "field set to null".
312 Exists {
313 /// Metadata key path.
314 key: String,
315 },
316 /// All children must match.
317 And(Vec<Self>),
318 /// At least one child must match.
319 Or(Vec<Self>),
320 /// Negate the inner filter.
321 Not(Box<Self>),
322}
323
324/// Vector index keyed by [`Namespace`]. Backed by qdrant, lancedb,
325/// pgvector, etc. in companion crates.
326///
327/// **Layering** — this is **tier 1 (primitive)** of the
328/// semantic-memory three-tier architecture. Operators implement
329/// `VectorStore` once per backend; the bundle
330/// [`crate::SemanticMemory<E, V>`] (tier 2) and the consumer trait
331/// [`crate::SemanticMemoryBackend`] (tier 3) compose it into the
332/// agent-facing surface automatically. Take `Namespace` as a
333/// per-call parameter so a single store instance serves many
334/// tenants.
335///
336/// Every async method accepts an [`ExecutionContext`] so backends
337/// can honour caller-side cancellation and deadlines (CLAUDE.md
338/// §"Cancellation"). The `delete` / `update` / `add_batch` /
339/// `search_filtered` methods have default impls so simple backends
340/// only need `add` and `search` — production backends override
341/// every method for efficiency and correctness.
342///
343/// **Atomicity**: the default `update` impl is non-atomic
344/// (delete-then-add): concurrent `search` calls observe a momentary
345/// gap. Backends that support transactional updates **must**
346/// override.
347#[async_trait]
348pub trait VectorStore: Send + Sync + 'static {
349 /// Vector dimension this index expects.
350 fn dimension(&self) -> usize;
351
352 /// Add a document with its pre-computed vector to the index.
353 /// Implementations validate `vector.len() == self.dimension()`.
354 async fn add(
355 &self,
356 ctx: &ExecutionContext,
357 ns: &Namespace,
358 document: Document,
359 vector: Vec<f32>,
360 ) -> Result<()>;
361
362 /// Search for the top `top_k` nearest documents to `query_vector`.
363 async fn search(
364 &self,
365 ctx: &ExecutionContext,
366 ns: &Namespace,
367 query_vector: &[f32],
368 top_k: usize,
369 ) -> Result<Vec<Document>>;
370
371 /// Delete a document by its backend-assigned id. Default impl
372 /// returns `Error::Config` — backends without a stable id space
373 /// must override or document the lifecycle.
374 async fn delete(&self, _ctx: &ExecutionContext, _ns: &Namespace, _doc_id: &str) -> Result<()> {
375 Err(entelix_core::Error::config(
376 "VectorStore::delete is not supported by this backend",
377 ))
378 }
379
380 /// Replace an existing document's vector and metadata. Default
381 /// impl chains `delete` + `add` (non-atomic — concurrent
382 /// searches observe a gap); backends with atomic-update support
383 /// **must** override.
384 async fn update(
385 &self,
386 ctx: &ExecutionContext,
387 ns: &Namespace,
388 doc_id: &str,
389 document: Document,
390 vector: Vec<f32>,
391 ) -> Result<()> {
392 self.delete(ctx, ns, doc_id).await?;
393 self.add(ctx, ns, document, vector).await
394 }
395
396 /// Insert many documents at once. Default impl loops over `add`,
397 /// polling [`ExecutionContext::is_cancelled`] between iterations
398 /// so a cancelled caller releases the index lock within one
399 /// `add` round-trip instead of completing the full batch.
400 /// Backends that support a native batch endpoint **should**
401 /// override — sequential calls amplify network latency by `N`.
402 async fn add_batch(
403 &self,
404 ctx: &ExecutionContext,
405 ns: &Namespace,
406 items: Vec<(Document, Vec<f32>)>,
407 ) -> Result<()> {
408 for (doc, vec) in items {
409 if ctx.is_cancelled() {
410 return Err(entelix_core::Error::Cancelled);
411 }
412 self.add(ctx, ns, doc, vec).await?;
413 }
414 Ok(())
415 }
416
417 /// Top-K nearest matches with a metadata filter pushed down to
418 /// the index. Default impl returns [`entelix_core::Error::Config`] —
419 /// silently dropping the filter would return wrong results, so
420 /// the trait makes the backend's lack of filter support
421 /// explicit. Backends with filter support **must** override.
422 async fn search_filtered(
423 &self,
424 _ctx: &ExecutionContext,
425 _ns: &Namespace,
426 _query_vector: &[f32],
427 _top_k: usize,
428 _filter: &VectorFilter,
429 ) -> Result<Vec<Document>> {
430 Err(entelix_core::Error::config(
431 "VectorStore::search_filtered is not supported by this backend; \
432 override the trait method to push filters down to the index",
433 ))
434 }
435
436 /// Count documents in the namespace, optionally narrowed by a
437 /// filter. Used by dashboards reporting per-tenant index sizes
438 /// and by memory-budget enforcement (skip indexing when the
439 /// namespace is at its cap).
440 ///
441 /// Default impl returns [`entelix_core::Error::Config`] —
442 /// counting requires either a backend-native COUNT or a full
443 /// scan, both of which are operator-visible cost decisions
444 /// that should not be silently approximated.
445 async fn count(
446 &self,
447 _ctx: &ExecutionContext,
448 _ns: &Namespace,
449 _filter: Option<&VectorFilter>,
450 ) -> Result<usize> {
451 Err(entelix_core::Error::config(
452 "VectorStore::count is not supported by this backend; \
453 override the trait method to surface index cardinality",
454 ))
455 }
456
457 /// Enumerate documents in the namespace, optionally narrowed by
458 /// a filter. `limit` caps the page size; `offset` is the page
459 /// start (cursor-style pagination semantics depend on the
460 /// backend). Returned documents may omit their vectors — the
461 /// method is for inspection / pagination, not for retrieval.
462 ///
463 /// Default impl returns [`entelix_core::Error::Config`] —
464 /// listing requires a stable iteration order that not every
465 /// vector backend exposes (e.g. ANN indices give no useful
466 /// ordering across calls).
467 async fn list(
468 &self,
469 _ctx: &ExecutionContext,
470 _ns: &Namespace,
471 _filter: Option<&VectorFilter>,
472 _limit: usize,
473 _offset: usize,
474 ) -> Result<Vec<Document>> {
475 Err(entelix_core::Error::config(
476 "VectorStore::list is not supported by this backend; \
477 override the trait method to enumerate documents in the index",
478 ))
479 }
480}
481
482/// One reranked document paired with the score the [`Reranker`] assigned.
483///
484/// Two scores ride together: the inner `Document::score` carries the
485/// original retrieval score from the [`VectorStore`] (or `None` if the
486/// store did not surface one), and `rerank_score` carries the reranker's
487/// own score. Keeping them distinct preserves explainability — UIs and
488/// dashboards can show "the embedding ranked this 0.82, the cross-encoder
489/// ranked it 0.41 → moved from rank 1 to rank 7" without ambiguity, and
490/// downstream filters can threshold on whichever score the deployment
491/// trusts.
492#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
493pub struct RerankedDocument {
494 /// The reranked document. Its `score` field still holds the
495 /// retrieval score from the originating [`VectorStore`] — the
496 /// reranker does not mutate it.
497 pub document: Document,
498 /// The reranker's score for this document at this query.
499 /// Comparable only within a single rerank call. Higher = better
500 /// per the reranker's scoring model.
501 pub rerank_score: f32,
502}
503
504impl RerankedDocument {
505 /// Build a reranked document from a candidate and a fresh score.
506 pub const fn new(document: Document, rerank_score: f32) -> Self {
507 Self {
508 document,
509 rerank_score,
510 }
511 }
512}
513
514/// Re-rank a candidate document set against the originating query.
515///
516/// Used after a `VectorStore::search` to apply MMR / cross-encoder
517/// scoring / time-decay before returning results to the caller. The
518/// default identity impl ([`IdentityReranker`]) preserves the
519/// retrieval order and copies each candidate's retrieval score into
520/// `rerank_score` so the return type is uniform regardless of which
521/// reranker is wired up; production deployments substitute with
522/// cross-encoder or MMR implementations from companion crates.
523#[async_trait]
524pub trait Reranker: Send + Sync + 'static {
525 /// Re-order (and optionally trim) the candidate list, attaching
526 /// a reranker-specific score to each survivor. The returned
527 /// `Vec` MUST contain only documents from the input candidates
528 /// — rerankers cannot fabricate new content.
529 async fn rerank(
530 &self,
531 query: &str,
532 candidates: Vec<Document>,
533 top_k: usize,
534 ctx: &ExecutionContext,
535 ) -> Result<Vec<RerankedDocument>>;
536}
537
538/// No-op [`Reranker`]: returns the first `top_k` candidates in the
539/// order the underlying `VectorStore` produced them, copying the
540/// retrieval score into [`RerankedDocument::rerank_score`] so
541/// downstream consumers see a uniform shape.
542#[derive(Clone, Copy, Debug, Default)]
543pub struct IdentityReranker;
544
545#[async_trait]
546impl Reranker for IdentityReranker {
547 async fn rerank(
548 &self,
549 _query: &str,
550 mut candidates: Vec<Document>,
551 top_k: usize,
552 _ctx: &ExecutionContext,
553 ) -> Result<Vec<RerankedDocument>> {
554 candidates.truncate(top_k);
555 Ok(candidates
556 .into_iter()
557 .map(|doc| {
558 let score = doc.score.unwrap_or(0.0);
559 RerankedDocument::new(doc, score)
560 })
561 .collect())
562 }
563}