Skip to main content

entelix_memory/
traits.rs

1//! `Embedder`, `Retriever`, `VectorStore` traits + `Document`.
2//!
3//! Per, entelix 1.0 ships only the traits. Concrete impls
4//! (`OpenAI`/Voyage/Cohere embedders, qdrant/lancedb vector stores,
5//! BM25 retrievers) land in 1.1 companion crates.
6
7use async_trait::async_trait;
8use entelix_core::{ExecutionContext, Result};
9use serde::{Deserialize, Serialize};
10
11use crate::namespace::Namespace;
12
13/// One retrieved document with optional similarity score.
14#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
15pub struct Document {
16    /// Backend-assigned stable id, when the originating
17    /// [`VectorStore`] mints one. Pass back to
18    /// [`VectorStore::update`] / [`VectorStore::delete`] to mutate
19    /// or remove this document. `None` for embedded usages where
20    /// stable identity isn't tracked.
21    #[serde(default)]
22    pub doc_id: Option<DocumentId>,
23    /// Body text. Implementations may store any UTF-8 payload.
24    pub content: String,
25    /// Free-form metadata — stored alongside the document for filtering
26    /// / display. Use a JSON object by convention.
27    #[serde(default)]
28    pub metadata: serde_json::Value,
29    /// Similarity score from the retriever, if available. Higher = better
30    /// match. Comparable only within a single query's result set.
31    #[serde(default)]
32    pub score: Option<f32>,
33}
34
35impl Document {
36    /// Convenience: build a document with empty metadata and no score.
37    pub fn new(content: impl Into<String>) -> Self {
38        Self {
39            doc_id: None,
40            content: content.into(),
41            metadata: serde_json::Value::Null,
42            score: None,
43        }
44    }
45
46    /// Builder-style metadata setter.
47    #[must_use]
48    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
49        self.metadata = metadata;
50        self
51    }
52
53    /// Builder-style doc_id setter — backends use this when
54    /// surfacing previously-indexed documents so callers can mutate
55    /// or delete via the same id.
56    #[must_use]
57    pub fn with_doc_id(mut self, id: impl Into<DocumentId>) -> Self {
58        self.doc_id = Some(id.into());
59        self
60    }
61}
62
63/// Token-accounting metadata an [`Embedder`] reports alongside the
64/// computed vector. Mirrors the `ChatModel` `Usage` shape so cost
65/// meters can charge embedding calls with the same machinery they
66/// use for completions.
67///
68/// Marked `#[non_exhaustive]` so future fields (cached_tokens, total
69/// for cost, latency_ms) are forward-compatible.
70#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
71#[non_exhaustive]
72pub struct EmbeddingUsage {
73    /// Tokens the embedder counted as input. `0` when the backend
74    /// does not surface a count (some local impls do not).
75    pub input_tokens: u32,
76}
77
78impl EmbeddingUsage {
79    /// Build a usage record with the supplied input-token count.
80    #[must_use]
81    pub const fn new(input_tokens: u32) -> Self {
82        Self { input_tokens }
83    }
84}
85
86/// One embedded text's vector plus optional usage metadata.
87///
88/// `usage` is `None` when the backend does not surface token
89/// accounting (in-process stub embedders, hash-based encoders).
90/// Cost-aware backends (`OpenAI`, Voyage, Cohere) return `Some` so
91/// downstream meters can charge per call.
92#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
93pub struct Embedding {
94    /// The encoded vector. Length always equals
95    /// [`Embedder::dimension`].
96    pub vector: Vec<f32>,
97    /// Token-accounting metadata when the backend reports it.
98    #[serde(default)]
99    pub usage: Option<EmbeddingUsage>,
100}
101
102impl Embedding {
103    /// Build an embedding with no usage metadata. Stub backends and
104    /// tests use this when they have no token count to report.
105    #[must_use]
106    pub const fn new(vector: Vec<f32>) -> Self {
107        Self {
108            vector,
109            usage: None,
110        }
111    }
112
113    /// Builder-style usage attachment.
114    #[must_use]
115    pub const fn with_usage(mut self, usage: EmbeddingUsage) -> Self {
116        self.usage = Some(usage);
117        self
118    }
119}
120
121/// Text → vector encoder.
122///
123/// Implementations are typically backed by a remote API
124/// (`OpenAI` / Voyage / Cohere). Per F10, instances are wrapped in `Arc`
125/// at the call boundary; do not create a new client per call.
126///
127/// **Override `embed_batch`** when the underlying API supports
128/// batch inference. The provided default loops sequentially via
129/// `embed`, which is correct but allocates one HTTP call per
130/// document — avoid in production.
131#[async_trait]
132pub trait Embedder: Send + Sync + 'static {
133    /// Output vector dimension. Used by `VectorStore` impls to validate
134    /// inserts against the configured index dimension.
135    fn dimension(&self) -> usize;
136
137    /// Embed one input string. Returns the vector plus optional
138    /// usage metadata so downstream cost meters can charge the
139    /// call without a second round-trip.
140    async fn embed(&self, text: &str, ctx: &ExecutionContext) -> Result<Embedding>;
141
142    /// Batch embed. Default impl runs sequentially via `embed`,
143    /// polling [`ExecutionContext::is_cancelled`] between iterations
144    /// so a long batch bails out within one `embed` round-trip of
145    /// cancellation rather than draining the full pool.
146    /// Implementations that support a true batch endpoint
147    /// **should** override — sequential calls amplify network
148    /// latency by `N`.
149    async fn embed_batch(
150        &self,
151        texts: &[String],
152        ctx: &ExecutionContext,
153    ) -> Result<Vec<Embedding>> {
154        let mut out = Vec::with_capacity(texts.len());
155        for text in texts {
156            if ctx.is_cancelled() {
157                return Err(entelix_core::Error::Cancelled);
158            }
159            out.push(self.embed(text, ctx).await?);
160        }
161        Ok(out)
162    }
163}
164
165/// Declarative description of one retrieval call.
166///
167/// Carries the text query plus optional knobs: a metadata filter, a
168/// minimum score floor, and a top-k cap. Future hybrid-search
169/// dimensions (per-field boosts, dense+keyword fusion, reranker
170/// hints) ride on `#[non_exhaustive]` without breaking call sites.
171#[derive(Clone, Debug)]
172#[non_exhaustive]
173pub struct RetrievalQuery {
174    /// Free-text query. Backends interpret it however they like —
175    /// dense vector embed-and-search, BM25 token match, hybrid fusion.
176    pub text: String,
177    /// Maximum number of hits to return. Backends truncate at this
178    /// cap; exceeding it is a backend bug.
179    pub top_k: usize,
180    /// Minimum similarity score required to include a hit. `None`
181    /// surfaces every hit regardless of score; useful when score
182    /// semantics differ across backends. Comparable only within a
183    /// single query's result set.
184    pub min_score: Option<f32>,
185    /// Metadata predicate. `None` is identity (no filter). Backends
186    /// that cannot honour the supplied [`VectorFilter`] variant fall
187    /// back to filterless retrieval and emit a `LossyEncode`-style
188    /// warning at the operator layer.
189    pub filter: Option<VectorFilter>,
190}
191
192impl RetrievalQuery {
193    /// Build a query with sensible defaults — no filter, no
194    /// floor, top-k cap supplied.
195    #[must_use]
196    pub fn new(text: impl Into<String>, top_k: usize) -> Self {
197        Self {
198            text: text.into(),
199            top_k,
200            min_score: None,
201            filter: None,
202        }
203    }
204
205    /// Attach a metadata filter. Builder-style.
206    #[must_use]
207    pub fn with_filter(mut self, filter: VectorFilter) -> Self {
208        self.filter = Some(filter);
209        self
210    }
211
212    /// Attach a minimum-score floor. Builder-style.
213    #[must_use]
214    pub const fn with_min_score(mut self, min_score: f32) -> Self {
215        self.min_score = Some(min_score);
216        self
217    }
218}
219
220/// Returns documents ranked by relevance to a [`RetrievalQuery`].
221///
222/// Backed by anything from a BM25 index to a remote search API; the
223/// signature is intentionally backend-agnostic. Implementations
224/// honour the query's `top_k`, `min_score`, and `filter` to the
225/// extent the underlying backend supports them.
226#[async_trait]
227pub trait Retriever: Send + Sync + 'static {
228    /// Look up matches for `query`.
229    async fn retrieve(
230        &self,
231        query: RetrievalQuery,
232        ctx: &ExecutionContext,
233    ) -> Result<Vec<Document>>;
234}
235
236/// Stable identifier for an indexed document. Backends mint these
237/// at insertion time; passing the same id to [`VectorStore::update`]
238/// or [`VectorStore::delete`] is the canonical way to mutate or
239/// remove a previously-indexed document.
240pub type DocumentId = String;
241
242/// Predicate against [`Document::metadata`] used by
243/// [`VectorStore::search_filtered`]. Backends translate this into
244/// their native filter language (`pgvector` `WHERE`, qdrant `Filter`,
245/// lancedb `where`); backends that cannot honour a given variant
246/// fall back to filterless search and emit a `LossyEncode`-style
247/// warning at the operator layer.
248#[derive(Clone, Debug, PartialEq)]
249#[non_exhaustive]
250pub enum VectorFilter {
251    /// Always passes — useful as identity inside `And`/`Or` chains.
252    All,
253    /// `metadata.<key> == value`.
254    Eq {
255        /// Metadata key path (dotted notation).
256        key: String,
257        /// Comparison value.
258        value: serde_json::Value,
259    },
260    /// `metadata.<key> < value`. Numeric semantics; non-numeric
261    /// operands produce an empty result set.
262    Lt {
263        /// Metadata key path.
264        key: String,
265        /// Comparison value (numeric).
266        value: serde_json::Value,
267    },
268    /// `metadata.<key> <= value`. Inclusive variant of [`Self::Lt`].
269    Lte {
270        /// Metadata key path.
271        key: String,
272        /// Comparison value (numeric).
273        value: serde_json::Value,
274    },
275    /// `metadata.<key> > value`.
276    Gt {
277        /// Metadata key path.
278        key: String,
279        /// Comparison value (numeric).
280        value: serde_json::Value,
281    },
282    /// `metadata.<key> >= value`. Inclusive variant of [`Self::Gt`].
283    Gte {
284        /// Metadata key path.
285        key: String,
286        /// Comparison value (numeric).
287        value: serde_json::Value,
288    },
289    /// `min <= metadata.<key> <= max`. Closed interval — backends
290    /// that natively support range queries can push this down as a
291    /// single index probe instead of decomposing into `And(Gte, Lte)`.
292    Range {
293        /// Metadata key path.
294        key: String,
295        /// Lower bound (inclusive). Numeric.
296        min: serde_json::Value,
297        /// Upper bound (inclusive). Numeric.
298        max: serde_json::Value,
299    },
300    /// `metadata.<key>` is one of `values`. Empty `values` matches
301    /// no documents — equivalent to a no-op filter that can be used
302    /// to short-circuit zero-result queries without consulting the
303    /// index.
304    In {
305        /// Metadata key path.
306        key: String,
307        /// Allowed values.
308        values: Vec<serde_json::Value>,
309    },
310    /// `metadata.<key>` is present (any value, including `null`).
311    /// Distinguishes "field unset" from "field set to null".
312    Exists {
313        /// Metadata key path.
314        key: String,
315    },
316    /// All children must match.
317    And(Vec<Self>),
318    /// At least one child must match.
319    Or(Vec<Self>),
320    /// Negate the inner filter.
321    Not(Box<Self>),
322}
323
324/// Vector index keyed by [`Namespace`]. Backed by qdrant, lancedb,
325/// pgvector, etc. in companion crates.
326///
327/// **Layering** — this is **tier 1 (primitive)** of the
328/// semantic-memory three-tier architecture. Operators implement
329/// `VectorStore` once per backend; the bundle
330/// [`crate::SemanticMemory<E, V>`] (tier 2) and the consumer trait
331/// [`crate::SemanticMemoryBackend`] (tier 3) compose it into the
332/// agent-facing surface automatically. Take `Namespace` as a
333/// per-call parameter so a single store instance serves many
334/// tenants.
335///
336/// Every async method accepts an [`ExecutionContext`] so backends
337/// can honour caller-side cancellation and deadlines (CLAUDE.md
338/// §"Cancellation"). The `delete` / `update` / `add_batch` /
339/// `search_filtered` methods have default impls so simple backends
340/// only need `add` and `search` — production backends override
341/// every method for efficiency and correctness.
342///
343/// **Atomicity**: the default `update` impl is non-atomic
344/// (delete-then-add): concurrent `search` calls observe a momentary
345/// gap. Backends that support transactional updates **must**
346/// override.
347#[async_trait]
348pub trait VectorStore: Send + Sync + 'static {
349    /// Vector dimension this index expects.
350    fn dimension(&self) -> usize;
351
352    /// Add a document with its pre-computed vector to the index.
353    /// Implementations validate `vector.len() == self.dimension()`.
354    async fn add(
355        &self,
356        ctx: &ExecutionContext,
357        ns: &Namespace,
358        document: Document,
359        vector: Vec<f32>,
360    ) -> Result<()>;
361
362    /// Search for the top `top_k` nearest documents to `query_vector`.
363    async fn search(
364        &self,
365        ctx: &ExecutionContext,
366        ns: &Namespace,
367        query_vector: &[f32],
368        top_k: usize,
369    ) -> Result<Vec<Document>>;
370
371    /// Delete a document by its backend-assigned id. Default impl
372    /// returns `Error::Config` — backends without a stable id space
373    /// must override or document the lifecycle.
374    async fn delete(&self, _ctx: &ExecutionContext, _ns: &Namespace, _doc_id: &str) -> Result<()> {
375        Err(entelix_core::Error::config(
376            "VectorStore::delete is not supported by this backend",
377        ))
378    }
379
380    /// Replace an existing document's vector and metadata. Default
381    /// impl chains `delete` + `add` (non-atomic — concurrent
382    /// searches observe a gap); backends with atomic-update support
383    /// **must** override.
384    async fn update(
385        &self,
386        ctx: &ExecutionContext,
387        ns: &Namespace,
388        doc_id: &str,
389        document: Document,
390        vector: Vec<f32>,
391    ) -> Result<()> {
392        self.delete(ctx, ns, doc_id).await?;
393        self.add(ctx, ns, document, vector).await
394    }
395
396    /// Insert many documents at once. Default impl loops over `add`,
397    /// polling [`ExecutionContext::is_cancelled`] between iterations
398    /// so a cancelled caller releases the index lock within one
399    /// `add` round-trip instead of completing the full batch.
400    /// Backends that support a native batch endpoint **should**
401    /// override — sequential calls amplify network latency by `N`.
402    async fn add_batch(
403        &self,
404        ctx: &ExecutionContext,
405        ns: &Namespace,
406        items: Vec<(Document, Vec<f32>)>,
407    ) -> Result<()> {
408        for (doc, vec) in items {
409            if ctx.is_cancelled() {
410                return Err(entelix_core::Error::Cancelled);
411            }
412            self.add(ctx, ns, doc, vec).await?;
413        }
414        Ok(())
415    }
416
417    /// Top-K nearest matches with a metadata filter pushed down to
418    /// the index. Default impl returns [`entelix_core::Error::Config`] —
419    /// silently dropping the filter would return wrong results, so
420    /// the trait makes the backend's lack of filter support
421    /// explicit. Backends with filter support **must** override.
422    async fn search_filtered(
423        &self,
424        _ctx: &ExecutionContext,
425        _ns: &Namespace,
426        _query_vector: &[f32],
427        _top_k: usize,
428        _filter: &VectorFilter,
429    ) -> Result<Vec<Document>> {
430        Err(entelix_core::Error::config(
431            "VectorStore::search_filtered is not supported by this backend; \
432             override the trait method to push filters down to the index",
433        ))
434    }
435
436    /// Count documents in the namespace, optionally narrowed by a
437    /// filter. Used by dashboards reporting per-tenant index sizes
438    /// and by memory-budget enforcement (skip indexing when the
439    /// namespace is at its cap).
440    ///
441    /// Default impl returns [`entelix_core::Error::Config`] —
442    /// counting requires either a backend-native COUNT or a full
443    /// scan, both of which are operator-visible cost decisions
444    /// that should not be silently approximated.
445    async fn count(
446        &self,
447        _ctx: &ExecutionContext,
448        _ns: &Namespace,
449        _filter: Option<&VectorFilter>,
450    ) -> Result<usize> {
451        Err(entelix_core::Error::config(
452            "VectorStore::count is not supported by this backend; \
453             override the trait method to surface index cardinality",
454        ))
455    }
456
457    /// Enumerate documents in the namespace, optionally narrowed by
458    /// a filter. `limit` caps the page size; `offset` is the page
459    /// start (cursor-style pagination semantics depend on the
460    /// backend). Returned documents may omit their vectors — the
461    /// method is for inspection / pagination, not for retrieval.
462    ///
463    /// Default impl returns [`entelix_core::Error::Config`] —
464    /// listing requires a stable iteration order that not every
465    /// vector backend exposes (e.g. ANN indices give no useful
466    /// ordering across calls).
467    async fn list(
468        &self,
469        _ctx: &ExecutionContext,
470        _ns: &Namespace,
471        _filter: Option<&VectorFilter>,
472        _limit: usize,
473        _offset: usize,
474    ) -> Result<Vec<Document>> {
475        Err(entelix_core::Error::config(
476            "VectorStore::list is not supported by this backend; \
477             override the trait method to enumerate documents in the index",
478        ))
479    }
480}
481
482/// One reranked document paired with the score the [`Reranker`] assigned.
483///
484/// Two scores ride together: the inner `Document::score` carries the
485/// original retrieval score from the [`VectorStore`] (or `None` if the
486/// store did not surface one), and `rerank_score` carries the reranker's
487/// own score. Keeping them distinct preserves explainability — UIs and
488/// dashboards can show "the embedding ranked this 0.82, the cross-encoder
489/// ranked it 0.41 → moved from rank 1 to rank 7" without ambiguity, and
490/// downstream filters can threshold on whichever score the deployment
491/// trusts.
492#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
493pub struct RerankedDocument {
494    /// The reranked document. Its `score` field still holds the
495    /// retrieval score from the originating [`VectorStore`] — the
496    /// reranker does not mutate it.
497    pub document: Document,
498    /// The reranker's score for this document at this query.
499    /// Comparable only within a single rerank call. Higher = better
500    /// per the reranker's scoring model.
501    pub rerank_score: f32,
502}
503
504impl RerankedDocument {
505    /// Build a reranked document from a candidate and a fresh score.
506    pub const fn new(document: Document, rerank_score: f32) -> Self {
507        Self {
508            document,
509            rerank_score,
510        }
511    }
512}
513
514/// Re-rank a candidate document set against the originating query.
515///
516/// Used after a `VectorStore::search` to apply MMR / cross-encoder
517/// scoring / time-decay before returning results to the caller. The
518/// default identity impl ([`IdentityReranker`]) preserves the
519/// retrieval order and copies each candidate's retrieval score into
520/// `rerank_score` so the return type is uniform regardless of which
521/// reranker is wired up; production deployments substitute with
522/// cross-encoder or MMR implementations from companion crates.
523#[async_trait]
524pub trait Reranker: Send + Sync + 'static {
525    /// Re-order (and optionally trim) the candidate list, attaching
526    /// a reranker-specific score to each survivor. The returned
527    /// `Vec` MUST contain only documents from the input candidates
528    /// — rerankers cannot fabricate new content.
529    async fn rerank(
530        &self,
531        query: &str,
532        candidates: Vec<Document>,
533        top_k: usize,
534        ctx: &ExecutionContext,
535    ) -> Result<Vec<RerankedDocument>>;
536}
537
538/// No-op [`Reranker`]: returns the first `top_k` candidates in the
539/// order the underlying `VectorStore` produced them, copying the
540/// retrieval score into [`RerankedDocument::rerank_score`] so
541/// downstream consumers see a uniform shape.
542#[derive(Clone, Copy, Debug, Default)]
543pub struct IdentityReranker;
544
545#[async_trait]
546impl Reranker for IdentityReranker {
547    async fn rerank(
548        &self,
549        _query: &str,
550        mut candidates: Vec<Document>,
551        top_k: usize,
552        _ctx: &ExecutionContext,
553    ) -> Result<Vec<RerankedDocument>> {
554        candidates.truncate(top_k);
555        Ok(candidates
556            .into_iter()
557            .map(|doc| {
558                let score = doc.score.unwrap_or(0.0);
559                RerankedDocument::new(doc, score)
560            })
561            .collect())
562    }
563}