entelix-memory 0.5.4

entelix cross-thread memory — Store/Embedder/Retriever/VectorStore traits, Buffer/Summary/Entity, SemanticMemory<E,R> generic
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
//! `Embedder`, `Retriever`, `VectorStore` traits + `Document`.
//!
//! Per, entelix 1.0 ships only the traits. Concrete impls
//! (`OpenAI`/Voyage/Cohere embedders, qdrant/lancedb vector stores,
//! BM25 retrievers) land in 1.1 companion crates.

use async_trait::async_trait;
use entelix_core::{ExecutionContext, Result};
use serde::{Deserialize, Serialize};

use crate::namespace::Namespace;

/// One retrieved document with optional similarity score.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Document {
    /// Backend-assigned stable id, when the originating
    /// [`VectorStore`] mints one. Pass back to
    /// [`VectorStore::update`] / [`VectorStore::delete`] to mutate
    /// or remove this document. `None` for embedded usages where
    /// stable identity isn't tracked.
    #[serde(default)]
    pub doc_id: Option<DocumentId>,
    /// Body text. Implementations may store any UTF-8 payload.
    pub content: String,
    /// Free-form metadata — stored alongside the document for filtering
    /// / display. Use a JSON object by convention.
    #[serde(default)]
    pub metadata: serde_json::Value,
    /// Similarity score from the retriever, if available. Higher = better
    /// match. Comparable only within a single query's result set.
    #[serde(default)]
    pub score: Option<f32>,
}

impl Document {
    /// Convenience: build a document with empty metadata and no score.
    pub fn new(content: impl Into<String>) -> Self {
        Self {
            doc_id: None,
            content: content.into(),
            metadata: serde_json::Value::Null,
            score: None,
        }
    }

    /// Builder-style metadata setter.
    #[must_use]
    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
        self.metadata = metadata;
        self
    }

    /// Builder-style doc_id setter — backends use this when
    /// surfacing previously-indexed documents so callers can mutate
    /// or delete via the same id.
    #[must_use]
    pub fn with_doc_id(mut self, id: impl Into<DocumentId>) -> Self {
        self.doc_id = Some(id.into());
        self
    }
}

/// Token-accounting metadata an [`Embedder`] reports alongside the
/// computed vector. Mirrors the `ChatModel` `Usage` shape so cost
/// meters can charge embedding calls with the same machinery they
/// use for completions.
///
/// Marked `#[non_exhaustive]` so future fields (cached_tokens, total
/// for cost, latency_ms) are forward-compatible.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub struct EmbeddingUsage {
    /// Tokens the embedder counted as input. `0` when the backend
    /// does not surface a count (some local impls do not).
    pub input_tokens: u32,
}

impl EmbeddingUsage {
    /// Build a usage record with the supplied input-token count.
    #[must_use]
    pub const fn new(input_tokens: u32) -> Self {
        Self { input_tokens }
    }
}

/// One embedded text's vector plus optional usage metadata.
///
/// `usage` is `None` when the backend does not surface token
/// accounting (in-process stub embedders, hash-based encoders).
/// Cost-aware backends (`OpenAI`, Voyage, Cohere) return `Some` so
/// downstream meters can charge per call.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Embedding {
    /// The encoded vector. Length always equals
    /// [`Embedder::dimension`].
    pub vector: Vec<f32>,
    /// Token-accounting metadata when the backend reports it.
    #[serde(default)]
    pub usage: Option<EmbeddingUsage>,
}

impl Embedding {
    /// Build an embedding with no usage metadata. Stub backends and
    /// tests use this when they have no token count to report.
    #[must_use]
    pub const fn new(vector: Vec<f32>) -> Self {
        Self {
            vector,
            usage: None,
        }
    }

    /// Builder-style usage attachment.
    #[must_use]
    pub const fn with_usage(mut self, usage: EmbeddingUsage) -> Self {
        self.usage = Some(usage);
        self
    }
}

/// Text → vector encoder.
///
/// Implementations are typically backed by a remote API
/// (`OpenAI` / Voyage / Cohere). Per F10, instances are wrapped in `Arc`
/// at the call boundary; do not create a new client per call.
///
/// **Override `embed_batch`** when the underlying API supports
/// batch inference. The provided default loops sequentially via
/// `embed`, which is correct but allocates one HTTP call per
/// document — avoid in production.
#[async_trait]
pub trait Embedder: Send + Sync + 'static {
    /// Output vector dimension. Used by `VectorStore` impls to validate
    /// inserts against the configured index dimension.
    fn dimension(&self) -> usize;

    /// Embed one input string. Returns the vector plus optional
    /// usage metadata so downstream cost meters can charge the
    /// call without a second round-trip.
    async fn embed(&self, text: &str, ctx: &ExecutionContext) -> Result<Embedding>;

    /// Batch embed. Default impl runs sequentially via `embed`,
    /// polling [`ExecutionContext::is_cancelled`] between iterations
    /// so a long batch bails out within one `embed` round-trip of
    /// cancellation rather than draining the full pool.
    /// Implementations that support a true batch endpoint
    /// **should** override — sequential calls amplify network
    /// latency by `N`.
    async fn embed_batch(
        &self,
        texts: &[String],
        ctx: &ExecutionContext,
    ) -> Result<Vec<Embedding>> {
        let mut out = Vec::with_capacity(texts.len());
        for text in texts {
            if ctx.is_cancelled() {
                return Err(entelix_core::Error::Cancelled);
            }
            out.push(self.embed(text, ctx).await?);
        }
        Ok(out)
    }
}

/// Declarative description of one retrieval call.
///
/// Carries the text query plus optional knobs: a metadata filter, a
/// minimum score floor, and a top-k cap. Future hybrid-search
/// dimensions (per-field boosts, dense+keyword fusion, reranker
/// hints) ride on `#[non_exhaustive]` without breaking call sites.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct RetrievalQuery {
    /// Free-text query. Backends interpret it however they like —
    /// dense vector embed-and-search, BM25 token match, hybrid fusion.
    pub text: String,
    /// Maximum number of hits to return. Backends truncate at this
    /// cap; exceeding it is a backend bug.
    pub top_k: usize,
    /// Minimum similarity score required to include a hit. `None`
    /// surfaces every hit regardless of score; useful when score
    /// semantics differ across backends. Comparable only within a
    /// single query's result set.
    pub min_score: Option<f32>,
    /// Metadata predicate. `None` is identity (no filter). Backends
    /// that cannot honour the supplied [`VectorFilter`] variant fall
    /// back to filterless retrieval and emit a `LossyEncode`-style
    /// warning at the operator layer.
    pub filter: Option<VectorFilter>,
}

impl RetrievalQuery {
    /// Build a query with sensible defaults — no filter, no
    /// floor, top-k cap supplied.
    #[must_use]
    pub fn new(text: impl Into<String>, top_k: usize) -> Self {
        Self {
            text: text.into(),
            top_k,
            min_score: None,
            filter: None,
        }
    }

    /// Attach a metadata filter. Builder-style.
    #[must_use]
    pub fn with_filter(mut self, filter: VectorFilter) -> Self {
        self.filter = Some(filter);
        self
    }

    /// Attach a minimum-score floor. Builder-style.
    #[must_use]
    pub const fn with_min_score(mut self, min_score: f32) -> Self {
        self.min_score = Some(min_score);
        self
    }
}

/// Returns documents ranked by relevance to a [`RetrievalQuery`].
///
/// Backed by anything from a BM25 index to a remote search API; the
/// signature is intentionally backend-agnostic. Implementations
/// honour the query's `top_k`, `min_score`, and `filter` to the
/// extent the underlying backend supports them.
#[async_trait]
pub trait Retriever: Send + Sync + 'static {
    /// Look up matches for `query`.
    async fn retrieve(
        &self,
        query: RetrievalQuery,
        ctx: &ExecutionContext,
    ) -> Result<Vec<Document>>;
}

/// Stable identifier for an indexed document. Backends mint these
/// at insertion time; passing the same id to [`VectorStore::update`]
/// or [`VectorStore::delete`] is the canonical way to mutate or
/// remove a previously-indexed document.
pub type DocumentId = String;

/// Predicate against [`Document::metadata`] used by
/// [`VectorStore::search_filtered`]. Backends translate this into
/// their native filter language (`pgvector` `WHERE`, qdrant `Filter`,
/// lancedb `where`); backends that cannot honour a given variant
/// fall back to filterless search and emit a `LossyEncode`-style
/// warning at the operator layer.
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum VectorFilter {
    /// Always passes — useful as identity inside `And`/`Or` chains.
    All,
    /// `metadata.<key> == value`.
    Eq {
        /// Metadata key path (dotted notation).
        key: String,
        /// Comparison value.
        value: serde_json::Value,
    },
    /// `metadata.<key> < value`. Numeric semantics; non-numeric
    /// operands produce an empty result set.
    Lt {
        /// Metadata key path.
        key: String,
        /// Comparison value (numeric).
        value: serde_json::Value,
    },
    /// `metadata.<key> <= value`. Inclusive variant of [`Self::Lt`].
    Lte {
        /// Metadata key path.
        key: String,
        /// Comparison value (numeric).
        value: serde_json::Value,
    },
    /// `metadata.<key> > value`.
    Gt {
        /// Metadata key path.
        key: String,
        /// Comparison value (numeric).
        value: serde_json::Value,
    },
    /// `metadata.<key> >= value`. Inclusive variant of [`Self::Gt`].
    Gte {
        /// Metadata key path.
        key: String,
        /// Comparison value (numeric).
        value: serde_json::Value,
    },
    /// `min <= metadata.<key> <= max`. Closed interval — backends
    /// that natively support range queries can push this down as a
    /// single index probe instead of decomposing into `And(Gte, Lte)`.
    Range {
        /// Metadata key path.
        key: String,
        /// Lower bound (inclusive). Numeric.
        min: serde_json::Value,
        /// Upper bound (inclusive). Numeric.
        max: serde_json::Value,
    },
    /// `metadata.<key>` is one of `values`. Empty `values` matches
    /// no documents — equivalent to a no-op filter that can be used
    /// to short-circuit zero-result queries without consulting the
    /// index.
    In {
        /// Metadata key path.
        key: String,
        /// Allowed values.
        values: Vec<serde_json::Value>,
    },
    /// `metadata.<key>` is present (any value, including `null`).
    /// Distinguishes "field unset" from "field set to null".
    Exists {
        /// Metadata key path.
        key: String,
    },
    /// All children must match.
    And(Vec<Self>),
    /// At least one child must match.
    Or(Vec<Self>),
    /// Negate the inner filter.
    Not(Box<Self>),
}

/// Vector index keyed by [`Namespace`]. Backed by qdrant, lancedb,
/// pgvector, etc. in companion crates.
///
/// **Layering** — this is **tier 1 (primitive)** of the
/// semantic-memory three-tier architecture. Operators implement
/// `VectorStore` once per backend; the bundle
/// [`crate::SemanticMemory<E, V>`] (tier 2) and the consumer trait
/// [`crate::SemanticMemoryBackend`] (tier 3) compose it into the
/// agent-facing surface automatically. Take `Namespace` as a
/// per-call parameter so a single store instance serves many
/// tenants.
///
/// Every async method accepts an [`ExecutionContext`] so backends
/// can honour caller-side cancellation and deadlines (CLAUDE.md
/// §"Cancellation"). The `delete` / `update` / `add_batch` /
/// `search_filtered` methods have default impls so simple backends
/// only need `add` and `search` — production backends override
/// every method for efficiency and correctness.
///
/// **Atomicity**: the default `update` impl is non-atomic
/// (delete-then-add): concurrent `search` calls observe a momentary
/// gap. Backends that support transactional updates **must**
/// override.
#[async_trait]
pub trait VectorStore: Send + Sync + 'static {
    /// Vector dimension this index expects.
    fn dimension(&self) -> usize;

    /// Add a document with its pre-computed vector to the index.
    /// Implementations validate `vector.len() == self.dimension()`.
    async fn add(
        &self,
        ctx: &ExecutionContext,
        ns: &Namespace,
        document: Document,
        vector: Vec<f32>,
    ) -> Result<()>;

    /// Search for the top `top_k` nearest documents to `query_vector`.
    async fn search(
        &self,
        ctx: &ExecutionContext,
        ns: &Namespace,
        query_vector: &[f32],
        top_k: usize,
    ) -> Result<Vec<Document>>;

    /// Delete a document by its backend-assigned id. Default impl
    /// returns `Error::Config` — backends without a stable id space
    /// must override or document the lifecycle.
    async fn delete(&self, _ctx: &ExecutionContext, _ns: &Namespace, _doc_id: &str) -> Result<()> {
        Err(entelix_core::Error::config(
            "VectorStore::delete is not supported by this backend",
        ))
    }

    /// Replace an existing document's vector and metadata. Default
    /// impl chains `delete` + `add` (non-atomic — concurrent
    /// searches observe a gap); backends with atomic-update support
    /// **must** override.
    async fn update(
        &self,
        ctx: &ExecutionContext,
        ns: &Namespace,
        doc_id: &str,
        document: Document,
        vector: Vec<f32>,
    ) -> Result<()> {
        self.delete(ctx, ns, doc_id).await?;
        self.add(ctx, ns, document, vector).await
    }

    /// Insert many documents at once. Default impl loops over `add`,
    /// polling [`ExecutionContext::is_cancelled`] between iterations
    /// so a cancelled caller releases the index lock within one
    /// `add` round-trip instead of completing the full batch.
    /// Backends that support a native batch endpoint **should**
    /// override — sequential calls amplify network latency by `N`.
    async fn add_batch(
        &self,
        ctx: &ExecutionContext,
        ns: &Namespace,
        items: Vec<(Document, Vec<f32>)>,
    ) -> Result<()> {
        for (doc, vec) in items {
            if ctx.is_cancelled() {
                return Err(entelix_core::Error::Cancelled);
            }
            self.add(ctx, ns, doc, vec).await?;
        }
        Ok(())
    }

    /// Top-K nearest matches with a metadata filter pushed down to
    /// the index. Default impl returns [`entelix_core::Error::Config`] —
    /// silently dropping the filter would return wrong results, so
    /// the trait makes the backend's lack of filter support
    /// explicit. Backends with filter support **must** override.
    async fn search_filtered(
        &self,
        _ctx: &ExecutionContext,
        _ns: &Namespace,
        _query_vector: &[f32],
        _top_k: usize,
        _filter: &VectorFilter,
    ) -> Result<Vec<Document>> {
        Err(entelix_core::Error::config(
            "VectorStore::search_filtered is not supported by this backend; \
             override the trait method to push filters down to the index",
        ))
    }

    /// Count documents in the namespace, optionally narrowed by a
    /// filter. Used by dashboards reporting per-tenant index sizes
    /// and by memory-budget enforcement (skip indexing when the
    /// namespace is at its cap).
    ///
    /// Default impl returns [`entelix_core::Error::Config`] —
    /// counting requires either a backend-native COUNT or a full
    /// scan, both of which are operator-visible cost decisions
    /// that should not be silently approximated.
    async fn count(
        &self,
        _ctx: &ExecutionContext,
        _ns: &Namespace,
        _filter: Option<&VectorFilter>,
    ) -> Result<usize> {
        Err(entelix_core::Error::config(
            "VectorStore::count is not supported by this backend; \
             override the trait method to surface index cardinality",
        ))
    }

    /// Enumerate documents in the namespace, optionally narrowed by
    /// a filter. `limit` caps the page size; `offset` is the page
    /// start (cursor-style pagination semantics depend on the
    /// backend). Returned documents may omit their vectors — the
    /// method is for inspection / pagination, not for retrieval.
    ///
    /// Default impl returns [`entelix_core::Error::Config`] —
    /// listing requires a stable iteration order that not every
    /// vector backend exposes (e.g. ANN indices give no useful
    /// ordering across calls).
    async fn list(
        &self,
        _ctx: &ExecutionContext,
        _ns: &Namespace,
        _filter: Option<&VectorFilter>,
        _limit: usize,
        _offset: usize,
    ) -> Result<Vec<Document>> {
        Err(entelix_core::Error::config(
            "VectorStore::list is not supported by this backend; \
             override the trait method to enumerate documents in the index",
        ))
    }
}

/// One reranked document paired with the score the [`Reranker`] assigned.
///
/// Two scores ride together: the inner `Document::score` carries the
/// original retrieval score from the [`VectorStore`] (or `None` if the
/// store did not surface one), and `rerank_score` carries the reranker's
/// own score. Keeping them distinct preserves explainability — UIs and
/// dashboards can show "the embedding ranked this 0.82, the cross-encoder
/// ranked it 0.41 → moved from rank 1 to rank 7" without ambiguity, and
/// downstream filters can threshold on whichever score the deployment
/// trusts.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct RerankedDocument {
    /// The reranked document. Its `score` field still holds the
    /// retrieval score from the originating [`VectorStore`] — the
    /// reranker does not mutate it.
    pub document: Document,
    /// The reranker's score for this document at this query.
    /// Comparable only within a single rerank call. Higher = better
    /// per the reranker's scoring model.
    pub rerank_score: f32,
}

impl RerankedDocument {
    /// Build a reranked document from a candidate and a fresh score.
    pub const fn new(document: Document, rerank_score: f32) -> Self {
        Self {
            document,
            rerank_score,
        }
    }
}

/// Re-rank a candidate document set against the originating query.
///
/// Used after a `VectorStore::search` to apply MMR / cross-encoder
/// scoring / time-decay before returning results to the caller. The
/// default identity impl ([`IdentityReranker`]) preserves the
/// retrieval order and copies each candidate's retrieval score into
/// `rerank_score` so the return type is uniform regardless of which
/// reranker is wired up; production deployments substitute with
/// cross-encoder or MMR implementations from companion crates.
#[async_trait]
pub trait Reranker: Send + Sync + 'static {
    /// Re-order (and optionally trim) the candidate list, attaching
    /// a reranker-specific score to each survivor. The returned
    /// `Vec` MUST contain only documents from the input candidates
    /// — rerankers cannot fabricate new content.
    async fn rerank(
        &self,
        query: &str,
        candidates: Vec<Document>,
        top_k: usize,
        ctx: &ExecutionContext,
    ) -> Result<Vec<RerankedDocument>>;
}

/// No-op [`Reranker`]: returns the first `top_k` candidates in the
/// order the underlying `VectorStore` produced them, copying the
/// retrieval score into [`RerankedDocument::rerank_score`] so
/// downstream consumers see a uniform shape.
#[derive(Clone, Copy, Debug, Default)]
pub struct IdentityReranker;

#[async_trait]
impl Reranker for IdentityReranker {
    async fn rerank(
        &self,
        _query: &str,
        mut candidates: Vec<Document>,
        top_k: usize,
        _ctx: &ExecutionContext,
    ) -> Result<Vec<RerankedDocument>> {
        candidates.truncate(top_k);
        Ok(candidates
            .into_iter()
            .map(|doc| {
                let score = doc.score.unwrap_or(0.0);
                RerankedDocument::new(doc, score)
            })
            .collect())
    }
}