Skip to main content

cortex_retrieval/embedding/
mod.rs

1//! Phase 4.C foundation: pluggable embedder trait + record shape.
2//!
3//! This module defines the contract every embedding backend must satisfy and
4//! the durable record shape the [`cortex_store`] side table stores. It is
5//! deliberately a foundation slice (D2-A): no concrete backend implementation
6//! lives here, and the existing deterministic lexical + salience retrieval
7//! pipeline (Phase 4.B baseline) is not regressed by this slice.
8//!
9//! Pluggable backends are identified by [`Embedder::backend_id`], a stable
10//! string the `(memory_id, backend_id)` durable key is composed against. A
11//! memory may carry embeddings from multiple backends simultaneously (the
12//! deterministic stub coexisting with a real model during a migration window
13//! is the canonical use case). The [`Embedder::dim`] contract is stable for
14//! a given `backend_id` — changing dimension is a backend version change and
15//! MUST be expressed as a new `backend_id` so old vectors stay readable.
16//!
17//! Downstream slices:
18//!
19//! - **D2-B** (sibling agent) lands the first concrete backend in
20//!   `embedding/local_stub.rs` (and any subsequent real model integrations).
21//!   Re-exported below.
22//! - **D2-C** (sibling agent) wires the score composer to consume cosine
23//!   similarity over [`EmbedRecord::vector`] alongside the existing lexical
24//!   and salience components, gated so the additivity guardrail keeps the
25//!   lexical + salience baseline from regressing.
26
27// Re-export the persistence-side record so downstream callers can pull a
28// single name from `cortex_retrieval::embedding`. The canonical definition
29// lives in `cortex_store::repo::embeddings` because the durable side table
30// is what fixes the byte-level encoding contract — this re-export keeps the
31// API surface of the retrieval crate aligned with the spec.
32pub use cortex_store::repo::EmbedRecord;
33
34/// Result type for embedder backends.
35pub type EmbedResult<T> = Result<T, EmbedError>;
36
37/// Errors raised by [`Embedder`] backends.
38#[derive(Debug, thiserror::Error)]
39pub enum EmbedError {
40    /// Input text was rejected by the backend (empty, too long, encoding
41    /// rejected, etc.).
42    #[error("embed input rejected: {0}")]
43    InvalidInput(String),
44    /// Backend failed at runtime (model load, tokenizer panic, IO, etc.).
45    #[error("embed backend failed: {0}")]
46    Backend(String),
47    /// Backend produced a vector whose length did not match the advertised
48    /// [`Embedder::dim`].
49    #[error("embed dimension mismatch: backend `{backend_id}` advertises dim {expected} but produced {actual}")]
50    DimensionMismatch {
51        /// Backend that produced the mismatched vector.
52        backend_id: String,
53        /// Dimension the backend advertises via [`Embedder::dim`].
54        expected: usize,
55        /// Actual length of the produced vector.
56        actual: usize,
57    },
58}
59
60/// An embedder produces a deterministic, fixed-length vector from a memory's
61/// claim text + tag context.
62///
63/// Backends are pluggable. Two contracts every backend MUST satisfy:
64///
65/// 1. [`backend_id`](Self::backend_id) is stable for the lifetime of the
66///    backend version. A `(memory_id, backend_id)` pair is the durable key
67///    on the storage substrate: changing `backend_id` is how a backend
68///    signals "all previously stored vectors are from a different version
69///    of me and should be recomputed".
70/// 2. [`dim`](Self::dim) is stable for a given `backend_id`. If a backend
71///    rev changes its dimensionality, that rev MUST surface as a new
72///    `backend_id` so legacy vectors stored under the prior id remain
73///    decodable and comparable.
74///
75/// The default backend (lives in `local_stub.rs`, slice D2-B) is a
76/// deterministic placeholder until a real model integration lands.
77pub trait Embedder: Send + Sync {
78    /// Stable identifier for this backend (e.g., `"stub:v1"`,
79    /// `"onnx:minilm-l6"`).
80    fn backend_id(&self) -> &str;
81
82    /// Output dimensionality. Must be stable for a given
83    /// [`backend_id`](Self::backend_id).
84    fn dim(&self) -> usize;
85
86    /// Embed a single `text + tags` pair into a `Vec<f32>` of length
87    /// [`dim`](Self::dim).
88    ///
89    /// Backends MUST verify their own output length and surface a
90    /// [`EmbedError::DimensionMismatch`] when the produced vector does not
91    /// match the advertised dimension.
92    fn embed(&self, text: &str, tags: &[String]) -> EmbedResult<Vec<f32>>;
93}
94
95/// Cosine similarity between two equal-length `f32` vectors.
96///
97/// Returns `0.0` when either vector has zero magnitude (the conventional
98/// "no similarity" answer for a degenerate input) and returns `0.0` when
99/// the lengths differ (no meaningful comparison exists). The function never
100/// returns `NaN`: any input that would produce `NaN` (zero magnitude, length
101/// mismatch, NaN inputs) collapses to `0.0` so the composer never has to
102/// special-case a NaN score.
103///
104/// The result is clamped to `[-1.0, 1.0]` to absorb floating-point drift on
105/// otherwise-identical vectors.
106#[must_use]
107pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
108    if a.len() != b.len() || a.is_empty() {
109        return 0.0;
110    }
111    let mut dot = 0.0f32;
112    let mut na = 0.0f32;
113    let mut nb = 0.0f32;
114    for (x, y) in a.iter().zip(b.iter()) {
115        if x.is_nan() || y.is_nan() {
116            return 0.0;
117        }
118        dot += x * y;
119        na += x * x;
120        nb += y * y;
121    }
122    if na <= 0.0 || nb <= 0.0 {
123        return 0.0;
124    }
125    let denom = na.sqrt() * nb.sqrt();
126    if denom <= 0.0 || !denom.is_finite() {
127        return 0.0;
128    }
129    let raw = dot / denom;
130    if raw.is_nan() {
131        return 0.0;
132    }
133    raw.clamp(-1.0, 1.0)
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use chrono::{DateTime, TimeZone, Utc};
140    use cortex_core::MemoryId;
141
142    fn at(second: u32) -> DateTime<Utc> {
143        Utc.with_ymd_and_hms(2026, 1, 1, 12, 0, second).unwrap()
144    }
145
146    fn mem(id: &str) -> MemoryId {
147        id.parse().expect("valid memory id")
148    }
149
150    #[test]
151    fn cosine_similarity_handles_orthogonal_zero() {
152        // Two perfectly orthogonal vectors -> 0.0.
153        let a = vec![1.0f32, 0.0, 0.0];
154        let b = vec![0.0f32, 1.0, 0.0];
155        let sim = cosine_similarity(&a, &b);
156        assert!(
157            (sim - 0.0).abs() < 1e-6,
158            "orthogonal vectors must produce similarity ~0, got {sim}"
159        );
160    }
161
162    #[test]
163    fn cosine_similarity_handles_identical_one() {
164        // A vector against itself -> 1.0.
165        let a = vec![0.3f32, -0.7, 0.5, 0.1];
166        let sim = cosine_similarity(&a, &a);
167        assert!(
168            (sim - 1.0).abs() < 1e-6,
169            "identical vectors must produce similarity 1.0, got {sim}"
170        );
171    }
172
173    #[test]
174    fn cosine_similarity_handles_opposite_negative_one() {
175        // A vector against its negation -> -1.0.
176        let a = vec![0.3f32, -0.7, 0.5, 0.1];
177        let b: Vec<f32> = a.iter().map(|v| -v).collect();
178        let sim = cosine_similarity(&a, &b);
179        assert!(
180            (sim - (-1.0)).abs() < 1e-6,
181            "antipodal vectors must produce similarity -1.0, got {sim}"
182        );
183    }
184
185    #[test]
186    fn cosine_similarity_handles_nan_safely() {
187        // NaN inputs must collapse to 0.0, never propagate.
188        let a = vec![1.0f32, f32::NAN, 0.0];
189        let b = vec![1.0f32, 1.0, 1.0];
190        let sim = cosine_similarity(&a, &b);
191        assert!(
192            !sim.is_nan(),
193            "cosine_similarity must never return NaN, got {sim}"
194        );
195        assert_eq!(sim, 0.0);
196
197        // Zero-magnitude inputs must also collapse to 0.0.
198        let zeros = vec![0.0f32; 3];
199        let sim_zero = cosine_similarity(&zeros, &[1.0, 2.0, 3.0]);
200        assert_eq!(sim_zero, 0.0);
201
202        // Length mismatch is no meaningful comparison: 0.0.
203        let sim_mismatch = cosine_similarity(&[1.0, 0.0], &[1.0, 0.0, 0.0]);
204        assert_eq!(sim_mismatch, 0.0);
205
206        // Empty inputs are degenerate: 0.0.
207        let sim_empty: f32 = cosine_similarity(&[], &[]);
208        assert_eq!(sim_empty, 0.0);
209    }
210
211    #[test]
212    fn embed_record_serializes_round_trip() {
213        // Foundation slice doesn't pull in a binary serializer: assert the
214        // raw little-endian f32 byte encoding the storage substrate uses
215        // is round-trippable here, so D2-B / D2-C can rely on it.
216        let record = EmbedRecord::new(
217            mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
218            "stub:v1",
219            vec![0.1f32, -0.2, 0.3, 0.4, -0.5],
220            at(7),
221        )
222        .expect("valid record");
223
224        // Encode -> decode the vector through the same byte form the
225        // EmbeddingRepo uses.
226        let mut bytes = Vec::with_capacity(record.vector.len() * 4);
227        for v in &record.vector {
228            bytes.extend_from_slice(&v.to_le_bytes());
229        }
230        assert_eq!(bytes.len(), record.vector.len() * 4);
231
232        let mut decoded = Vec::with_capacity(record.vector.len());
233        for chunk in bytes.chunks_exact(4) {
234            let arr = <[u8; 4]>::try_from(chunk).expect("chunk_exact yields four bytes");
235            decoded.push(f32::from_le_bytes(arr));
236        }
237        assert_eq!(decoded, record.vector);
238        assert_eq!(record.dim as usize, record.vector.len());
239
240        // Rebuilt record matches the original byte-for-byte.
241        let rebuilt = EmbedRecord::new(
242            record.memory_id,
243            record.backend_id.clone(),
244            decoded,
245            record.computed_at,
246        )
247        .expect("rebuilt record");
248        assert_eq!(rebuilt, record);
249    }
250
251    #[test]
252    fn embed_record_new_rejects_oversized_dim() {
253        // Pathological constructor input: u32 overflow on dim must surface
254        // as InvalidInput rather than a silent truncation.
255        // Direct construction of a vector with > u32::MAX entries is not
256        // feasible in a unit test; instead we assert the construction path
257        // does not panic for legitimate sizes.
258        let rec = EmbedRecord::new(
259            mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
260            "stub:v1",
261            vec![1.0f32; 8],
262            at(0),
263        )
264        .expect("eight-dim record");
265        assert_eq!(rec.dim, 8);
266        assert_eq!(rec.vector.len(), 8);
267    }
268}
269
270pub mod local_stub;
271pub use local_stub::{LocalStubEmbedder, STUB_BACKEND_ID, STUB_DIM};
272
273pub mod ollama;
274pub use ollama::{
275    OllamaEmbedder, DEFAULT_OLLAMA_EMBED_MODEL, DEFAULT_OLLAMA_ENDPOINT, NOMIC_EMBED_DIM,
276    OLLAMA_BACKEND_ID_PREFIX,
277};