cortex_retrieval/embedding/mod.rs
1//! Phase 4.C foundation: pluggable embedder trait + record shape.
2//!
3//! This module defines the contract every embedding backend must satisfy and
4//! the durable record shape the [`cortex_store`] side table stores. It is
5//! deliberately a foundation slice (D2-A): no concrete backend implementation
6//! lives here, and the existing deterministic lexical + salience retrieval
7//! pipeline (Phase 4.B baseline) is not regressed by this slice.
8//!
9//! Pluggable backends are identified by [`Embedder::backend_id`], a stable
10//! string the `(memory_id, backend_id)` durable key is composed against. A
11//! memory may carry embeddings from multiple backends simultaneously (the
12//! deterministic stub coexisting with a real model during a migration window
13//! is the canonical use case). The [`Embedder::dim`] contract is stable for
14//! a given `backend_id` — changing dimension is a backend version change and
15//! MUST be expressed as a new `backend_id` so old vectors stay readable.
16//!
17//! Downstream slices:
18//!
19//! - **D2-B** (sibling agent) lands the first concrete backend in
20//! `embedding/local_stub.rs` (and any subsequent real model integrations).
21//! Re-exported below.
22//! - **D2-C** (sibling agent) wires the score composer to consume cosine
23//! similarity over [`EmbedRecord::vector`] alongside the existing lexical
24//! and salience components, gated so the additivity guardrail keeps the
25//! lexical + salience baseline from regressing.
26
27// Re-export the persistence-side record so downstream callers can pull a
28// single name from `cortex_retrieval::embedding`. The canonical definition
29// lives in `cortex_store::repo::embeddings` because the durable side table
30// is what fixes the byte-level encoding contract — this re-export keeps the
31// API surface of the retrieval crate aligned with the spec.
32pub use cortex_store::repo::EmbedRecord;
33
34/// Result type for embedder backends.
35pub type EmbedResult<T> = Result<T, EmbedError>;
36
37/// Errors raised by [`Embedder`] backends.
38#[derive(Debug, thiserror::Error)]
39pub enum EmbedError {
40 /// Input text was rejected by the backend (empty, too long, encoding
41 /// rejected, etc.).
42 #[error("embed input rejected: {0}")]
43 InvalidInput(String),
44 /// Backend failed at runtime (model load, tokenizer panic, IO, etc.).
45 #[error("embed backend failed: {0}")]
46 Backend(String),
47 /// Backend produced a vector whose length did not match the advertised
48 /// [`Embedder::dim`].
49 #[error("embed dimension mismatch: backend `{backend_id}` advertises dim {expected} but produced {actual}")]
50 DimensionMismatch {
51 /// Backend that produced the mismatched vector.
52 backend_id: String,
53 /// Dimension the backend advertises via [`Embedder::dim`].
54 expected: usize,
55 /// Actual length of the produced vector.
56 actual: usize,
57 },
58}
59
60/// An embedder produces a deterministic, fixed-length vector from a memory's
61/// claim text + tag context.
62///
63/// Backends are pluggable. Two contracts every backend MUST satisfy:
64///
65/// 1. [`backend_id`](Self::backend_id) is stable for the lifetime of the
66/// backend version. A `(memory_id, backend_id)` pair is the durable key
67/// on the storage substrate: changing `backend_id` is how a backend
68/// signals "all previously stored vectors are from a different version
69/// of me and should be recomputed".
70/// 2. [`dim`](Self::dim) is stable for a given `backend_id`. If a backend
71/// rev changes its dimensionality, that rev MUST surface as a new
72/// `backend_id` so legacy vectors stored under the prior id remain
73/// decodable and comparable.
74///
75/// The default backend (lives in `local_stub.rs`, slice D2-B) is a
76/// deterministic placeholder until a real model integration lands.
77pub trait Embedder: Send + Sync {
78 /// Stable identifier for this backend (e.g., `"stub:v1"`,
79 /// `"onnx:minilm-l6"`).
80 fn backend_id(&self) -> &str;
81
82 /// Output dimensionality. Must be stable for a given
83 /// [`backend_id`](Self::backend_id).
84 fn dim(&self) -> usize;
85
86 /// Embed a single `text + tags` pair into a `Vec<f32>` of length
87 /// [`dim`](Self::dim).
88 ///
89 /// Backends MUST verify their own output length and surface a
90 /// [`EmbedError::DimensionMismatch`] when the produced vector does not
91 /// match the advertised dimension.
92 fn embed(&self, text: &str, tags: &[String]) -> EmbedResult<Vec<f32>>;
93}
94
95/// Cosine similarity between two equal-length `f32` vectors.
96///
97/// Returns `0.0` when either vector has zero magnitude (the conventional
98/// "no similarity" answer for a degenerate input) and returns `0.0` when
99/// the lengths differ (no meaningful comparison exists). The function never
100/// returns `NaN`: any input that would produce `NaN` (zero magnitude, length
101/// mismatch, NaN inputs) collapses to `0.0` so the composer never has to
102/// special-case a NaN score.
103///
104/// The result is clamped to `[-1.0, 1.0]` to absorb floating-point drift on
105/// otherwise-identical vectors.
106#[must_use]
107pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
108 if a.len() != b.len() || a.is_empty() {
109 return 0.0;
110 }
111 let mut dot = 0.0f32;
112 let mut na = 0.0f32;
113 let mut nb = 0.0f32;
114 for (x, y) in a.iter().zip(b.iter()) {
115 if x.is_nan() || y.is_nan() {
116 return 0.0;
117 }
118 dot += x * y;
119 na += x * x;
120 nb += y * y;
121 }
122 if na <= 0.0 || nb <= 0.0 {
123 return 0.0;
124 }
125 let denom = na.sqrt() * nb.sqrt();
126 if denom <= 0.0 || !denom.is_finite() {
127 return 0.0;
128 }
129 let raw = dot / denom;
130 if raw.is_nan() {
131 return 0.0;
132 }
133 raw.clamp(-1.0, 1.0)
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139 use chrono::{DateTime, TimeZone, Utc};
140 use cortex_core::MemoryId;
141
142 fn at(second: u32) -> DateTime<Utc> {
143 Utc.with_ymd_and_hms(2026, 1, 1, 12, 0, second).unwrap()
144 }
145
146 fn mem(id: &str) -> MemoryId {
147 id.parse().expect("valid memory id")
148 }
149
150 #[test]
151 fn cosine_similarity_handles_orthogonal_zero() {
152 // Two perfectly orthogonal vectors -> 0.0.
153 let a = vec![1.0f32, 0.0, 0.0];
154 let b = vec![0.0f32, 1.0, 0.0];
155 let sim = cosine_similarity(&a, &b);
156 assert!(
157 (sim - 0.0).abs() < 1e-6,
158 "orthogonal vectors must produce similarity ~0, got {sim}"
159 );
160 }
161
162 #[test]
163 fn cosine_similarity_handles_identical_one() {
164 // A vector against itself -> 1.0.
165 let a = vec![0.3f32, -0.7, 0.5, 0.1];
166 let sim = cosine_similarity(&a, &a);
167 assert!(
168 (sim - 1.0).abs() < 1e-6,
169 "identical vectors must produce similarity 1.0, got {sim}"
170 );
171 }
172
173 #[test]
174 fn cosine_similarity_handles_opposite_negative_one() {
175 // A vector against its negation -> -1.0.
176 let a = vec![0.3f32, -0.7, 0.5, 0.1];
177 let b: Vec<f32> = a.iter().map(|v| -v).collect();
178 let sim = cosine_similarity(&a, &b);
179 assert!(
180 (sim - (-1.0)).abs() < 1e-6,
181 "antipodal vectors must produce similarity -1.0, got {sim}"
182 );
183 }
184
185 #[test]
186 fn cosine_similarity_handles_nan_safely() {
187 // NaN inputs must collapse to 0.0, never propagate.
188 let a = vec![1.0f32, f32::NAN, 0.0];
189 let b = vec![1.0f32, 1.0, 1.0];
190 let sim = cosine_similarity(&a, &b);
191 assert!(
192 !sim.is_nan(),
193 "cosine_similarity must never return NaN, got {sim}"
194 );
195 assert_eq!(sim, 0.0);
196
197 // Zero-magnitude inputs must also collapse to 0.0.
198 let zeros = vec![0.0f32; 3];
199 let sim_zero = cosine_similarity(&zeros, &[1.0, 2.0, 3.0]);
200 assert_eq!(sim_zero, 0.0);
201
202 // Length mismatch is no meaningful comparison: 0.0.
203 let sim_mismatch = cosine_similarity(&[1.0, 0.0], &[1.0, 0.0, 0.0]);
204 assert_eq!(sim_mismatch, 0.0);
205
206 // Empty inputs are degenerate: 0.0.
207 let sim_empty: f32 = cosine_similarity(&[], &[]);
208 assert_eq!(sim_empty, 0.0);
209 }
210
211 #[test]
212 fn embed_record_serializes_round_trip() {
213 // Foundation slice doesn't pull in a binary serializer: assert the
214 // raw little-endian f32 byte encoding the storage substrate uses
215 // is round-trippable here, so D2-B / D2-C can rely on it.
216 let record = EmbedRecord::new(
217 mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
218 "stub:v1",
219 vec![0.1f32, -0.2, 0.3, 0.4, -0.5],
220 at(7),
221 )
222 .expect("valid record");
223
224 // Encode -> decode the vector through the same byte form the
225 // EmbeddingRepo uses.
226 let mut bytes = Vec::with_capacity(record.vector.len() * 4);
227 for v in &record.vector {
228 bytes.extend_from_slice(&v.to_le_bytes());
229 }
230 assert_eq!(bytes.len(), record.vector.len() * 4);
231
232 let mut decoded = Vec::with_capacity(record.vector.len());
233 for chunk in bytes.chunks_exact(4) {
234 let arr = <[u8; 4]>::try_from(chunk).expect("chunk_exact yields four bytes");
235 decoded.push(f32::from_le_bytes(arr));
236 }
237 assert_eq!(decoded, record.vector);
238 assert_eq!(record.dim as usize, record.vector.len());
239
240 // Rebuilt record matches the original byte-for-byte.
241 let rebuilt = EmbedRecord::new(
242 record.memory_id,
243 record.backend_id.clone(),
244 decoded,
245 record.computed_at,
246 )
247 .expect("rebuilt record");
248 assert_eq!(rebuilt, record);
249 }
250
251 #[test]
252 fn embed_record_new_rejects_oversized_dim() {
253 // Pathological constructor input: u32 overflow on dim must surface
254 // as InvalidInput rather than a silent truncation.
255 // Direct construction of a vector with > u32::MAX entries is not
256 // feasible in a unit test; instead we assert the construction path
257 // does not panic for legitimate sizes.
258 let rec = EmbedRecord::new(
259 mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
260 "stub:v1",
261 vec![1.0f32; 8],
262 at(0),
263 )
264 .expect("eight-dim record");
265 assert_eq!(rec.dim, 8);
266 assert_eq!(rec.vector.len(), 8);
267 }
268}
269
270pub mod local_stub;
271pub use local_stub::{LocalStubEmbedder, STUB_BACKEND_ID, STUB_DIM};
272
273pub mod ollama;
274pub use ollama::{
275 OllamaEmbedder, DEFAULT_OLLAMA_EMBED_MODEL, DEFAULT_OLLAMA_ENDPOINT, NOMIC_EMBED_DIM,
276 OLLAMA_BACKEND_ID_PREFIX,
277};