crtx-retrieval 0.1.1

Hybrid retrieval over memory views (lexical + salience; vectors later).
Documentation
//! Phase 4.C foundation: pluggable embedder trait + record shape.
//!
//! This module defines the contract every embedding backend must satisfy and
//! the durable record shape the [`cortex_store`] side table stores. It is
//! deliberately a foundation slice (D2-A): no concrete backend implementation
//! lives here, and the existing deterministic lexical + salience retrieval
//! pipeline (Phase 4.B baseline) is not regressed by this slice.
//!
//! Pluggable backends are identified by [`Embedder::backend_id`], a stable
//! string the `(memory_id, backend_id)` durable key is composed against. A
//! memory may carry embeddings from multiple backends simultaneously (the
//! deterministic stub coexisting with a real model during a migration window
//! is the canonical use case). The [`Embedder::dim`] contract is stable for
//! a given `backend_id` — changing dimension is a backend version change and
//! MUST be expressed as a new `backend_id` so old vectors stay readable.
//!
//! Downstream slices:
//!
//! - **D2-B** (sibling agent) lands the first concrete backend in
//!   `embedding/local_stub.rs` (and any subsequent real model integrations).
//!   Re-exported below.
//! - **D2-C** (sibling agent) wires the score composer to consume cosine
//!   similarity over [`EmbedRecord::vector`] alongside the existing lexical
//!   and salience components, gated so the additivity guardrail keeps the
//!   lexical + salience baseline from regressing.

// Re-export the persistence-side record so downstream callers can pull a
// single name from `cortex_retrieval::embedding`. The canonical definition
// lives in `cortex_store::repo::embeddings` because the durable side table
// is what fixes the byte-level encoding contract — this re-export keeps the
// API surface of the retrieval crate aligned with the spec.
pub use cortex_store::repo::EmbedRecord;

/// Result type for embedder backends.
pub type EmbedResult<T> = Result<T, EmbedError>;

/// Errors raised by [`Embedder`] backends.
#[derive(Debug, thiserror::Error)]
pub enum EmbedError {
    /// Input text was rejected by the backend (empty, too long, encoding
    /// rejected, etc.).
    #[error("embed input rejected: {0}")]
    InvalidInput(String),
    /// Backend failed at runtime (model load, tokenizer panic, IO, etc.).
    #[error("embed backend failed: {0}")]
    Backend(String),
    /// Backend produced a vector whose length did not match the advertised
    /// [`Embedder::dim`].
    #[error("embed dimension mismatch: backend `{backend_id}` advertises dim {expected} but produced {actual}")]
    DimensionMismatch {
        /// Backend that produced the mismatched vector.
        backend_id: String,
        /// Dimension the backend advertises via [`Embedder::dim`].
        expected: usize,
        /// Actual length of the produced vector.
        actual: usize,
    },
}

/// An embedder produces a deterministic, fixed-length vector from a memory's
/// claim text + tag context.
///
/// Backends are pluggable. Two contracts every backend MUST satisfy:
///
/// 1. [`backend_id`](Self::backend_id) is stable for the lifetime of the
///    backend version. A `(memory_id, backend_id)` pair is the durable key
///    on the storage substrate: changing `backend_id` is how a backend
///    signals "all previously stored vectors are from a different version
///    of me and should be recomputed".
/// 2. [`dim`](Self::dim) is stable for a given `backend_id`. If a backend
///    rev changes its dimensionality, that rev MUST surface as a new
///    `backend_id` so legacy vectors stored under the prior id remain
///    decodable and comparable.
///
/// The default backend (lives in `local_stub.rs`, slice D2-B) is a
/// deterministic placeholder until a real model integration lands.
pub trait Embedder: Send + Sync {
    /// Stable identifier for this backend (e.g., `"stub:v1"`,
    /// `"onnx:minilm-l6"`).
    fn backend_id(&self) -> &str;

    /// Output dimensionality. Must be stable for a given
    /// [`backend_id`](Self::backend_id).
    fn dim(&self) -> usize;

    /// Embed a single `text + tags` pair into a `Vec<f32>` of length
    /// [`dim`](Self::dim).
    ///
    /// Backends MUST verify their own output length and surface a
    /// [`EmbedError::DimensionMismatch`] when the produced vector does not
    /// match the advertised dimension.
    fn embed(&self, text: &str, tags: &[String]) -> EmbedResult<Vec<f32>>;
}

/// Cosine similarity between two equal-length `f32` vectors.
///
/// Returns `0.0` when either vector has zero magnitude (the conventional
/// "no similarity" answer for a degenerate input) and returns `0.0` when
/// the lengths differ (no meaningful comparison exists). The function never
/// returns `NaN`: any input that would produce `NaN` (zero magnitude, length
/// mismatch, NaN inputs) collapses to `0.0` so the composer never has to
/// special-case a NaN score.
///
/// The result is clamped to `[-1.0, 1.0]` to absorb floating-point drift on
/// otherwise-identical vectors.
#[must_use]
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    if a.len() != b.len() || a.is_empty() {
        return 0.0;
    }
    let mut dot = 0.0f32;
    let mut na = 0.0f32;
    let mut nb = 0.0f32;
    for (x, y) in a.iter().zip(b.iter()) {
        if x.is_nan() || y.is_nan() {
            return 0.0;
        }
        dot += x * y;
        na += x * x;
        nb += y * y;
    }
    if na <= 0.0 || nb <= 0.0 {
        return 0.0;
    }
    let denom = na.sqrt() * nb.sqrt();
    if denom <= 0.0 || !denom.is_finite() {
        return 0.0;
    }
    let raw = dot / denom;
    if raw.is_nan() {
        return 0.0;
    }
    raw.clamp(-1.0, 1.0)
}

#[cfg(test)]
mod tests {
    use super::*;
    use chrono::{DateTime, TimeZone, Utc};
    use cortex_core::MemoryId;

    fn at(second: u32) -> DateTime<Utc> {
        Utc.with_ymd_and_hms(2026, 1, 1, 12, 0, second).unwrap()
    }

    fn mem(id: &str) -> MemoryId {
        id.parse().expect("valid memory id")
    }

    #[test]
    fn cosine_similarity_handles_orthogonal_zero() {
        // Two perfectly orthogonal vectors -> 0.0.
        let a = vec![1.0f32, 0.0, 0.0];
        let b = vec![0.0f32, 1.0, 0.0];
        let sim = cosine_similarity(&a, &b);
        assert!(
            (sim - 0.0).abs() < 1e-6,
            "orthogonal vectors must produce similarity ~0, got {sim}"
        );
    }

    #[test]
    fn cosine_similarity_handles_identical_one() {
        // A vector against itself -> 1.0.
        let a = vec![0.3f32, -0.7, 0.5, 0.1];
        let sim = cosine_similarity(&a, &a);
        assert!(
            (sim - 1.0).abs() < 1e-6,
            "identical vectors must produce similarity 1.0, got {sim}"
        );
    }

    #[test]
    fn cosine_similarity_handles_opposite_negative_one() {
        // A vector against its negation -> -1.0.
        let a = vec![0.3f32, -0.7, 0.5, 0.1];
        let b: Vec<f32> = a.iter().map(|v| -v).collect();
        let sim = cosine_similarity(&a, &b);
        assert!(
            (sim - (-1.0)).abs() < 1e-6,
            "antipodal vectors must produce similarity -1.0, got {sim}"
        );
    }

    #[test]
    fn cosine_similarity_handles_nan_safely() {
        // NaN inputs must collapse to 0.0, never propagate.
        let a = vec![1.0f32, f32::NAN, 0.0];
        let b = vec![1.0f32, 1.0, 1.0];
        let sim = cosine_similarity(&a, &b);
        assert!(
            !sim.is_nan(),
            "cosine_similarity must never return NaN, got {sim}"
        );
        assert_eq!(sim, 0.0);

        // Zero-magnitude inputs must also collapse to 0.0.
        let zeros = vec![0.0f32; 3];
        let sim_zero = cosine_similarity(&zeros, &[1.0, 2.0, 3.0]);
        assert_eq!(sim_zero, 0.0);

        // Length mismatch is no meaningful comparison: 0.0.
        let sim_mismatch = cosine_similarity(&[1.0, 0.0], &[1.0, 0.0, 0.0]);
        assert_eq!(sim_mismatch, 0.0);

        // Empty inputs are degenerate: 0.0.
        let sim_empty: f32 = cosine_similarity(&[], &[]);
        assert_eq!(sim_empty, 0.0);
    }

    #[test]
    fn embed_record_serializes_round_trip() {
        // Foundation slice doesn't pull in a binary serializer: assert the
        // raw little-endian f32 byte encoding the storage substrate uses
        // is round-trippable here, so D2-B / D2-C can rely on it.
        let record = EmbedRecord::new(
            mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
            "stub:v1",
            vec![0.1f32, -0.2, 0.3, 0.4, -0.5],
            at(7),
        )
        .expect("valid record");

        // Encode -> decode the vector through the same byte form the
        // EmbeddingRepo uses.
        let mut bytes = Vec::with_capacity(record.vector.len() * 4);
        for v in &record.vector {
            bytes.extend_from_slice(&v.to_le_bytes());
        }
        assert_eq!(bytes.len(), record.vector.len() * 4);

        let mut decoded = Vec::with_capacity(record.vector.len());
        for chunk in bytes.chunks_exact(4) {
            let arr = <[u8; 4]>::try_from(chunk).expect("chunk_exact yields four bytes");
            decoded.push(f32::from_le_bytes(arr));
        }
        assert_eq!(decoded, record.vector);
        assert_eq!(record.dim as usize, record.vector.len());

        // Rebuilt record matches the original byte-for-byte.
        let rebuilt = EmbedRecord::new(
            record.memory_id,
            record.backend_id.clone(),
            decoded,
            record.computed_at,
        )
        .expect("rebuilt record");
        assert_eq!(rebuilt, record);
    }

    #[test]
    fn embed_record_new_rejects_oversized_dim() {
        // Pathological constructor input: u32 overflow on dim must surface
        // as InvalidInput rather than a silent truncation.
        // Direct construction of a vector with > u32::MAX entries is not
        // feasible in a unit test; instead we assert the construction path
        // does not panic for legitimate sizes.
        let rec = EmbedRecord::new(
            mem("mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"),
            "stub:v1",
            vec![1.0f32; 8],
            at(0),
        )
        .expect("eight-dim record");
        assert_eq!(rec.dim, 8);
        assert_eq!(rec.vector.len(), 8);
    }
}

pub mod local_stub;
pub use local_stub::{LocalStubEmbedder, STUB_BACKEND_ID, STUB_DIM};

pub mod ollama;
pub use ollama::{
    OllamaEmbedder, DEFAULT_OLLAMA_EMBED_MODEL, DEFAULT_OLLAMA_ENDPOINT, NOMIC_EMBED_DIM,
    OLLAMA_BACKEND_ID_PREFIX,
};