crtx-retrieval 0.1.1

//! Phase 4.B opt-in fuzzy retrieval over the FTS5 trigram mirror.
//!
//! The FTS5 mirror itself lives in `cortex-store` (migration
//! `006_fts5_memories`) and is queried via [`cortex_store::repo::MemoryRepo::fts5_search`].
//! This module is the retrieval-layer adapter: it converts the raw FTS5
//! BM25 ranks into the deterministic `[0, 1]` band that the existing
//! lexical scorer already speaks, and it composes the fuzzy score with
//! the existing `LexicalExplanation::lexical_match` value without
//! displacing exact lexical hits.
//!
//! Design invariants (Phase 4.B eval guardrail):
//!
//!  - The default lexical retrieval path is byte-for-byte unchanged.
//!    Fuzzy retrieval is opt-in at the CLI surface and at the
//!    composition helper; both default OFF.
//!  - Composition stays in `[0, 1]` so the downstream hybrid scorer
//!    in `score.rs` continues to receive the same input shape.
//!  - FTS5 BM25 ranks are non-positive monotonic; `exp(rank)` maps the
//!    full BM25 range into `(0, 1]` with the best rank (closest to 0)
//!    landing at `1.0`. No rank is normalised by dataset size, so the
//!    transform is stable across stores and across query batches.

use cortex_core::{CortexError, CortexResult, MemoryId};
use cortex_store::repo::MemoryRepo;

/// Blend weight applied to the fuzzy FTS5 component when composing with
/// the existing deterministic lexical scorer.
///
/// 0.25 is the smallest blend weight that lets a typo-of-one-character
/// query surface its target memory above the no-match floor while still
/// keeping an exact lexical hit (`lexical_match = 1.0`) dominant over a
/// fuzzy-only hit. See the `compose_fuzzy_keeps_exact_lexical_dominant`
/// invariant test.
pub const FUZZY_BOOST_WEIGHT: f32 = 0.25;

/// One FTS5 fuzzy hit, expressed in the retrieval-layer score band.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Fts5Hit {
    /// Memory id matched by the FTS5 trigram mirror.
    pub memory_id: MemoryId,
    /// Raw FTS5 BM25 rank (non-positive, smaller is better).
    pub raw_rank: f32,
    /// `exp(raw_rank)` — the rank mapped into `(0, 1]` so it can be
    /// composed with the deterministic lexical scorer.
    pub normalized_score: f32,
}

impl Fts5Hit {
    /// Construct a typed FTS5 hit from the raw store-layer output.
    #[must_use]
    pub fn from_raw(memory_id: MemoryId, raw_rank: f32) -> Self {
        Self {
            memory_id,
            raw_rank,
            normalized_score: normalize_bm25(raw_rank),
        }
    }
}

/// Query the FTS5 mirror and return typed `Fts5Hit` values.
///
/// `query` MUST be non-empty after trimming — an empty expression fails
/// closed with `CortexError::Validation` rather than returning a silent
/// empty result. `limit` caps the number of returned rows; `0` returns
/// `Ok(vec![])` without touching SQLite (mirrors the store-layer
/// behaviour).
///
/// This function is read-only. It performs no policy composition and
/// does NOT filter by memory lifecycle status — the caller layers the
/// existing default-retrieval gates (proof closure, contradictions,
/// status filter) on top of the returned hits before composing scores.
/// This separation matches the existing `LexicalIndex::search` contract
/// where the index is fed only with already-vetted documents.
pub fn query_fts5(repo: &MemoryRepo<'_>, query: &str, limit: usize) -> CortexResult<Vec<Fts5Hit>> {
    if query.trim().is_empty() {
        return Err(CortexError::Validation(
            "fts5 query must not be empty".into(),
        ));
    }

    let raw_hits = repo
        .fts5_search(query, limit)
        .map_err(|err| CortexError::Validation(format!("fts5 mirror lookup failed: {err}")))?;

    Ok(raw_hits
        .into_iter()
        .map(|(memory_id, rank)| Fts5Hit::from_raw(memory_id, rank))
        .collect())
}

/// Map a raw FTS5 BM25 rank into the deterministic `[0, 1]` band the
/// retrieval scorer speaks.
///
/// FTS5's `rank` column is the BM25 score the library computes
/// internally; smaller values indicate a stronger match. The conventional
/// range is `(-inf, 0]`. `exp(rank)` is a stable monotonic transform:
/// the strongest possible BM25 rank (0) maps to `1.0`, weaker matches
/// fall toward `0.0`, and the transform is well-defined for every rank
/// SQLite emits. We clamp to `[0, 1]` defensively so an arithmetic edge
/// case (e.g. a non-finite rank from a corrupt mirror) cannot leak a
/// score outside the band the downstream hybrid scorer expects.
#[must_use]
pub fn normalize_bm25(rank: f32) -> f32 {
    if !rank.is_finite() {
        return 0.0;
    }
    rank.exp().clamp(0.0, 1.0)
}

/// Compose an existing deterministic `lexical_match` value with the
/// FTS5 fuzzy normalized score, keyed by `FUZZY_BOOST_WEIGHT`.
///
/// The composition is the convex blend
///
/// ```text
/// effective = lexical_match * (1 - FUZZY_BOOST_WEIGHT)
///           + fuzzy_score   * FUZZY_BOOST_WEIGHT
/// ```
///
/// so any value already in `[0, 1]` stays in `[0, 1]`. The 0.25 weight
/// is the smallest blend that surfaces a fuzzy-only hit (lexical_match
/// = 0.0, fuzzy_score = 1.0 → 0.25) above the unmatched floor while
/// keeping an exact lexical hit (lexical_match = 1.0, fuzzy_score = 0.0
/// → 0.75) strictly dominant over a fuzzy-only hit. The invariant test
/// `compose_fuzzy_keeps_exact_lexical_dominant` pins this property.
///
/// Inputs are clamped to `[0, 1]` before composition — passing a NaN or
/// out-of-band value cannot turn the composed score into a NaN or push
/// it outside the band.
#[must_use]
pub fn compose_fuzzy_boost(lexical_match: f32, fuzzy_score: f32) -> f32 {
    let lex = if lexical_match.is_finite() {
        lexical_match.clamp(0.0, 1.0)
    } else {
        0.0
    };
    let fuzz = if fuzzy_score.is_finite() {
        fuzzy_score.clamp(0.0, 1.0)
    } else {
        0.0
    };
    lex * (1.0 - FUZZY_BOOST_WEIGHT) + fuzz * FUZZY_BOOST_WEIGHT
}

#[cfg(test)]
mod tests {
    use super::*;

    // The DB-bound tests for `query_fts5` (typed Fts5Hit shape, exact-match
    // outranks typo, one-character typo still recovers, zero-limit
    // short-circuits, empty expression refusal) live in the
    // `cortex-store/tests/fts5_mirror.rs` integration test file, which
    // already pulls in chrono+serde_json as dev-deps. Keeping them here
    // would add a Cargo.lock delta on this crate, which Phase 4.B
    // explicitly forbids.

    #[test]
    fn normalize_bm25_maps_best_rank_to_one() {
        // rank = 0.0 is the strongest possible BM25 score in SQLite's
        // ranking convention; exp(0) = 1.0.
        assert_eq!(normalize_bm25(0.0), 1.0);
    }

    #[test]
    fn normalize_bm25_decays_for_weaker_ranks() {
        let strong = normalize_bm25(-0.5);
        let weak = normalize_bm25(-3.0);
        assert!(strong > weak);
        assert!((0.0..=1.0).contains(&strong));
        assert!((0.0..=1.0).contains(&weak));
    }

    #[test]
    fn normalize_bm25_handles_non_finite_rank() {
        assert_eq!(normalize_bm25(f32::NAN), 0.0);
        assert_eq!(normalize_bm25(f32::INFINITY), 0.0);
        assert_eq!(normalize_bm25(f32::NEG_INFINITY), 0.0);
    }

    #[test]
    fn compose_fuzzy_keeps_exact_lexical_dominant() {
        // Stable Phase 4.B invariant: an exact lexical hit
        // (lexical_match = 1.0) must dominate a fuzzy-only hit
        // (lexical_match = 0.0, fuzzy_score = 1.0). The 0.25 blend
        // weight is the smallest value that satisfies this when the
        // fuzzy hit also maxes out at 1.0.
        let exact = compose_fuzzy_boost(1.0, 0.0);
        let fuzzy_only = compose_fuzzy_boost(0.0, 1.0);
        assert!(
            exact > fuzzy_only,
            "exact lexical hit must dominate fuzzy-only hit; got exact={exact}, fuzzy_only={fuzzy_only}"
        );
        assert!((exact - 0.75).abs() < f32::EPSILON);
        assert!((fuzzy_only - FUZZY_BOOST_WEIGHT).abs() < f32::EPSILON);
    }

    #[test]
    fn compose_fuzzy_pure_lexical_stays_in_band() {
        let pure = compose_fuzzy_boost(0.8, 0.0);
        assert!((0.0..=1.0).contains(&pure));
        // Pure-lexical with no fuzzy hit must equal lexical_match *
        // (1 - weight) — the deterministic baseline survives at
        // 75% of its previous magnitude. The remaining 25% is the
        // fuzzy headroom that opens up when --fuzzy is on; the
        // CLI default never reaches this code path (Phase 4.B
        // eval guardrail).
        assert!((pure - 0.8 * (1.0 - FUZZY_BOOST_WEIGHT)).abs() < f32::EPSILON);
    }

    #[test]
    fn compose_fuzzy_clamps_out_of_band_inputs() {
        let huge = compose_fuzzy_boost(5.0, -2.0);
        assert!((0.0..=1.0).contains(&huge));
        // 5.0 clamps to 1.0; -2.0 clamps to 0.0; expected = 0.75.
        assert!((huge - 0.75).abs() < f32::EPSILON);
    }

    #[test]
    fn compose_fuzzy_handles_nan_inputs() {
        // NaN inputs degrade to 0 rather than producing a NaN
        // result. This is the same defense-in-depth that
        // `normalize_bm25` applies on the FTS5 side.
        let composed = compose_fuzzy_boost(f32::NAN, f32::NAN);
        assert_eq!(composed, 0.0);
    }

    #[test]
    fn compose_fuzzy_at_default_baseline_passes_lexical_through_at_weighted_fraction() {
        // Document the default-baseline shape so a future change that
        // tries to widen FUZZY_BOOST_WEIGHT trips an explicit test.
        assert_eq!(FUZZY_BOOST_WEIGHT, 0.25);
    }

    #[test]
    fn fts5_hit_from_raw_normalises_bm25_via_exp_transform() {
        // Spot-check that the typed-hit constructor uses the same
        // `normalize_bm25` transform documented on `Fts5Hit`. The
        // memory id parse is a no-op for this test — we only care that
        // the score side of the typed wrapper matches the math.
        let memory_id: MemoryId = "mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"
            .parse()
            .expect("parse memory id");
        let hit = Fts5Hit::from_raw(memory_id, -1.0);
        let expected = (-1.0_f32).exp();
        assert!((hit.normalized_score - expected).abs() < f32::EPSILON);
        assert_eq!(hit.memory_id, memory_id);
        assert_eq!(hit.raw_rank, -1.0);
    }
}