Skip to main content

cortex_retrieval/
fts5.rs

1//! Phase 4.B opt-in fuzzy retrieval over the FTS5 trigram mirror.
2//!
3//! The FTS5 mirror itself lives in `cortex-store` (migration
4//! `006_fts5_memories`) and is queried via [`cortex_store::repo::MemoryRepo::fts5_search`].
5//! This module is the retrieval-layer adapter: it converts the raw FTS5
6//! BM25 ranks into the deterministic `[0, 1]` band that the existing
7//! lexical scorer already speaks, and it composes the fuzzy score with
8//! the existing `LexicalExplanation::lexical_match` value without
9//! displacing exact lexical hits.
10//!
11//! Design invariants (Phase 4.B eval guardrail):
12//!
13//!  - The default lexical retrieval path is byte-for-byte unchanged.
14//!    Fuzzy retrieval is opt-in at the CLI surface and at the
15//!    composition helper; both default OFF.
16//!  - Composition stays in `[0, 1]` so the downstream hybrid scorer
17//!    in `score.rs` continues to receive the same input shape.
18//!  - FTS5 BM25 ranks are non-positive monotonic; `exp(rank)` maps the
19//!    full BM25 range into `(0, 1]` with the best rank (closest to 0)
20//!    landing at `1.0`. No rank is normalised by dataset size, so the
21//!    transform is stable across stores and across query batches.
22
23use cortex_core::{CortexError, CortexResult, MemoryId};
24use cortex_store::repo::MemoryRepo;
25
26/// Blend weight applied to the fuzzy FTS5 component when composing with
27/// the existing deterministic lexical scorer.
28///
29/// 0.25 is the smallest blend weight that lets a typo-of-one-character
30/// query surface its target memory above the no-match floor while still
31/// keeping an exact lexical hit (`lexical_match = 1.0`) dominant over a
32/// fuzzy-only hit. See the `compose_fuzzy_keeps_exact_lexical_dominant`
33/// invariant test.
34pub const FUZZY_BOOST_WEIGHT: f32 = 0.25;
35
36/// One FTS5 fuzzy hit, expressed in the retrieval-layer score band.
37#[derive(Debug, Clone, Copy, PartialEq)]
38pub struct Fts5Hit {
39    /// Memory id matched by the FTS5 trigram mirror.
40    pub memory_id: MemoryId,
41    /// Raw FTS5 BM25 rank (non-positive, smaller is better).
42    pub raw_rank: f32,
43    /// `exp(raw_rank)` — the rank mapped into `(0, 1]` so it can be
44    /// composed with the deterministic lexical scorer.
45    pub normalized_score: f32,
46}
47
48impl Fts5Hit {
49    /// Construct a typed FTS5 hit from the raw store-layer output.
50    #[must_use]
51    pub fn from_raw(memory_id: MemoryId, raw_rank: f32) -> Self {
52        Self {
53            memory_id,
54            raw_rank,
55            normalized_score: normalize_bm25(raw_rank),
56        }
57    }
58}
59
60/// Query the FTS5 mirror and return typed `Fts5Hit` values.
61///
62/// `query` MUST be non-empty after trimming — an empty expression fails
63/// closed with `CortexError::Validation` rather than returning a silent
64/// empty result. `limit` caps the number of returned rows; `0` returns
65/// `Ok(vec![])` without touching SQLite (mirrors the store-layer
66/// behaviour).
67///
68/// This function is read-only. It performs no policy composition and
69/// does NOT filter by memory lifecycle status — the caller layers the
70/// existing default-retrieval gates (proof closure, contradictions,
71/// status filter) on top of the returned hits before composing scores.
72/// This separation matches the existing `LexicalIndex::search` contract
73/// where the index is fed only with already-vetted documents.
74pub fn query_fts5(repo: &MemoryRepo<'_>, query: &str, limit: usize) -> CortexResult<Vec<Fts5Hit>> {
75    if query.trim().is_empty() {
76        return Err(CortexError::Validation(
77            "fts5 query must not be empty".into(),
78        ));
79    }
80
81    let raw_hits = repo
82        .fts5_search(query, limit)
83        .map_err(|err| CortexError::Validation(format!("fts5 mirror lookup failed: {err}")))?;
84
85    Ok(raw_hits
86        .into_iter()
87        .map(|(memory_id, rank)| Fts5Hit::from_raw(memory_id, rank))
88        .collect())
89}
90
91/// Map a raw FTS5 BM25 rank into the deterministic `[0, 1]` band the
92/// retrieval scorer speaks.
93///
94/// FTS5's `rank` column is the BM25 score the library computes
95/// internally; smaller values indicate a stronger match. The conventional
96/// range is `(-inf, 0]`. `exp(rank)` is a stable monotonic transform:
97/// the strongest possible BM25 rank (0) maps to `1.0`, weaker matches
98/// fall toward `0.0`, and the transform is well-defined for every rank
99/// SQLite emits. We clamp to `[0, 1]` defensively so an arithmetic edge
100/// case (e.g. a non-finite rank from a corrupt mirror) cannot leak a
101/// score outside the band the downstream hybrid scorer expects.
102#[must_use]
103pub fn normalize_bm25(rank: f32) -> f32 {
104    if !rank.is_finite() {
105        return 0.0;
106    }
107    rank.exp().clamp(0.0, 1.0)
108}
109
110/// Compose an existing deterministic `lexical_match` value with the
111/// FTS5 fuzzy normalized score, keyed by `FUZZY_BOOST_WEIGHT`.
112///
113/// The composition is the convex blend
114///
115/// ```text
116/// effective = lexical_match * (1 - FUZZY_BOOST_WEIGHT)
117///           + fuzzy_score   * FUZZY_BOOST_WEIGHT
118/// ```
119///
120/// so any value already in `[0, 1]` stays in `[0, 1]`. The 0.25 weight
121/// is the smallest blend that surfaces a fuzzy-only hit (lexical_match
122/// = 0.0, fuzzy_score = 1.0 → 0.25) above the unmatched floor while
123/// keeping an exact lexical hit (lexical_match = 1.0, fuzzy_score = 0.0
124/// → 0.75) strictly dominant over a fuzzy-only hit. The invariant test
125/// `compose_fuzzy_keeps_exact_lexical_dominant` pins this property.
126///
127/// Inputs are clamped to `[0, 1]` before composition — passing a NaN or
128/// out-of-band value cannot turn the composed score into a NaN or push
129/// it outside the band.
130#[must_use]
131pub fn compose_fuzzy_boost(lexical_match: f32, fuzzy_score: f32) -> f32 {
132    let lex = if lexical_match.is_finite() {
133        lexical_match.clamp(0.0, 1.0)
134    } else {
135        0.0
136    };
137    let fuzz = if fuzzy_score.is_finite() {
138        fuzzy_score.clamp(0.0, 1.0)
139    } else {
140        0.0
141    };
142    lex * (1.0 - FUZZY_BOOST_WEIGHT) + fuzz * FUZZY_BOOST_WEIGHT
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    // The DB-bound tests for `query_fts5` (typed Fts5Hit shape, exact-match
150    // outranks typo, one-character typo still recovers, zero-limit
151    // short-circuits, empty expression refusal) live in the
152    // `cortex-store/tests/fts5_mirror.rs` integration test file, which
153    // already pulls in chrono+serde_json as dev-deps. Keeping them here
154    // would add a Cargo.lock delta on this crate, which Phase 4.B
155    // explicitly forbids.
156
157    #[test]
158    fn normalize_bm25_maps_best_rank_to_one() {
159        // rank = 0.0 is the strongest possible BM25 score in SQLite's
160        // ranking convention; exp(0) = 1.0.
161        assert_eq!(normalize_bm25(0.0), 1.0);
162    }
163
164    #[test]
165    fn normalize_bm25_decays_for_weaker_ranks() {
166        let strong = normalize_bm25(-0.5);
167        let weak = normalize_bm25(-3.0);
168        assert!(strong > weak);
169        assert!((0.0..=1.0).contains(&strong));
170        assert!((0.0..=1.0).contains(&weak));
171    }
172
173    #[test]
174    fn normalize_bm25_handles_non_finite_rank() {
175        assert_eq!(normalize_bm25(f32::NAN), 0.0);
176        assert_eq!(normalize_bm25(f32::INFINITY), 0.0);
177        assert_eq!(normalize_bm25(f32::NEG_INFINITY), 0.0);
178    }
179
180    #[test]
181    fn compose_fuzzy_keeps_exact_lexical_dominant() {
182        // Stable Phase 4.B invariant: an exact lexical hit
183        // (lexical_match = 1.0) must dominate a fuzzy-only hit
184        // (lexical_match = 0.0, fuzzy_score = 1.0). The 0.25 blend
185        // weight is the smallest value that satisfies this when the
186        // fuzzy hit also maxes out at 1.0.
187        let exact = compose_fuzzy_boost(1.0, 0.0);
188        let fuzzy_only = compose_fuzzy_boost(0.0, 1.0);
189        assert!(
190            exact > fuzzy_only,
191            "exact lexical hit must dominate fuzzy-only hit; got exact={exact}, fuzzy_only={fuzzy_only}"
192        );
193        assert!((exact - 0.75).abs() < f32::EPSILON);
194        assert!((fuzzy_only - FUZZY_BOOST_WEIGHT).abs() < f32::EPSILON);
195    }
196
197    #[test]
198    fn compose_fuzzy_pure_lexical_stays_in_band() {
199        let pure = compose_fuzzy_boost(0.8, 0.0);
200        assert!((0.0..=1.0).contains(&pure));
201        // Pure-lexical with no fuzzy hit must equal lexical_match *
202        // (1 - weight) — the deterministic baseline survives at
203        // 75% of its previous magnitude. The remaining 25% is the
204        // fuzzy headroom that opens up when --fuzzy is on; the
205        // CLI default never reaches this code path (Phase 4.B
206        // eval guardrail).
207        assert!((pure - 0.8 * (1.0 - FUZZY_BOOST_WEIGHT)).abs() < f32::EPSILON);
208    }
209
210    #[test]
211    fn compose_fuzzy_clamps_out_of_band_inputs() {
212        let huge = compose_fuzzy_boost(5.0, -2.0);
213        assert!((0.0..=1.0).contains(&huge));
214        // 5.0 clamps to 1.0; -2.0 clamps to 0.0; expected = 0.75.
215        assert!((huge - 0.75).abs() < f32::EPSILON);
216    }
217
218    #[test]
219    fn compose_fuzzy_handles_nan_inputs() {
220        // NaN inputs degrade to 0 rather than producing a NaN
221        // result. This is the same defense-in-depth that
222        // `normalize_bm25` applies on the FTS5 side.
223        let composed = compose_fuzzy_boost(f32::NAN, f32::NAN);
224        assert_eq!(composed, 0.0);
225    }
226
227    #[test]
228    fn compose_fuzzy_at_default_baseline_passes_lexical_through_at_weighted_fraction() {
229        // Document the default-baseline shape so a future change that
230        // tries to widen FUZZY_BOOST_WEIGHT trips an explicit test.
231        assert_eq!(FUZZY_BOOST_WEIGHT, 0.25);
232    }
233
234    #[test]
235    fn fts5_hit_from_raw_normalises_bm25_via_exp_transform() {
236        // Spot-check that the typed-hit constructor uses the same
237        // `normalize_bm25` transform documented on `Fts5Hit`. The
238        // memory id parse is a no-op for this test — we only care that
239        // the score side of the typed wrapper matches the math.
240        let memory_id: MemoryId = "mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"
241            .parse()
242            .expect("parse memory id");
243        let hit = Fts5Hit::from_raw(memory_id, -1.0);
244        let expected = (-1.0_f32).exp();
245        assert!((hit.normalized_score - expected).abs() < f32::EPSILON);
246        assert_eq!(hit.memory_id, memory_id);
247        assert_eq!(hit.raw_rank, -1.0);
248    }
249}