Skip to main content

cortex_retrieval/
score.rs

1//! Hybrid retrieval scoring and explanations.
2//!
3//! Phase 4.C extends the upstream lexical-input composition with a third
4//! axis (semantic similarity) so the retrieval surface can fold in
5//! embeddings when the operator opts in via `--semantic`. The
6//! composition lives in [`compose_lexical_semantic`]; the downstream
7//! hybrid scorer in [`score`] is unchanged. See the constants
8//! [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`], and
9//! [`SEM_WEIGHT_WITH_SEM`] for the active weights and the rationale
10//! comments for why the semantic weight is small.
11//!
12//! **Phase 4.B eval guardrail**: when the caller passes `None` for the
13//! semantic axis, [`compose_lexical_semantic`] returns exactly the same
14//! value as the Phase 4.B [`compose_fuzzy_boost`] (`lex * 0.75 +
15//! fts5 * 0.25`). The default-off `--semantic` flag therefore preserves
16//! byte-for-byte the Phase 4.B retrieval ordering, and the regression
17//! test `compose_semantic_off_matches_phase_4b_baseline_exactly` pins
18//! the property.
19
20use std::collections::HashSet;
21
22use crate::fts5::compose_fuzzy_boost;
23
24/// Weight for the lexical match component.
25pub const LEXICAL_MATCH_WEIGHT: f32 = 0.30;
26/// Weight for the semantic similarity component.
27pub const SEMANTIC_SIMILARITY_WEIGHT: f32 = 0.25;
28/// Weight for the brightness component.
29pub const BRIGHTNESS_WEIGHT: f32 = 0.15;
30/// Weight for the domain overlap component.
31pub const DOMAIN_OVERLAP_WEIGHT: f32 = 0.10;
32/// Weight for the validation component.
33pub const VALIDATION_WEIGHT: f32 = 0.10;
34/// Weight for the authority component.
35pub const AUTHORITY_WEIGHT: f32 = 0.10;
36/// Weight for the contradiction-risk penalty.
37pub const CONTRADICTION_RISK_WEIGHT: f32 = -0.25;
38/// Weight for the staleness penalty.
39pub const STALENESS_PENALTY_WEIGHT: f32 = -0.10;
40
41// =============================================================================
42// Phase 4.C compositional weights for the upstream lexical-input axis.
43//
44// These constants are active ONLY when the caller passes a non-`None`
45// semantic similarity into [`compose_lexical_semantic`]. When semantic
46// is `None`, [`compose_lexical_semantic`] reduces to the Phase 4.B
47// [`compose_fuzzy_boost`] shape (lex * 0.75 + fts5 * 0.25) so the
48// default ordering is byte-for-byte unchanged.
49//
50// The weights below were picked under three constraints:
51//
52//   1. They MUST sum to 1.0 so the composed value stays inside `[0, 1]`
53//      and the downstream hybrid scorer (`score`) receives the same
54//      input shape it always has.
55//   2. The FTS5 weight MUST be unchanged from Phase 4.B (0.25). The
56//      fuzzy-recovery invariant
57//      `compose_fuzzy_keeps_exact_lexical_dominant` depends on the
58//      FTS5 axis carrying the same fraction of the composed score in
59//      both modes; otherwise turning on `--semantic` would weaken
60//      a typo-of-one-character recovery as a side effect.
61//   3. The semantic weight MUST be SMALL relative to lexical so that
62//      a memory with a perfect lexical match (lex=1.0) but no
63//      embedding (sem=0.0) still strictly outscores a memory with
64//      no lexical match but a perfect embedding match (lex=0.0,
65//      sem=1.0). The 0.10 / 0.65 ratio guarantees this:
66//      `0.65 * 1.0 + 0.10 * 0.0 = 0.65 > 0.65 * 0.0 + 0.10 * 1.0 = 0.10`.
67//
68// The Phase 4.C SPEC mentions a notional fourth axis (`w_sal = 0.10`)
69// for salience, but salience is composed DOWNSTREAM by [`score`] via
70// `brightness`, `validation`, `authority_weight`, and the
71// contradiction / staleness penalties — not at the lexical-input phase.
72// Pulling salience up here would double-count it. The four-way
73// composition shape in the SPEC is the conceptual decomposition
74// operators reason about; the implementation keeps lexical-input
75// composition (here) and salience composition (in `score`) separate.
76
77/// Lexical-match weight when the semantic axis is active.
78///
79/// 0.65 = 0.75 (Phase 4.B baseline) - 0.10 (semantic eats into lexical).
80/// Lexical stays the strongest axis so an exact lexical hit dominates a
81/// semantic-only hit. See the rationale comment above for the
82/// dominance arithmetic.
83pub const LEX_WEIGHT_WITH_SEM: f32 = 0.65;
84
85/// FTS5 weight when the semantic axis is active.
86///
87/// 0.25 is unchanged from Phase 4.B
88/// ([`crate::fts5::FUZZY_BOOST_WEIGHT`]) so flipping `--semantic` on
89/// does not weaken fuzzy recovery as a side effect.
90pub const FTS5_WEIGHT_WITH_SEM: f32 = 0.25;
91
92/// Semantic-similarity weight when the semantic axis is active.
93///
94/// 0.10 is the SMALL value the SPEC pins. Larger weights would let a
95/// stub-embedder hash-collision push a semantically unrelated memory
96/// above an exact lexical hit; smaller weights would make the axis
97/// invisible. The compromise is 0.10 — enough to break ties between
98/// two equally-lexical-matched memories using semantic signal, not
99/// enough to displace a lexical hit.
100pub const SEM_WEIGHT_WITH_SEM: f32 = 0.10;
101
102// Compile-time check that the weights sum to 1.0. A non-1.0 sum would
103// silently push the composed lexical input outside `[0, 1]` and break
104// the downstream scorer's assumptions; pin the property explicitly so
105// a future weight tweak trips the assertion in the
106// `phase_4c_weights_sum_to_one` test.
107const _: () = {
108    let sum_x100 = (LEX_WEIGHT_WITH_SEM * 100.0) as i32
109        + (FTS5_WEIGHT_WITH_SEM * 100.0) as i32
110        + (SEM_WEIGHT_WITH_SEM * 100.0) as i32;
111    assert!(sum_x100 == 100, "Phase 4.C weights must sum to 1.0");
112};
113
114/// Score inputs available for v0 retrieval.
115///
116/// Values are clamped to `[0, 1]` before weighting. Semantic similarity is
117/// intentionally absent because v0 fixes it at `0` until embeddings exist.
118#[derive(Debug, Clone, Copy, PartialEq)]
119pub struct ScoreInputs {
120    /// Lexical match from `lexical.rs`.
121    pub lexical_match: f32,
122    /// Deterministic salience brightness.
123    pub brightness: f32,
124    /// Domain overlap between query/task domains and memory domains.
125    pub domain_overlap: f32,
126    /// Outcome-bound validation signal.
127    pub validation: f32,
128    /// Authority weight for the memory source.
129    pub authority_weight: f32,
130    /// Risk from unresolved contradictions.
131    pub contradiction_risk: f32,
132    /// Penalty for stale or unvalidated memory.
133    pub staleness_penalty: f32,
134}
135
136/// A weighted score component suitable for `memory search --explain`.
137#[derive(Debug, Clone, Copy, PartialEq)]
138pub struct ScoreComponent {
139    /// Raw normalized component value after clamping.
140    pub raw: f32,
141    /// BUILD_SPEC §14.1 component weight.
142    pub weight: f32,
143    /// Weighted contribution to the final score.
144    pub contribution: f32,
145}
146
147impl ScoreComponent {
148    fn new(raw: f32, weight: f32) -> Self {
149        let raw = raw.clamp(0.0, 1.0);
150        Self {
151            raw,
152            weight,
153            contribution: raw * weight,
154        }
155    }
156}
157
158/// Full hybrid score explanation.
159#[derive(Debug, Clone, Copy, PartialEq)]
160pub struct HybridScoreExplanation {
161    /// `0.30 * lexical_match`.
162    pub lexical_match: ScoreComponent,
163    /// `0.25 * semantic_similarity`; fixed to zero for v0.
164    pub semantic_similarity: ScoreComponent,
165    /// `0.15 * brightness`.
166    pub brightness: ScoreComponent,
167    /// `0.10 * domain_overlap`.
168    pub domain_overlap: ScoreComponent,
169    /// `0.10 * validation`.
170    pub validation: ScoreComponent,
171    /// `0.10 * authority_weight`.
172    pub authority_weight: ScoreComponent,
173    /// `-0.25 * contradiction_risk`.
174    pub contradiction_risk: ScoreComponent,
175    /// `-0.10 * staleness_penalty`.
176    pub staleness_penalty: ScoreComponent,
177    /// Final retrieval score.
178    pub final_score: f32,
179}
180
181/// Compose lexical, FTS5, and (optional) semantic axes into a single
182/// effective lexical-input value the downstream hybrid scorer consumes.
183///
184/// This is the Phase 4.C generalisation of [`compose_fuzzy_boost`].
185/// When `semantic` is `None`, the function returns exactly the same
186/// value as `compose_fuzzy_boost(lexical, fts5)` — the Phase 4.B eval
187/// guardrail. When `semantic` is `Some(sim)`, the function blends the
188/// three axes by [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`],
189/// and [`SEM_WEIGHT_WITH_SEM`] (which sum to 1.0).
190///
191/// All inputs are clamped to `[0, 1]` defensively. A NaN or out-of-band
192/// input cannot push the composed value outside the band the
193/// downstream scorer expects. Negative cosine similarity (semantic
194/// orthogonality / opposition) is clamped to `0.0` rather than being
195/// treated as a penalty — the Phase 4.C SPEC does not introduce a
196/// semantic-displacement penalty axis.
197///
198/// **Invariants**:
199///
200/// - `compose_lexical_semantic(lex, fts5, None) == compose_fuzzy_boost(lex, fts5)`
201///   (pinned by `compose_semantic_off_matches_phase_4b_baseline_exactly`).
202/// - A memory with `lex=1.0, fts5=0.0, sem=0.0` (exact lexical, no
203///   fuzzy, no semantic) outscores a memory with `lex=0.0, fts5=0.0,
204///   sem=1.0` (semantic-only hit). Pinned by
205///   `compose_semantic_keeps_exact_lexical_dominant`.
206#[must_use]
207pub fn compose_lexical_semantic(lexical: f32, fts5: f32, semantic: Option<f32>) -> f32 {
208    let Some(sem) = semantic else {
209        return compose_fuzzy_boost(lexical, fts5);
210    };
211    let lex = clamp_band(lexical);
212    let fts = clamp_band(fts5);
213    let sem = clamp_band(sem);
214    lex * LEX_WEIGHT_WITH_SEM + fts * FTS5_WEIGHT_WITH_SEM + sem * SEM_WEIGHT_WITH_SEM
215}
216
217fn clamp_band(value: f32) -> f32 {
218    if value.is_finite() {
219        value.clamp(0.0, 1.0)
220    } else {
221        0.0
222    }
223}
224
225/// Calculates the BUILD_SPEC §14.1 hybrid retrieval score.
226#[must_use]
227pub fn score(inputs: ScoreInputs) -> HybridScoreExplanation {
228    let lexical_match = ScoreComponent::new(inputs.lexical_match, LEXICAL_MATCH_WEIGHT);
229    let semantic_similarity = ScoreComponent::new(0.0, SEMANTIC_SIMILARITY_WEIGHT);
230    let brightness = ScoreComponent::new(inputs.brightness, BRIGHTNESS_WEIGHT);
231    let domain_overlap = ScoreComponent::new(inputs.domain_overlap, DOMAIN_OVERLAP_WEIGHT);
232    let validation = ScoreComponent::new(inputs.validation, VALIDATION_WEIGHT);
233    let authority_weight = ScoreComponent::new(inputs.authority_weight, AUTHORITY_WEIGHT);
234    let contradiction_risk =
235        ScoreComponent::new(inputs.contradiction_risk, CONTRADICTION_RISK_WEIGHT);
236    let staleness_penalty = ScoreComponent::new(inputs.staleness_penalty, STALENESS_PENALTY_WEIGHT);
237    let final_score = lexical_match.contribution
238        + semantic_similarity.contribution
239        + brightness.contribution
240        + domain_overlap.contribution
241        + validation.contribution
242        + authority_weight.contribution
243        + contradiction_risk.contribution
244        + staleness_penalty.contribution;
245
246    HybridScoreExplanation {
247        lexical_match,
248        semantic_similarity,
249        brightness,
250        domain_overlap,
251        validation,
252        authority_weight,
253        contradiction_risk,
254        staleness_penalty,
255        final_score,
256    }
257}
258
259/// Explanation for domain-overlap scoring.
260#[derive(Debug, Clone, PartialEq)]
261pub struct DomainOverlapExplanation {
262    /// Normalized domain overlap in `[0, 1]`.
263    pub domain_overlap: f32,
264    /// Normalized query/task domains considered.
265    pub query_domains: Vec<String>,
266    /// Normalized memory domains considered.
267    pub memory_domains: Vec<String>,
268    /// Query domains also present on the memory.
269    pub matched_domains: Vec<String>,
270}
271
272/// Calculates normalized domain overlap for score inputs.
273#[must_use]
274pub fn domain_overlap(
275    query_domains: &[impl AsRef<str>],
276    memory_domains: &[impl AsRef<str>],
277) -> DomainOverlapExplanation {
278    let query_domains = normalize_domains(query_domains);
279    let memory_domains = normalize_domains(memory_domains);
280    if query_domains.is_empty() {
281        return DomainOverlapExplanation {
282            domain_overlap: 0.0,
283            query_domains,
284            memory_domains,
285            matched_domains: Vec::new(),
286        };
287    }
288
289    let memory_set: HashSet<_> = memory_domains.iter().cloned().collect();
290    let matched_domains: Vec<_> = query_domains
291        .iter()
292        .filter(|domain| memory_set.contains(*domain))
293        .cloned()
294        .collect();
295    let domain_overlap = matched_domains.len() as f32 / query_domains.len() as f32;
296
297    DomainOverlapExplanation {
298        domain_overlap,
299        query_domains,
300        memory_domains,
301        matched_domains,
302    }
303}
304
305fn normalize_domains(domains: &[impl AsRef<str>]) -> Vec<String> {
306    let mut seen = HashSet::new();
307    let mut normalized = Vec::new();
308    for domain in domains {
309        let domain = domain.as_ref().trim().to_ascii_lowercase();
310        if !domain.is_empty() && seen.insert(domain.clone()) {
311            normalized.push(domain);
312        }
313    }
314    normalized
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    #[test]
322    fn score_matches_build_spec_weights() {
323        let explanation = score(ScoreInputs {
324            lexical_match: 0.8,
325            brightness: 0.6,
326            domain_overlap: 0.5,
327            validation: 0.7,
328            authority_weight: 0.4,
329            contradiction_risk: 0.2,
330            staleness_penalty: 0.3,
331        });
332
333        let expected = 0.30 * 0.8 + 0.25 * 0.0 + 0.15 * 0.6 + 0.10 * 0.5 + 0.10 * 0.7 + 0.10 * 0.4
334            - 0.25 * 0.2
335            - 0.10 * 0.3;
336        assert!((explanation.final_score - expected).abs() < f32::EPSILON);
337        assert_eq!(explanation.semantic_similarity.raw, 0.0);
338        assert_eq!(explanation.semantic_similarity.contribution, 0.0);
339    }
340
341    #[test]
342    fn score_ordering_rewards_salient_validated_memories() {
343        let strong = score(ScoreInputs {
344            lexical_match: 0.75,
345            brightness: 0.9,
346            domain_overlap: 1.0,
347            validation: 1.0,
348            authority_weight: 0.8,
349            contradiction_risk: 0.0,
350            staleness_penalty: 0.0,
351        });
352        let weak = score(ScoreInputs {
353            lexical_match: 1.0,
354            brightness: 0.1,
355            domain_overlap: 0.0,
356            validation: 0.0,
357            authority_weight: 0.2,
358            contradiction_risk: 0.8,
359            staleness_penalty: 0.6,
360        });
361
362        assert!(strong.final_score > weak.final_score);
363    }
364
365    #[test]
366    fn explanation_contains_all_score_fields() {
367        let explanation = score(ScoreInputs {
368            lexical_match: 2.0,
369            brightness: 1.0,
370            domain_overlap: 1.0,
371            validation: 1.0,
372            authority_weight: 1.0,
373            contradiction_risk: 1.0,
374            staleness_penalty: 1.0,
375        });
376
377        assert_eq!(explanation.lexical_match.raw, 1.0);
378        assert_eq!(explanation.lexical_match.weight, LEXICAL_MATCH_WEIGHT);
379        assert_eq!(
380            explanation.semantic_similarity.weight,
381            SEMANTIC_SIMILARITY_WEIGHT
382        );
383        assert_eq!(explanation.brightness.weight, BRIGHTNESS_WEIGHT);
384        assert_eq!(explanation.domain_overlap.weight, DOMAIN_OVERLAP_WEIGHT);
385        assert_eq!(explanation.validation.weight, VALIDATION_WEIGHT);
386        assert_eq!(explanation.authority_weight.weight, AUTHORITY_WEIGHT);
387        assert_eq!(
388            explanation.contradiction_risk.weight,
389            CONTRADICTION_RISK_WEIGHT
390        );
391        assert_eq!(
392            explanation.staleness_penalty.weight,
393            STALENESS_PENALTY_WEIGHT
394        );
395    }
396
397    #[test]
398    fn domain_overlap_reports_matched_domains() {
399        let explanation =
400            domain_overlap(&["Retrieval", "Store", "retrieval"], &["store", "privacy"]);
401
402        assert_eq!(explanation.domain_overlap, 0.5);
403        assert_eq!(explanation.query_domains, ["retrieval", "store"]);
404        assert_eq!(explanation.memory_domains, ["store", "privacy"]);
405        assert_eq!(explanation.matched_domains, ["store"]);
406    }
407
408    // =========================================================================
409    // Phase 4.C compositional axis tests.
410    //
411    // The non-negotiable invariant is the eval guardrail
412    // `compose_semantic_off_matches_phase_4b_baseline_exactly` — when the
413    // caller passes `None` for the semantic axis the composer MUST return
414    // exactly the same value as the Phase 4.B `compose_fuzzy_boost` shape.
415    // A failure here means flipping `--semantic` from OFF to OFF (default
416    // path) changed the retrieval ordering, which is a Phase 4.B
417    // regression.
418
419    #[test]
420    fn phase_4c_weights_sum_to_one() {
421        let sum = LEX_WEIGHT_WITH_SEM + FTS5_WEIGHT_WITH_SEM + SEM_WEIGHT_WITH_SEM;
422        assert!(
423            (sum - 1.0).abs() < f32::EPSILON,
424            "Phase 4.C composition weights must sum to 1.0, got {sum}"
425        );
426    }
427
428    #[test]
429    fn compose_semantic_off_matches_phase_4b_baseline_exactly() {
430        // The Phase 4.B eval guardrail: passing `None` for semantic
431        // MUST produce byte-for-byte the same value as the Phase 4.B
432        // `compose_fuzzy_boost` helper. Iterate over a grid of
433        // representative `(lex, fts5)` pairs so a future change that
434        // sneaks a non-zero semantic contribution into the OFF path
435        // trips this assertion on at least one cell.
436        let cells = [
437            (0.0_f32, 0.0_f32),
438            (1.0, 0.0),
439            (0.0, 1.0),
440            (0.5, 0.5),
441            (0.75, 0.25),
442            (0.25, 0.75),
443            (0.8, 0.2),
444            (0.2, 0.8),
445            (0.9, 0.1),
446            (0.1, 0.9),
447        ];
448        for (lex, fts5) in cells {
449            let phase_4b = compose_fuzzy_boost(lex, fts5);
450            let phase_4c_off = compose_lexical_semantic(lex, fts5, None);
451            assert!(
452                (phase_4b - phase_4c_off).abs() < f32::EPSILON,
453                "Phase 4.B baseline drift at (lex={lex}, fts5={fts5}): \
454                 phase_4b={phase_4b}, phase_4c_off={phase_4c_off}"
455            );
456        }
457    }
458
459    #[test]
460    fn compose_semantic_on_includes_semantic_axis() {
461        // With semantic ON, two memories that are otherwise identical
462        // (same lexical, same fts5) MUST receive different composed
463        // values when their semantic similarity differs.
464        let lex = 0.5;
465        let fts5 = 0.0;
466        let low_sem = compose_lexical_semantic(lex, fts5, Some(0.0));
467        let high_sem = compose_lexical_semantic(lex, fts5, Some(1.0));
468        assert!(
469            high_sem > low_sem,
470            "semantic axis must contribute positively; low_sem={low_sem}, high_sem={high_sem}"
471        );
472        // The gap is exactly the semantic weight.
473        assert!(
474            (high_sem - low_sem - SEM_WEIGHT_WITH_SEM).abs() < f32::EPSILON,
475            "semantic contribution must equal SEM_WEIGHT_WITH_SEM"
476        );
477    }
478
479    #[test]
480    fn compose_semantic_keeps_exact_lexical_dominant() {
481        // A memory with a perfect lexical hit and no semantic signal
482        // (e.g. no embedding row in the side table) MUST still
483        // outscore a memory with no lexical hit but a perfect semantic
484        // signal. This pins the SPEC's "small semantic weight does
485        // not displace exact lexical hits" property.
486        let lexical_only = compose_lexical_semantic(1.0, 0.0, Some(0.0));
487        let semantic_only = compose_lexical_semantic(0.0, 0.0, Some(1.0));
488        assert!(
489            lexical_only > semantic_only,
490            "exact lexical hit must dominate semantic-only hit; \
491             lexical_only={lexical_only}, semantic_only={semantic_only}"
492        );
493    }
494
495    #[test]
496    fn compose_semantic_default_weight_does_not_displace_exact_lexical_hits() {
497        // Even with worst-case semantic noise (semantic = 1.0 for an
498        // unrelated memory, semantic = 0.0 for the lexically-matching
499        // memory), the lexical hit MUST still win the composed score.
500        // Pinned by SPEC: "small semantic weight" — the 0.10 weight is
501        // small enough that lex=1.0,sem=0.0 (0.65) > lex=0.0,sem=1.0
502        // (0.10).
503        let lex_winner = compose_lexical_semantic(1.0, 0.0, Some(0.0));
504        let sem_winner = compose_lexical_semantic(0.0, 0.0, Some(1.0));
505        assert!(
506            lex_winner > sem_winner,
507            "default semantic weight must not displace exact lexical hits"
508        );
509    }
510
511    #[test]
512    fn compose_semantic_stays_in_band_for_all_inputs() {
513        // Random-ish grid: every composed value MUST stay in [0, 1]
514        // regardless of input combination.
515        let values = [0.0_f32, 0.25, 0.5, 0.75, 1.0];
516        for &lex in &values {
517            for &fts5 in &values {
518                for &sem in &values {
519                    let composed = compose_lexical_semantic(lex, fts5, Some(sem));
520                    assert!(
521                        (0.0..=1.0).contains(&composed),
522                        "composed value out of band at (lex={lex}, fts5={fts5}, sem={sem}): {composed}"
523                    );
524                }
525            }
526        }
527    }
528
529    #[test]
530    fn compose_semantic_clamps_out_of_band_inputs() {
531        // NaN, infinity, and out-of-band values MUST degrade to 0.0
532        // rather than propagating into the composed score.
533        let composed = compose_lexical_semantic(f32::NAN, f32::INFINITY, Some(-5.0));
534        assert!((0.0..=1.0).contains(&composed));
535        assert_eq!(composed, 0.0);
536    }
537
538    #[test]
539    fn compose_semantic_zero_for_all_axes_is_zero() {
540        // A memory that matched nothing must compose to 0.0 regardless
541        // of which mode the composer is in.
542        assert_eq!(compose_lexical_semantic(0.0, 0.0, None), 0.0);
543        assert_eq!(compose_lexical_semantic(0.0, 0.0, Some(0.0)), 0.0);
544    }
545}