cortex_retrieval/score.rs
1//! Hybrid retrieval scoring and explanations.
2//!
3//! Phase 4.C extends the upstream lexical-input composition with a third
4//! axis (semantic similarity) so the retrieval surface can fold in
5//! embeddings when the operator opts in via `--semantic`. The
6//! composition lives in [`compose_lexical_semantic`]; the downstream
7//! hybrid scorer in [`score`] is unchanged. See the constants
8//! [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`], and
9//! [`SEM_WEIGHT_WITH_SEM`] for the active weights and the rationale
10//! comments for why the semantic weight is small.
11//!
12//! **Phase 4.B eval guardrail**: when the caller passes `None` for the
13//! semantic axis, [`compose_lexical_semantic`] returns exactly the same
14//! value as the Phase 4.B [`compose_fuzzy_boost`] (`lex * 0.75 +
15//! fts5 * 0.25`). The default-off `--semantic` flag therefore preserves
16//! byte-for-byte the Phase 4.B retrieval ordering, and the regression
17//! test `compose_semantic_off_matches_phase_4b_baseline_exactly` pins
18//! the property.
19
20use std::collections::HashSet;
21
22use crate::fts5::compose_fuzzy_boost;
23
24/// Weight for the lexical match component.
25pub const LEXICAL_MATCH_WEIGHT: f32 = 0.30;
26/// Weight for the semantic similarity component.
27pub const SEMANTIC_SIMILARITY_WEIGHT: f32 = 0.25;
28/// Weight for the brightness component.
29pub const BRIGHTNESS_WEIGHT: f32 = 0.15;
30/// Weight for the domain overlap component.
31pub const DOMAIN_OVERLAP_WEIGHT: f32 = 0.10;
32/// Weight for the validation component.
33pub const VALIDATION_WEIGHT: f32 = 0.10;
34/// Weight for the authority component.
35pub const AUTHORITY_WEIGHT: f32 = 0.10;
36/// Weight for the contradiction-risk penalty.
37pub const CONTRADICTION_RISK_WEIGHT: f32 = -0.25;
38/// Weight for the staleness penalty.
39pub const STALENESS_PENALTY_WEIGHT: f32 = -0.10;
40
41// =============================================================================
42// Phase 4.C compositional weights for the upstream lexical-input axis.
43//
44// These constants are active ONLY when the caller passes a non-`None`
45// semantic similarity into [`compose_lexical_semantic`]. When semantic
46// is `None`, [`compose_lexical_semantic`] reduces to the Phase 4.B
47// [`compose_fuzzy_boost`] shape (lex * 0.75 + fts5 * 0.25) so the
48// default ordering is byte-for-byte unchanged.
49//
50// The weights below were picked under three constraints:
51//
52// 1. They MUST sum to 1.0 so the composed value stays inside `[0, 1]`
53// and the downstream hybrid scorer (`score`) receives the same
54// input shape it always has.
55// 2. The FTS5 weight MUST be unchanged from Phase 4.B (0.25). The
56// fuzzy-recovery invariant
57// `compose_fuzzy_keeps_exact_lexical_dominant` depends on the
58// FTS5 axis carrying the same fraction of the composed score in
59// both modes; otherwise turning on `--semantic` would weaken
60// a typo-of-one-character recovery as a side effect.
61// 3. The semantic weight MUST be SMALL relative to lexical so that
62// a memory with a perfect lexical match (lex=1.0) but no
63// embedding (sem=0.0) still strictly outscores a memory with
64// no lexical match but a perfect embedding match (lex=0.0,
65// sem=1.0). The 0.10 / 0.65 ratio guarantees this:
66// `0.65 * 1.0 + 0.10 * 0.0 = 0.65 > 0.65 * 0.0 + 0.10 * 1.0 = 0.10`.
67//
68// The Phase 4.C SPEC mentions a notional fourth axis (`w_sal = 0.10`)
69// for salience, but salience is composed DOWNSTREAM by [`score`] via
70// `brightness`, `validation`, `authority_weight`, and the
71// contradiction / staleness penalties — not at the lexical-input phase.
72// Pulling salience up here would double-count it. The four-way
73// composition shape in the SPEC is the conceptual decomposition
74// operators reason about; the implementation keeps lexical-input
75// composition (here) and salience composition (in `score`) separate.
76
77/// Lexical-match weight when the semantic axis is active.
78///
79/// 0.65 = 0.75 (Phase 4.B baseline) - 0.10 (semantic eats into lexical).
80/// Lexical stays the strongest axis so an exact lexical hit dominates a
81/// semantic-only hit. See the rationale comment above for the
82/// dominance arithmetic.
83pub const LEX_WEIGHT_WITH_SEM: f32 = 0.65;
84
85/// FTS5 weight when the semantic axis is active.
86///
87/// 0.25 is unchanged from Phase 4.B
88/// ([`crate::fts5::FUZZY_BOOST_WEIGHT`]) so flipping `--semantic` on
89/// does not weaken fuzzy recovery as a side effect.
90pub const FTS5_WEIGHT_WITH_SEM: f32 = 0.25;
91
92/// Semantic-similarity weight when the semantic axis is active.
93///
94/// 0.10 is the SMALL value the SPEC pins. Larger weights would let a
95/// stub-embedder hash-collision push a semantically unrelated memory
96/// above an exact lexical hit; smaller weights would make the axis
97/// invisible. The compromise is 0.10 — enough to break ties between
98/// two equally-lexical-matched memories using semantic signal, not
99/// enough to displace a lexical hit.
100pub const SEM_WEIGHT_WITH_SEM: f32 = 0.10;
101
102// Compile-time check that the weights sum to 1.0. A non-1.0 sum would
103// silently push the composed lexical input outside `[0, 1]` and break
104// the downstream scorer's assumptions; pin the property explicitly so
105// a future weight tweak trips the assertion in the
106// `phase_4c_weights_sum_to_one` test.
107const _: () = {
108 let sum_x100 = (LEX_WEIGHT_WITH_SEM * 100.0) as i32
109 + (FTS5_WEIGHT_WITH_SEM * 100.0) as i32
110 + (SEM_WEIGHT_WITH_SEM * 100.0) as i32;
111 assert!(sum_x100 == 100, "Phase 4.C weights must sum to 1.0");
112};
113
114/// Score inputs available for v0 retrieval.
115///
116/// Values are clamped to `[0, 1]` before weighting. Semantic similarity is
117/// intentionally absent because v0 fixes it at `0` until embeddings exist.
118#[derive(Debug, Clone, Copy, PartialEq)]
119pub struct ScoreInputs {
120 /// Lexical match from `lexical.rs`.
121 pub lexical_match: f32,
122 /// Deterministic salience brightness.
123 pub brightness: f32,
124 /// Domain overlap between query/task domains and memory domains.
125 pub domain_overlap: f32,
126 /// Outcome-bound validation signal.
127 pub validation: f32,
128 /// Authority weight for the memory source.
129 pub authority_weight: f32,
130 /// Risk from unresolved contradictions.
131 pub contradiction_risk: f32,
132 /// Penalty for stale or unvalidated memory.
133 pub staleness_penalty: f32,
134}
135
136/// A weighted score component suitable for `memory search --explain`.
137#[derive(Debug, Clone, Copy, PartialEq)]
138pub struct ScoreComponent {
139 /// Raw normalized component value after clamping.
140 pub raw: f32,
141 /// BUILD_SPEC §14.1 component weight.
142 pub weight: f32,
143 /// Weighted contribution to the final score.
144 pub contribution: f32,
145}
146
147impl ScoreComponent {
148 fn new(raw: f32, weight: f32) -> Self {
149 let raw = raw.clamp(0.0, 1.0);
150 Self {
151 raw,
152 weight,
153 contribution: raw * weight,
154 }
155 }
156}
157
158/// Full hybrid score explanation.
159#[derive(Debug, Clone, Copy, PartialEq)]
160pub struct HybridScoreExplanation {
161 /// `0.30 * lexical_match`.
162 pub lexical_match: ScoreComponent,
163 /// `0.25 * semantic_similarity`; fixed to zero for v0.
164 pub semantic_similarity: ScoreComponent,
165 /// `0.15 * brightness`.
166 pub brightness: ScoreComponent,
167 /// `0.10 * domain_overlap`.
168 pub domain_overlap: ScoreComponent,
169 /// `0.10 * validation`.
170 pub validation: ScoreComponent,
171 /// `0.10 * authority_weight`.
172 pub authority_weight: ScoreComponent,
173 /// `-0.25 * contradiction_risk`.
174 pub contradiction_risk: ScoreComponent,
175 /// `-0.10 * staleness_penalty`.
176 pub staleness_penalty: ScoreComponent,
177 /// Final retrieval score.
178 pub final_score: f32,
179}
180
181/// Compose lexical, FTS5, and (optional) semantic axes into a single
182/// effective lexical-input value the downstream hybrid scorer consumes.
183///
184/// This is the Phase 4.C generalisation of [`compose_fuzzy_boost`].
185/// When `semantic` is `None`, the function returns exactly the same
186/// value as `compose_fuzzy_boost(lexical, fts5)` — the Phase 4.B eval
187/// guardrail. When `semantic` is `Some(sim)`, the function blends the
188/// three axes by [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`],
189/// and [`SEM_WEIGHT_WITH_SEM`] (which sum to 1.0).
190///
191/// All inputs are clamped to `[0, 1]` defensively. A NaN or out-of-band
192/// input cannot push the composed value outside the band the
193/// downstream scorer expects. Negative cosine similarity (semantic
194/// orthogonality / opposition) is clamped to `0.0` rather than being
195/// treated as a penalty — the Phase 4.C SPEC does not introduce a
196/// semantic-displacement penalty axis.
197///
198/// **Invariants**:
199///
200/// - `compose_lexical_semantic(lex, fts5, None) == compose_fuzzy_boost(lex, fts5)`
201/// (pinned by `compose_semantic_off_matches_phase_4b_baseline_exactly`).
202/// - A memory with `lex=1.0, fts5=0.0, sem=0.0` (exact lexical, no
203/// fuzzy, no semantic) outscores a memory with `lex=0.0, fts5=0.0,
204/// sem=1.0` (semantic-only hit). Pinned by
205/// `compose_semantic_keeps_exact_lexical_dominant`.
206#[must_use]
207pub fn compose_lexical_semantic(lexical: f32, fts5: f32, semantic: Option<f32>) -> f32 {
208 let Some(sem) = semantic else {
209 return compose_fuzzy_boost(lexical, fts5);
210 };
211 let lex = clamp_band(lexical);
212 let fts = clamp_band(fts5);
213 let sem = clamp_band(sem);
214 lex * LEX_WEIGHT_WITH_SEM + fts * FTS5_WEIGHT_WITH_SEM + sem * SEM_WEIGHT_WITH_SEM
215}
216
217fn clamp_band(value: f32) -> f32 {
218 if value.is_finite() {
219 value.clamp(0.0, 1.0)
220 } else {
221 0.0
222 }
223}
224
225/// Calculates the BUILD_SPEC §14.1 hybrid retrieval score.
226#[must_use]
227pub fn score(inputs: ScoreInputs) -> HybridScoreExplanation {
228 let lexical_match = ScoreComponent::new(inputs.lexical_match, LEXICAL_MATCH_WEIGHT);
229 let semantic_similarity = ScoreComponent::new(0.0, SEMANTIC_SIMILARITY_WEIGHT);
230 let brightness = ScoreComponent::new(inputs.brightness, BRIGHTNESS_WEIGHT);
231 let domain_overlap = ScoreComponent::new(inputs.domain_overlap, DOMAIN_OVERLAP_WEIGHT);
232 let validation = ScoreComponent::new(inputs.validation, VALIDATION_WEIGHT);
233 let authority_weight = ScoreComponent::new(inputs.authority_weight, AUTHORITY_WEIGHT);
234 let contradiction_risk =
235 ScoreComponent::new(inputs.contradiction_risk, CONTRADICTION_RISK_WEIGHT);
236 let staleness_penalty = ScoreComponent::new(inputs.staleness_penalty, STALENESS_PENALTY_WEIGHT);
237 let final_score = lexical_match.contribution
238 + semantic_similarity.contribution
239 + brightness.contribution
240 + domain_overlap.contribution
241 + validation.contribution
242 + authority_weight.contribution
243 + contradiction_risk.contribution
244 + staleness_penalty.contribution;
245
246 HybridScoreExplanation {
247 lexical_match,
248 semantic_similarity,
249 brightness,
250 domain_overlap,
251 validation,
252 authority_weight,
253 contradiction_risk,
254 staleness_penalty,
255 final_score,
256 }
257}
258
259/// Explanation for domain-overlap scoring.
260#[derive(Debug, Clone, PartialEq)]
261pub struct DomainOverlapExplanation {
262 /// Normalized domain overlap in `[0, 1]`.
263 pub domain_overlap: f32,
264 /// Normalized query/task domains considered.
265 pub query_domains: Vec<String>,
266 /// Normalized memory domains considered.
267 pub memory_domains: Vec<String>,
268 /// Query domains also present on the memory.
269 pub matched_domains: Vec<String>,
270}
271
272/// Calculates normalized domain overlap for score inputs.
273#[must_use]
274pub fn domain_overlap(
275 query_domains: &[impl AsRef<str>],
276 memory_domains: &[impl AsRef<str>],
277) -> DomainOverlapExplanation {
278 let query_domains = normalize_domains(query_domains);
279 let memory_domains = normalize_domains(memory_domains);
280 if query_domains.is_empty() {
281 return DomainOverlapExplanation {
282 domain_overlap: 0.0,
283 query_domains,
284 memory_domains,
285 matched_domains: Vec::new(),
286 };
287 }
288
289 let memory_set: HashSet<_> = memory_domains.iter().cloned().collect();
290 let matched_domains: Vec<_> = query_domains
291 .iter()
292 .filter(|domain| memory_set.contains(*domain))
293 .cloned()
294 .collect();
295 let domain_overlap = matched_domains.len() as f32 / query_domains.len() as f32;
296
297 DomainOverlapExplanation {
298 domain_overlap,
299 query_domains,
300 memory_domains,
301 matched_domains,
302 }
303}
304
305fn normalize_domains(domains: &[impl AsRef<str>]) -> Vec<String> {
306 let mut seen = HashSet::new();
307 let mut normalized = Vec::new();
308 for domain in domains {
309 let domain = domain.as_ref().trim().to_ascii_lowercase();
310 if !domain.is_empty() && seen.insert(domain.clone()) {
311 normalized.push(domain);
312 }
313 }
314 normalized
315}
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn score_matches_build_spec_weights() {
323 let explanation = score(ScoreInputs {
324 lexical_match: 0.8,
325 brightness: 0.6,
326 domain_overlap: 0.5,
327 validation: 0.7,
328 authority_weight: 0.4,
329 contradiction_risk: 0.2,
330 staleness_penalty: 0.3,
331 });
332
333 let expected = 0.30 * 0.8 + 0.25 * 0.0 + 0.15 * 0.6 + 0.10 * 0.5 + 0.10 * 0.7 + 0.10 * 0.4
334 - 0.25 * 0.2
335 - 0.10 * 0.3;
336 assert!((explanation.final_score - expected).abs() < f32::EPSILON);
337 assert_eq!(explanation.semantic_similarity.raw, 0.0);
338 assert_eq!(explanation.semantic_similarity.contribution, 0.0);
339 }
340
341 #[test]
342 fn score_ordering_rewards_salient_validated_memories() {
343 let strong = score(ScoreInputs {
344 lexical_match: 0.75,
345 brightness: 0.9,
346 domain_overlap: 1.0,
347 validation: 1.0,
348 authority_weight: 0.8,
349 contradiction_risk: 0.0,
350 staleness_penalty: 0.0,
351 });
352 let weak = score(ScoreInputs {
353 lexical_match: 1.0,
354 brightness: 0.1,
355 domain_overlap: 0.0,
356 validation: 0.0,
357 authority_weight: 0.2,
358 contradiction_risk: 0.8,
359 staleness_penalty: 0.6,
360 });
361
362 assert!(strong.final_score > weak.final_score);
363 }
364
365 #[test]
366 fn explanation_contains_all_score_fields() {
367 let explanation = score(ScoreInputs {
368 lexical_match: 2.0,
369 brightness: 1.0,
370 domain_overlap: 1.0,
371 validation: 1.0,
372 authority_weight: 1.0,
373 contradiction_risk: 1.0,
374 staleness_penalty: 1.0,
375 });
376
377 assert_eq!(explanation.lexical_match.raw, 1.0);
378 assert_eq!(explanation.lexical_match.weight, LEXICAL_MATCH_WEIGHT);
379 assert_eq!(
380 explanation.semantic_similarity.weight,
381 SEMANTIC_SIMILARITY_WEIGHT
382 );
383 assert_eq!(explanation.brightness.weight, BRIGHTNESS_WEIGHT);
384 assert_eq!(explanation.domain_overlap.weight, DOMAIN_OVERLAP_WEIGHT);
385 assert_eq!(explanation.validation.weight, VALIDATION_WEIGHT);
386 assert_eq!(explanation.authority_weight.weight, AUTHORITY_WEIGHT);
387 assert_eq!(
388 explanation.contradiction_risk.weight,
389 CONTRADICTION_RISK_WEIGHT
390 );
391 assert_eq!(
392 explanation.staleness_penalty.weight,
393 STALENESS_PENALTY_WEIGHT
394 );
395 }
396
397 #[test]
398 fn domain_overlap_reports_matched_domains() {
399 let explanation =
400 domain_overlap(&["Retrieval", "Store", "retrieval"], &["store", "privacy"]);
401
402 assert_eq!(explanation.domain_overlap, 0.5);
403 assert_eq!(explanation.query_domains, ["retrieval", "store"]);
404 assert_eq!(explanation.memory_domains, ["store", "privacy"]);
405 assert_eq!(explanation.matched_domains, ["store"]);
406 }
407
408 // =========================================================================
409 // Phase 4.C compositional axis tests.
410 //
411 // The non-negotiable invariant is the eval guardrail
412 // `compose_semantic_off_matches_phase_4b_baseline_exactly` — when the
413 // caller passes `None` for the semantic axis the composer MUST return
414 // exactly the same value as the Phase 4.B `compose_fuzzy_boost` shape.
415 // A failure here means flipping `--semantic` from OFF to OFF (default
416 // path) changed the retrieval ordering, which is a Phase 4.B
417 // regression.
418
419 #[test]
420 fn phase_4c_weights_sum_to_one() {
421 let sum = LEX_WEIGHT_WITH_SEM + FTS5_WEIGHT_WITH_SEM + SEM_WEIGHT_WITH_SEM;
422 assert!(
423 (sum - 1.0).abs() < f32::EPSILON,
424 "Phase 4.C composition weights must sum to 1.0, got {sum}"
425 );
426 }
427
428 #[test]
429 fn compose_semantic_off_matches_phase_4b_baseline_exactly() {
430 // The Phase 4.B eval guardrail: passing `None` for semantic
431 // MUST produce byte-for-byte the same value as the Phase 4.B
432 // `compose_fuzzy_boost` helper. Iterate over a grid of
433 // representative `(lex, fts5)` pairs so a future change that
434 // sneaks a non-zero semantic contribution into the OFF path
435 // trips this assertion on at least one cell.
436 let cells = [
437 (0.0_f32, 0.0_f32),
438 (1.0, 0.0),
439 (0.0, 1.0),
440 (0.5, 0.5),
441 (0.75, 0.25),
442 (0.25, 0.75),
443 (0.8, 0.2),
444 (0.2, 0.8),
445 (0.9, 0.1),
446 (0.1, 0.9),
447 ];
448 for (lex, fts5) in cells {
449 let phase_4b = compose_fuzzy_boost(lex, fts5);
450 let phase_4c_off = compose_lexical_semantic(lex, fts5, None);
451 assert!(
452 (phase_4b - phase_4c_off).abs() < f32::EPSILON,
453 "Phase 4.B baseline drift at (lex={lex}, fts5={fts5}): \
454 phase_4b={phase_4b}, phase_4c_off={phase_4c_off}"
455 );
456 }
457 }
458
459 #[test]
460 fn compose_semantic_on_includes_semantic_axis() {
461 // With semantic ON, two memories that are otherwise identical
462 // (same lexical, same fts5) MUST receive different composed
463 // values when their semantic similarity differs.
464 let lex = 0.5;
465 let fts5 = 0.0;
466 let low_sem = compose_lexical_semantic(lex, fts5, Some(0.0));
467 let high_sem = compose_lexical_semantic(lex, fts5, Some(1.0));
468 assert!(
469 high_sem > low_sem,
470 "semantic axis must contribute positively; low_sem={low_sem}, high_sem={high_sem}"
471 );
472 // The gap is exactly the semantic weight.
473 assert!(
474 (high_sem - low_sem - SEM_WEIGHT_WITH_SEM).abs() < f32::EPSILON,
475 "semantic contribution must equal SEM_WEIGHT_WITH_SEM"
476 );
477 }
478
479 #[test]
480 fn compose_semantic_keeps_exact_lexical_dominant() {
481 // A memory with a perfect lexical hit and no semantic signal
482 // (e.g. no embedding row in the side table) MUST still
483 // outscore a memory with no lexical hit but a perfect semantic
484 // signal. This pins the SPEC's "small semantic weight does
485 // not displace exact lexical hits" property.
486 let lexical_only = compose_lexical_semantic(1.0, 0.0, Some(0.0));
487 let semantic_only = compose_lexical_semantic(0.0, 0.0, Some(1.0));
488 assert!(
489 lexical_only > semantic_only,
490 "exact lexical hit must dominate semantic-only hit; \
491 lexical_only={lexical_only}, semantic_only={semantic_only}"
492 );
493 }
494
495 #[test]
496 fn compose_semantic_default_weight_does_not_displace_exact_lexical_hits() {
497 // Even with worst-case semantic noise (semantic = 1.0 for an
498 // unrelated memory, semantic = 0.0 for the lexically-matching
499 // memory), the lexical hit MUST still win the composed score.
500 // Pinned by SPEC: "small semantic weight" — the 0.10 weight is
501 // small enough that lex=1.0,sem=0.0 (0.65) > lex=0.0,sem=1.0
502 // (0.10).
503 let lex_winner = compose_lexical_semantic(1.0, 0.0, Some(0.0));
504 let sem_winner = compose_lexical_semantic(0.0, 0.0, Some(1.0));
505 assert!(
506 lex_winner > sem_winner,
507 "default semantic weight must not displace exact lexical hits"
508 );
509 }
510
511 #[test]
512 fn compose_semantic_stays_in_band_for_all_inputs() {
513 // Random-ish grid: every composed value MUST stay in [0, 1]
514 // regardless of input combination.
515 let values = [0.0_f32, 0.25, 0.5, 0.75, 1.0];
516 for &lex in &values {
517 for &fts5 in &values {
518 for &sem in &values {
519 let composed = compose_lexical_semantic(lex, fts5, Some(sem));
520 assert!(
521 (0.0..=1.0).contains(&composed),
522 "composed value out of band at (lex={lex}, fts5={fts5}, sem={sem}): {composed}"
523 );
524 }
525 }
526 }
527 }
528
529 #[test]
530 fn compose_semantic_clamps_out_of_band_inputs() {
531 // NaN, infinity, and out-of-band values MUST degrade to 0.0
532 // rather than propagating into the composed score.
533 let composed = compose_lexical_semantic(f32::NAN, f32::INFINITY, Some(-5.0));
534 assert!((0.0..=1.0).contains(&composed));
535 assert_eq!(composed, 0.0);
536 }
537
538 #[test]
539 fn compose_semantic_zero_for_all_axes_is_zero() {
540 // A memory that matched nothing must compose to 0.0 regardless
541 // of which mode the composer is in.
542 assert_eq!(compose_lexical_semantic(0.0, 0.0, None), 0.0);
543 assert_eq!(compose_lexical_semantic(0.0, 0.0, Some(0.0)), 0.0);
544 }
545}