zeph_memory/five_signal/scoring.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use crate::five_signal::weights::FiveSignalWeights;
7use crate::types::MessageId;
8
9/// Per-candidate raw signals before weighting.
10#[derive(Debug, Clone, Copy, Default)]
11pub struct CandidateSignals {
12 /// Recency signal ∈ `[0.0, 1.0]`.
13 pub recency: f64,
14 /// Semantic relevance signal ∈ `[0.0, 1.0]`.
15 pub relevance: f64,
16 /// Normalized access frequency ∈ `[0.0, 1.0]`.
17 pub frequency: f64,
18 /// Causal distance converted to score ∈ `[0.0, 1.0]` via `1/distance`.
19 pub causal: f64,
20 /// Novelty ∈ `(0.0, 1.0]`.
21 pub novelty: f64,
22}
23
24/// Apply five-signal scoring to a ranked candidate list.
25///
26/// Replaces each candidate's existing score with the weighted combination:
27/// `score = w_recency × recency + w_relevance × relevance
28/// + w_frequency × frequency + w_causal × causal + w_novelty × novelty`
29///
30/// Signals not provided in `signals_map` default to `0.0`.
31/// After scoring, `ranked` is re-sorted descending by the new score.
32///
33/// # Parameters
34///
35/// - `ranked`: mutable slice of `(MessageId, score)` pairs to re-score in-place.
36/// - `weights`: pre-normalized five-signal weights.
37/// - `signals_map`: per-candidate signal values; missing candidates use zeros for new signals.
38/// - `base_scores`: the original (recency+relevance) scores per candidate, used when the
39/// caller has not pre-split recency from relevance. When a candidate appears in
40/// `signals_map` its `recency` and `relevance` fields override this value.
41///
42/// # Examples
43///
44/// ```
45/// use zeph_memory::five_signal::scoring::{apply_five_signal_scoring, CandidateSignals};
46/// use zeph_memory::five_signal::weights::FiveSignalWeights;
47/// use zeph_config::memory::FiveSignalConfig;
48/// use zeph_memory::types::MessageId;
49///
50/// let mut cfg = FiveSignalConfig::default();
51/// cfg.w_recency = 0.5;
52/// cfg.w_relevance = 0.5;
53/// cfg.w_frequency = 0.0;
54/// cfg.w_causal = 0.0;
55/// cfg.w_novelty = 0.0;
56/// let weights = FiveSignalWeights::normalized(&cfg);
57///
58/// let id1 = MessageId(1);
59/// let id2 = MessageId(2);
60/// let mut ranked = vec![(id1, 0.8), (id2, 0.6)];
61/// let signals = std::collections::HashMap::from([
62/// (id1, CandidateSignals { recency: 0.8, relevance: 0.8, ..Default::default() }),
63/// (id2, CandidateSignals { recency: 0.6, relevance: 0.6, ..Default::default() }),
64/// ]);
65///
66/// apply_five_signal_scoring(&mut ranked, &weights, &signals);
67/// assert_eq!(ranked[0].0, id1);
68/// ```
69pub fn apply_five_signal_scoring<S: std::hash::BuildHasher>(
70 ranked: &mut [(MessageId, f64)],
71 weights: &FiveSignalWeights,
72 signals_map: &HashMap<MessageId, CandidateSignals, S>,
73) {
74 let _span = tracing::info_span!("memory.five_signal.scoring").entered();
75
76 for (msg_id, score) in ranked.iter_mut() {
77 let s = signals_map.get(msg_id).copied().unwrap_or_default();
78 *score = weights.w_recency * s.recency
79 + weights.w_relevance * s.relevance
80 + weights.w_frequency * s.frequency
81 + weights.w_causal * s.causal
82 + weights.w_novelty * s.novelty;
83 }
84
85 ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91 use zeph_config::memory::FiveSignalConfig;
92
93 fn baseline_weights() -> FiveSignalWeights {
94 let cfg = FiveSignalConfig {
95 w_recency: 0.5,
96 w_relevance: 0.5,
97 ..FiveSignalConfig::default()
98 };
99 FiveSignalWeights::normalized(&cfg)
100 }
101
102 #[test]
103 fn baseline_order_preserved() {
104 let w = baseline_weights();
105 let id1 = MessageId(1);
106 let id2 = MessageId(2);
107 let mut ranked = vec![(id1, 0.8_f64), (id2, 0.6_f64)];
108 let signals = HashMap::from([
109 (
110 id1,
111 CandidateSignals {
112 recency: 0.8,
113 relevance: 0.8,
114 ..Default::default()
115 },
116 ),
117 (
118 id2,
119 CandidateSignals {
120 recency: 0.6,
121 relevance: 0.6,
122 ..Default::default()
123 },
124 ),
125 ]);
126 apply_five_signal_scoring(&mut ranked, &w, &signals);
127 assert_eq!(ranked[0].0, id1, "higher score should rank first");
128 }
129
130 #[test]
131 fn frequency_signal_flips_order() {
132 let cfg = FiveSignalConfig {
133 w_recency: 0.35,
134 w_relevance: 0.35,
135 w_frequency: 0.30,
136 ..FiveSignalConfig::default()
137 };
138 let w = FiveSignalWeights::normalized(&cfg);
139
140 let id1 = MessageId(1); // lower relevance, high frequency
141 let id2 = MessageId(2); // higher relevance, zero frequency
142
143 let mut ranked = vec![(id2, 0.9_f64), (id1, 0.7_f64)];
144 let signals = HashMap::from([
145 (
146 id1,
147 CandidateSignals {
148 recency: 0.5,
149 relevance: 0.7,
150 frequency: 1.0, // max frequency
151 ..Default::default()
152 },
153 ),
154 (
155 id2,
156 CandidateSignals {
157 recency: 0.5,
158 relevance: 0.9,
159 frequency: 0.0,
160 ..Default::default()
161 },
162 ),
163 ]);
164 apply_five_signal_scoring(&mut ranked, &w, &signals);
165 assert_eq!(ranked[0].0, id1, "frequency should flip the ranking");
166 }
167
168 #[test]
169 fn missing_candidate_scores_zero() {
170 let w = baseline_weights();
171 let id1 = MessageId(1);
172 let mut ranked = vec![(id1, 0.5_f64)];
173 // Empty signals map → all signals default to 0
174 apply_five_signal_scoring(&mut ranked, &w, &HashMap::new());
175 assert!((ranked[0].1).abs() < 1e-9, "missing signals → score 0.0");
176 }
177
178 #[test]
179 fn zero_new_weights_equals_two_signal_baseline() {
180 // When w_frequency = w_causal = w_novelty = 0.0, the five-signal formula degenerates
181 // to: score = w_recency * recency + w_relevance * relevance — exactly the two-signal result.
182 let w = baseline_weights(); // 0.5 recency, 0.5 relevance, rest = 0
183 assert!(w.is_baseline());
184
185 let id1 = MessageId(1);
186 let id2 = MessageId(2);
187
188 let recency1 = 0.7_f64;
189 let relevance1 = 0.8_f64;
190 let recency2 = 0.9_f64;
191 let relevance2 = 0.3_f64;
192
193 // Expected two-signal scores
194 let expected1 = 0.5 * recency1 + 0.5 * relevance1;
195 let expected2 = 0.5 * recency2 + 0.5 * relevance2;
196
197 let mut ranked = vec![(id1, 0.0_f64), (id2, 0.0_f64)];
198 let signals = HashMap::from([
199 (
200 id1,
201 CandidateSignals {
202 recency: recency1,
203 relevance: relevance1,
204 frequency: 0.0,
205 causal: 0.0,
206 novelty: 0.0,
207 },
208 ),
209 (
210 id2,
211 CandidateSignals {
212 recency: recency2,
213 relevance: relevance2,
214 frequency: 0.0,
215 causal: 0.0,
216 novelty: 0.0,
217 },
218 ),
219 ]);
220 apply_five_signal_scoring(&mut ranked, &w, &signals);
221
222 // After sort descending: id1 (0.75) > id2 (0.6)
223 assert_eq!(ranked[0].0, id1);
224 assert!(
225 (ranked[0].1 - expected1).abs() < 1e-9,
226 "five-signal must equal two-signal baseline"
227 );
228 assert!(
229 (ranked[1].1 - expected2).abs() < 1e-9,
230 "five-signal must equal two-signal baseline"
231 );
232 }
233}