Skip to main content

mnm_core/
scoring.rs

1//! Confidence-scoring compute layer (US6, D24).
2//!
3//! Pure, DB-free math: given a [`crate::provenance::Provenance`], an age in
4//! whole days, a query-side version constraint, and a normalized relevance
5//! term, produce a `trust_score`, a blended `confidence`, and a per-factor
6//! [`ConfidenceFactors`] breakdown. The relevance-normalization helper
7//! [`normalize_rrf`] is compiled-in, not policy-configurable, so every
8//! confidence score in the corpus stays reproducible (spec §"Relevance term").
9
10use serde::Serialize;
11
12use crate::provenance::{Attribution, LanguageTarget, Provenance};
13use crate::scoring_policy::ScoringPolicy;
14
15/// Which relevance term fed the confidence blend.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
17#[serde(rename_all = "lowercase")]
18pub enum RelevanceSource {
19    /// Normalized Reciprocal Rank Fusion score (cloud default, `rerank=false`).
20    Rrf,
21    /// Voyage relevance score (server inline or client BYOK), `rerank=true`.
22    Rerank,
23}
24
25/// The query-side language target echoed into [`ConfidenceFactors`].
26#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
27pub struct LanguageTargetQueryFactor {
28    /// Language name from the query filter.
29    pub name: String,
30    /// Concrete version the query asked to be satisfied.
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub version_constraint_satisfies: Option<String>,
33}
34
35/// Precomputed version-match input for [`ScoringPolicy::score`] (spec §3.5).
36/// Computed by the search route from the mode + per-facet classification.
37#[derive(Debug, Clone)]
38pub struct VersionScoreInput {
39    /// The multiplier to apply to trust.
40    pub multiplier: f64,
41    /// `"satisfies" | "near_miss" | "silent" | "unknown"`.
42    pub class: &'static str,
43    /// Component distance for near misses.
44    pub distance: Option<u32>,
45    /// Echo of the query-side element that drove the outcome.
46    pub query: Option<LanguageTargetQueryFactor>,
47}
48
49/// Per-factor breakdown of a result's trust + confidence, rich enough to write
50/// a one-sentence provenance explanation without further API calls (#12).
51#[derive(Debug, Clone, PartialEq, Serialize)]
52pub struct ConfidenceFactors {
53    /// Source attribution that drove the dominant trust multiplier.
54    pub attribution: Attribution,
55    /// The attribution multiplier applied.
56    pub attribution_multiplier: f64,
57    /// Whether the content was verified.
58    pub verified: bool,
59    /// Who verified it, if recorded.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub verified_by: Option<String>,
62    /// The verification multiplier applied.
63    pub verification_multiplier: f64,
64    /// Age of the content in whole days (from `source_modified_at`, else the
65    /// source-version `ingested_at`).
66    pub age_days: i64,
67    /// The exponential freshness-decay multiplier applied.
68    pub freshness_multiplier: f64,
69    /// Whether the content is flagged deprecated.
70    pub deprecation: bool,
71    /// The deprecation multiplier applied (1.0 when not deprecated).
72    pub deprecation_multiplier: f64,
73    /// The query-side language target, when one was supplied.
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub language_target_query: Option<LanguageTargetQueryFactor>,
76    /// The chunk's declared language targets.
77    #[serde(skip_serializing_if = "Vec::is_empty")]
78    pub language_targets_chunk: Vec<LanguageTarget>,
79    /// The version-match multiplier applied.
80    pub version_match_multiplier: f64,
81    /// Match class, present only when the request carried a version filter.
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub version_match_class: Option<&'static str>,
84    /// Near-miss component distance, when applicable.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub version_distance: Option<u32>,
87    /// Which relevance term fed the blend.
88    pub relevance_source: RelevanceSource,
89    /// The normalized relevance term used in the blend.
90    pub relevance_multiplier: f64,
91}
92
93/// The full result of scoring one candidate.
94#[derive(Debug, Clone, PartialEq)]
95pub struct ScoreResult {
96    /// Content trust in `[0, 1]`.
97    pub trust_score: f64,
98    /// Blended confidence in `[0, 1]`.
99    pub confidence: f64,
100    /// Per-factor breakdown.
101    pub factors: ConfidenceFactors,
102}
103
104/// Normalize a raw RRF score to `[0, 1)`, monotonic in the raw score.
105///
106/// `relevance_rrf = 1 - 1/(1 + raw)`. Compiled-in (spec §"Relevance term").
107#[must_use]
108pub fn normalize_rrf(raw_rrf_score: f64) -> f64 {
109    let raw = raw_rrf_score.max(0.0);
110    1.0 - 1.0 / (1.0 + raw)
111}
112
113/// Clamp a scoring value into `[0, 1]`, logging a structured warning when the
114/// raw value falls outside the range or is non-finite (acceptance #13).
115fn clamp_unit(value: f64, metric: &str) -> f64 {
116    if value.is_finite() && (0.0..=1.0).contains(&value) {
117        return value;
118    }
119    let clamped = if value.is_finite() {
120        value.clamp(0.0, 1.0)
121    } else {
122        0.0
123    };
124    tracing::warn!(metric, raw = value, clamped, "scoring value clamped to [0,1]");
125    clamped
126}
127
128impl ScoringPolicy {
129    /// The attribution multiplier for one [`Attribution`] variant.
130    #[must_use]
131    pub const fn attribution_multiplier(&self, attribution: Attribution) -> f64 {
132        match attribution {
133            Attribution::Foundation => self.attribution.foundation,
134            Attribution::Partner => self.attribution.partner,
135            Attribution::ThirdParty => self.attribution.third_party,
136            Attribution::Community => self.attribution.community,
137            Attribution::Unknown => self.attribution.unknown,
138        }
139    }
140
141    /// The verification multiplier, keyed off whether the content was verified
142    /// and who verified it. Unverified content always lands on the lowest
143    /// `unverified` multiplier, so any verified result outranks it (#3).
144    #[must_use]
145    pub fn verification_multiplier(&self, verified: bool, verified_by: Option<&str>) -> f64 {
146        if !verified {
147            return self.verification.unverified;
148        }
149        match verified_by.map(str::to_ascii_lowercase) {
150            Some(who) if who.contains("foundation") => self.verification.verified_by_foundation,
151            Some(who) if who.contains("partner") => self.verification.verified_by_partner,
152            _ => self.verification.verified_by_other,
153        }
154    }
155
156    /// The exponential freshness multiplier `exp(-age_days / half_life_days)`.
157    /// Negative ages (clock skew) are treated as zero (fully fresh).
158    #[must_use]
159    pub fn freshness_multiplier(&self, age_days: i64) -> f64 {
160        #[allow(clippy::cast_precision_loss)] // ages well within f64's exact-integer range
161        let age = age_days.max(0) as f64;
162        (-age / self.freshness.half_life_days).exp()
163    }
164
165    /// Map a [`crate::version_match::MatchClass`] to its trust multiplier
166    /// using the `[version_match]` policy knobs (spec §3.3): linear steps with
167    /// a floor. `Breaking` maps to `floor` for completeness — callers drop
168    /// Breaking candidates before scoring.
169    #[must_use]
170    pub fn version_multiplier(&self, class: &crate::version_match::MatchClass) -> f64 {
171        use crate::version_match::MatchClass as C;
172        let m = &self.version_match;
173        match class {
174            C::Satisfies => m.satisfies,
175            C::Unknown => m.neutral,
176            C::Breaking => m.floor,
177            C::NearMissPatch(d) => m.patch_step.mul_add(-f64::from(*d), 1.0).max(m.floor),
178            C::NearMissMinor(d) => m.minor_step.mul_add(-f64::from(*d), 1.0).max(m.floor),
179        }
180    }
181
182    /// Blend trust and relevance into a confidence in `[0, 1]`.
183    ///
184    /// `confidence = clamp(trust^trust_weight * relevance^relevance_weight)`.
185    #[must_use]
186    pub fn confidence(&self, trust_score: f64, relevance: f64) -> f64 {
187        let t = trust_score.clamp(0.0, 1.0);
188        let r = relevance.clamp(0.0, 1.0);
189        let raw = t.powf(self.blend.trust_weight) * r.powf(self.blend.relevance_weight);
190        clamp_unit(raw, "confidence")
191    }
192
193    /// Score one candidate end to end: trust, blended confidence, and the
194    /// per-factor breakdown. `relevance` must already be normalized to `[0, 1]`
195    /// — via [`normalize_rrf`] for RRF scores, or directly for Voyage relevance
196    /// scores (which Voyage already returns in `[0, 1]`).
197    #[must_use]
198    pub fn score(
199        &self,
200        provenance: &Provenance,
201        version: Option<&VersionScoreInput>,
202        age_days: i64,
203        relevance: f64,
204        relevance_source: RelevanceSource,
205    ) -> ScoreResult {
206        let attribution_multiplier = self.attribution_multiplier(provenance.attribution);
207        let verification_multiplier =
208            self.verification_multiplier(provenance.verified, provenance.verified_by.as_deref());
209        let freshness_multiplier = self.freshness_multiplier(age_days);
210        let deprecation_multiplier = if provenance.deprecation.is_deprecated {
211            self.deprecation.penalty_multiplier
212        } else {
213            1.0
214        };
215        let version_match_multiplier = version.map_or(self.version_match.neutral, |v| v.multiplier);
216
217        let raw_trust = attribution_multiplier
218            * verification_multiplier
219            * freshness_multiplier
220            * deprecation_multiplier
221            * version_match_multiplier;
222        let trust_score = clamp_unit(raw_trust, "trust_score");
223        let relevance_multiplier = relevance.clamp(0.0, 1.0);
224        let confidence = self.confidence(trust_score, relevance_multiplier);
225
226        let factors = ConfidenceFactors {
227            attribution: provenance.attribution,
228            attribution_multiplier,
229            verified: provenance.verified,
230            verified_by: provenance.verified_by.clone(),
231            verification_multiplier,
232            age_days,
233            freshness_multiplier,
234            deprecation: provenance.deprecation.is_deprecated,
235            deprecation_multiplier,
236            language_target_query: version.and_then(|v| v.query.clone()),
237            language_targets_chunk: provenance.language_targets.clone(),
238            version_match_multiplier,
239            version_match_class: version.map(|v| v.class),
240            version_distance: version.and_then(|v| v.distance),
241            relevance_source,
242            relevance_multiplier,
243        };
244
245        ScoreResult {
246            trust_score,
247            confidence,
248            factors,
249        }
250    }
251}
252
253/// Parse a possibly-partial version string into a full [`semver::Version`].
254///
255/// Pads missing minor/patch components with `0` (`"0.31"` → `0.31.0`, `"v1.4"`
256/// → `1.4.0`, `"1"` → `1.0.0`) and returns `None` when the numeric core can't
257/// be parsed. Shared with the search-filter layer so version-match scoring and
258/// version-constraint filtering normalize versions identically.
259#[must_use]
260pub fn parse_version(raw: &str) -> Option<semver::Version> {
261    let trimmed = raw.trim().trim_start_matches(['v', 'V']);
262    // Split off any pre-release/build suffix; we only normalize the numeric core.
263    let core = trimmed.split(['-', '+']).next().unwrap_or(trimmed).trim();
264    if core.is_empty() {
265        return None;
266    }
267    let mut parts = core.split('.');
268    let major = parts.next()?.parse::<u64>().ok()?;
269    let minor = parts.next().map_or(Ok(0), str::parse).ok()?;
270    let patch = parts.next().map_or(Ok(0), str::parse).ok()?;
271    if parts.next().is_some() {
272        return None;
273    }
274    Some(semver::Version::new(major, minor, patch))
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280    use crate::provenance::Deprecation;
281
282    fn policy() -> ScoringPolicy {
283        ScoringPolicy::default()
284    }
285
286    fn prov_with(attribution: Attribution) -> Provenance {
287        Provenance::attributed_to(attribution)
288    }
289
290    #[test]
291    fn attribution_orders_foundation_above_community(/* #2 */) {
292        let p = policy();
293        let f = p.score(&prov_with(Attribution::Foundation), None, 0, 0.5, RelevanceSource::Rrf);
294        let part = p.score(&prov_with(Attribution::Partner), None, 0, 0.5, RelevanceSource::Rrf);
295        let third =
296            p.score(&prov_with(Attribution::ThirdParty), None, 0, 0.5, RelevanceSource::Rrf);
297        let comm = p.score(&prov_with(Attribution::Community), None, 0, 0.5, RelevanceSource::Rrf);
298        let unk = p.score(&prov_with(Attribution::Unknown), None, 0, 0.5, RelevanceSource::Rrf);
299        assert!(f.trust_score > part.trust_score);
300        assert!(part.trust_score > third.trust_score);
301        assert!(third.trust_score > comm.trust_score);
302        assert!(comm.trust_score > unk.trust_score);
303    }
304
305    #[test]
306    fn verified_outranks_unverified(/* #3 */) {
307        let p = policy();
308        let mut verified = prov_with(Attribution::Foundation);
309        verified.verified = true;
310        verified.verified_by = Some("midnight-foundation".into());
311        let unverified = prov_with(Attribution::Foundation);
312        let v = p.score(&verified, None, 0, 0.5, RelevanceSource::Rrf);
313        let u = p.score(&unverified, None, 0, 0.5, RelevanceSource::Rrf);
314        assert!(v.trust_score > u.trust_score);
315    }
316
317    #[test]
318    fn verified_by_principal_selects_multiplier() {
319        let p = policy();
320        assert!(
321            (p.verification_multiplier(true, Some("Midnight Foundation"))
322                - p.verification.verified_by_foundation)
323                .abs()
324                < 1e-12
325        );
326        assert!(
327            (p.verification_multiplier(true, Some("acme-partner"))
328                - p.verification.verified_by_partner)
329                .abs()
330                < 1e-12
331        );
332        assert!(
333            (p.verification_multiplier(true, Some("some-reviewer"))
334                - p.verification.verified_by_other)
335                .abs()
336                < 1e-12
337        );
338        assert!(
339            (p.verification_multiplier(true, None) - p.verification.verified_by_other).abs()
340                < 1e-12
341        );
342    }
343
344    #[test]
345    fn fresher_outranks_stale(/* #4 */) {
346        let p = policy();
347        let prov = prov_with(Attribution::Foundation);
348        let fresh = p.score(&prov, None, 14, 0.5, RelevanceSource::Rrf);
349        let stale = p.score(&prov, None, 730, 0.5, RelevanceSource::Rrf);
350        assert!(fresh.trust_score > stale.trust_score);
351        // Spec decay is exp(-age/half_life): at age == half_life the multiplier
352        // is e^-1 (~0.368), not 0.5 (this is a characteristic decay time).
353        let hl = p.freshness_multiplier(180);
354        assert!((hl - std::f64::consts::E.recip()).abs() < 1e-9, "decay multiplier was {hl}");
355    }
356
357    #[test]
358    fn deprecation_penalizes(/* #5 */) {
359        let p = policy();
360        let mut deprecated = prov_with(Attribution::Foundation);
361        deprecated.deprecation = Deprecation {
362            is_deprecated: true,
363            since: None,
364            reason: None,
365        };
366        let live = prov_with(Attribution::Foundation);
367        let d = p.score(&deprecated, None, 0, 0.5, RelevanceSource::Rrf);
368        let l = p.score(&live, None, 0, 0.5, RelevanceSource::Rrf);
369        assert!(d.trust_score < l.trust_score);
370        assert!(d.factors.deprecation);
371        assert!((d.factors.deprecation_multiplier - 0.30).abs() < 1e-12);
372    }
373
374    #[test]
375    fn score_applies_precomputed_version_input(/* spec §3.5 */) {
376        let p = policy();
377        let prov = prov_with(Attribution::Foundation);
378        let vin = VersionScoreInput {
379            multiplier: 0.85,
380            class: "near_miss",
381            distance: Some(1),
382            query: Some(LanguageTargetQueryFactor {
383                name: "compact".into(),
384                version_constraint_satisfies: Some("0.31".into()),
385            }),
386        };
387        let r = p.score(&prov, Some(&vin), 0, 0.5, RelevanceSource::Rrf);
388        assert!((r.factors.version_match_multiplier - 0.85).abs() < 1e-12);
389        assert_eq!(r.factors.version_match_class, Some("near_miss"));
390        assert_eq!(r.factors.version_distance, Some(1));
391        // absent input → neutral, fields omitted
392        let r2 = p.score(&prov, None, 0, 0.5, RelevanceSource::Rrf);
393        assert!((r2.factors.version_match_multiplier - 1.0).abs() < 1e-12);
394        assert_eq!(r2.factors.version_match_class, None);
395        let v = serde_json::to_value(&r2.factors).unwrap();
396        assert!(v.get("version_match_class").is_none());
397        assert!(v.get("version_distance").is_none());
398    }
399
400    #[test]
401    fn multiplier_for_class_scales_with_distance(/* spec §3.3 */) {
402        use crate::version_match::MatchClass;
403        let p = policy();
404        assert!((p.version_multiplier(&MatchClass::Satisfies) - 1.15).abs() < 1e-12);
405        assert!((p.version_multiplier(&MatchClass::Unknown) - 1.00).abs() < 1e-12);
406        assert!((p.version_multiplier(&MatchClass::NearMissPatch(2)) - 0.90).abs() < 1e-12);
407        assert!((p.version_multiplier(&MatchClass::NearMissMinor(3)) - 0.55).abs() < 1e-12);
408        // floor clamps
409        assert!((p.version_multiplier(&MatchClass::NearMissMinor(20)) - 0.30).abs() < 1e-12);
410    }
411
412    #[test]
413    fn trust_clamps_when_boost_exceeds_one(/* #13 */) {
414        let p = policy();
415        // Foundation (1.0) * verified_by_foundation (1.0) * fresh (~1.0) *
416        // not-deprecated (1.0) * satisfies (1.15) would exceed 1.0 → clamp.
417        let mut prov = prov_with(Attribution::Foundation);
418        prov.verified = true;
419        prov.verified_by = Some("midnight-foundation".into());
420        prov.language_targets = vec![LanguageTarget {
421            name: "compact".into(),
422            version_constraint: Some(">=0.23".into()),
423        }];
424        let vin = VersionScoreInput {
425            multiplier: 1.15,
426            class: "satisfies",
427            distance: None,
428            query: Some(LanguageTargetQueryFactor {
429                name: "compact".into(),
430                version_constraint_satisfies: Some("0.31".into()),
431            }),
432        };
433        let r = p.score(&prov, Some(&vin), 0, 1.0, RelevanceSource::Rrf);
434        assert!((r.trust_score - 1.0).abs() < 1e-12, "trust should clamp to 1.0");
435        assert!((0.0..=1.0).contains(&r.confidence));
436    }
437
438    #[test]
439    fn confidence_is_monotonic_in_relevance() {
440        let p = policy();
441        let prov = prov_with(Attribution::Partner);
442        let lo = p.score(&prov, None, 30, 0.2, RelevanceSource::Rrf);
443        let hi = p.score(&prov, None, 30, 0.8, RelevanceSource::Rrf);
444        assert!(hi.confidence > lo.confidence);
445        assert_eq!(hi.factors.relevance_source, RelevanceSource::Rrf);
446        assert!((hi.factors.relevance_multiplier - 0.8).abs() < 1e-12);
447    }
448
449    #[test]
450    fn normalize_rrf_is_bounded_and_monotonic() {
451        assert!((normalize_rrf(0.0) - 0.0).abs() < 1e-12);
452        assert!(normalize_rrf(1.0) > normalize_rrf(0.5));
453        assert!(normalize_rrf(1e9) < 1.0);
454    }
455
456    #[test]
457    fn parse_version_pads_partials() {
458        assert_eq!(parse_version("0.31"), Some(semver::Version::new(0, 31, 0)));
459        assert_eq!(parse_version("v1.4.2"), Some(semver::Version::new(1, 4, 2)));
460        assert_eq!(parse_version("2"), Some(semver::Version::new(2, 0, 0)));
461        assert_eq!(parse_version("not-a-version"), None);
462    }
463
464    #[test]
465    fn factors_serialize_with_spec_keys() {
466        let p = policy();
467        let mut prov = prov_with(Attribution::Foundation);
468        prov.verified = true;
469        prov.verified_by = Some("midnight-foundation".into());
470        prov.language_targets = vec![LanguageTarget {
471            name: "compact".into(),
472            version_constraint: Some(">=0.23".into()),
473        }];
474        let vin = VersionScoreInput {
475            multiplier: 1.15,
476            class: "satisfies",
477            distance: None,
478            query: Some(LanguageTargetQueryFactor {
479                name: "compact".into(),
480                version_constraint_satisfies: Some("0.31".into()),
481            }),
482        };
483        let r = p.score(&prov, Some(&vin), 14, 0.873, RelevanceSource::Rerank);
484        let v = serde_json::to_value(&r.factors).unwrap();
485        assert_eq!(v["attribution"], "foundation");
486        assert_eq!(v["verified"], true);
487        assert_eq!(v["age_days"], 14);
488        assert_eq!(v["relevance_source"], "rerank");
489        assert_eq!(v["language_target_query"]["version_constraint_satisfies"], "0.31");
490        assert_eq!(v["language_targets_chunk"][0]["name"], "compact");
491    }
492}