Skip to main content

ski/
confidence.rs

1//! Map a stage score onto one `[0,1]` confidence axis plus a coarse band, shared
2//! by the injection phrasing (how forcefully to recommend) and session dedup
3//! (whether a *re*-recommendation clears the HIGH bar).
4//!
5//! Two scales reach us: stage-1 cosine (`~0.3–0.9`, anisotropic — unrelated
6//! prompts still sit ~0.5) and stage-2 reranker logits (`~-10..+10`). They are
7//! not comparable, so each gets its own mapping. The reranker mapping is
8//! principled (a sigmoid, matching the cross-encoder's training objective); the
9//! cosine mapping is an explicit heuristic — it exists so phrasing/dedup have a
10//! single dial, not to claim cosine is a probability.
11
12use crate::config::Config;
13
14/// Which ranking stage produced a score, selecting its confidence mapping.
15#[derive(Clone, Copy, Debug, PartialEq, Eq)]
16pub enum Stage {
17    /// Stage-1 bi-encoder cosine (+ keyword boost).
18    Cosine,
19    /// Stage-2 cross-encoder reranker logit.
20    Rerank,
21    /// Stage-1.5 lexical (BM25-over-description) dominant winner.
22    Lexical,
23}
24
25/// Confidence assigned to a dominant lexical winner. BM25 scores are not
26/// comparable across prompts (they scale with query length and term rarity), so a
27/// scalar map would be dishonest; the dominance gate ([`crate::lexical::dominant`])
28/// has already established this is high-precision, so it reports a fixed High-band
29/// confidence — strong enough to drive an assertive directive, deliberately below
30/// [`crate::config::Config::body_inject_min`] so it stays a directive pointer
31/// rather than inlining a full SKILL.md off a lexical signal.
32pub const LEXICAL_CONF: f32 = 0.90;
33
34/// Coarse confidence band, driving phrasing forcefulness.
35#[derive(Clone, Copy, Debug, PartialEq, Eq)]
36pub enum Band {
37    High,
38    Medium,
39    Low,
40}
41
42/// `>=` this is the High band, and the bar a *repeat* recommendation must clear
43/// (see [`crate::session::Session::should_recommend`]).
44pub const HIGH: f32 = 0.80;
45/// `<` this is the Low (tentative) band.
46pub const LOW: f32 = 0.55;
47/// Cosine span above `min_similarity` over which confidence climbs floor->ceiling.
48/// Heuristic: bge's genuinely-strong matches sit roughly this far above the
49/// eligibility floor.
50const COSINE_SPAN: f32 = 0.45;
51
52/// Confidence in `[0,1]` for a hit's `score`, given the stage that produced it.
53pub fn of(score: f32, stage: Stage, cfg: &Config) -> f32 {
54    match stage {
55        // JINA-turbo logits are ~calibrated; sigmoid -> probability.
56        Stage::Rerank => sigmoid(score),
57        // Cosine has no probabilistic meaning; map [floor, floor+span] -> [.5,.97].
58        Stage::Cosine => {
59            let t = ((score - cfg.min_similarity) / COSINE_SPAN).clamp(0.0, 1.0);
60            (0.5 + 0.47 * t).clamp(0.0, 0.99)
61        }
62        // BM25 has no probabilistic meaning and no cross-prompt scale; the dominance
63        // gate already vouched for precision, so a dominant winner reports a fixed
64        // High-band confidence (see [`LEXICAL_CONF`]). `score` is ignored.
65        Stage::Lexical => LEXICAL_CONF,
66    }
67}
68
69/// Band for a confidence value.
70pub fn band(conf: f32) -> Band {
71    if conf >= HIGH {
72        Band::High
73    } else if conf >= LOW {
74        Band::Medium
75    } else {
76        Band::Low
77    }
78}
79
80fn sigmoid(x: f32) -> f32 {
81    1.0 / (1.0 + (-x).exp())
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87    use crate::config::Config;
88
89    #[test]
90    fn rerank_sigmoid_anchors() {
91        let cfg = Config::default();
92        assert!((of(0.0, Stage::Rerank, &cfg) - 0.5).abs() < 1e-3);
93        assert!(of(3.0, Stage::Rerank, &cfg) > 0.9); // strong match
94        assert!(of(-2.5, Stage::Rerank, &cfg) < 0.1); // at the rerank floor
95    }
96
97    #[test]
98    fn cosine_climbs_from_floor() {
99        let cfg = Config::default(); // min_similarity 0.30
100        let at_floor = of(0.30, Stage::Cosine, &cfg);
101        let strong = of(0.80, Stage::Cosine, &cfg);
102        assert!((at_floor - 0.5).abs() < 1e-3);
103        assert!(strong > HIGH);
104        assert!(strong <= 0.99);
105    }
106
107    #[test]
108    fn cosine_clamps_below_floor() {
109        let cfg = Config::default();
110        // A forced sub-floor keyword hit must not produce a negative confidence.
111        assert!(of(0.0, Stage::Cosine, &cfg) >= 0.0);
112    }
113
114    #[test]
115    fn bands_partition_the_axis() {
116        assert_eq!(band(0.95), Band::High);
117        assert_eq!(band(HIGH), Band::High);
118        assert_eq!(band(0.70), Band::Medium);
119        assert_eq!(band(LOW), Band::Medium);
120        assert_eq!(band(0.40), Band::Low);
121    }
122}