Skip to main content

anno/types/
confidence.rs

1//! Witness type for confidence values bounded to [0.0, 1.0].
2//!
3//! # What Confidence Actually Means
4//!
5//! Different NER backends compute confidence in fundamentally different ways.
6//! These numbers are NOT directly comparable!
7//!
8//! ```text
9//! ┌──────────────────────────────────────────────────────────────────────────┐
10//! │                    CONFIDENCE ACROSS BACKENDS                            │
11//! ├──────────────────────────────────────────────────────────────────────────┤
12//! │                                                                          │
13//! │  PATTERN NER: "Did the regex match?"                                     │
14//! │  ────────────────────────────────────                                    │
15//! │                                                                          │
16//! │  • 0.95-0.99 = Regex matched (deterministic)                             │
17//! │  • Confidence reflects pattern complexity, NOT uncertainty               │
18//! │                                                                          │
19//! │    Email pattern matched? → 0.98                                         │
20//! │    Date pattern matched?  → 0.95                                         │
21//! │                                                                          │
22//! │  This is CERTAINTY, not probability.                                     │
23//! │  If the pattern fires, it's almost always correct.                       │
24//! │                                                                          │
25//! │  ──────────────────────────────────────────────────────────────────────  │
26//! │                                                                          │
27//! │  STATISTICAL NER: "How many heuristics agreed?"                          │
28//! │  ───────────────────────────────────────────────                         │
29//! │                                                                          │
30//! │  • Score = (capitalization + context + gazetteer) / weights              │
31//! │  • Range: typically 0.4 - 0.8                                            │
32//! │                                                                          │
33//! │    "Dr. Smith" → 0.72 (title + capitalization)                           │
34//! │    "Apple"     → 0.55 (capitalization only, ambiguous)                   │
35//! │                                                                          │
36//! │  This is a HEURISTIC BLEND.                                              │
37//! │  Higher = more features matched, but not a probability.                  │
38//! │                                                                          │
39//! │  ──────────────────────────────────────────────────────────────────────  │
40//! │                                                                          │
41//! │  NEURAL NER (BERT/GLiNER): "Softmax probability"                         │
42//! │  ────────────────────────────────────────────────                        │
43//! │                                                                          │
44//! │  • softmax([logit_PER, logit_ORG, logit_LOC, ...])                       │
45//! │  • Range: 0.0 - 1.0, calibrated to approximate probability               │
46//! │                                                                          │
47//! │    "John"  → PER: 0.94, ORG: 0.03, LOC: 0.03                             │
48//! │    "Apple" → ORG: 0.52, PER: 0.01, LOC: 0.47  (ambiguous!)               │
49//! │                                                                          │
50//! │  This is a CALIBRATED probability (ideally).                             │
51//! │  Models with temperature scaling are better calibrated.                  │
52//! │                                                                          │
53//! └──────────────────────────────────────────────────────────────────────────┘
54//! ```
55//!
56//! # The Comparison Problem
57//!
58//! ```text
59//! NEVER DO THIS:
60//!
61//!   RegexNER says EMAIL with 0.98 confidence
62//!   HeuristicNER says ORG with 0.55 confidence
63//!
64//!   "0.98 > 0.55, so EMAIL is more likely!"  ← WRONG!
65//!
66//! These scales are incompatible:
67//!
68//!   • RegexNER's 0.98 means "regex matched, nearly certain"
69//!   • HeuristicNER's 0.55 means "some features matched, unsure"
70//!
71//! Comparing them is like comparing °C to °F to Kelvin.
72//! Same name (confidence), different scales.
73//!
74//! ────────────────────────────────────────────────────────────────────────────
75//!
76//! WHAT TO DO INSTEAD:
77//!
78//! 1. Use conflict resolution strategies (Priority, LongestSpan)
79//! 2. Calibrate scores if mixing backends
80//! 3. Threshold per-backend: Pattern > 0.9, Neural > 0.5
81//! ```
82//!
83//! # When to Trust Confidence
84//!
85//! ```text
86//! ┌───────────────┬─────────────────────────────────────────────────────────┐
87//! │ Backend       │ When confidence is reliable                            │
88//! ├───────────────┼─────────────────────────────────────────────────────────┤
89//! │ RegexNER    │ Always (deterministic). 0.95+ means pattern matched.   │
90//! │ HeuristicNER│ Use as ranking within backend, not absolute truth.     │
91//! │ BERT-NER      │ Reasonably calibrated for in-domain data.              │
92//! │ GLiNER        │ Good for ranking, less calibrated for absolute probs.  │
93//! └───────────────┴─────────────────────────────────────────────────────────┘
94//! ```
95
96use serde::{Deserialize, Serialize};
97use std::fmt;
98
99/// A confidence score guaranteed to be in the range [0.0, 1.0].
100///
101/// This is a "witness type" - its existence proves the value is valid.
102/// Once you have a `Confidence`, you never need to check bounds again.
103///
104/// # Construction
105///
106/// - [`Confidence::new`]: Returns `None` if out of range (strict parsing)
107/// - [`Confidence::saturating`]: Clamps to [0, 1] (lenient, never fails)
108/// - [`Confidence::try_from`]: Returns `Err` if out of range
109///
110/// # Zero-Cost Abstraction
111///
112/// `Confidence` is `#[repr(transparent)]`, meaning it has the exact same
113/// memory layout as `f64`. There is no runtime overhead.
114///
115/// # Example
116///
117/// ```rust
118/// use anno::types::Confidence;
119///
120/// // Strict: fail on invalid input
121/// assert!(Confidence::new(0.5).is_some());
122/// assert!(Confidence::new(1.5).is_none());
123///
124/// // Lenient: clamp to valid range
125/// let conf = Confidence::saturating(1.5);
126/// assert_eq!(conf.get(), 1.0);
127///
128/// // Use with Entity - convert to f64 with .get()
129/// use anno::{Entity, EntityType};
130/// let entity = Entity::new("test", EntityType::Person, 0, 4, conf.get());
131/// ```
132#[derive(Clone, Copy, PartialEq, PartialOrd, Serialize, Deserialize)]
133#[repr(transparent)]
134#[serde(transparent)]
135pub struct Confidence(f64);
136
137impl Confidence {
138    /// The minimum valid confidence value.
139    pub const MIN: Self = Self(0.0);
140
141    /// The maximum valid confidence value.
142    pub const MAX: Self = Self(1.0);
143
144    /// A "perfect" confidence of 1.0 (deterministic/regex-based extraction).
145    pub const CERTAIN: Self = Self(1.0);
146
147    /// A "no information" confidence of 0.5 (maximum entropy).
148    pub const UNCERTAIN: Self = Self(0.5);
149
150    /// Create a confidence score, returning `None` if out of range.
151    ///
152    /// Use this when invalid values should be handled explicitly.
153    #[must_use]
154    #[inline]
155    pub fn new(value: f64) -> Option<Self> {
156        if (0.0..=1.0).contains(&value) && !value.is_nan() {
157            Some(Self(value))
158        } else {
159            None
160        }
161    }
162
163    /// Create a confidence score, clamping to [0.0, 1.0].
164    ///
165    /// Use this when you want lenient handling of out-of-range values.
166    /// NaN is treated as 0.0.
167    #[must_use]
168    #[inline]
169    pub fn saturating(value: f64) -> Self {
170        if value.is_nan() {
171            Self(0.0)
172        } else {
173            Self(value.clamp(0.0, 1.0))
174        }
175    }
176
177    /// Create a confidence score from a percentage (0-100).
178    #[must_use]
179    #[inline]
180    pub fn from_percent(percent: f64) -> Option<Self> {
181        Self::new(percent / 100.0)
182    }
183
184    /// Get the inner value (guaranteed to be in [0.0, 1.0]).
185    #[must_use]
186    #[inline]
187    pub const fn get(self) -> f64 {
188        self.0
189    }
190
191    /// Convert to percentage (0-100).
192    #[must_use]
193    #[inline]
194    pub fn as_percent(self) -> f64 {
195        self.0 * 100.0
196    }
197
198    /// Check if this is "high confidence" (>= 0.9).
199    #[must_use]
200    #[inline]
201    pub fn is_high(self) -> bool {
202        self.0 >= 0.9
203    }
204
205    /// Check if this is "low confidence" (< 0.5).
206    #[must_use]
207    #[inline]
208    pub fn is_low(self) -> bool {
209        self.0 < 0.5
210    }
211
212    /// Linear interpolation between two confidence values.
213    ///
214    /// `t = 0.0` returns `self`, `t = 1.0` returns `other`.
215    #[must_use]
216    #[inline]
217    pub fn lerp(self, other: Self, t: f64) -> Self {
218        let t = t.clamp(0.0, 1.0);
219        Self::saturating(self.0 * (1.0 - t) + other.0 * t)
220    }
221
222    /// Combine two confidence scores (geometric mean).
223    ///
224    /// Geometric mean penalizes low scores more than arithmetic mean,
225    /// which is appropriate for independent confidence estimates.
226    #[must_use]
227    #[inline]
228    pub fn combine(self, other: Self) -> Self {
229        Self((self.0 * other.0).sqrt())
230    }
231}
232
233impl Default for Confidence {
234    fn default() -> Self {
235        Self::CERTAIN
236    }
237}
238
239impl fmt::Debug for Confidence {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        write!(f, "Confidence({:.4})", self.0)
242    }
243}
244
245impl fmt::Display for Confidence {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        write!(f, "{:.1}%", self.0 * 100.0)
248    }
249}
250
251/// Error when trying to create a Confidence from an invalid value.
252#[derive(Debug, Clone, Copy, PartialEq)]
253pub struct ConfidenceError {
254    /// The invalid value that was provided.
255    pub value: f64,
256}
257
258impl fmt::Display for ConfidenceError {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        write!(
261            f,
262            "confidence value {} is outside valid range [0.0, 1.0]",
263            self.value
264        )
265    }
266}
267
268impl std::error::Error for ConfidenceError {}
269
270impl TryFrom<f64> for Confidence {
271    type Error = ConfidenceError;
272
273    fn try_from(value: f64) -> Result<Self, Self::Error> {
274        Self::new(value).ok_or(ConfidenceError { value })
275    }
276}
277
278impl From<Confidence> for f64 {
279    #[inline]
280    fn from(conf: Confidence) -> Self {
281        conf.0
282    }
283}
284
285impl PartialEq<f64> for Confidence {
286    fn eq(&self, other: &f64) -> bool {
287        (self.0 - other).abs() < f64::EPSILON
288    }
289}
290
291impl PartialOrd<f64> for Confidence {
292    fn partial_cmp(&self, other: &f64) -> Option<std::cmp::Ordering> {
293        self.0.partial_cmp(other)
294    }
295}
296
297/// Type alias for `Confidence` when used in probabilistic contexts.
298pub type Probability = Confidence;
299
300/// Type alias for generic unit interval values.
301pub type UnitInterval = Confidence;
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn new_valid() {
309        assert!(Confidence::new(0.0).is_some());
310        assert!(Confidence::new(0.5).is_some());
311        assert!(Confidence::new(1.0).is_some());
312    }
313
314    #[test]
315    fn new_invalid() {
316        assert!(Confidence::new(-0.1).is_none());
317        assert!(Confidence::new(1.1).is_none());
318        assert!(Confidence::new(f64::NAN).is_none());
319        assert!(Confidence::new(f64::INFINITY).is_none());
320    }
321
322    #[test]
323    fn saturating_clamps() {
324        assert_eq!(Confidence::saturating(0.5).get(), 0.5);
325        assert_eq!(Confidence::saturating(-1.0).get(), 0.0);
326        assert_eq!(Confidence::saturating(2.0).get(), 1.0);
327        assert_eq!(Confidence::saturating(f64::NAN).get(), 0.0);
328    }
329
330    #[test]
331    fn from_percent_works() {
332        let conf = Confidence::from_percent(85.0).expect("85.0% is a valid confidence value");
333        assert!((conf.get() - 0.85).abs() < 1e-10);
334        assert!(Confidence::from_percent(150.0).is_none());
335    }
336
337    #[test]
338    fn predicates() {
339        assert!(Confidence::new(0.95).expect("0.95 is valid").is_high());
340        assert!(!Confidence::new(0.85).expect("0.85 is valid").is_high());
341        assert!(Confidence::new(0.3).expect("0.3 is valid").is_low());
342        assert!(!Confidence::new(0.6).expect("0.6 is valid").is_low());
343    }
344
345    #[test]
346    fn lerp_bounded() {
347        let a = Confidence::new(0.0).unwrap();
348        let b = Confidence::new(1.0).unwrap();
349        assert!((a.lerp(b, 0.0).get() - 0.0).abs() < 1e-10);
350        assert!((a.lerp(b, 0.5).get() - 0.5).abs() < 1e-10);
351        assert!((a.lerp(b, 1.0).get() - 1.0).abs() < 1e-10);
352    }
353
354    #[test]
355    fn combine_geometric_mean() {
356        let a = Confidence::new(0.8).expect("0.8 is valid");
357        let b = Confidence::new(0.8).expect("0.8 is valid");
358        assert!((a.combine(b).get() - 0.8).abs() < 1e-10);
359
360        let c = Confidence::new(1.0).unwrap();
361        let d = Confidence::new(0.0).unwrap();
362        assert!((c.combine(d).get() - 0.0).abs() < 1e-10);
363    }
364
365    #[test]
366    fn try_from_f64() {
367        let ok: Result<Confidence, _> = 0.5_f64.try_into();
368        assert!(ok.is_ok());
369
370        let err: Result<Confidence, _> = 1.5_f64.try_into();
371        assert!(err.is_err());
372    }
373
374    #[test]
375    fn display_format() {
376        let conf = Confidence::new(0.856).expect("0.856 is valid");
377        assert_eq!(format!("{}", conf), "85.6%");
378    }
379
380    #[test]
381    fn serde_roundtrip() {
382        let conf = Confidence::new(0.85).expect("0.85 is valid");
383        let json = serde_json::to_string(&conf).expect("serialization should succeed");
384        assert_eq!(json, "0.85");
385        let restored: Confidence =
386            serde_json::from_str(&json).expect("deserialization should succeed");
387        assert!((restored.get() - 0.85).abs() < 1e-10);
388    }
389
390    #[test]
391    fn constants() {
392        assert_eq!(Confidence::MIN.get(), 0.0);
393        assert_eq!(Confidence::MAX.get(), 1.0);
394        assert_eq!(Confidence::CERTAIN.get(), 1.0);
395        assert_eq!(Confidence::UNCERTAIN.get(), 0.5);
396    }
397}