anno/types/confidence.rs
1//! Witness type for confidence values bounded to [0.0, 1.0].
2//!
3//! # What Confidence Actually Means
4//!
5//! Different NER backends compute confidence in fundamentally different ways.
6//! These numbers are NOT directly comparable!
7//!
8//! ```text
9//! ┌──────────────────────────────────────────────────────────────────────────┐
10//! │ CONFIDENCE ACROSS BACKENDS │
11//! ├──────────────────────────────────────────────────────────────────────────┤
12//! │ │
13//! │ PATTERN NER: "Did the regex match?" │
14//! │ ──────────────────────────────────── │
15//! │ │
16//! │ • 0.95-0.99 = Regex matched (deterministic) │
17//! │ • Confidence reflects pattern complexity, NOT uncertainty │
18//! │ │
19//! │ Email pattern matched? → 0.98 │
20//! │ Date pattern matched? → 0.95 │
21//! │ │
22//! │ This is CERTAINTY, not probability. │
23//! │ If the pattern fires, it's almost always correct. │
24//! │ │
25//! │ ────────────────────────────────────────────────────────────────────── │
26//! │ │
27//! │ STATISTICAL NER: "How many heuristics agreed?" │
28//! │ ─────────────────────────────────────────────── │
29//! │ │
30//! │ • Score = (capitalization + context + gazetteer) / weights │
31//! │ • Range: typically 0.4 - 0.8 │
32//! │ │
33//! │ "Dr. Smith" → 0.72 (title + capitalization) │
34//! │ "Apple" → 0.55 (capitalization only, ambiguous) │
35//! │ │
36//! │ This is a HEURISTIC BLEND. │
37//! │ Higher = more features matched, but not a probability. │
38//! │ │
39//! │ ────────────────────────────────────────────────────────────────────── │
40//! │ │
41//! │ NEURAL NER (BERT/GLiNER): "Softmax probability" │
42//! │ ──────────────────────────────────────────────── │
43//! │ │
44//! │ • softmax([logit_PER, logit_ORG, logit_LOC, ...]) │
45//! │ • Range: 0.0 - 1.0, calibrated to approximate probability │
46//! │ │
47//! │ "John" → PER: 0.94, ORG: 0.03, LOC: 0.03 │
48//! │ "Apple" → ORG: 0.52, PER: 0.01, LOC: 0.47 (ambiguous!) │
49//! │ │
50//! │ This is a CALIBRATED probability (ideally). │
51//! │ Models with temperature scaling are better calibrated. │
52//! │ │
53//! └──────────────────────────────────────────────────────────────────────────┘
54//! ```
55//!
56//! # The Comparison Problem
57//!
58//! ```text
59//! NEVER DO THIS:
60//!
61//! RegexNER says EMAIL with 0.98 confidence
62//! HeuristicNER says ORG with 0.55 confidence
63//!
64//! "0.98 > 0.55, so EMAIL is more likely!" ← WRONG!
65//!
66//! These scales are incompatible:
67//!
68//! • RegexNER's 0.98 means "regex matched, nearly certain"
69//! • HeuristicNER's 0.55 means "some features matched, unsure"
70//!
71//! Comparing them is like comparing °C to °F to Kelvin.
72//! Same name (confidence), different scales.
73//!
74//! ────────────────────────────────────────────────────────────────────────────
75//!
76//! WHAT TO DO INSTEAD:
77//!
78//! 1. Use conflict resolution strategies (Priority, LongestSpan)
79//! 2. Calibrate scores if mixing backends
80//! 3. Threshold per-backend: Pattern > 0.9, Neural > 0.5
81//! ```
82//!
83//! # When to Trust Confidence
84//!
85//! ```text
86//! ┌───────────────┬─────────────────────────────────────────────────────────┐
87//! │ Backend │ When confidence is reliable │
88//! ├───────────────┼─────────────────────────────────────────────────────────┤
89//! │ RegexNER │ Always (deterministic). 0.95+ means pattern matched. │
90//! │ HeuristicNER│ Use as ranking within backend, not absolute truth. │
91//! │ BERT-NER │ Reasonably calibrated for in-domain data. │
92//! │ GLiNER │ Good for ranking, less calibrated for absolute probs. │
93//! └───────────────┴─────────────────────────────────────────────────────────┘
94//! ```
95
96use serde::{Deserialize, Serialize};
97use std::fmt;
98
99/// A confidence score guaranteed to be in the range [0.0, 1.0].
100///
101/// This is a "witness type" - its existence proves the value is valid.
102/// Once you have a `Confidence`, you never need to check bounds again.
103///
104/// # Construction
105///
106/// - [`Confidence::new`]: Returns `None` if out of range (strict parsing)
107/// - [`Confidence::saturating`]: Clamps to [0, 1] (lenient, never fails)
108/// - [`Confidence::try_from`]: Returns `Err` if out of range
109///
110/// # Zero-Cost Abstraction
111///
112/// `Confidence` is `#[repr(transparent)]`, meaning it has the exact same
113/// memory layout as `f64`. There is no runtime overhead.
114///
115/// # Example
116///
117/// ```rust
118/// use anno::types::Confidence;
119///
120/// // Strict: fail on invalid input
121/// assert!(Confidence::new(0.5).is_some());
122/// assert!(Confidence::new(1.5).is_none());
123///
124/// // Lenient: clamp to valid range
125/// let conf = Confidence::saturating(1.5);
126/// assert_eq!(conf.get(), 1.0);
127///
128/// // Use with Entity - convert to f64 with .get()
129/// use anno::{Entity, EntityType};
130/// let entity = Entity::new("test", EntityType::Person, 0, 4, conf.get());
131/// ```
132#[derive(Clone, Copy, PartialEq, PartialOrd, Serialize, Deserialize)]
133#[repr(transparent)]
134#[serde(transparent)]
135pub struct Confidence(f64);
136
137impl Confidence {
138 /// The minimum valid confidence value.
139 pub const MIN: Self = Self(0.0);
140
141 /// The maximum valid confidence value.
142 pub const MAX: Self = Self(1.0);
143
144 /// A "perfect" confidence of 1.0 (deterministic/regex-based extraction).
145 pub const CERTAIN: Self = Self(1.0);
146
147 /// A "no information" confidence of 0.5 (maximum entropy).
148 pub const UNCERTAIN: Self = Self(0.5);
149
150 /// Create a confidence score, returning `None` if out of range.
151 ///
152 /// Use this when invalid values should be handled explicitly.
153 #[must_use]
154 #[inline]
155 pub fn new(value: f64) -> Option<Self> {
156 if (0.0..=1.0).contains(&value) && !value.is_nan() {
157 Some(Self(value))
158 } else {
159 None
160 }
161 }
162
163 /// Create a confidence score, clamping to [0.0, 1.0].
164 ///
165 /// Use this when you want lenient handling of out-of-range values.
166 /// NaN is treated as 0.0.
167 #[must_use]
168 #[inline]
169 pub fn saturating(value: f64) -> Self {
170 if value.is_nan() {
171 Self(0.0)
172 } else {
173 Self(value.clamp(0.0, 1.0))
174 }
175 }
176
177 /// Create a confidence score from a percentage (0-100).
178 #[must_use]
179 #[inline]
180 pub fn from_percent(percent: f64) -> Option<Self> {
181 Self::new(percent / 100.0)
182 }
183
184 /// Get the inner value (guaranteed to be in [0.0, 1.0]).
185 #[must_use]
186 #[inline]
187 pub const fn get(self) -> f64 {
188 self.0
189 }
190
191 /// Convert to percentage (0-100).
192 #[must_use]
193 #[inline]
194 pub fn as_percent(self) -> f64 {
195 self.0 * 100.0
196 }
197
198 /// Check if this is "high confidence" (>= 0.9).
199 #[must_use]
200 #[inline]
201 pub fn is_high(self) -> bool {
202 self.0 >= 0.9
203 }
204
205 /// Check if this is "low confidence" (< 0.5).
206 #[must_use]
207 #[inline]
208 pub fn is_low(self) -> bool {
209 self.0 < 0.5
210 }
211
212 /// Linear interpolation between two confidence values.
213 ///
214 /// `t = 0.0` returns `self`, `t = 1.0` returns `other`.
215 #[must_use]
216 #[inline]
217 pub fn lerp(self, other: Self, t: f64) -> Self {
218 let t = t.clamp(0.0, 1.0);
219 Self::saturating(self.0 * (1.0 - t) + other.0 * t)
220 }
221
222 /// Combine two confidence scores (geometric mean).
223 ///
224 /// Geometric mean penalizes low scores more than arithmetic mean,
225 /// which is appropriate for independent confidence estimates.
226 #[must_use]
227 #[inline]
228 pub fn combine(self, other: Self) -> Self {
229 Self((self.0 * other.0).sqrt())
230 }
231}
232
233impl Default for Confidence {
234 fn default() -> Self {
235 Self::CERTAIN
236 }
237}
238
239impl fmt::Debug for Confidence {
240 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241 write!(f, "Confidence({:.4})", self.0)
242 }
243}
244
245impl fmt::Display for Confidence {
246 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247 write!(f, "{:.1}%", self.0 * 100.0)
248 }
249}
250
251/// Error when trying to create a Confidence from an invalid value.
252#[derive(Debug, Clone, Copy, PartialEq)]
253pub struct ConfidenceError {
254 /// The invalid value that was provided.
255 pub value: f64,
256}
257
258impl fmt::Display for ConfidenceError {
259 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260 write!(
261 f,
262 "confidence value {} is outside valid range [0.0, 1.0]",
263 self.value
264 )
265 }
266}
267
268impl std::error::Error for ConfidenceError {}
269
270impl TryFrom<f64> for Confidence {
271 type Error = ConfidenceError;
272
273 fn try_from(value: f64) -> Result<Self, Self::Error> {
274 Self::new(value).ok_or(ConfidenceError { value })
275 }
276}
277
278impl From<Confidence> for f64 {
279 #[inline]
280 fn from(conf: Confidence) -> Self {
281 conf.0
282 }
283}
284
285impl PartialEq<f64> for Confidence {
286 fn eq(&self, other: &f64) -> bool {
287 (self.0 - other).abs() < f64::EPSILON
288 }
289}
290
291impl PartialOrd<f64> for Confidence {
292 fn partial_cmp(&self, other: &f64) -> Option<std::cmp::Ordering> {
293 self.0.partial_cmp(other)
294 }
295}
296
297/// Type alias for `Confidence` when used in probabilistic contexts.
298pub type Probability = Confidence;
299
300/// Type alias for generic unit interval values.
301pub type UnitInterval = Confidence;
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 #[test]
308 fn new_valid() {
309 assert!(Confidence::new(0.0).is_some());
310 assert!(Confidence::new(0.5).is_some());
311 assert!(Confidence::new(1.0).is_some());
312 }
313
314 #[test]
315 fn new_invalid() {
316 assert!(Confidence::new(-0.1).is_none());
317 assert!(Confidence::new(1.1).is_none());
318 assert!(Confidence::new(f64::NAN).is_none());
319 assert!(Confidence::new(f64::INFINITY).is_none());
320 }
321
322 #[test]
323 fn saturating_clamps() {
324 assert_eq!(Confidence::saturating(0.5).get(), 0.5);
325 assert_eq!(Confidence::saturating(-1.0).get(), 0.0);
326 assert_eq!(Confidence::saturating(2.0).get(), 1.0);
327 assert_eq!(Confidence::saturating(f64::NAN).get(), 0.0);
328 }
329
330 #[test]
331 fn from_percent_works() {
332 let conf = Confidence::from_percent(85.0).expect("85.0% is a valid confidence value");
333 assert!((conf.get() - 0.85).abs() < 1e-10);
334 assert!(Confidence::from_percent(150.0).is_none());
335 }
336
337 #[test]
338 fn predicates() {
339 assert!(Confidence::new(0.95).expect("0.95 is valid").is_high());
340 assert!(!Confidence::new(0.85).expect("0.85 is valid").is_high());
341 assert!(Confidence::new(0.3).expect("0.3 is valid").is_low());
342 assert!(!Confidence::new(0.6).expect("0.6 is valid").is_low());
343 }
344
345 #[test]
346 fn lerp_bounded() {
347 let a = Confidence::new(0.0).unwrap();
348 let b = Confidence::new(1.0).unwrap();
349 assert!((a.lerp(b, 0.0).get() - 0.0).abs() < 1e-10);
350 assert!((a.lerp(b, 0.5).get() - 0.5).abs() < 1e-10);
351 assert!((a.lerp(b, 1.0).get() - 1.0).abs() < 1e-10);
352 }
353
354 #[test]
355 fn combine_geometric_mean() {
356 let a = Confidence::new(0.8).expect("0.8 is valid");
357 let b = Confidence::new(0.8).expect("0.8 is valid");
358 assert!((a.combine(b).get() - 0.8).abs() < 1e-10);
359
360 let c = Confidence::new(1.0).unwrap();
361 let d = Confidence::new(0.0).unwrap();
362 assert!((c.combine(d).get() - 0.0).abs() < 1e-10);
363 }
364
365 #[test]
366 fn try_from_f64() {
367 let ok: Result<Confidence, _> = 0.5_f64.try_into();
368 assert!(ok.is_ok());
369
370 let err: Result<Confidence, _> = 1.5_f64.try_into();
371 assert!(err.is_err());
372 }
373
374 #[test]
375 fn display_format() {
376 let conf = Confidence::new(0.856).expect("0.856 is valid");
377 assert_eq!(format!("{}", conf), "85.6%");
378 }
379
380 #[test]
381 fn serde_roundtrip() {
382 let conf = Confidence::new(0.85).expect("0.85 is valid");
383 let json = serde_json::to_string(&conf).expect("serialization should succeed");
384 assert_eq!(json, "0.85");
385 let restored: Confidence =
386 serde_json::from_str(&json).expect("deserialization should succeed");
387 assert!((restored.get() - 0.85).abs() < 1e-10);
388 }
389
390 #[test]
391 fn constants() {
392 assert_eq!(Confidence::MIN.get(), 0.0);
393 assert_eq!(Confidence::MAX.get(), 1.0);
394 assert_eq!(Confidence::CERTAIN.get(), 1.0);
395 assert_eq!(Confidence::UNCERTAIN.get(), 0.5);
396 }
397}