Skip to main content

cu_profiler_core/
confidence.rs

1//! Confidence scoring.
2//!
3//! Every measurement carries a [`Confidence`]. The tool never claims more
4//! certainty than the evidence supports, and it always explains *why* a score
5//! is not [`ConfidenceLevel::High`].
6
7use serde::{Deserialize, Serialize};
8
9/// Qualitative confidence in a measurement.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum ConfidenceLevel {
13    /// Reasons are present but do not undermine the result.
14    Unknown,
15    /// Multiple weak signals; treat the number as indicative only.
16    Low,
17    /// Minor caveats; the number is broadly trustworthy.
18    Medium,
19    /// No material caveats detected.
20    High,
21}
22
23impl ConfidenceLevel {
24    /// Lowercase, human-facing label (`"High"`, `"Medium"`, ...).
25    #[must_use]
26    pub fn label(self) -> &'static str {
27        match self {
28            Self::High => "High",
29            Self::Medium => "Medium",
30            Self::Low => "Low",
31            Self::Unknown => "Unknown",
32        }
33    }
34}
35
36/// A confidence score plus the reasons that shaped it.
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38pub struct Confidence {
39    /// The qualitative level.
40    pub level: ConfidenceLevel,
41    /// Why the level is what it is. Always populated when `level != High`.
42    pub reasons: Vec<String>,
43}
44
45impl Confidence {
46    /// A high-confidence score with no caveats.
47    #[must_use]
48    pub fn high() -> Self {
49        Self {
50            level: ConfidenceLevel::High,
51            reasons: Vec::new(),
52        }
53    }
54
55    /// An unknown score with a single explanatory reason.
56    #[must_use]
57    pub fn unknown(reason: impl Into<String>) -> Self {
58        Self {
59            level: ConfidenceLevel::Unknown,
60            reasons: vec![reason.into()],
61        }
62    }
63}
64
65/// Inputs to confidence scoring. Caller fills in what it knows; absent signals
66/// are conservative defaults.
67#[derive(Debug, Clone)]
68pub struct ConfidenceFactors {
69    /// Did the simulation succeed (or fail as expected)?
70    pub simulation_ok: bool,
71    /// Were the logs parsed without leftover unrecognised lines?
72    pub logs_complete: bool,
73    /// Number of parser warnings collected.
74    pub parser_warnings: usize,
75    /// Did the baseline fingerprint match (None when no baseline was compared)?
76    pub baseline_matched: Option<bool>,
77    /// Percentage of total CU that could not be attributed to a scope (0..=100).
78    pub unattributed_pct: f64,
79    /// Number of scope markers detected.
80    pub scope_markers: usize,
81    /// Whether runtime/version metadata was available.
82    pub metadata_available: bool,
83    /// Coefficient of variation of `total_cu` across samples, when multi-sampled.
84    /// `None` for a single sample / deterministic backend.
85    pub sample_cv: Option<f64>,
86}
87
88impl Default for ConfidenceFactors {
89    fn default() -> Self {
90        Self {
91            simulation_ok: true,
92            logs_complete: true,
93            parser_warnings: 0,
94            baseline_matched: None,
95            unattributed_pct: 0.0,
96            scope_markers: 0,
97            metadata_available: false,
98            sample_cv: None,
99        }
100    }
101}
102
103/// Score a measurement from its [`ConfidenceFactors`].
104///
105/// The model is deliberately simple and monotone: each adverse signal can only
106/// lower the level, never raise it, and each contributes a reason string.
107#[must_use]
108pub fn score(factors: &ConfidenceFactors) -> Confidence {
109    // `level` only ever moves downward. Because the enum is ordered
110    // `Unknown < Low < Medium < High`, the worse level is the smaller one, so
111    // `level.min(target)` demotes correctly.
112    let mut level = ConfidenceLevel::High;
113    let mut reasons = Vec::new();
114
115    if !factors.simulation_ok {
116        level = level.min(ConfidenceLevel::Low);
117        reasons.push("simulation did not complete as expected".to_string());
118    }
119    if !factors.logs_complete {
120        level = level.min(ConfidenceLevel::Low);
121        reasons.push("logs were incomplete or contained unrecognised lines".to_string());
122    }
123    if factors.parser_warnings > 0 {
124        level = level.min(ConfidenceLevel::Medium);
125        reasons.push(format!("{} parser warning(s)", factors.parser_warnings));
126    }
127    match factors.baseline_matched {
128        Some(true) => reasons.push("baseline matched".to_string()),
129        Some(false) => {
130            level = level.min(ConfidenceLevel::Low);
131            reasons.push("baseline fingerprint did not match".to_string());
132        }
133        None => {}
134    }
135    if factors.unattributed_pct >= 20.0 {
136        level = level.min(ConfidenceLevel::Medium);
137        reasons.push(format!("{:.0}% unattributed CU", factors.unattributed_pct));
138    }
139    if factors.scope_markers > 0 {
140        reasons.push(format!("{} scope markers detected", factors.scope_markers));
141    }
142    // Run-to-run variance across samples: a wide spread means the headline number
143    // is less trustworthy. Thresholds are on the coefficient of variation.
144    if let Some(cv) = factors.sample_cv {
145        if cv >= 0.10 {
146            level = level.min(ConfidenceLevel::Low);
147            reasons.push(format!("high run-to-run variance (CV {:.1}%)", cv * 100.0));
148        } else if cv >= 0.02 {
149            level = level.min(ConfidenceLevel::Medium);
150            reasons.push(format!("run-to-run variance (CV {:.1}%)", cv * 100.0));
151        }
152    }
153    if !factors.metadata_available {
154        level = level.min(ConfidenceLevel::Medium);
155        reasons.push("runtime/version metadata unavailable".to_string());
156    }
157
158    Confidence { level, reasons }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn clean_run_with_metadata_is_high() {
167        let f = ConfidenceFactors {
168            metadata_available: true,
169            ..Default::default()
170        };
171        assert_eq!(score(&f).level, ConfidenceLevel::High);
172    }
173
174    #[test]
175    fn failed_simulation_is_low() {
176        let f = ConfidenceFactors {
177            simulation_ok: false,
178            metadata_available: true,
179            ..Default::default()
180        };
181        assert_eq!(score(&f).level, ConfidenceLevel::Low);
182    }
183
184    #[test]
185    fn unattributed_cu_demotes_to_medium_with_reason() {
186        let f = ConfidenceFactors {
187            unattributed_pct: 22.0,
188            metadata_available: true,
189            ..Default::default()
190        };
191        let c = score(&f);
192        assert_eq!(c.level, ConfidenceLevel::Medium);
193        assert!(c.reasons.iter().any(|r| r.contains("22% unattributed")));
194    }
195
196    #[test]
197    fn sample_variance_demotes_confidence() {
198        // Low spread → Medium; high spread → Low.
199        let medium = ConfidenceFactors {
200            metadata_available: true,
201            sample_cv: Some(0.05),
202            ..Default::default()
203        };
204        let c = score(&medium);
205        assert_eq!(c.level, ConfidenceLevel::Medium);
206        assert!(c.reasons.iter().any(|r| r.contains("variance")));
207
208        let low = ConfidenceFactors {
209            metadata_available: true,
210            sample_cv: Some(0.25),
211            ..Default::default()
212        };
213        assert_eq!(score(&low).level, ConfidenceLevel::Low);
214
215        // A tiny spread is within tolerance and stays High.
216        let high = ConfidenceFactors {
217            metadata_available: true,
218            sample_cv: Some(0.005),
219            ..Default::default()
220        };
221        assert_eq!(score(&high).level, ConfidenceLevel::High);
222    }
223
224    #[test]
225    fn levels_order_high_above_low() {
226        assert!(ConfidenceLevel::High > ConfidenceLevel::Low);
227        assert!(ConfidenceLevel::Medium > ConfidenceLevel::Unknown);
228    }
229}