Skip to main content

libverify_core/controls/
behavioral_regression.rs

1use crate::control::{Control, ControlFinding, ControlId, builtin};
2use crate::evidence::{EvidenceBundle, EvidenceState, MetricObservation};
3
4/// Detects behavioral regressions in post-deploy metrics.
5pub struct BehavioralRegressionControl;
6
7/// Metric category for threshold determination.
8enum MetricCategory {
9    /// Latency/duration metrics: regression if current > baseline * 1.1
10    Latency,
11    /// Error/failure metrics: regression if current > baseline * 1.05
12    Error,
13    /// Throughput/success metrics: regression if current < baseline * 0.9
14    Throughput,
15    /// Unknown category: no opinion
16    Unknown,
17}
18
19fn classify_metric(name: &str) -> MetricCategory {
20    let lower = name.to_lowercase();
21    if lower.ends_with("_latency")
22        || lower.ends_with("_duration")
23        || lower.ends_with("_p50")
24        || lower.ends_with("_p90")
25        || lower.ends_with("_p95")
26        || lower.ends_with("_p99")
27    {
28        MetricCategory::Latency
29    } else if lower.ends_with("_error")
30        || lower.ends_with("_5xx")
31        || lower.ends_with("_4xx")
32        || lower.ends_with("_failure")
33    {
34        MetricCategory::Error
35    } else if lower.ends_with("_throughput")
36        || lower.ends_with("_qps")
37        || lower.ends_with("_rps")
38        || lower.ends_with("_success")
39    {
40        MetricCategory::Throughput
41    } else {
42        MetricCategory::Unknown
43    }
44}
45
46fn is_regressed(metric: &MetricObservation) -> Option<f64> {
47    // Avoid division by zero; if baseline is zero, we cannot compute relative change.
48    if metric.baseline == 0.0 {
49        return None;
50    }
51
52    match classify_metric(&metric.name) {
53        MetricCategory::Latency => {
54            // Regression if current > baseline * 1.1 (10% increase is bad)
55            if metric.current > metric.baseline * 1.1 {
56                Some((metric.current - metric.baseline) / metric.baseline * 100.0)
57            } else {
58                None
59            }
60        }
61        MetricCategory::Error => {
62            // Regression if current > baseline * 1.05 (5% increase is bad)
63            if metric.current > metric.baseline * 1.05 {
64                Some((metric.current - metric.baseline) / metric.baseline * 100.0)
65            } else {
66                None
67            }
68        }
69        MetricCategory::Throughput => {
70            // Regression if current < baseline * 0.9 (10% decrease is bad)
71            if metric.current < metric.baseline * 0.9 {
72                Some((metric.current - metric.baseline) / metric.baseline * 100.0)
73            } else {
74                None
75            }
76        }
77        MetricCategory::Unknown => None,
78    }
79}
80
81impl Control for BehavioralRegressionControl {
82    fn id(&self) -> ControlId {
83        builtin::id(builtin::BEHAVIORAL_REGRESSION)
84    }
85
86    fn description(&self) -> &'static str {
87        "Post-deployment metrics must not regress beyond acceptable thresholds"
88    }
89
90    fn evaluate(&self, evidence: &EvidenceBundle) -> Vec<ControlFinding> {
91        let id = self.id();
92
93        let diff = match &evidence.behavioral_diff {
94            EvidenceState::NotApplicable => {
95                return vec![ControlFinding::not_applicable(
96                    id,
97                    "Behavioral diff evidence is not applicable",
98                )];
99            }
100            EvidenceState::Missing { gaps } => {
101                return vec![ControlFinding::indeterminate(
102                    id,
103                    "Behavioral diff evidence is unavailable",
104                    vec![],
105                    gaps.clone(),
106                )];
107            }
108            EvidenceState::Complete { value } | EvidenceState::Partial { value, .. } => value,
109        };
110
111        let mut regressions: Vec<String> = Vec::new();
112
113        for metric in &diff.metrics {
114            if let Some(delta_pct) = is_regressed(metric) {
115                regressions.push(format!(
116                    "{}: {:.1}% change (baseline={}, current={})",
117                    metric.name, delta_pct, metric.baseline, metric.current,
118                ));
119            }
120        }
121
122        if regressions.is_empty() {
123            vec![ControlFinding::satisfied(
124                id,
125                format!(
126                    "No metric regressions detected for deployment {}",
127                    diff.deployment_id
128                ),
129                vec![diff.deployment_id.clone()],
130            )]
131        } else {
132            let subjects: Vec<String> = regressions.clone();
133            vec![ControlFinding::violated(
134                id,
135                format!(
136                    "{} metric regression(s) detected: {}",
137                    regressions.len(),
138                    regressions.join("; ")
139                ),
140                subjects,
141            )]
142        }
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use crate::control::ControlStatus;
150    use crate::evidence::{BehavioralDiff, EvidenceGap, MetricObservation};
151
152    fn metric(name: &str, baseline: f64, current: f64) -> MetricObservation {
153        MetricObservation {
154            name: name.to_string(),
155            current,
156            baseline,
157            unit: None,
158            window_secs: None,
159        }
160    }
161
162    fn make_bundle(metrics: Vec<MetricObservation>) -> EvidenceBundle {
163        EvidenceBundle {
164            behavioral_diff: EvidenceState::complete(BehavioralDiff {
165                deployment_id: "abc123".to_string(),
166                environment: Some("canary".to_string()),
167                metrics,
168                observed_at: None,
169            }),
170            ..Default::default()
171        }
172    }
173
174    #[test]
175    fn no_regressions_is_satisfied() {
176        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
177            metric("http_request_duration_p99", 100.0, 105.0), // +5%, under 10% threshold
178            metric("error_rate_5xx", 1.0, 1.04),               // +4%, under 5% threshold
179            metric("requests_rps", 1000.0, 950.0),             // -5%, under 10% threshold
180        ]));
181        assert_eq!(findings.len(), 1);
182        assert_eq!(findings[0].status, ControlStatus::Satisfied);
183    }
184
185    #[test]
186    fn latency_regression_is_violated() {
187        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
188            metric("http_request_duration_p99", 100.0, 120.0), // +20%
189        ]));
190        assert_eq!(findings.len(), 1);
191        assert_eq!(findings[0].status, ControlStatus::Violated);
192        assert!(findings[0].rationale.contains("duration_p99"));
193    }
194
195    #[test]
196    fn error_regression_is_violated() {
197        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
198            metric("api_error", 2.0, 2.2), // +10%, over 5% threshold
199        ]));
200        assert_eq!(findings.len(), 1);
201        assert_eq!(findings[0].status, ControlStatus::Violated);
202    }
203
204    #[test]
205    fn throughput_regression_is_violated() {
206        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
207            metric("requests_rps", 1000.0, 800.0), // -20%
208        ]));
209        assert_eq!(findings.len(), 1);
210        assert_eq!(findings[0].status, ControlStatus::Violated);
211    }
212
213    #[test]
214    fn unknown_metric_category_is_ignored() {
215        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
216            metric("cpu_usage", 50.0, 90.0), // big change but unknown category
217        ]));
218        assert_eq!(findings.len(), 1);
219        assert_eq!(findings[0].status, ControlStatus::Satisfied);
220    }
221
222    #[test]
223    fn zero_baseline_is_not_regression() {
224        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
225            metric("error_rate_5xx", 0.0, 1.0), // can't compute relative change
226        ]));
227        assert_eq!(findings.len(), 1);
228        assert_eq!(findings[0].status, ControlStatus::Satisfied);
229    }
230
231    #[test]
232    fn missing_evidence_is_indeterminate() {
233        let bundle = EvidenceBundle {
234            behavioral_diff: EvidenceState::missing(vec![EvidenceGap::CollectionFailed {
235                source: "prometheus".to_string(),
236                subject: "metrics".to_string(),
237                detail: "timeout".to_string(),
238            }]),
239            ..Default::default()
240        };
241        let findings = BehavioralRegressionControl.evaluate(&bundle);
242        assert_eq!(findings.len(), 1);
243        assert_eq!(findings[0].status, ControlStatus::Indeterminate);
244    }
245
246    #[test]
247    fn not_applicable_when_evidence_not_applicable() {
248        let bundle = EvidenceBundle::default();
249        let findings = BehavioralRegressionControl.evaluate(&bundle);
250        assert_eq!(findings.len(), 1);
251        assert_eq!(findings[0].status, ControlStatus::NotApplicable);
252    }
253
254    #[test]
255    fn multiple_regressions_are_all_reported() {
256        let findings = BehavioralRegressionControl.evaluate(&make_bundle(vec![
257            metric("http_request_duration_p99", 100.0, 150.0),
258            metric("error_rate_5xx", 1.0, 2.0),
259            metric("requests_rps", 1000.0, 500.0),
260        ]));
261        assert_eq!(findings.len(), 1);
262        assert_eq!(findings[0].status, ControlStatus::Violated);
263        assert!(findings[0].rationale.contains("3 metric regression(s)"));
264    }
265}