use perfgate_types::{
Budget, Direction, Metric, MetricStatus, Verdict, VerdictCounts, VerdictStatus,
};
use std::collections::BTreeMap;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum BudgetError {
#[error("no samples to summarize")]
NoSamples,
#[error("baseline value must be > 0")]
InvalidBaseline,
}
#[derive(Debug, Clone, PartialEq)]
pub struct BudgetResult {
pub baseline: f64,
pub current: f64,
pub ratio: f64,
pub pct: f64,
pub regression: f64,
pub cv: Option<f64>,
pub noise_threshold: Option<f64>,
pub status: MetricStatus,
}
#[must_use = "pure computation; call site should use the returned BudgetResult"]
pub fn evaluate_budget(
baseline: f64,
current: f64,
budget: &Budget,
current_cv: Option<f64>,
) -> Result<BudgetResult, BudgetError> {
if baseline <= 0.0 {
return Err(BudgetError::InvalidBaseline);
}
let ratio = current / baseline;
let pct = (current - baseline) / baseline;
let regression = calculate_regression(baseline, current, budget.direction);
let mut status = determine_status(regression, budget.threshold, budget.warn_threshold);
if let (Some(cv), Some(limit)) = (current_cv, budget.noise_threshold)
&& cv > limit
{
match budget.noise_policy {
perfgate_types::NoisePolicy::Ignore => {
}
perfgate_types::NoisePolicy::Warn => {
status = MetricStatus::Warn;
}
perfgate_types::NoisePolicy::Skip => {
status = MetricStatus::Skip;
}
}
}
Ok(BudgetResult {
baseline,
current,
ratio,
pct,
regression,
cv: current_cv,
noise_threshold: budget.noise_threshold,
status,
})
}
#[must_use = "pure computation; call site should use the returned regression value"]
pub fn calculate_regression(baseline: f64, current: f64, direction: Direction) -> f64 {
let pct = (current - baseline) / baseline;
match direction {
Direction::Lower => pct.max(0.0),
Direction::Higher => (-pct).max(0.0),
}
}
#[must_use = "pure computation; call site should use the returned MetricStatus"]
pub fn determine_status(regression: f64, threshold: f64, warn_threshold: f64) -> MetricStatus {
if regression > threshold {
MetricStatus::Fail
} else if regression >= warn_threshold {
MetricStatus::Warn
} else {
MetricStatus::Pass
}
}
#[must_use = "pure computation; call site should use the returned Verdict"]
pub fn aggregate_verdict(statuses: &[MetricStatus]) -> Verdict {
let mut counts = VerdictCounts {
pass: 0,
warn: 0,
fail: 0,
skip: 0,
};
for status in statuses {
match status {
MetricStatus::Pass => counts.pass += 1,
MetricStatus::Warn => counts.warn += 1,
MetricStatus::Fail => counts.fail += 1,
MetricStatus::Skip => counts.skip += 1,
}
}
let status = if counts.fail > 0 {
VerdictStatus::Fail
} else if counts.warn > 0 {
VerdictStatus::Warn
} else if counts.pass > 0 {
VerdictStatus::Pass
} else {
VerdictStatus::Skip
};
Verdict {
status,
counts,
reasons: Vec::new(),
}
}
#[must_use = "pure computation; call site should use the returned token string"]
pub fn reason_token(metric: Metric, status: MetricStatus) -> String {
format!("{}_{}", metric.as_str(), status.as_str())
}
#[must_use = "pure computation; call site should use the returned deltas and verdict"]
pub fn evaluate_budgets<'a, I>(
metrics: I,
budgets: &BTreeMap<Metric, Budget>,
) -> Result<(BTreeMap<Metric, BudgetResult>, Verdict), BudgetError>
where
I: Iterator<Item = (Metric, f64, f64, Option<f64>)> + 'a,
{
let mut deltas: BTreeMap<Metric, BudgetResult> = BTreeMap::new();
let mut statuses: Vec<MetricStatus> = Vec::new();
let mut reasons: Vec<String> = Vec::new();
for (metric, baseline, current, cv) in metrics {
if let Some(budget) = budgets.get(&metric) {
let result = evaluate_budget(baseline, current, budget, cv)?;
if result.status != MetricStatus::Pass {
reasons.push(reason_token(metric, result.status));
}
statuses.push(result.status);
deltas.insert(metric, result);
}
}
let mut verdict = aggregate_verdict(&statuses);
verdict.reasons = reasons;
Ok((deltas, verdict))
}
#[cfg(test)]
mod tests {
use super::*;
fn test_budget() -> Budget {
Budget::new(0.20, 0.10, Direction::Lower)
}
#[test]
fn evaluate_budget_pass() {
let budget = test_budget();
let result = evaluate_budget(100.0, 105.0, &budget, None).unwrap();
assert_eq!(result.status, MetricStatus::Pass);
assert!((result.regression - 0.05).abs() < 1e-10);
}
#[test]
fn evaluate_budget_warn() {
let budget = test_budget();
let result = evaluate_budget(100.0, 115.0, &budget, None).unwrap();
assert_eq!(result.status, MetricStatus::Warn);
assert!((result.regression - 0.15).abs() < 1e-10);
}
#[test]
fn evaluate_budget_fail() {
let budget = test_budget();
let result = evaluate_budget(100.0, 130.0, &budget, None).unwrap();
assert_eq!(result.status, MetricStatus::Fail);
assert!((result.regression - 0.30).abs() < 1e-10);
}
#[test]
fn evaluate_budget_zero_baseline() {
let budget = test_budget();
let result = evaluate_budget(0.0, 100.0, &budget, None);
assert!(matches!(result, Err(BudgetError::InvalidBaseline)));
}
#[test]
fn evaluate_budget_negative_baseline() {
let budget = test_budget();
let result = evaluate_budget(-10.0, 100.0, &budget, None);
assert!(matches!(result, Err(BudgetError::InvalidBaseline)));
}
#[test]
fn calculate_regression_lower_is_better_improvement() {
let reg = calculate_regression(100.0, 90.0, Direction::Lower);
assert!((reg - 0.0).abs() < 1e-10);
}
#[test]
fn calculate_regression_lower_is_better_regression() {
let reg = calculate_regression(100.0, 115.0, Direction::Lower);
assert!((reg - 0.15).abs() < 1e-10);
}
#[test]
fn calculate_regression_higher_is_better_improvement() {
let reg = calculate_regression(100.0, 120.0, Direction::Higher);
assert!((reg - 0.0).abs() < 1e-10);
}
#[test]
fn calculate_regression_higher_is_better_regression() {
let reg = calculate_regression(100.0, 80.0, Direction::Higher);
assert!((reg - 0.20).abs() < 1e-10);
}
#[test]
fn determine_status_at_threshold_boundaries() {
let threshold = 0.20;
let warn_threshold = 0.10;
assert_eq!(
determine_status(0.20, threshold, warn_threshold),
MetricStatus::Warn
);
assert_eq!(
determine_status(0.2001, threshold, warn_threshold),
MetricStatus::Fail
);
assert_eq!(
determine_status(0.10, threshold, warn_threshold),
MetricStatus::Warn
);
assert_eq!(
determine_status(0.0999, threshold, warn_threshold),
MetricStatus::Pass
);
}
#[test]
fn aggregate_verdict_fail_dominates() {
let verdict =
aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Fail, MetricStatus::Warn]);
assert_eq!(verdict.status, VerdictStatus::Fail);
assert_eq!(verdict.counts.pass, 1);
assert_eq!(verdict.counts.warn, 1);
assert_eq!(verdict.counts.fail, 1);
}
#[test]
fn aggregate_verdict_warn_without_fail() {
let verdict =
aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Warn, MetricStatus::Pass]);
assert_eq!(verdict.status, VerdictStatus::Warn);
assert_eq!(verdict.counts.pass, 2);
assert_eq!(verdict.counts.warn, 1);
assert_eq!(verdict.counts.fail, 0);
}
#[test]
fn aggregate_verdict_all_pass() {
let verdict =
aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Pass, MetricStatus::Pass]);
assert_eq!(verdict.status, VerdictStatus::Pass);
assert_eq!(verdict.counts.pass, 3);
assert_eq!(verdict.counts.warn, 0);
assert_eq!(verdict.counts.fail, 0);
}
#[test]
fn reason_token_format() {
assert_eq!(
reason_token(Metric::WallMs, MetricStatus::Warn),
"wall_ms_warn"
);
assert_eq!(
reason_token(Metric::MaxRssKb, MetricStatus::Fail),
"max_rss_kb_fail"
);
assert_eq!(
reason_token(Metric::ThroughputPerS, MetricStatus::Pass),
"throughput_per_s_pass"
);
}
#[test]
fn evaluate_budgets_multiple_metrics() {
let mut budgets = BTreeMap::new();
budgets.insert(Metric::WallMs, Budget::new(0.20, 0.10, Direction::Lower));
budgets.insert(Metric::MaxRssKb, Budget::new(0.30, 0.15, Direction::Lower));
let metrics = vec![
(Metric::WallMs, 100.0, 115.0), (Metric::MaxRssKb, 1000.0, 900.0), ];
let (deltas, verdict) = evaluate_budgets(
metrics.into_iter().map(|(m, b, c)| (m, b, c, None)),
&budgets,
)
.unwrap();
assert_eq!(deltas.len(), 2);
assert_eq!(verdict.status, VerdictStatus::Warn);
assert_eq!(verdict.counts.warn, 1);
assert_eq!(verdict.counts.pass, 1);
}
}
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
fn budget_strategy() -> impl Strategy<Value = Budget> {
(0.01f64..1.0, 0.0f64..=1.0).prop_map(|(threshold, warn_factor)| {
let warn_threshold = threshold * warn_factor;
Budget {
noise_threshold: None,
noise_policy: perfgate_types::NoisePolicy::Ignore,
threshold,
warn_threshold,
direction: Direction::Lower,
}
})
}
proptest! {
#[test]
fn prop_regression_is_non_negative(
baseline in 1.0f64..10000.0,
current in 0.1f64..20000.0,
direction in prop_oneof![Just(Direction::Lower), Just(Direction::Higher)],
) {
let regression = calculate_regression(baseline, current, direction);
prop_assert!(regression >= 0.0, "regression should be non-negative");
}
#[test]
fn prop_evaluate_budget_consistency(
baseline in 1.0f64..10000.0,
current in 0.1f64..20000.0,
budget in budget_strategy(),
) {
let result = evaluate_budget(baseline, current, &budget, None).unwrap();
let expected_ratio = current / baseline;
prop_assert!((result.ratio - expected_ratio).abs() < 1e-10);
let expected_pct = (current - baseline) / baseline;
prop_assert!((result.pct - expected_pct).abs() < 1e-10);
let expected_regression = calculate_regression(baseline, current, budget.direction);
prop_assert!((result.regression - expected_regression).abs() < 1e-10);
let expected_status = determine_status(result.regression, budget.threshold, budget.warn_threshold);
prop_assert_eq!(result.status, expected_status);
}
#[test]
fn prop_determine_status_ordering(
regression in 0.0f64..2.0,
threshold in 0.01f64..1.0,
warn_factor in 0.0f64..=1.0,
) {
let warn_threshold = threshold * warn_factor;
let status = determine_status(regression, threshold, warn_threshold);
match status {
MetricStatus::Fail => prop_assert!(regression > threshold),
MetricStatus::Warn => {
prop_assert!(regression >= warn_threshold);
prop_assert!(regression <= threshold);
}
MetricStatus::Pass => prop_assert!(regression < warn_threshold),
MetricStatus::Skip => {
}
}
}
#[test]
fn prop_aggregate_verdict_consistency(statuses in prop::collection::vec(
prop_oneof![
Just(MetricStatus::Pass),
Just(MetricStatus::Warn),
Just(MetricStatus::Fail),
Just(MetricStatus::Skip)
],
0..20
)) {
let verdict = aggregate_verdict(&statuses);
let expected_pass = statuses.iter().filter(|&&s| s == MetricStatus::Pass).count() as u32;
let expected_warn = statuses.iter().filter(|&&s| s == MetricStatus::Warn).count() as u32;
let expected_fail = statuses.iter().filter(|&&s| s == MetricStatus::Fail).count() as u32;
let expected_skip = statuses.iter().filter(|&&s| s == MetricStatus::Skip).count() as u32;
prop_assert_eq!(verdict.counts.pass, expected_pass);
prop_assert_eq!(verdict.counts.warn, expected_warn);
prop_assert_eq!(verdict.counts.fail, expected_fail);
prop_assert_eq!(verdict.counts.skip, expected_skip);
if expected_fail > 0 {
prop_assert_eq!(verdict.status, VerdictStatus::Fail);
} else if expected_warn > 0 {
prop_assert_eq!(verdict.status, VerdictStatus::Warn);
} else if expected_pass > 0 {
prop_assert_eq!(verdict.status, VerdictStatus::Pass);
} else {
prop_assert_eq!(verdict.status, VerdictStatus::Skip);
}
}
#[test]
fn prop_evaluate_budget_deterministic(
baseline in 1.0f64..10000.0,
current in 0.1f64..20000.0,
budget in budget_strategy(),
) {
let r1 = evaluate_budget(baseline, current, &budget, None).unwrap();
let r2 = evaluate_budget(baseline, current, &budget, None).unwrap();
prop_assert_eq!(r1, r2, "evaluate_budget must be deterministic");
}
#[test]
fn prop_zero_regression_is_pass(
threshold in 0.01f64..1.0,
warn_factor in 0.01f64..=1.0,
) {
let warn_threshold = threshold * warn_factor;
let status = determine_status(0.0, threshold, warn_threshold);
prop_assert_eq!(status, MetricStatus::Pass, "zero regression should always be Pass");
}
#[test]
fn prop_negative_regression_clamped(
baseline in 1.0f64..10000.0,
improvement_factor in 0.01f64..1.0,
direction in prop_oneof![Just(Direction::Lower), Just(Direction::Higher)],
) {
let current = match direction {
Direction::Lower => baseline * (1.0 - improvement_factor), Direction::Higher => baseline * (1.0 + improvement_factor), };
let regression = calculate_regression(baseline, current, direction);
prop_assert_eq!(regression, 0.0, "improvements should yield zero regression");
}
}
}