1use perfgate_types::{
43 Budget, Direction, Metric, MetricStatus, Verdict, VerdictCounts, VerdictStatus,
44};
45use std::collections::BTreeMap;
46use thiserror::Error;
47
48#[derive(Debug, Error)]
69pub enum BudgetError {
70 #[error("no samples to summarize")]
71 NoSamples,
72
73 #[error("baseline value must be > 0")]
74 InvalidBaseline,
75}
76
77#[derive(Debug, Clone, PartialEq)]
102pub struct BudgetResult {
103 pub baseline: f64,
105 pub current: f64,
107 pub ratio: f64,
109 pub pct: f64,
111 pub regression: f64,
113 pub cv: Option<f64>,
115 pub noise_threshold: Option<f64>,
117 pub status: MetricStatus,
119}
120
121pub fn evaluate_budget(
143 baseline: f64,
144 current: f64,
145 budget: &Budget,
146 current_cv: Option<f64>,
147) -> Result<BudgetResult, BudgetError> {
148 if baseline <= 0.0 {
149 return Err(BudgetError::InvalidBaseline);
150 }
151
152 let ratio = current / baseline;
153 let pct = (current - baseline) / baseline;
154 let regression = calculate_regression(baseline, current, budget.direction);
155
156 let mut status = determine_status(regression, budget.threshold, budget.warn_threshold);
157
158 if let (Some(cv), Some(limit)) = (current_cv, budget.noise_threshold)
160 && cv > limit
161 {
162 match budget.noise_policy {
163 perfgate_types::NoisePolicy::Ignore => {
164 }
169 perfgate_types::NoisePolicy::Warn => {
170 status = MetricStatus::Warn;
171 }
172 perfgate_types::NoisePolicy::Skip => {
173 status = MetricStatus::Skip;
174 }
175 }
176 }
177
178 Ok(BudgetResult {
179 baseline,
180 current,
181 ratio,
182 pct,
183 regression,
184 cv: current_cv,
185 noise_threshold: budget.noise_threshold,
186 status,
187 })
188}
189
190pub fn calculate_regression(baseline: f64, current: f64, direction: Direction) -> f64 {
230 let pct = (current - baseline) / baseline;
231 match direction {
232 Direction::Lower => pct.max(0.0),
233 Direction::Higher => (-pct).max(0.0),
234 }
235}
236
237pub fn determine_status(regression: f64, threshold: f64, warn_threshold: f64) -> MetricStatus {
271 if regression > threshold {
272 MetricStatus::Fail
273 } else if regression >= warn_threshold {
274 MetricStatus::Warn
275 } else {
276 MetricStatus::Pass
277 }
278}
279
280pub fn aggregate_verdict(statuses: &[MetricStatus]) -> Verdict {
307 let mut counts = VerdictCounts {
308 pass: 0,
309 warn: 0,
310 fail: 0,
311 skip: 0,
312 };
313
314 for status in statuses {
315 match status {
316 MetricStatus::Pass => counts.pass += 1,
317 MetricStatus::Warn => counts.warn += 1,
318 MetricStatus::Fail => counts.fail += 1,
319 MetricStatus::Skip => counts.skip += 1,
320 }
321 }
322
323 let status = if counts.fail > 0 {
324 VerdictStatus::Fail
325 } else if counts.warn > 0 {
326 VerdictStatus::Warn
327 } else if counts.pass > 0 {
328 VerdictStatus::Pass
329 } else {
330 VerdictStatus::Skip
331 };
332
333 Verdict {
334 status,
335 counts,
336 reasons: Vec::new(),
337 }
338}
339
340pub fn reason_token(metric: Metric, status: MetricStatus) -> String {
355 format!("{}_{}", metric.as_str(), status.as_str())
356}
357
358pub fn evaluate_budgets<'a, I>(
372 metrics: I,
373 budgets: &BTreeMap<Metric, Budget>,
374) -> Result<(BTreeMap<Metric, BudgetResult>, Verdict), BudgetError>
375where
376 I: Iterator<Item = (Metric, f64, f64, Option<f64>)> + 'a,
377{
378 let mut deltas: BTreeMap<Metric, BudgetResult> = BTreeMap::new();
379 let mut statuses: Vec<MetricStatus> = Vec::new();
380 let mut reasons: Vec<String> = Vec::new();
381
382 for (metric, baseline, current, cv) in metrics {
383 if let Some(budget) = budgets.get(&metric) {
384 let result = evaluate_budget(baseline, current, budget, cv)?;
385
386 if result.status != MetricStatus::Pass {
387 reasons.push(reason_token(metric, result.status));
388 }
389
390 statuses.push(result.status);
391 deltas.insert(metric, result);
392 }
393 }
394
395 let mut verdict = aggregate_verdict(&statuses);
396 verdict.reasons = reasons;
397
398 Ok((deltas, verdict))
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404
405 fn test_budget() -> Budget {
406 Budget::new(0.20, 0.10, Direction::Lower)
407 }
408
409 #[test]
410 fn evaluate_budget_pass() {
411 let budget = test_budget();
412 let result = evaluate_budget(100.0, 105.0, &budget, None).unwrap();
413 assert_eq!(result.status, MetricStatus::Pass);
414 assert!((result.regression - 0.05).abs() < 1e-10);
415 }
416
417 #[test]
418 fn evaluate_budget_warn() {
419 let budget = test_budget();
420 let result = evaluate_budget(100.0, 115.0, &budget, None).unwrap();
421 assert_eq!(result.status, MetricStatus::Warn);
422 assert!((result.regression - 0.15).abs() < 1e-10);
423 }
424
425 #[test]
426 fn evaluate_budget_fail() {
427 let budget = test_budget();
428 let result = evaluate_budget(100.0, 130.0, &budget, None).unwrap();
429 assert_eq!(result.status, MetricStatus::Fail);
430 assert!((result.regression - 0.30).abs() < 1e-10);
431 }
432
433 #[test]
434 fn evaluate_budget_zero_baseline() {
435 let budget = test_budget();
436 let result = evaluate_budget(0.0, 100.0, &budget, None);
437 assert!(matches!(result, Err(BudgetError::InvalidBaseline)));
438 }
439
440 #[test]
441 fn evaluate_budget_negative_baseline() {
442 let budget = test_budget();
443 let result = evaluate_budget(-10.0, 100.0, &budget, None);
444 assert!(matches!(result, Err(BudgetError::InvalidBaseline)));
445 }
446
447 #[test]
448 fn calculate_regression_lower_is_better_improvement() {
449 let reg = calculate_regression(100.0, 90.0, Direction::Lower);
450 assert!((reg - 0.0).abs() < 1e-10);
451 }
452
453 #[test]
454 fn calculate_regression_lower_is_better_regression() {
455 let reg = calculate_regression(100.0, 115.0, Direction::Lower);
456 assert!((reg - 0.15).abs() < 1e-10);
457 }
458
459 #[test]
460 fn calculate_regression_higher_is_better_improvement() {
461 let reg = calculate_regression(100.0, 120.0, Direction::Higher);
462 assert!((reg - 0.0).abs() < 1e-10);
463 }
464
465 #[test]
466 fn calculate_regression_higher_is_better_regression() {
467 let reg = calculate_regression(100.0, 80.0, Direction::Higher);
468 assert!((reg - 0.20).abs() < 1e-10);
469 }
470
471 #[test]
472 fn determine_status_at_threshold_boundaries() {
473 let threshold = 0.20;
474 let warn_threshold = 0.10;
475
476 assert_eq!(
478 determine_status(0.20, threshold, warn_threshold),
479 MetricStatus::Warn
480 );
481
482 assert_eq!(
484 determine_status(0.2001, threshold, warn_threshold),
485 MetricStatus::Fail
486 );
487
488 assert_eq!(
490 determine_status(0.10, threshold, warn_threshold),
491 MetricStatus::Warn
492 );
493
494 assert_eq!(
496 determine_status(0.0999, threshold, warn_threshold),
497 MetricStatus::Pass
498 );
499 }
500
501 #[test]
502 fn aggregate_verdict_fail_dominates() {
503 let verdict =
504 aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Fail, MetricStatus::Warn]);
505 assert_eq!(verdict.status, VerdictStatus::Fail);
506 assert_eq!(verdict.counts.pass, 1);
507 assert_eq!(verdict.counts.warn, 1);
508 assert_eq!(verdict.counts.fail, 1);
509 }
510
511 #[test]
512 fn aggregate_verdict_warn_without_fail() {
513 let verdict =
514 aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Warn, MetricStatus::Pass]);
515 assert_eq!(verdict.status, VerdictStatus::Warn);
516 assert_eq!(verdict.counts.pass, 2);
517 assert_eq!(verdict.counts.warn, 1);
518 assert_eq!(verdict.counts.fail, 0);
519 }
520
521 #[test]
522 fn aggregate_verdict_all_pass() {
523 let verdict =
524 aggregate_verdict(&[MetricStatus::Pass, MetricStatus::Pass, MetricStatus::Pass]);
525 assert_eq!(verdict.status, VerdictStatus::Pass);
526 assert_eq!(verdict.counts.pass, 3);
527 assert_eq!(verdict.counts.warn, 0);
528 assert_eq!(verdict.counts.fail, 0);
529 }
530
531 #[test]
532 fn reason_token_format() {
533 assert_eq!(
534 reason_token(Metric::WallMs, MetricStatus::Warn),
535 "wall_ms_warn"
536 );
537 assert_eq!(
538 reason_token(Metric::MaxRssKb, MetricStatus::Fail),
539 "max_rss_kb_fail"
540 );
541 assert_eq!(
542 reason_token(Metric::ThroughputPerS, MetricStatus::Pass),
543 "throughput_per_s_pass"
544 );
545 }
546
547 #[test]
548 fn evaluate_budgets_multiple_metrics() {
549 let mut budgets = BTreeMap::new();
550 budgets.insert(Metric::WallMs, Budget::new(0.20, 0.10, Direction::Lower));
551 budgets.insert(Metric::MaxRssKb, Budget::new(0.30, 0.15, Direction::Lower));
552
553 let metrics = vec![
554 (Metric::WallMs, 100.0, 115.0), (Metric::MaxRssKb, 1000.0, 900.0), ];
557
558 let (deltas, verdict) = evaluate_budgets(
559 metrics.into_iter().map(|(m, b, c)| (m, b, c, None)),
560 &budgets,
561 )
562 .unwrap();
563
564 assert_eq!(deltas.len(), 2);
565 assert_eq!(verdict.status, VerdictStatus::Warn);
566 assert_eq!(verdict.counts.warn, 1);
567 assert_eq!(verdict.counts.pass, 1);
568 }
569}
570
571#[cfg(test)]
572mod property_tests {
573 use super::*;
574 use proptest::prelude::*;
575
576 fn budget_strategy() -> impl Strategy<Value = Budget> {
577 (0.01f64..1.0, 0.0f64..=1.0).prop_map(|(threshold, warn_factor)| {
578 let warn_threshold = threshold * warn_factor;
579 Budget {
580 noise_threshold: None,
581 noise_policy: perfgate_types::NoisePolicy::Ignore,
582 threshold,
583 warn_threshold,
584 direction: Direction::Lower,
585 }
586 })
587 }
588
589 proptest! {
590 #[test]
591 fn prop_regression_is_non_negative(
592 baseline in 1.0f64..10000.0,
593 current in 0.1f64..20000.0,
594 direction in prop_oneof![Just(Direction::Lower), Just(Direction::Higher)],
595 ) {
596 let regression = calculate_regression(baseline, current, direction);
597 prop_assert!(regression >= 0.0, "regression should be non-negative");
598 }
599
600 #[test]
601 fn prop_evaluate_budget_consistency(
602 baseline in 1.0f64..10000.0,
603 current in 0.1f64..20000.0,
604 budget in budget_strategy(),
605 ) {
606 let result = evaluate_budget(baseline, current, &budget, None).unwrap();
607
608 let expected_ratio = current / baseline;
610 prop_assert!((result.ratio - expected_ratio).abs() < 1e-10);
611
612 let expected_pct = (current - baseline) / baseline;
614 prop_assert!((result.pct - expected_pct).abs() < 1e-10);
615
616 let expected_regression = calculate_regression(baseline, current, budget.direction);
618 prop_assert!((result.regression - expected_regression).abs() < 1e-10);
619
620 let expected_status = determine_status(result.regression, budget.threshold, budget.warn_threshold);
622 prop_assert_eq!(result.status, expected_status);
623 }
624
625 #[test]
626 fn prop_determine_status_ordering(
627 regression in 0.0f64..2.0,
628 threshold in 0.01f64..1.0,
629 warn_factor in 0.0f64..=1.0,
630 ) {
631 let warn_threshold = threshold * warn_factor;
632 let status = determine_status(regression, threshold, warn_threshold);
633
634 match status {
636 MetricStatus::Fail => prop_assert!(regression > threshold),
637 MetricStatus::Warn => {
638 prop_assert!(regression >= warn_threshold);
639 prop_assert!(regression <= threshold);
640 }
641 MetricStatus::Pass => prop_assert!(regression < warn_threshold),
642 MetricStatus::Skip => {
643 }
647 }
648 }
649
650 #[test]
651 fn prop_aggregate_verdict_consistency(statuses in prop::collection::vec(
652 prop_oneof![
653 Just(MetricStatus::Pass),
654 Just(MetricStatus::Warn),
655 Just(MetricStatus::Fail),
656 Just(MetricStatus::Skip)
657 ],
658 0..20
659 )) {
660 let verdict = aggregate_verdict(&statuses);
661
662 let expected_pass = statuses.iter().filter(|&&s| s == MetricStatus::Pass).count() as u32;
664 let expected_warn = statuses.iter().filter(|&&s| s == MetricStatus::Warn).count() as u32;
665 let expected_fail = statuses.iter().filter(|&&s| s == MetricStatus::Fail).count() as u32;
666 let expected_skip = statuses.iter().filter(|&&s| s == MetricStatus::Skip).count() as u32;
667
668 prop_assert_eq!(verdict.counts.pass, expected_pass);
669 prop_assert_eq!(verdict.counts.warn, expected_warn);
670 prop_assert_eq!(verdict.counts.fail, expected_fail);
671 prop_assert_eq!(verdict.counts.skip, expected_skip);
672
673 if expected_fail > 0 {
675 prop_assert_eq!(verdict.status, VerdictStatus::Fail);
676 } else if expected_warn > 0 {
677 prop_assert_eq!(verdict.status, VerdictStatus::Warn);
678 } else if expected_pass > 0 {
679 prop_assert_eq!(verdict.status, VerdictStatus::Pass);
680 } else {
681 prop_assert_eq!(verdict.status, VerdictStatus::Skip);
682 }
683 }
684
685 #[test]
686 fn prop_evaluate_budget_deterministic(
687 baseline in 1.0f64..10000.0,
688 current in 0.1f64..20000.0,
689 budget in budget_strategy(),
690 ) {
691 let r1 = evaluate_budget(baseline, current, &budget, None).unwrap();
692 let r2 = evaluate_budget(baseline, current, &budget, None).unwrap();
693 prop_assert_eq!(r1, r2, "evaluate_budget must be deterministic");
694 }
695
696 #[test]
697 fn prop_zero_regression_is_pass(
698 threshold in 0.01f64..1.0,
699 warn_factor in 0.01f64..=1.0,
700 ) {
701 let warn_threshold = threshold * warn_factor;
702 let status = determine_status(0.0, threshold, warn_threshold);
703 prop_assert_eq!(status, MetricStatus::Pass, "zero regression should always be Pass");
704 }
705
706 #[test]
707 fn prop_negative_regression_clamped(
708 baseline in 1.0f64..10000.0,
709 improvement_factor in 0.01f64..1.0,
710 direction in prop_oneof![Just(Direction::Lower), Just(Direction::Higher)],
711 ) {
712 let current = match direction {
714 Direction::Lower => baseline * (1.0 - improvement_factor), Direction::Higher => baseline * (1.0 + improvement_factor), };
717 let regression = calculate_regression(baseline, current, direction);
718 prop_assert_eq!(regression, 0.0, "improvements should yield zero regression");
719 }
720 }
721}