1#![forbid(unsafe_code)]
2
3use std::collections::BTreeMap;
47
48use crate::determinism::{JsonValue, TestJsonlLogger};
49
50#[derive(Debug, Clone)]
56pub struct Threshold {
57 pub metric: String,
59 pub budget: f64,
61 pub tolerance_pct: f64,
64}
65
66impl Threshold {
67 pub fn new(metric: &str, budget: f64) -> Self {
69 Self {
70 metric: metric.to_string(),
71 budget,
72 tolerance_pct: 0.0,
73 }
74 }
75
76 #[must_use]
78 pub fn tolerance_pct(mut self, pct: f64) -> Self {
79 self.tolerance_pct = pct;
80 self
81 }
82
83 #[must_use]
85 pub fn ceiling(&self) -> f64 {
86 self.budget * (1.0 + self.tolerance_pct / 100.0)
87 }
88}
89
90#[derive(Debug, Clone)]
96pub struct Measurement {
97 pub metric: String,
99 pub value: f64,
101 pub unit: Option<String>,
103}
104
105impl Measurement {
106 pub fn new(metric: &str, value: f64) -> Self {
108 Self {
109 metric: metric.to_string(),
110 value,
111 unit: None,
112 }
113 }
114
115 #[must_use]
117 pub fn unit(mut self, unit: &str) -> Self {
118 self.unit = Some(unit.to_string());
119 self
120 }
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum MetricVerdict {
130 Pass,
132 Fail,
134 Unchecked,
136}
137
138#[derive(Debug, Clone)]
140pub struct MetricResult {
141 pub metric: String,
143 pub value: f64,
145 pub budget: Option<f64>,
147 pub ceiling: Option<f64>,
149 pub tolerance_pct: Option<f64>,
151 pub overshoot_pct: Option<f64>,
154 pub verdict: MetricVerdict,
156 pub unit: Option<String>,
158}
159
160#[derive(Debug, Clone)]
166pub struct GateResult {
167 pub gate_name: String,
169 pub metrics: Vec<MetricResult>,
171 pub pass_count: usize,
173 pub fail_count: usize,
175 pub unchecked_count: usize,
177}
178
179impl GateResult {
180 #[must_use]
182 pub fn passed(&self) -> bool {
183 self.fail_count == 0
184 }
185
186 pub fn failures(&self) -> Vec<&MetricResult> {
188 self.metrics
189 .iter()
190 .filter(|m| m.verdict == MetricVerdict::Fail)
191 .collect()
192 }
193
194 #[must_use]
196 pub fn summary(&self) -> String {
197 let status = if self.passed() { "PASS" } else { "FAIL" };
198 let mut out = format!(
199 "Gate '{}': {} ({} passed, {} failed, {} unchecked)\n",
200 self.gate_name, status, self.pass_count, self.fail_count, self.unchecked_count
201 );
202 for m in &self.metrics {
203 let icon = match m.verdict {
204 MetricVerdict::Pass => " ok",
205 MetricVerdict::Fail => "FAIL",
206 MetricVerdict::Unchecked => " --",
207 };
208 let unit = m.unit.as_deref().unwrap_or("");
209 if let Some(budget) = m.budget {
210 let overshoot = m.overshoot_pct.unwrap_or(0.0);
211 out.push_str(&format!(
212 " [{icon}] {}: {:.1}{unit} (budget: {:.1}{unit}, overshoot: {overshoot:+.1}%)\n",
213 m.metric, m.value, budget
214 ));
215 } else {
216 out.push_str(&format!(
217 " [{icon}] {}: {:.1}{unit} (no threshold)\n",
218 m.metric, m.value
219 ));
220 }
221 }
222 out
223 }
224}
225
226#[derive(Debug, Clone)]
232pub struct BenchmarkGate {
233 gate_name: String,
235 thresholds: BTreeMap<String, Threshold>,
237}
238
239impl BenchmarkGate {
240 pub fn new(gate_name: &str) -> Self {
242 Self {
243 gate_name: gate_name.to_string(),
244 thresholds: BTreeMap::new(),
245 }
246 }
247
248 #[must_use]
250 pub fn threshold(mut self, threshold: Threshold) -> Self {
251 self.thresholds.insert(threshold.metric.clone(), threshold);
252 self
253 }
254
255 #[must_use]
266 pub fn load_json(gate_name: &str, json: &str) -> Option<Self> {
267 let parsed: serde_json::Value = serde_json::from_str(json).ok()?;
268 let obj = parsed.as_object()?;
269 let mut gate = Self::new(gate_name);
270 for (metric, value) in obj {
271 let budget = value.get("budget")?.as_f64()?;
272 let tolerance_pct = value
273 .get("tolerance_pct")
274 .and_then(|v| v.as_f64())
275 .unwrap_or(0.0);
276 gate.thresholds.insert(
277 metric.clone(),
278 Threshold {
279 metric: metric.clone(),
280 budget,
281 tolerance_pct,
282 },
283 );
284 }
285 Some(gate)
286 }
287
288 #[must_use]
305 pub fn load_baseline_json(gate_name: &str, json: &str, percentile: &str) -> Option<Self> {
306 let parsed: serde_json::Value = serde_json::from_str(json).ok()?;
307 let obj = parsed.as_object()?;
308 let mut gate = Self::new(gate_name);
309 for (metric, value) in obj {
310 if metric.starts_with('_') {
312 continue;
313 }
314 let budget = value.get(percentile).and_then(|v| v.as_f64())?;
315 let tolerance_pct = value
316 .get("threshold_pct")
317 .and_then(|v| v.as_f64())
318 .unwrap_or(0.0);
319 gate.thresholds.insert(
320 metric.clone(),
321 Threshold {
322 metric: metric.clone(),
323 budget,
324 tolerance_pct,
325 },
326 );
327 }
328 Some(gate)
329 }
330
331 pub fn evaluate(&self, measurements: &[Measurement]) -> GateResult {
336 let mut logger = TestJsonlLogger::new_with(&format!("{}_gate", self.gate_name), 0, true, 0);
337 logger.add_context_str("gate_name", &self.gate_name);
338
339 logger.log(
340 "gate.start",
341 &[
342 ("gate_name", JsonValue::str(&self.gate_name)),
343 (
344 "threshold_count",
345 JsonValue::u64(self.thresholds.len() as u64),
346 ),
347 (
348 "measurement_count",
349 JsonValue::u64(measurements.len() as u64),
350 ),
351 ],
352 );
353
354 let mut metrics = Vec::new();
355 let mut pass_count = 0usize;
356 let mut fail_count = 0usize;
357 let mut unchecked_count = 0usize;
358
359 for measurement in measurements {
360 let result = if let Some(threshold) = self.thresholds.get(&measurement.metric) {
361 let ceiling = threshold.ceiling();
362 let overshoot_pct = if threshold.budget > 0.0 {
363 (measurement.value - threshold.budget) / threshold.budget * 100.0
364 } else {
365 0.0
366 };
367 let verdict = if measurement.value <= ceiling {
368 MetricVerdict::Pass
369 } else {
370 MetricVerdict::Fail
371 };
372 MetricResult {
373 metric: measurement.metric.clone(),
374 value: measurement.value,
375 budget: Some(threshold.budget),
376 ceiling: Some(ceiling),
377 tolerance_pct: Some(threshold.tolerance_pct),
378 overshoot_pct: Some(overshoot_pct),
379 verdict,
380 unit: measurement.unit.clone(),
381 }
382 } else {
383 MetricResult {
384 metric: measurement.metric.clone(),
385 value: measurement.value,
386 budget: None,
387 ceiling: None,
388 tolerance_pct: None,
389 overshoot_pct: None,
390 verdict: MetricVerdict::Unchecked,
391 unit: measurement.unit.clone(),
392 }
393 };
394
395 let verdict_str = match result.verdict {
397 MetricVerdict::Pass => "pass",
398 MetricVerdict::Fail => "fail",
399 MetricVerdict::Unchecked => "unchecked",
400 };
401
402 let mut fields: Vec<(&str, JsonValue)> = vec![
403 ("metric", JsonValue::str(&result.metric)),
404 ("value", JsonValue::raw(format!("{:.6}", result.value))),
405 ("verdict", JsonValue::str(verdict_str)),
406 ];
407 if let Some(budget) = result.budget {
408 fields.push(("budget", JsonValue::raw(format!("{budget:.6}"))));
409 }
410 if let Some(ceiling) = result.ceiling {
411 fields.push(("ceiling", JsonValue::raw(format!("{ceiling:.6}"))));
412 }
413 if let Some(overshoot) = result.overshoot_pct {
414 fields.push(("overshoot_pct", JsonValue::raw(format!("{overshoot:.2}"))));
415 }
416 logger.log("gate.metric", &fields);
417
418 match result.verdict {
419 MetricVerdict::Pass => pass_count += 1,
420 MetricVerdict::Fail => fail_count += 1,
421 MetricVerdict::Unchecked => unchecked_count += 1,
422 }
423
424 metrics.push(result);
425 }
426
427 metrics.sort_by(|a, b| a.metric.cmp(&b.metric));
429
430 let overall = if fail_count == 0 { "pass" } else { "fail" };
431 logger.log(
432 "gate.result",
433 &[
434 ("gate_name", JsonValue::str(&self.gate_name)),
435 ("verdict", JsonValue::str(overall)),
436 ("pass_count", JsonValue::u64(pass_count as u64)),
437 ("fail_count", JsonValue::u64(fail_count as u64)),
438 ("unchecked_count", JsonValue::u64(unchecked_count as u64)),
439 ],
440 );
441
442 GateResult {
443 gate_name: self.gate_name.clone(),
444 metrics,
445 pass_count,
446 fail_count,
447 unchecked_count,
448 }
449 }
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455
456 #[test]
457 fn threshold_ceiling_with_tolerance() {
458 let t = Threshold::new("render_p99", 2000.0).tolerance_pct(10.0);
459 assert!((t.ceiling() - 2200.0).abs() < f64::EPSILON);
460 }
461
462 #[test]
463 fn threshold_ceiling_zero_tolerance() {
464 let t = Threshold::new("render_p99", 1000.0);
465 assert!((t.ceiling() - 1000.0).abs() < f64::EPSILON);
466 }
467
468 #[test]
469 fn gate_pass_within_budget() {
470 let gate = BenchmarkGate::new("test_gate")
471 .threshold(Threshold::new("metric_a", 100.0).tolerance_pct(10.0));
472
473 let result = gate.evaluate(&[Measurement::new("metric_a", 95.0)]);
474 assert!(result.passed());
475 assert_eq!(result.pass_count, 1);
476 assert_eq!(result.fail_count, 0);
477 }
478
479 #[test]
480 fn gate_pass_within_tolerance() {
481 let gate = BenchmarkGate::new("test_gate")
482 .threshold(Threshold::new("metric_a", 100.0).tolerance_pct(10.0));
483
484 let result = gate.evaluate(&[Measurement::new("metric_a", 105.0)]);
486 assert!(result.passed());
487 }
488
489 #[test]
490 fn gate_fail_exceeds_tolerance() {
491 let gate = BenchmarkGate::new("test_gate")
492 .threshold(Threshold::new("metric_a", 100.0).tolerance_pct(10.0));
493
494 let result = gate.evaluate(&[Measurement::new("metric_a", 115.0)]);
496 assert!(!result.passed());
497 assert_eq!(result.fail_count, 1);
498 }
499
500 #[test]
501 fn gate_unchecked_metric() {
502 let gate = BenchmarkGate::new("test_gate").threshold(Threshold::new("metric_a", 100.0));
503
504 let result = gate.evaluate(&[
505 Measurement::new("metric_a", 90.0),
506 Measurement::new("metric_b", 999.0),
507 ]);
508 assert!(result.passed());
509 assert_eq!(result.unchecked_count, 1);
510 }
511
512 #[test]
513 fn gate_multiple_metrics_mixed() {
514 let gate = BenchmarkGate::new("test_gate")
515 .threshold(Threshold::new("fast", 100.0))
516 .threshold(Threshold::new("slow", 200.0).tolerance_pct(5.0));
517
518 let result = gate.evaluate(&[
519 Measurement::new("fast", 80.0),
520 Measurement::new("slow", 250.0), ]);
522 assert!(!result.passed());
523 assert_eq!(result.pass_count, 1);
524 assert_eq!(result.fail_count, 1);
525
526 let failures = result.failures();
527 assert_eq!(failures.len(), 1);
528 assert_eq!(failures[0].metric, "slow");
529 }
530
531 #[test]
532 fn gate_load_json() {
533 let json = r#"{
534 "render_p99": { "budget": 2000.0, "tolerance_pct": 10.0 },
535 "diff_p99": { "budget": 500.0 }
536 }"#;
537 let gate = BenchmarkGate::load_json("perf_gate", json).expect("valid JSON");
538 let result = gate.evaluate(&[
539 Measurement::new("render_p99", 1800.0),
540 Measurement::new("diff_p99", 480.0),
541 ]);
542 assert!(result.passed());
543 }
544
545 #[test]
546 fn gate_load_json_invalid() {
547 assert!(BenchmarkGate::load_json("bad", "not json").is_none());
548 }
549
550 #[test]
551 fn gate_load_baseline_json_format() {
552 let json = r#"{
553 "_comment": "Performance baseline",
554 "_format": "p50/p95/p99/p999 in nanoseconds",
555 "frame_render": {
556 "p50_ns": 500000,
557 "p95_ns": 1000000,
558 "p99_ns": 2000000,
559 "p999_ns": 5000000,
560 "threshold_pct": 10
561 },
562 "diff_strategy": {
563 "p50_ns": 50000,
564 "p99_ns": 200000,
565 "threshold_pct": 10
566 }
567 }"#;
568 let gate = BenchmarkGate::load_baseline_json("perf_gate", json, "p99_ns")
569 .expect("baseline JSON should parse");
570
571 let result = gate.evaluate(&[
573 Measurement::new("frame_render", 1_800_000.0).unit("ns"),
574 Measurement::new("diff_strategy", 190_000.0).unit("ns"),
575 ]);
576 assert!(result.passed(), "gate should pass: {}", result.summary());
577
578 let result = gate.evaluate(&[
580 Measurement::new("frame_render", 2_500_000.0).unit("ns"), Measurement::new("diff_strategy", 190_000.0).unit("ns"),
582 ]);
583 assert!(!result.passed(), "gate should fail on regression");
584 }
585
586 #[test]
587 fn gate_load_baseline_json_skips_metadata() {
588 let json = r#"{
589 "_comment": "ignored",
590 "metric_a": { "p99_ns": 100.0, "threshold_pct": 5 }
591 }"#;
592 let gate =
593 BenchmarkGate::load_baseline_json("meta_test", json, "p99_ns").expect("should parse");
594 let result = gate.evaluate(&[Measurement::new("metric_a", 95.0)]);
595 assert!(result.passed());
596 assert_eq!(result.metrics.len(), 1);
598 }
599
600 #[test]
601 fn gate_summary_format() {
602 let gate = BenchmarkGate::new("summary_test").threshold(Threshold::new("metric_a", 100.0));
603 let result = gate.evaluate(&[Measurement::new("metric_a", 90.0).unit("μs")]);
604 let summary = result.summary();
605 assert!(summary.contains("PASS"));
606 assert!(summary.contains("metric_a"));
607 assert!(summary.contains("μs"));
608 }
609
610 #[test]
611 fn gate_overshoot_pct_negative_when_under_budget() {
612 let gate =
613 BenchmarkGate::new("overshoot_test").threshold(Threshold::new("metric_a", 100.0));
614 let result = gate.evaluate(&[Measurement::new("metric_a", 80.0)]);
615 let m = &result.metrics[0];
616 assert!(m.overshoot_pct.unwrap() < 0.0);
617 }
618
619 #[test]
620 fn gate_empty_measurements() {
621 let gate = BenchmarkGate::new("empty_test").threshold(Threshold::new("metric_a", 100.0));
622 let result = gate.evaluate(&[]);
623 assert!(result.passed());
624 assert_eq!(result.pass_count, 0);
625 assert_eq!(result.fail_count, 0);
626 }
627
628 #[test]
633 fn load_baseline_includes_runtime_benchmarks() {
634 let json = include_str!("../../../tests/baseline.json");
635 let gate = BenchmarkGate::load_baseline_json("runtime_gate", json, "p99_ns")
636 .expect("baseline.json should parse");
637
638 let metrics: Vec<&str> = gate
640 .thresholds
641 .keys()
642 .filter(|k| k.starts_with("runtime_"))
643 .map(|k| k.as_str())
644 .collect();
645 assert!(
646 metrics.contains(&"runtime_shutdown_latency"),
647 "shutdown_latency baseline should be loaded"
648 );
649 assert!(
650 metrics.contains(&"runtime_first_frame"),
651 "first_frame baseline should be loaded"
652 );
653 assert!(
654 metrics.contains(&"runtime_command_roundtrip"),
655 "command_roundtrip baseline should be loaded"
656 );
657 assert!(
658 metrics.contains(&"runtime_effect_queue_drain"),
659 "effect_queue_drain baseline should be loaded"
660 );
661 }
662
663 #[test]
664 fn runtime_gate_passes_within_budget() {
665 let json = include_str!("../../../tests/baseline.json");
666 let gate = BenchmarkGate::load_baseline_json("runtime_gate", json, "p99_ns")
667 .expect("baseline.json should parse");
668
669 let measurements = vec![
671 Measurement::new("runtime_shutdown_latency", 1_000_000.0).unit("ns"),
672 Measurement::new("runtime_first_frame", 5_000_000.0).unit("ns"),
673 Measurement::new("runtime_command_roundtrip", 100_000.0).unit("ns"),
674 Measurement::new("runtime_effect_queue_drain", 500_000.0).unit("ns"),
675 ];
676 let result = gate.evaluate(&measurements);
677 assert!(
678 result.passed(),
679 "all runtime metrics should pass: {}",
680 result.summary()
681 );
682 }
683
684 #[test]
685 fn runtime_gate_fails_on_regression() {
686 let json = include_str!("../../../tests/baseline.json");
687 let gate = BenchmarkGate::load_baseline_json("runtime_gate", json, "p99_ns")
688 .expect("baseline.json should parse");
689
690 let measurements = vec![
692 Measurement::new("runtime_shutdown_latency", 100_000_000.0).unit("ns"), Measurement::new("runtime_first_frame", 5_000_000.0).unit("ns"),
694 ];
695 let result = gate.evaluate(&measurements);
696 assert!(!result.passed(), "regression should fail the gate");
697 assert!(result.fail_count >= 1);
698
699 let failures = result.failures();
700 assert!(
701 failures
702 .iter()
703 .any(|f| f.metric == "runtime_shutdown_latency"),
704 "shutdown latency should be the failing metric"
705 );
706 }
707
708 #[test]
709 fn runtime_gate_summary_readable() {
710 let json = include_str!("../../../tests/baseline.json");
711 let gate = BenchmarkGate::load_baseline_json("runtime_gate", json, "p99_ns")
712 .expect("baseline.json should parse");
713
714 let measurements =
715 vec![Measurement::new("runtime_shutdown_latency", 4_000_000.0).unit("ns")];
716 let result = gate.evaluate(&measurements);
717 let summary = result.summary();
718 assert!(summary.contains("runtime_shutdown_latency"));
719 assert!(summary.contains("PASS") || summary.contains("ok"));
720 }
721}