1use serde::{Deserialize, Serialize};
8use std::collections::BTreeMap;
9use std::error::Error;
10use std::fmt;
11use std::fs;
12use std::io;
13use std::path::Path;
14use std::time::{Instant, SystemTime, UNIX_EPOCH};
15
16pub const PERF_EVIDENCE_SCHEMA_VERSION: &str = "1";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
19#[serde(rename_all = "snake_case")]
20pub enum PerfWorkloadKind {
21 Search,
22 WatchOnce,
23 FullRebuild,
24 SemanticBackfill,
25 SourceSync,
26 DoctorRepair,
27 CacheWarm,
28 #[default]
29 Other,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
33#[serde(rename_all = "snake_case")]
34pub enum PerfPhaseKind {
35 Queueing,
36 Service,
37 Io,
38 Synchronization,
39 Retries,
40 Hydration,
41 Output,
42 #[default]
43 Other,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
47#[serde(rename_all = "snake_case")]
48pub enum PerfProofStatus {
49 #[default]
50 NotMeasured,
51 Passed,
52 Failed,
53 Inconclusive,
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
57#[serde(rename_all = "snake_case")]
58pub enum PerfCountPrecision {
59 #[default]
60 Exact,
61 LowerBound,
62 Estimated,
63 Unavailable,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67pub struct PerfEvidenceLedger {
68 pub schema_version: String,
69 pub run_id: String,
70 pub recorded_at_ms: i64,
71 pub workload: PerfWorkload,
72 #[serde(default)]
73 pub machine: PerfMachineProfile,
74 #[serde(default)]
75 pub env: BTreeMap<String, String>,
76 #[serde(default)]
77 pub phases: Vec<PerfPhaseTiming>,
78 #[serde(default)]
79 pub resources: PerfResourceSnapshot,
80 #[serde(default)]
81 pub cache: Option<PerfCacheSnapshot>,
82 #[serde(default)]
83 pub search: Option<PerfSearchSnapshot>,
84 #[serde(default)]
85 pub rebuild: Option<PerfRebuildSnapshot>,
86 #[serde(default)]
87 pub proof: PerfProofSummary,
88 #[serde(default)]
89 pub artifacts: Vec<PerfArtifactRef>,
90}
91
92impl PerfEvidenceLedger {
93 pub fn new(run_id: impl Into<String>, workload: PerfWorkload, recorded_at_ms: i64) -> Self {
94 Self {
95 schema_version: PERF_EVIDENCE_SCHEMA_VERSION.to_string(),
96 run_id: run_id.into(),
97 recorded_at_ms,
98 workload,
99 machine: PerfMachineProfile::default(),
100 env: BTreeMap::new(),
101 phases: Vec::new(),
102 resources: PerfResourceSnapshot::default(),
103 cache: None,
104 search: None,
105 rebuild: None,
106 proof: PerfProofSummary::default(),
107 artifacts: Vec::new(),
108 }
109 }
110
111 pub fn validate(&self) -> Result<(), PerfEvidenceValidationError> {
112 if self.schema_version != PERF_EVIDENCE_SCHEMA_VERSION {
113 return Err(PerfEvidenceValidationError::UnsupportedSchemaVersion {
114 expected: PERF_EVIDENCE_SCHEMA_VERSION,
115 actual: self.schema_version.clone(),
116 });
117 }
118
119 if self.run_id.trim().is_empty() {
120 return Err(PerfEvidenceValidationError::EmptyRunId);
121 }
122
123 if self.recorded_at_ms < 0 {
124 return Err(PerfEvidenceValidationError::NegativeRecordedAtMs {
125 recorded_at_ms: self.recorded_at_ms,
126 });
127 }
128
129 if self.workload.name.trim().is_empty() {
130 return Err(PerfEvidenceValidationError::EmptyWorkloadName);
131 }
132
133 if let Some(search) = &self.search {
134 if search.query_hash.trim().is_empty() {
135 return Err(PerfEvidenceValidationError::EmptySearchQueryHash);
136 }
137
138 if search.requested_mode.trim().is_empty() {
139 return Err(PerfEvidenceValidationError::EmptySearchRequestedMode);
140 }
141
142 if search.realized_mode.trim().is_empty() {
143 return Err(PerfEvidenceValidationError::EmptySearchRealizedMode);
144 }
145 }
146
147 if let Some(rebuild) = &self.rebuild {
148 if rebuild.execution_mode.trim().is_empty() {
149 return Err(PerfEvidenceValidationError::EmptyRebuildExecutionMode);
150 }
151
152 if rebuild.workers == 0 {
153 return Err(PerfEvidenceValidationError::ZeroRebuildWorkers);
154 }
155 }
156
157 for (index, phase) in self.phases.iter().enumerate() {
158 if phase.name.trim().is_empty() {
159 return Err(PerfEvidenceValidationError::EmptyPhaseName { index });
160 }
161
162 if quantile_order_violated(phase.p50_ms, phase.p95_ms)
163 || quantile_order_violated(phase.p95_ms, phase.p99_ms)
164 || quantile_order_violated(phase.p50_ms, phase.p99_ms)
165 {
166 return Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index });
167 }
168 }
169
170 for (index, artifact) in self.artifacts.iter().enumerate() {
171 if artifact.label.trim().is_empty() {
172 return Err(PerfEvidenceValidationError::EmptyArtifactLabel { index });
173 }
174
175 if artifact.path.trim().is_empty() {
176 return Err(PerfEvidenceValidationError::EmptyArtifactPath { index });
177 }
178
179 if artifact.kind.trim().is_empty() {
180 return Err(PerfEvidenceValidationError::EmptyArtifactKind { index });
181 }
182 }
183
184 Ok(())
185 }
186}
187
188#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
189pub struct PerfWorkload {
190 pub kind: PerfWorkloadKind,
191 pub name: String,
192 #[serde(default)]
193 pub description: Option<String>,
194 #[serde(default)]
195 pub command_args: Vec<String>,
196 #[serde(default)]
197 pub input_count: Option<PerfCount>,
198}
199
200impl PerfWorkload {
201 pub fn new(kind: PerfWorkloadKind, name: impl Into<String>) -> Self {
202 Self {
203 kind,
204 name: name.into(),
205 description: None,
206 command_args: Vec::new(),
207 input_count: None,
208 }
209 }
210}
211
212#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
213pub struct PerfCount {
214 pub value: u64,
215 #[serde(default)]
216 pub precision: PerfCountPrecision,
217}
218
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
220pub struct PerfMachineProfile {
221 #[serde(default)]
222 pub logical_cpus: Option<u32>,
223 #[serde(default)]
224 pub reserved_cores: Option<u32>,
225 #[serde(default)]
226 pub available_memory_bytes: Option<u64>,
227 #[serde(default)]
228 pub topology_class: Option<String>,
229}
230
231#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
232pub struct PerfPhaseTiming {
233 pub name: String,
234 pub kind: PerfPhaseKind,
235 pub elapsed_ms: u64,
236 #[serde(default)]
237 pub p50_ms: Option<u64>,
238 #[serde(default)]
239 pub p95_ms: Option<u64>,
240 #[serde(default)]
241 pub p99_ms: Option<u64>,
242 #[serde(default)]
243 pub samples: Option<PerfCount>,
244}
245
246impl PerfPhaseTiming {
247 pub fn new(name: impl Into<String>, kind: PerfPhaseKind, elapsed_ms: u64) -> Self {
248 Self {
249 name: name.into(),
250 kind,
251 elapsed_ms,
252 p50_ms: None,
253 p95_ms: None,
254 p99_ms: None,
255 samples: None,
256 }
257 }
258}
259
260#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
261pub struct PerfResourceSnapshot {
262 #[serde(default)]
263 pub peak_rss_bytes: Option<u64>,
264 #[serde(default)]
265 pub avg_cpu_utilization_pct_x100: Option<u32>,
266 #[serde(default)]
267 pub max_inflight_bytes: Option<u64>,
268 #[serde(default)]
269 pub disk_read_bytes: Option<u64>,
270 #[serde(default)]
271 pub disk_write_bytes: Option<u64>,
272 #[serde(default)]
273 pub notes: Vec<String>,
274}
275
276#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
277pub struct PerfCacheSnapshot {
278 #[serde(default)]
279 pub result_cache_hits: u64,
280 #[serde(default)]
281 pub result_cache_misses: u64,
282 #[serde(default)]
283 pub eviction_count: u64,
284 #[serde(default)]
285 pub approx_bytes: Option<u64>,
286 #[serde(default)]
287 pub byte_cap: Option<u64>,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
291pub struct PerfSearchSnapshot {
292 pub query_hash: String,
293 pub limit: u32,
294 #[serde(default)]
295 pub matched_count: Option<PerfCount>,
296 pub returned_hits: u32,
297 pub requested_mode: String,
298 pub realized_mode: String,
299 #[serde(default)]
300 pub fallback_tier: Option<String>,
301 #[serde(default)]
302 pub timed_out: bool,
303}
304
305#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
306pub struct PerfRebuildSnapshot {
307 pub execution_mode: String,
308 pub workers: u32,
309 #[serde(default)]
310 pub shard_count: Option<u32>,
311 #[serde(default)]
312 pub queued_items: Option<PerfCount>,
313 #[serde(default)]
314 pub indexed_items: Option<PerfCount>,
315 #[serde(default)]
316 pub checkpoint_count: Option<u64>,
317}
318
319#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
320pub struct PerfProofSummary {
321 #[serde(default)]
322 pub status: PerfProofStatus,
323 #[serde(default)]
324 pub baseline_artifact: Option<String>,
325 #[serde(default)]
326 pub comparison_artifact: Option<String>,
327 #[serde(default)]
328 pub p99_regression_basis_points: Option<i64>,
329 #[serde(default)]
330 pub notes: Vec<String>,
331}
332
333#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
334pub struct PerfArtifactRef {
335 pub label: String,
336 pub path: String,
337 pub kind: String,
338 #[serde(default)]
339 pub sha256: Option<String>,
340}
341
342#[derive(Debug, Clone, PartialEq, Eq)]
343pub enum PerfEvidenceValidationError {
344 UnsupportedSchemaVersion {
345 expected: &'static str,
346 actual: String,
347 },
348 EmptyRunId,
349 NegativeRecordedAtMs {
350 recorded_at_ms: i64,
351 },
352 EmptyWorkloadName,
353 EmptySearchQueryHash,
354 EmptySearchRequestedMode,
355 EmptySearchRealizedMode,
356 EmptyRebuildExecutionMode,
357 ZeroRebuildWorkers,
358 EmptyPhaseName {
359 index: usize,
360 },
361 PhaseQuantilesOutOfOrder {
362 index: usize,
363 },
364 EmptyArtifactLabel {
365 index: usize,
366 },
367 EmptyArtifactPath {
368 index: usize,
369 },
370 EmptyArtifactKind {
371 index: usize,
372 },
373}
374
375impl fmt::Display for PerfEvidenceValidationError {
376 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
377 match self {
378 Self::UnsupportedSchemaVersion { expected, actual } => {
379 write!(
380 f,
381 "unsupported perf evidence schema version {actual:?}; expected {expected:?}"
382 )
383 }
384 Self::EmptyRunId => write!(f, "perf evidence run_id cannot be empty"),
385 Self::NegativeRecordedAtMs { recorded_at_ms } => {
386 write!(
387 f,
388 "perf evidence recorded_at_ms cannot be negative: {recorded_at_ms}"
389 )
390 }
391 Self::EmptyWorkloadName => write!(f, "perf evidence workload.name cannot be empty"),
392 Self::EmptySearchQueryHash => {
393 write!(f, "perf evidence search.query_hash cannot be empty")
394 }
395 Self::EmptySearchRequestedMode => {
396 write!(f, "perf evidence search.requested_mode cannot be empty")
397 }
398 Self::EmptySearchRealizedMode => {
399 write!(f, "perf evidence search.realized_mode cannot be empty")
400 }
401 Self::EmptyRebuildExecutionMode => {
402 write!(f, "perf evidence rebuild.execution_mode cannot be empty")
403 }
404 Self::ZeroRebuildWorkers => {
405 write!(f, "perf evidence rebuild.workers must be greater than zero")
406 }
407 Self::EmptyPhaseName { index } => {
408 write!(f, "perf evidence phase at index {index} has an empty name")
409 }
410 Self::PhaseQuantilesOutOfOrder { index } => {
411 write!(
412 f,
413 "perf evidence phase at index {index} has out-of-order quantiles"
414 )
415 }
416 Self::EmptyArtifactLabel { index } => {
417 write!(
418 f,
419 "perf evidence artifact at index {index} has an empty label"
420 )
421 }
422 Self::EmptyArtifactPath { index } => {
423 write!(
424 f,
425 "perf evidence artifact at index {index} has an empty path"
426 )
427 }
428 Self::EmptyArtifactKind { index } => {
429 write!(
430 f,
431 "perf evidence artifact at index {index} has an empty kind"
432 )
433 }
434 }
435 }
436}
437
438impl Error for PerfEvidenceValidationError {}
439
440fn quantile_order_violated(lower: Option<u64>, upper: Option<u64>) -> bool {
441 matches!((lower, upper), (Some(lower), Some(upper)) if lower > upper)
442}
443
444#[derive(Debug)]
445pub enum PerfEvidenceIoError {
446 Io(io::Error),
447 Json(serde_json::Error),
448 Validation(PerfEvidenceValidationError),
449}
450
451impl fmt::Display for PerfEvidenceIoError {
452 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
453 match self {
454 Self::Io(err) => write!(f, "perf evidence I/O failed: {err}"),
455 Self::Json(err) => write!(f, "perf evidence JSON failed: {err}"),
456 Self::Validation(err) => write!(f, "perf evidence validation failed: {err}"),
457 }
458 }
459}
460
461impl Error for PerfEvidenceIoError {
462 fn source(&self) -> Option<&(dyn Error + 'static)> {
463 match self {
464 Self::Io(err) => Some(err),
465 Self::Json(err) => Some(err),
466 Self::Validation(err) => Some(err),
467 }
468 }
469}
470
471impl From<io::Error> for PerfEvidenceIoError {
472 fn from(err: io::Error) -> Self {
473 Self::Io(err)
474 }
475}
476
477impl From<serde_json::Error> for PerfEvidenceIoError {
478 fn from(err: serde_json::Error) -> Self {
479 Self::Json(err)
480 }
481}
482
483impl From<PerfEvidenceValidationError> for PerfEvidenceIoError {
484 fn from(err: PerfEvidenceValidationError) -> Self {
485 Self::Validation(err)
486 }
487}
488
489pub fn read_perf_evidence_ledger(
490 path: impl AsRef<Path>,
491) -> Result<PerfEvidenceLedger, PerfEvidenceIoError> {
492 let bytes = fs::read(path.as_ref())?;
493 let ledger: PerfEvidenceLedger = serde_json::from_slice(&bytes)?;
494 ledger.validate()?;
495 Ok(ledger)
496}
497
498pub fn write_perf_evidence_ledger(
499 ledger: &PerfEvidenceLedger,
500 path: impl AsRef<Path>,
501) -> Result<PerfArtifactRef, PerfEvidenceIoError> {
502 ledger.validate()?;
503 let path = path.as_ref();
504 if let Some(parent) = path
505 .parent()
506 .filter(|parent| !parent.as_os_str().is_empty())
507 {
508 fs::create_dir_all(parent)?;
509 }
510 let bytes = serde_json::to_vec_pretty(ledger)?;
511 fs::write(path, &bytes)?;
512 Ok(PerfArtifactRef {
513 label: "perf-evidence-ledger".to_string(),
514 path: path.display().to_string(),
515 kind: "json".to_string(),
516 sha256: Some(sha256_hex(&bytes)),
517 })
518}
519
520#[derive(Debug)]
521pub enum PerfEvidenceRecorderError {
522 ActivePhaseAlreadyRunning { active_phase: String },
523 NoActivePhase,
524 Validation(PerfEvidenceValidationError),
525}
526
527impl fmt::Display for PerfEvidenceRecorderError {
528 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
529 match self {
530 Self::ActivePhaseAlreadyRunning { active_phase } => {
531 write!(f, "perf evidence phase {active_phase:?} is already active")
532 }
533 Self::NoActivePhase => write!(f, "no perf evidence phase is active"),
534 Self::Validation(err) => {
535 write!(f, "perf evidence recorder produced invalid data: {err}")
536 }
537 }
538 }
539}
540
541impl Error for PerfEvidenceRecorderError {
542 fn source(&self) -> Option<&(dyn Error + 'static)> {
543 match self {
544 Self::Validation(err) => Some(err),
545 _ => None,
546 }
547 }
548}
549
550impl From<PerfEvidenceValidationError> for PerfEvidenceRecorderError {
551 fn from(err: PerfEvidenceValidationError) -> Self {
552 Self::Validation(err)
553 }
554}
555
556#[derive(Debug)]
557struct ActivePerfPhase {
558 name: String,
559 kind: PerfPhaseKind,
560 started_at: Instant,
561}
562
563#[derive(Debug)]
570pub struct PerfEvidenceRecorder {
571 ledger: PerfEvidenceLedger,
572 active_phase: Option<ActivePerfPhase>,
573}
574
575impl PerfEvidenceRecorder {
576 pub fn new(run_id: impl Into<String>, workload: PerfWorkload, recorded_at_ms: i64) -> Self {
577 Self {
578 ledger: PerfEvidenceLedger::new(run_id, workload, recorded_at_ms),
579 active_phase: None,
580 }
581 }
582
583 pub fn start(run_id: impl Into<String>, workload: PerfWorkload) -> Self {
584 Self::new(run_id, workload, now_unix_ms())
585 }
586
587 pub fn ledger(&self) -> &PerfEvidenceLedger {
588 &self.ledger
589 }
590
591 pub fn machine(&mut self, machine: PerfMachineProfile) -> &mut Self {
592 self.ledger.machine = machine;
593 self
594 }
595
596 pub fn resource_snapshot(&mut self, resources: PerfResourceSnapshot) -> &mut Self {
597 self.ledger.resources = resources;
598 self
599 }
600
601 pub fn cache_snapshot(&mut self, cache: PerfCacheSnapshot) -> &mut Self {
602 self.ledger.cache = Some(cache);
603 self
604 }
605
606 pub fn search_snapshot(&mut self, search: PerfSearchSnapshot) -> &mut Self {
607 self.ledger.search = Some(search);
608 self
609 }
610
611 pub fn rebuild_snapshot(&mut self, rebuild: PerfRebuildSnapshot) -> &mut Self {
612 self.ledger.rebuild = Some(rebuild);
613 self
614 }
615
616 pub fn proof_summary(&mut self, proof: PerfProofSummary) -> &mut Self {
617 self.ledger.proof = proof;
618 self
619 }
620
621 pub fn env_kv(&mut self, key: impl Into<String>, value: impl Into<String>) -> &mut Self {
622 self.ledger.env.insert(key.into(), value.into());
623 self
624 }
625
626 pub fn artifact(&mut self, artifact: PerfArtifactRef) -> &mut Self {
627 self.ledger.artifacts.push(artifact);
628 self
629 }
630
631 pub fn record_phase(
632 &mut self,
633 phase: PerfPhaseTiming,
634 ) -> Result<&mut Self, PerfEvidenceRecorderError> {
635 validate_phase(&phase, self.ledger.phases.len())?;
636 self.ledger.phases.push(phase);
637 Ok(self)
638 }
639
640 pub fn begin_phase(
641 &mut self,
642 name: impl Into<String>,
643 kind: PerfPhaseKind,
644 ) -> Result<&mut Self, PerfEvidenceRecorderError> {
645 if let Some(active) = &self.active_phase {
646 return Err(PerfEvidenceRecorderError::ActivePhaseAlreadyRunning {
647 active_phase: active.name.clone(),
648 });
649 }
650 self.active_phase = Some(ActivePerfPhase {
651 name: name.into(),
652 kind,
653 started_at: Instant::now(),
654 });
655 Ok(self)
656 }
657
658 pub fn finish_phase(&mut self) -> Result<&mut Self, PerfEvidenceRecorderError> {
659 let Some(active) = self.active_phase.take() else {
660 return Err(PerfEvidenceRecorderError::NoActivePhase);
661 };
662 let elapsed_ms = active
663 .started_at
664 .elapsed()
665 .as_millis()
666 .min(u128::from(u64::MAX)) as u64;
667 self.record_phase(PerfPhaseTiming::new(active.name, active.kind, elapsed_ms))
668 }
669
670 pub fn finish(mut self) -> Result<PerfEvidenceLedger, PerfEvidenceRecorderError> {
671 if self.active_phase.is_some() {
672 self.finish_phase()?;
673 }
674 self.ledger.validate()?;
675 Ok(self.ledger)
676 }
677}
678
679#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
680#[serde(rename_all = "snake_case")]
681pub enum PerfReplayVerdict {
682 Clean,
683 Warning,
684 Failure,
685}
686
687impl PerfReplayVerdict {
688 pub fn should_fail_build(self) -> bool {
689 matches!(self, Self::Failure)
690 }
691
692 fn max(self, other: Self) -> Self {
693 match (self, other) {
694 (Self::Failure, _) | (_, Self::Failure) => Self::Failure,
695 (Self::Warning, _) | (_, Self::Warning) => Self::Warning,
696 _ => Self::Clean,
697 }
698 }
699}
700
701#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
702#[serde(rename_all = "snake_case")]
703pub enum PerfReplayMetric {
704 Validation,
705 MeasurementCoverage,
706 ProofStatus,
707 ProofP99Regression,
708 ComposedP99,
709 TotalElapsed,
710}
711
712#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
713pub struct PerfReplayFinding {
714 pub verdict: PerfReplayVerdict,
715 pub metric: PerfReplayMetric,
716 pub message: String,
717 #[serde(default)]
718 pub baseline_value: Option<i64>,
719 #[serde(default)]
720 pub current_value: Option<i64>,
721 #[serde(default)]
722 pub delta_basis_points: Option<i64>,
723 #[serde(default)]
724 pub threshold_basis_points: Option<i64>,
725}
726
727#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
728pub struct PerfReplayLogEvent {
729 pub level: String,
730 pub message: String,
731 #[serde(default)]
732 pub artifact_path: Option<String>,
733 pub run_id: String,
734 #[serde(default)]
735 pub command_args: Vec<String>,
736 #[serde(default)]
737 pub failure_reason: Option<String>,
738}
739
740#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
741pub struct PerfReplayReport {
742 pub current_run_id: String,
743 #[serde(default)]
744 pub baseline_run_id: Option<String>,
745 pub verdict: PerfReplayVerdict,
746 #[serde(default)]
747 pub findings: Vec<PerfReplayFinding>,
748 #[serde(default)]
749 pub logs: Vec<PerfReplayLogEvent>,
750}
751
752impl PerfReplayReport {
753 pub fn should_fail_build(&self) -> bool {
754 self.verdict.should_fail_build()
755 }
756
757 fn new(current: &PerfEvidenceLedger, baseline: Option<&PerfEvidenceLedger>) -> Self {
758 Self {
759 current_run_id: current.run_id.clone(),
760 baseline_run_id: baseline.map(|ledger| ledger.run_id.clone()),
761 verdict: PerfReplayVerdict::Clean,
762 findings: Vec::new(),
763 logs: Vec::new(),
764 }
765 }
766
767 fn add_finding(&mut self, finding: PerfReplayFinding) {
768 self.verdict = self.verdict.max(finding.verdict);
769 self.findings.push(finding);
770 }
771
772 fn log(
773 &mut self,
774 level: &str,
775 message: &str,
776 current: &PerfEvidenceLedger,
777 artifact_path: Option<&Path>,
778 failure_reason: Option<String>,
779 ) {
780 self.logs.push(PerfReplayLogEvent {
781 level: level.to_string(),
782 message: message.to_string(),
783 artifact_path: artifact_path.map(|path| path.display().to_string()),
784 run_id: current.run_id.clone(),
785 command_args: current.workload.command_args.clone(),
786 failure_reason,
787 });
788 }
789}
790
791#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
792pub struct PerfReplayThresholds {
793 pub warning_p99_regression_basis_points: i64,
794 pub failure_p99_regression_basis_points: i64,
795 pub warning_elapsed_regression_basis_points: i64,
796 pub failure_elapsed_regression_basis_points: i64,
797}
798
799impl PerfReplayThresholds {
800 pub fn defaults() -> Self {
801 Self {
802 warning_p99_regression_basis_points: 1_000,
803 failure_p99_regression_basis_points: 2_500,
804 warning_elapsed_regression_basis_points: 1_500,
805 failure_elapsed_regression_basis_points: 3_000,
806 }
807 }
808
809 pub fn try_new(
810 warning_p99_regression_basis_points: i64,
811 failure_p99_regression_basis_points: i64,
812 warning_elapsed_regression_basis_points: i64,
813 failure_elapsed_regression_basis_points: i64,
814 ) -> Result<Self, &'static str> {
815 validate_threshold_pair(
816 warning_p99_regression_basis_points,
817 failure_p99_regression_basis_points,
818 "p99",
819 )?;
820 validate_threshold_pair(
821 warning_elapsed_regression_basis_points,
822 failure_elapsed_regression_basis_points,
823 "elapsed",
824 )?;
825 Ok(Self {
826 warning_p99_regression_basis_points,
827 failure_p99_regression_basis_points,
828 warning_elapsed_regression_basis_points,
829 failure_elapsed_regression_basis_points,
830 })
831 }
832}
833
834impl Default for PerfReplayThresholds {
835 fn default() -> Self {
836 Self::defaults()
837 }
838}
839
840#[derive(Debug, Clone, PartialEq, Eq)]
841pub struct PerfReplayGate {
842 thresholds: PerfReplayThresholds,
843}
844
845impl PerfReplayGate {
846 pub fn new(thresholds: PerfReplayThresholds) -> Self {
847 Self { thresholds }
848 }
849
850 pub fn replay(
851 &self,
852 current: &PerfEvidenceLedger,
853 baseline: Option<&PerfEvidenceLedger>,
854 ) -> PerfReplayReport {
855 self.replay_with_artifact(current, baseline, None)
856 }
857
858 pub fn replay_with_artifact(
859 &self,
860 current: &PerfEvidenceLedger,
861 baseline: Option<&PerfEvidenceLedger>,
862 current_artifact_path: Option<&Path>,
863 ) -> PerfReplayReport {
864 let mut report = PerfReplayReport::new(current, baseline);
865 report.log(
866 "info",
867 "perf evidence replay started",
868 current,
869 current_artifact_path,
870 None,
871 );
872
873 if let Err(err) = current.validate() {
874 let failure_reason = err.to_string();
875 report.add_finding(PerfReplayFinding {
876 verdict: PerfReplayVerdict::Failure,
877 metric: PerfReplayMetric::Validation,
878 message: "current perf evidence ledger failed validation".to_string(),
879 baseline_value: None,
880 current_value: None,
881 delta_basis_points: None,
882 threshold_basis_points: None,
883 });
884 report.log(
885 "error",
886 "perf evidence replay failed",
887 current,
888 current_artifact_path,
889 Some(failure_reason),
890 );
891 return report;
892 }
893
894 if let Some(baseline) = baseline
895 && let Err(err) = baseline.validate()
896 {
897 let failure_reason = err.to_string();
898 report.add_finding(PerfReplayFinding {
899 verdict: PerfReplayVerdict::Failure,
900 metric: PerfReplayMetric::Validation,
901 message: "baseline perf evidence ledger failed validation".to_string(),
902 baseline_value: None,
903 current_value: None,
904 delta_basis_points: None,
905 threshold_basis_points: None,
906 });
907 report.log(
908 "error",
909 "perf evidence replay failed",
910 current,
911 current_artifact_path,
912 Some(failure_reason),
913 );
914 return report;
915 }
916
917 self.evaluate_measurement_coverage(current, baseline, &mut report);
918 self.evaluate_proof_status(current, &mut report);
919 self.evaluate_proof_p99(current, &mut report);
920 if let Some(baseline) = baseline {
921 self.evaluate_composed_p99(current, baseline, &mut report);
922 self.evaluate_total_elapsed(current, baseline, &mut report);
923 } else {
924 report.log(
925 "info",
926 "perf evidence replay had no baseline; validated current ledger only",
927 current,
928 current_artifact_path,
929 None,
930 );
931 }
932
933 if report.verdict.should_fail_build() {
934 let reason = report
935 .findings
936 .iter()
937 .find(|finding| finding.verdict == PerfReplayVerdict::Failure)
938 .map(|finding| finding.message.clone())
939 .unwrap_or_else(|| "perf evidence replay failed".to_string());
940 report.log(
941 "error",
942 "perf evidence replay failed",
943 current,
944 current_artifact_path,
945 Some(reason),
946 );
947 } else if report.verdict == PerfReplayVerdict::Warning {
948 report.log(
949 "warn",
950 "perf evidence replay produced warnings",
951 current,
952 current_artifact_path,
953 None,
954 );
955 } else {
956 report.log(
957 "info",
958 "perf evidence replay passed",
959 current,
960 current_artifact_path,
961 None,
962 );
963 }
964
965 report
966 }
967
968 pub fn replay_files<P>(
969 &self,
970 current_path: P,
971 baseline_path: Option<P>,
972 ) -> Result<PerfReplayReport, PerfEvidenceIoError>
973 where
974 P: AsRef<Path>,
975 {
976 let current_path = current_path.as_ref();
977 let current = read_perf_evidence_ledger(current_path)?;
978 let baseline = match baseline_path {
979 Some(path) => Some(read_perf_evidence_ledger(path.as_ref())?),
980 None => None,
981 };
982 Ok(self.replay_with_artifact(¤t, baseline.as_ref(), Some(current_path)))
983 }
984
985 fn evaluate_measurement_coverage(
986 &self,
987 current: &PerfEvidenceLedger,
988 baseline: Option<&PerfEvidenceLedger>,
989 report: &mut PerfReplayReport,
990 ) {
991 let current_has_phase_timings = !current.phases.is_empty();
992 let current_has_proof = current.proof.status != PerfProofStatus::NotMeasured
993 || current.proof.p99_regression_basis_points.is_some();
994 if !current_has_phase_timings && !current_has_proof {
995 report.add_finding(PerfReplayFinding {
996 verdict: PerfReplayVerdict::Warning,
997 metric: PerfReplayMetric::MeasurementCoverage,
998 message: "current perf evidence ledger has no phase timings or proof summary"
999 .to_string(),
1000 baseline_value: None,
1001 current_value: None,
1002 delta_basis_points: None,
1003 threshold_basis_points: None,
1004 });
1005 }
1006
1007 if baseline.is_some_and(|ledger| ledger.phases.is_empty()) {
1008 report.add_finding(PerfReplayFinding {
1009 verdict: PerfReplayVerdict::Warning,
1010 metric: PerfReplayMetric::MeasurementCoverage,
1011 message:
1012 "baseline perf evidence ledger has no phase timings; timing comparisons skipped"
1013 .to_string(),
1014 baseline_value: None,
1015 current_value: None,
1016 delta_basis_points: None,
1017 threshold_basis_points: None,
1018 });
1019 }
1020 }
1021
1022 fn evaluate_proof_status(&self, current: &PerfEvidenceLedger, report: &mut PerfReplayReport) {
1023 match current.proof.status {
1024 PerfProofStatus::Failed => report.add_finding(PerfReplayFinding {
1025 verdict: PerfReplayVerdict::Failure,
1026 metric: PerfReplayMetric::ProofStatus,
1027 message: "perf evidence proof status is failed".to_string(),
1028 baseline_value: None,
1029 current_value: None,
1030 delta_basis_points: None,
1031 threshold_basis_points: None,
1032 }),
1033 PerfProofStatus::Inconclusive => report.add_finding(PerfReplayFinding {
1034 verdict: PerfReplayVerdict::Warning,
1035 metric: PerfReplayMetric::ProofStatus,
1036 message: "perf evidence proof status is inconclusive".to_string(),
1037 baseline_value: None,
1038 current_value: None,
1039 delta_basis_points: None,
1040 threshold_basis_points: None,
1041 }),
1042 PerfProofStatus::NotMeasured | PerfProofStatus::Passed => {}
1043 }
1044 }
1045
1046 fn evaluate_proof_p99(&self, current: &PerfEvidenceLedger, report: &mut PerfReplayReport) {
1047 let Some(delta_basis_points) = current.proof.p99_regression_basis_points else {
1048 return;
1049 };
1050 self.add_threshold_finding(
1051 report,
1052 PerfReplayMetric::ProofP99Regression,
1053 "proof-reported p99 regression",
1054 None,
1055 None,
1056 delta_basis_points,
1057 self.thresholds.warning_p99_regression_basis_points,
1058 self.thresholds.failure_p99_regression_basis_points,
1059 );
1060 }
1061
1062 fn evaluate_composed_p99(
1063 &self,
1064 current: &PerfEvidenceLedger,
1065 baseline: &PerfEvidenceLedger,
1066 report: &mut PerfReplayReport,
1067 ) {
1068 let Some(baseline_p99) = composed_p99_ms(baseline) else {
1069 return;
1070 };
1071 let Some(current_p99) = composed_p99_ms(current) else {
1072 return;
1073 };
1074 let Some(delta_basis_points) = basis_points_delta(baseline_p99, current_p99) else {
1075 return;
1076 };
1077 self.add_threshold_finding(
1078 report,
1079 PerfReplayMetric::ComposedP99,
1080 "composed phase p99 regression",
1081 Some(baseline_p99),
1082 Some(current_p99),
1083 delta_basis_points,
1084 self.thresholds.warning_p99_regression_basis_points,
1085 self.thresholds.failure_p99_regression_basis_points,
1086 );
1087 }
1088
1089 fn evaluate_total_elapsed(
1090 &self,
1091 current: &PerfEvidenceLedger,
1092 baseline: &PerfEvidenceLedger,
1093 report: &mut PerfReplayReport,
1094 ) {
1095 let baseline_elapsed = total_elapsed_ms(baseline);
1096 let current_elapsed = total_elapsed_ms(current);
1097 let Some(delta_basis_points) = basis_points_delta(baseline_elapsed, current_elapsed) else {
1098 return;
1099 };
1100 self.add_threshold_finding(
1101 report,
1102 PerfReplayMetric::TotalElapsed,
1103 "total elapsed phase time regression",
1104 Some(baseline_elapsed),
1105 Some(current_elapsed),
1106 delta_basis_points,
1107 self.thresholds.warning_elapsed_regression_basis_points,
1108 self.thresholds.failure_elapsed_regression_basis_points,
1109 );
1110 }
1111
1112 #[allow(clippy::too_many_arguments)]
1113 fn add_threshold_finding(
1114 &self,
1115 report: &mut PerfReplayReport,
1116 metric: PerfReplayMetric,
1117 label: &str,
1118 baseline_value: Option<i64>,
1119 current_value: Option<i64>,
1120 delta_basis_points: i64,
1121 warning_basis_points: i64,
1122 failure_basis_points: i64,
1123 ) {
1124 if delta_basis_points < warning_basis_points {
1125 return;
1126 }
1127 let (verdict, threshold_basis_points) = if delta_basis_points >= failure_basis_points {
1128 (PerfReplayVerdict::Failure, failure_basis_points)
1129 } else {
1130 (PerfReplayVerdict::Warning, warning_basis_points)
1131 };
1132 report.add_finding(PerfReplayFinding {
1133 verdict,
1134 metric,
1135 message: format!("{label}: +{delta_basis_points} bps"),
1136 baseline_value,
1137 current_value,
1138 delta_basis_points: Some(delta_basis_points),
1139 threshold_basis_points: Some(threshold_basis_points),
1140 });
1141 }
1142}
1143
1144fn validate_phase(
1145 phase: &PerfPhaseTiming,
1146 index: usize,
1147) -> Result<(), PerfEvidenceValidationError> {
1148 if phase.name.trim().is_empty() {
1149 return Err(PerfEvidenceValidationError::EmptyPhaseName { index });
1150 }
1151 if quantile_order_violated(phase.p50_ms, phase.p95_ms)
1152 || quantile_order_violated(phase.p95_ms, phase.p99_ms)
1153 || quantile_order_violated(phase.p50_ms, phase.p99_ms)
1154 {
1155 return Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index });
1156 }
1157 Ok(())
1158}
1159
1160fn composed_p99_ms(ledger: &PerfEvidenceLedger) -> Option<i64> {
1161 let mut total = 0u64;
1162 let mut saw_phase = false;
1163 for phase in &ledger.phases {
1164 total = total.checked_add(phase.p99_ms?)?;
1165 saw_phase = true;
1166 }
1167 saw_phase.then_some(total.min(i64::MAX as u64) as i64)
1168}
1169
1170fn total_elapsed_ms(ledger: &PerfEvidenceLedger) -> i64 {
1171 ledger
1172 .phases
1173 .iter()
1174 .map(|phase| phase.elapsed_ms)
1175 .fold(0u64, u64::saturating_add)
1176 .min(i64::MAX as u64) as i64
1177}
1178
1179fn basis_points_delta(baseline: i64, current: i64) -> Option<i64> {
1180 if baseline <= 0 {
1181 return None;
1182 }
1183 let delta = i128::from(current) - i128::from(baseline);
1184 let scaled = delta.checked_mul(10_000)?;
1185 let rounded = if delta >= 0 {
1186 scaled.checked_add(i128::from(baseline / 2))?
1187 } else {
1188 scaled.checked_sub(i128::from(baseline / 2))?
1189 };
1190 let basis_points = rounded.checked_div(i128::from(baseline))?;
1191 i64::try_from(basis_points).ok()
1192}
1193
1194fn validate_threshold_pair(
1195 warning_basis_points: i64,
1196 failure_basis_points: i64,
1197 metric: &'static str,
1198) -> Result<(), &'static str> {
1199 if warning_basis_points < 0 || failure_basis_points < 0 {
1200 return Err("perf replay thresholds must be non-negative basis points");
1201 }
1202 if warning_basis_points >= failure_basis_points {
1203 return match metric {
1204 "p99" => Err(
1205 "warning_p99_regression_basis_points must be less than failure_p99_regression_basis_points",
1206 ),
1207 "elapsed" => Err(
1208 "warning_elapsed_regression_basis_points must be less than failure_elapsed_regression_basis_points",
1209 ),
1210 _ => Err("warning threshold must be less than failure threshold"),
1211 };
1212 }
1213 Ok(())
1214}
1215
1216fn now_unix_ms() -> i64 {
1217 SystemTime::now()
1218 .duration_since(UNIX_EPOCH)
1219 .map(|duration| duration.as_millis().min(i64::MAX as u128) as i64)
1220 .unwrap_or(0)
1221}
1222
1223fn sha256_hex(bytes: &[u8]) -> String {
1224 use sha2::{Digest, Sha256};
1225
1226 let digest = Sha256::digest(bytes);
1227 format!("{digest:x}")
1228}
1229
1230#[cfg(test)]
1231mod tests {
1232 use super::*;
1233 use serde_json::{Value, json};
1234
1235 fn representative_ledger() -> PerfEvidenceLedger {
1236 let mut ledger = PerfEvidenceLedger::new(
1237 "run-search-p99-001",
1238 PerfWorkload {
1239 kind: PerfWorkloadKind::Search,
1240 name: "hybrid-search-tail-latency".to_string(),
1241 description: Some("Representative hybrid search p99 probe".to_string()),
1242 command_args: vec![
1243 "cass".to_string(),
1244 "search".to_string(),
1245 "wal conflict".to_string(),
1246 "--json".to_string(),
1247 ],
1248 input_count: Some(PerfCount {
1249 value: 1_000_000,
1250 precision: PerfCountPrecision::LowerBound,
1251 }),
1252 },
1253 1_779_999_999_000,
1254 );
1255
1256 ledger.machine = PerfMachineProfile {
1257 logical_cpus: Some(64),
1258 reserved_cores: Some(8),
1259 available_memory_bytes: Some(256 * 1024 * 1024 * 1024),
1260 topology_class: Some("single_host_many_core".to_string()),
1261 };
1262 ledger.env = BTreeMap::from([("CASS_SEARCH_MODE".to_string(), "hybrid".to_string())]);
1263 ledger.phases = vec![
1264 phase("admission", PerfPhaseKind::Queueing, 2, 1, 2, 3),
1265 phase("bm25", PerfPhaseKind::Service, 18, 12, 16, 18),
1266 phase("semantic", PerfPhaseKind::Io, 35, 22, 31, 35),
1267 phase("merge", PerfPhaseKind::Synchronization, 7, 4, 6, 7),
1268 phase("retry-budget", PerfPhaseKind::Retries, 1, 0, 1, 1),
1269 phase("hydrate", PerfPhaseKind::Hydration, 9, 5, 8, 9),
1270 phase("emit-json", PerfPhaseKind::Output, 3, 2, 3, 3),
1271 ];
1272 ledger.resources = PerfResourceSnapshot {
1273 peak_rss_bytes: Some(2_147_483_648),
1274 avg_cpu_utilization_pct_x100: Some(5_250),
1275 max_inflight_bytes: Some(268_435_456),
1276 disk_read_bytes: Some(41_943_040),
1277 disk_write_bytes: Some(0),
1278 notes: vec!["warm lexical index".to_string()],
1279 };
1280 ledger.cache = Some(PerfCacheSnapshot {
1281 result_cache_hits: 42,
1282 result_cache_misses: 3,
1283 eviction_count: 1,
1284 approx_bytes: Some(64 * 1024 * 1024),
1285 byte_cap: Some(512 * 1024 * 1024),
1286 });
1287 ledger.search = Some(PerfSearchSnapshot {
1288 query_hash: "blake3:abc123".to_string(),
1289 limit: 20,
1290 matched_count: Some(PerfCount {
1291 value: 482,
1292 precision: PerfCountPrecision::Exact,
1293 }),
1294 returned_hits: 20,
1295 requested_mode: "hybrid".to_string(),
1296 realized_mode: "hybrid".to_string(),
1297 fallback_tier: None,
1298 timed_out: false,
1299 });
1300 ledger.proof = PerfProofSummary {
1301 status: PerfProofStatus::Passed,
1302 baseline_artifact: Some("tests/artifacts/perf/baseline.json".to_string()),
1303 comparison_artifact: Some("tests/artifacts/perf/candidate.json".to_string()),
1304 p99_regression_basis_points: Some(-250),
1305 notes: vec!["p99 improved by 2.5%".to_string()],
1306 };
1307 ledger.artifacts = vec![PerfArtifactRef {
1308 label: "candidate-ledger".to_string(),
1309 path: "tests/artifacts/perf/candidate.json".to_string(),
1310 kind: "json".to_string(),
1311 sha256: Some("0123456789abcdef".to_string()),
1312 }];
1313
1314 ledger
1315 }
1316
1317 fn phase(
1318 name: &str,
1319 kind: PerfPhaseKind,
1320 elapsed_ms: u64,
1321 p50_ms: u64,
1322 p95_ms: u64,
1323 p99_ms: u64,
1324 ) -> PerfPhaseTiming {
1325 PerfPhaseTiming {
1326 name: name.to_string(),
1327 kind,
1328 elapsed_ms,
1329 p50_ms: Some(p50_ms),
1330 p95_ms: Some(p95_ms),
1331 p99_ms: Some(p99_ms),
1332 samples: Some(PerfCount {
1333 value: 100,
1334 precision: PerfCountPrecision::Exact,
1335 }),
1336 }
1337 }
1338
1339 #[test]
1340 fn recorder_accumulates_phases_snapshots_and_artifacts() {
1341 let mut recorder = PerfEvidenceRecorder::new(
1342 "recorder-run",
1343 PerfWorkload {
1344 kind: PerfWorkloadKind::WatchOnce,
1345 name: "watch-once-ingest".to_string(),
1346 description: None,
1347 command_args: vec![
1348 "cass".to_string(),
1349 "index".to_string(),
1350 "--watch-once".to_string(),
1351 "/tmp/session.jsonl".to_string(),
1352 "--json".to_string(),
1353 ],
1354 input_count: Some(PerfCount {
1355 value: 64,
1356 precision: PerfCountPrecision::Exact,
1357 }),
1358 },
1359 42,
1360 );
1361
1362 recorder
1363 .machine(PerfMachineProfile {
1364 logical_cpus: Some(64),
1365 reserved_cores: Some(4),
1366 available_memory_bytes: Some(256 * 1024 * 1024 * 1024),
1367 topology_class: Some("many_core".to_string()),
1368 })
1369 .env_kv("CASS_WATCH_ONCE_INGEST_CHUNK_CONVERSATIONS", "64")
1370 .cache_snapshot(PerfCacheSnapshot {
1371 result_cache_hits: 7,
1372 result_cache_misses: 2,
1373 eviction_count: 1,
1374 approx_bytes: Some(1_024),
1375 byte_cap: Some(2_048),
1376 })
1377 .artifact(PerfArtifactRef {
1378 label: "trace".to_string(),
1379 path: "tests/artifacts/perf/trace.json".to_string(),
1380 kind: "json".to_string(),
1381 sha256: None,
1382 });
1383 recorder
1384 .record_phase(phase("queue", PerfPhaseKind::Queueing, 3, 1, 2, 3))
1385 .unwrap()
1386 .begin_phase("emit-json", PerfPhaseKind::Output)
1387 .unwrap()
1388 .finish_phase()
1389 .unwrap();
1390
1391 let ledger = recorder.finish().unwrap();
1392
1393 ledger.validate().unwrap();
1394 assert_eq!(ledger.run_id, "recorder-run");
1395 assert_eq!(
1396 ledger.env["CASS_WATCH_ONCE_INGEST_CHUNK_CONVERSATIONS"],
1397 "64"
1398 );
1399 assert_eq!(ledger.phases.len(), 2);
1400 assert_eq!(ledger.phases[0].kind, PerfPhaseKind::Queueing);
1401 assert_eq!(ledger.phases[1].name, "emit-json");
1402 assert_eq!(ledger.artifacts[0].label, "trace");
1403 }
1404
1405 #[test]
1406 fn recorder_rejects_overlapping_or_missing_active_phase() {
1407 let mut recorder = PerfEvidenceRecorder::new(
1408 "active-phase-run",
1409 PerfWorkload::new(PerfWorkloadKind::Search, "search"),
1410 1,
1411 );
1412
1413 assert_eq!(
1414 recorder.finish_phase().unwrap_err().to_string(),
1415 "no perf evidence phase is active"
1416 );
1417
1418 recorder
1419 .begin_phase("service", PerfPhaseKind::Service)
1420 .unwrap();
1421 let err = recorder
1422 .begin_phase("io", PerfPhaseKind::Io)
1423 .unwrap_err()
1424 .to_string();
1425 assert!(err.contains("service"), "{err}");
1426 }
1427
1428 #[test]
1429 fn replay_gate_detects_p99_and_elapsed_regressions() {
1430 let baseline = representative_ledger();
1431 let mut current = representative_ledger();
1432 current.run_id = "current-regressed".to_string();
1433 current.phases = vec![
1434 phase("admission", PerfPhaseKind::Queueing, 4, 2, 3, 5),
1435 phase("bm25", PerfPhaseKind::Service, 30, 20, 24, 30),
1436 phase("semantic", PerfPhaseKind::Io, 45, 30, 40, 45),
1437 phase("merge", PerfPhaseKind::Synchronization, 12, 7, 10, 12),
1438 phase("retry-budget", PerfPhaseKind::Retries, 2, 1, 2, 2),
1439 phase("hydrate", PerfPhaseKind::Hydration, 18, 10, 15, 18),
1440 phase("emit-json", PerfPhaseKind::Output, 6, 3, 5, 6),
1441 ];
1442
1443 let gate =
1444 PerfReplayGate::new(PerfReplayThresholds::try_new(500, 1_000, 500, 1_000).unwrap());
1445 let report = gate.replay(¤t, Some(&baseline));
1446
1447 assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1448 assert!(report.should_fail_build());
1449 assert!(
1450 report
1451 .findings
1452 .iter()
1453 .any(|finding| finding.metric == PerfReplayMetric::ComposedP99
1454 && finding.verdict == PerfReplayVerdict::Failure),
1455 "{report:#?}"
1456 );
1457 assert!(
1458 report
1459 .findings
1460 .iter()
1461 .any(|finding| finding.metric == PerfReplayMetric::TotalElapsed),
1462 "{report:#?}"
1463 );
1464 }
1465
1466 #[test]
1467 fn replay_gate_warns_on_inconclusive_proof_and_fails_on_failed_proof() {
1468 let mut current = representative_ledger();
1469 current.proof.status = PerfProofStatus::Inconclusive;
1470
1471 let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1472 let report = gate.replay(¤t, None);
1473
1474 assert_eq!(report.verdict, PerfReplayVerdict::Warning);
1475 assert!(
1476 report
1477 .findings
1478 .iter()
1479 .any(|finding| finding.metric == PerfReplayMetric::ProofStatus)
1480 );
1481
1482 current.proof.status = PerfProofStatus::Failed;
1483 let report = gate.replay(¤t, None);
1484
1485 assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1486 assert!(
1487 report
1488 .logs
1489 .iter()
1490 .any(|event| event.failure_reason.as_deref()
1491 == Some("perf evidence proof status is failed")),
1492 "{report:#?}"
1493 );
1494 }
1495
1496 #[test]
1497 fn replay_gate_uses_proof_reported_p99_without_baseline() {
1498 let mut current = representative_ledger();
1499 current.proof.p99_regression_basis_points = Some(1_500);
1500
1501 let gate =
1502 PerfReplayGate::new(PerfReplayThresholds::try_new(500, 1_000, 500, 1_000).unwrap());
1503 let report = gate.replay(¤t, None);
1504
1505 assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1506 assert!(
1507 report.findings.iter().any(|finding| finding.metric
1508 == PerfReplayMetric::ProofP99Regression
1509 && finding.delta_basis_points == Some(1_500)),
1510 "{report:#?}"
1511 );
1512 }
1513
1514 #[test]
1515 fn replay_gate_warns_when_current_ledger_has_no_measurements() {
1516 let current = PerfEvidenceLedger::new(
1517 "empty-measurement-run",
1518 PerfWorkload::new(PerfWorkloadKind::Search, "empty-measurement"),
1519 1,
1520 );
1521
1522 let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1523 let report = gate.replay(¤t, None);
1524
1525 assert_eq!(report.verdict, PerfReplayVerdict::Warning);
1526 assert!(
1527 report
1528 .findings
1529 .iter()
1530 .any(|finding| finding.metric == PerfReplayMetric::MeasurementCoverage),
1531 "{report:#?}"
1532 );
1533 }
1534
1535 #[test]
1536 fn replay_thresholds_reject_unreachable_warning_bands() {
1537 assert_eq!(
1538 PerfReplayThresholds::try_new(1_000, 1_000, 500, 1_000),
1539 Err(
1540 "warning_p99_regression_basis_points must be less than failure_p99_regression_basis_points"
1541 )
1542 );
1543 assert_eq!(
1544 PerfReplayThresholds::try_new(500, 1_000, -1, 1_000),
1545 Err("perf replay thresholds must be non-negative basis points")
1546 );
1547 }
1548
1549 #[test]
1550 fn replay_log_events_include_command_shape_and_artifact_context() {
1551 let baseline = representative_ledger();
1552 let mut current = representative_ledger();
1553 current.run_id = "artifact-context".to_string();
1554 current.proof.status = PerfProofStatus::Failed;
1555
1556 let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1557 let report = gate.replay_with_artifact(
1558 ¤t,
1559 Some(&baseline),
1560 Some(Path::new("tests/artifacts/perf/current.json")),
1561 );
1562
1563 let failure_log = report
1564 .logs
1565 .iter()
1566 .find(|event| event.level == "error")
1567 .expect("error log");
1568 assert_eq!(failure_log.run_id, "artifact-context");
1569 assert_eq!(
1570 failure_log.artifact_path.as_deref(),
1571 Some("tests/artifacts/perf/current.json")
1572 );
1573 assert_eq!(
1574 failure_log.command_args,
1575 ["cass", "search", "wal conflict", "--json"]
1576 );
1577 assert_eq!(
1578 failure_log.failure_reason.as_deref(),
1579 Some("perf evidence proof status is failed")
1580 );
1581 }
1582
1583 #[test]
1584 fn representative_ledger_validates_and_round_trips_json() {
1585 let ledger = representative_ledger();
1586
1587 ledger.validate().unwrap();
1588
1589 let encoded = serde_json::to_value(&ledger).unwrap();
1590 assert_eq!(encoded["schema_version"], PERF_EVIDENCE_SCHEMA_VERSION);
1591 assert_eq!(encoded["workload"]["kind"], "search");
1592 assert_eq!(encoded["phases"][0]["kind"], "queueing");
1593 assert_eq!(
1594 encoded["workload"]["input_count"]["precision"],
1595 "lower_bound"
1596 );
1597
1598 let decoded: PerfEvidenceLedger = serde_json::from_value(encoded).unwrap();
1599 assert_eq!(decoded, ledger);
1600 }
1601
1602 #[test]
1603 fn future_top_level_fields_are_ignored_by_old_readers() {
1604 let encoded = json!({
1605 "schema_version": PERF_EVIDENCE_SCHEMA_VERSION,
1606 "run_id": "run-with-future",
1607 "recorded_at_ms": 1,
1608 "workload": {
1609 "kind": "search",
1610 "name": "future-field-probe"
1611 },
1612 "future_controller_hint": {
1613 "new_field": true
1614 }
1615 });
1616
1617 let decoded: PerfEvidenceLedger = serde_json::from_value(encoded).unwrap();
1618
1619 assert_eq!(decoded.run_id, "run-with-future");
1620 decoded.validate().unwrap();
1621 }
1622
1623 #[test]
1624 fn validation_rejects_missing_identity_fields() {
1625 let mut ledger = representative_ledger();
1626 ledger.run_id = " ".to_string();
1627
1628 assert_eq!(
1629 ledger.validate(),
1630 Err(PerfEvidenceValidationError::EmptyRunId)
1631 );
1632
1633 ledger = representative_ledger();
1634 ledger.workload.name.clear();
1635 assert_eq!(
1636 ledger.validate(),
1637 Err(PerfEvidenceValidationError::EmptyWorkloadName)
1638 );
1639 }
1640
1641 #[test]
1642 fn validation_rejects_unsupported_schema_and_negative_time() {
1643 let mut ledger = representative_ledger();
1644 ledger.schema_version = "2".to_string();
1645
1646 assert_eq!(
1647 ledger.validate(),
1648 Err(PerfEvidenceValidationError::UnsupportedSchemaVersion {
1649 expected: PERF_EVIDENCE_SCHEMA_VERSION,
1650 actual: "2".to_string(),
1651 })
1652 );
1653
1654 ledger = representative_ledger();
1655 ledger.recorded_at_ms = -1;
1656 assert_eq!(
1657 ledger.validate(),
1658 Err(PerfEvidenceValidationError::NegativeRecordedAtMs { recorded_at_ms: -1 })
1659 );
1660 }
1661
1662 #[test]
1663 fn validation_rejects_bad_phase_and_artifact_entries() {
1664 let mut ledger = representative_ledger();
1665 ledger.phases[0].name.clear();
1666
1667 assert_eq!(
1668 ledger.validate(),
1669 Err(PerfEvidenceValidationError::EmptyPhaseName { index: 0 })
1670 );
1671
1672 ledger = representative_ledger();
1673 ledger.phases[0].p50_ms = Some(10);
1674 ledger.phases[0].p95_ms = Some(5);
1675 assert_eq!(
1676 ledger.validate(),
1677 Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index: 0 })
1678 );
1679
1680 ledger = representative_ledger();
1681 ledger.artifacts[0].label.clear();
1682 assert_eq!(
1683 ledger.validate(),
1684 Err(PerfEvidenceValidationError::EmptyArtifactLabel { index: 0 })
1685 );
1686
1687 ledger = representative_ledger();
1688 ledger.artifacts[0].path = " ".to_string();
1689 assert_eq!(
1690 ledger.validate(),
1691 Err(PerfEvidenceValidationError::EmptyArtifactPath { index: 0 })
1692 );
1693
1694 ledger = representative_ledger();
1695 ledger.artifacts[0].kind.clear();
1696 assert_eq!(
1697 ledger.validate(),
1698 Err(PerfEvidenceValidationError::EmptyArtifactKind { index: 0 })
1699 );
1700 }
1701
1702 #[test]
1703 fn validation_rejects_empty_nested_snapshot_fields() {
1704 let mut ledger = representative_ledger();
1705 ledger.search.as_mut().unwrap().query_hash.clear();
1706
1707 assert_eq!(
1708 ledger.validate(),
1709 Err(PerfEvidenceValidationError::EmptySearchQueryHash)
1710 );
1711
1712 ledger = representative_ledger();
1713 ledger.search.as_mut().unwrap().requested_mode = " ".to_string();
1714 assert_eq!(
1715 ledger.validate(),
1716 Err(PerfEvidenceValidationError::EmptySearchRequestedMode)
1717 );
1718
1719 ledger = representative_ledger();
1720 ledger.search.as_mut().unwrap().realized_mode.clear();
1721 assert_eq!(
1722 ledger.validate(),
1723 Err(PerfEvidenceValidationError::EmptySearchRealizedMode)
1724 );
1725
1726 ledger = representative_ledger();
1727 ledger.rebuild = Some(PerfRebuildSnapshot {
1728 execution_mode: " ".to_string(),
1729 workers: 1,
1730 shard_count: None,
1731 queued_items: None,
1732 indexed_items: None,
1733 checkpoint_count: None,
1734 });
1735 assert_eq!(
1736 ledger.validate(),
1737 Err(PerfEvidenceValidationError::EmptyRebuildExecutionMode)
1738 );
1739
1740 ledger = representative_ledger();
1741 ledger.rebuild = Some(PerfRebuildSnapshot {
1742 execution_mode: "flat_combining".to_string(),
1743 workers: 0,
1744 shard_count: None,
1745 queued_items: None,
1746 indexed_items: None,
1747 checkpoint_count: None,
1748 });
1749 assert_eq!(
1750 ledger.validate(),
1751 Err(PerfEvidenceValidationError::ZeroRebuildWorkers)
1752 );
1753 }
1754
1755 #[test]
1756 fn representative_ledger_covers_tail_decomposition_phase_kinds() {
1757 let ledger = representative_ledger();
1758 let phase_kinds = ledger
1759 .phases
1760 .iter()
1761 .map(|phase| phase.kind)
1762 .collect::<Vec<_>>();
1763
1764 for required in [
1765 PerfPhaseKind::Queueing,
1766 PerfPhaseKind::Service,
1767 PerfPhaseKind::Io,
1768 PerfPhaseKind::Synchronization,
1769 PerfPhaseKind::Retries,
1770 PerfPhaseKind::Hydration,
1771 PerfPhaseKind::Output,
1772 ] {
1773 assert!(
1774 phase_kinds.contains(&required),
1775 "missing required phase kind {required:?}"
1776 );
1777 }
1778 }
1779
1780 #[test]
1781 fn enum_serialization_is_stable_snake_case() {
1782 let encoded = serde_json::to_value(PerfEvidenceLedger {
1783 schema_version: PERF_EVIDENCE_SCHEMA_VERSION.to_string(),
1784 run_id: "enum-stability".to_string(),
1785 recorded_at_ms: 1,
1786 workload: PerfWorkload::new(PerfWorkloadKind::CacheWarm, "cache-warm"),
1787 machine: PerfMachineProfile::default(),
1788 env: BTreeMap::new(),
1789 phases: vec![PerfPhaseTiming::new("output", PerfPhaseKind::Output, 1)],
1790 resources: PerfResourceSnapshot::default(),
1791 cache: None,
1792 search: None,
1793 rebuild: None,
1794 proof: PerfProofSummary {
1795 status: PerfProofStatus::Inconclusive,
1796 ..PerfProofSummary::default()
1797 },
1798 artifacts: Vec::new(),
1799 })
1800 .unwrap();
1801
1802 assert_eq!(encoded["workload"]["kind"], "cache_warm");
1803 assert_eq!(encoded["phases"][0]["kind"], "output");
1804 assert_eq!(encoded["proof"]["status"], "inconclusive");
1805
1806 let precision: Value = serde_json::to_value(PerfCountPrecision::Unavailable).unwrap();
1807 assert_eq!(precision, "unavailable");
1808 }
1809}