coding_agent_search/
perf_evidence.rs

1//! Stable evidence records for performance experiments and control-plane decisions.
2//!
3//! These types are intentionally data-only. Runtime controllers can consume ledgers
4//! from benchmarks, replay harnesses, or production diagnostics without depending on
5//! benchmark-specific structs or ad hoc JSON.
6
7use serde::{Deserialize, Serialize};
8use std::collections::BTreeMap;
9use std::error::Error;
10use std::fmt;
11use std::fs;
12use std::io;
13use std::path::Path;
14use std::time::{Instant, SystemTime, UNIX_EPOCH};
15
16pub const PERF_EVIDENCE_SCHEMA_VERSION: &str = "1";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
19#[serde(rename_all = "snake_case")]
20pub enum PerfWorkloadKind {
21    Search,
22    WatchOnce,
23    FullRebuild,
24    SemanticBackfill,
25    SourceSync,
26    DoctorRepair,
27    CacheWarm,
28    #[default]
29    Other,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
33#[serde(rename_all = "snake_case")]
34pub enum PerfPhaseKind {
35    Queueing,
36    Service,
37    Io,
38    Synchronization,
39    Retries,
40    Hydration,
41    Output,
42    #[default]
43    Other,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
47#[serde(rename_all = "snake_case")]
48pub enum PerfProofStatus {
49    #[default]
50    NotMeasured,
51    Passed,
52    Failed,
53    Inconclusive,
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
57#[serde(rename_all = "snake_case")]
58pub enum PerfCountPrecision {
59    #[default]
60    Exact,
61    LowerBound,
62    Estimated,
63    Unavailable,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67pub struct PerfEvidenceLedger {
68    pub schema_version: String,
69    pub run_id: String,
70    pub recorded_at_ms: i64,
71    pub workload: PerfWorkload,
72    #[serde(default)]
73    pub machine: PerfMachineProfile,
74    #[serde(default)]
75    pub env: BTreeMap<String, String>,
76    #[serde(default)]
77    pub phases: Vec<PerfPhaseTiming>,
78    #[serde(default)]
79    pub resources: PerfResourceSnapshot,
80    #[serde(default)]
81    pub cache: Option<PerfCacheSnapshot>,
82    #[serde(default)]
83    pub search: Option<PerfSearchSnapshot>,
84    #[serde(default)]
85    pub rebuild: Option<PerfRebuildSnapshot>,
86    #[serde(default)]
87    pub proof: PerfProofSummary,
88    #[serde(default)]
89    pub artifacts: Vec<PerfArtifactRef>,
90}
91
92impl PerfEvidenceLedger {
93    pub fn new(run_id: impl Into<String>, workload: PerfWorkload, recorded_at_ms: i64) -> Self {
94        Self {
95            schema_version: PERF_EVIDENCE_SCHEMA_VERSION.to_string(),
96            run_id: run_id.into(),
97            recorded_at_ms,
98            workload,
99            machine: PerfMachineProfile::default(),
100            env: BTreeMap::new(),
101            phases: Vec::new(),
102            resources: PerfResourceSnapshot::default(),
103            cache: None,
104            search: None,
105            rebuild: None,
106            proof: PerfProofSummary::default(),
107            artifacts: Vec::new(),
108        }
109    }
110
111    pub fn validate(&self) -> Result<(), PerfEvidenceValidationError> {
112        if self.schema_version != PERF_EVIDENCE_SCHEMA_VERSION {
113            return Err(PerfEvidenceValidationError::UnsupportedSchemaVersion {
114                expected: PERF_EVIDENCE_SCHEMA_VERSION,
115                actual: self.schema_version.clone(),
116            });
117        }
118
119        if self.run_id.trim().is_empty() {
120            return Err(PerfEvidenceValidationError::EmptyRunId);
121        }
122
123        if self.recorded_at_ms < 0 {
124            return Err(PerfEvidenceValidationError::NegativeRecordedAtMs {
125                recorded_at_ms: self.recorded_at_ms,
126            });
127        }
128
129        if self.workload.name.trim().is_empty() {
130            return Err(PerfEvidenceValidationError::EmptyWorkloadName);
131        }
132
133        if let Some(search) = &self.search {
134            if search.query_hash.trim().is_empty() {
135                return Err(PerfEvidenceValidationError::EmptySearchQueryHash);
136            }
137
138            if search.requested_mode.trim().is_empty() {
139                return Err(PerfEvidenceValidationError::EmptySearchRequestedMode);
140            }
141
142            if search.realized_mode.trim().is_empty() {
143                return Err(PerfEvidenceValidationError::EmptySearchRealizedMode);
144            }
145        }
146
147        if let Some(rebuild) = &self.rebuild {
148            if rebuild.execution_mode.trim().is_empty() {
149                return Err(PerfEvidenceValidationError::EmptyRebuildExecutionMode);
150            }
151
152            if rebuild.workers == 0 {
153                return Err(PerfEvidenceValidationError::ZeroRebuildWorkers);
154            }
155        }
156
157        for (index, phase) in self.phases.iter().enumerate() {
158            if phase.name.trim().is_empty() {
159                return Err(PerfEvidenceValidationError::EmptyPhaseName { index });
160            }
161
162            if quantile_order_violated(phase.p50_ms, phase.p95_ms)
163                || quantile_order_violated(phase.p95_ms, phase.p99_ms)
164                || quantile_order_violated(phase.p50_ms, phase.p99_ms)
165            {
166                return Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index });
167            }
168        }
169
170        for (index, artifact) in self.artifacts.iter().enumerate() {
171            if artifact.label.trim().is_empty() {
172                return Err(PerfEvidenceValidationError::EmptyArtifactLabel { index });
173            }
174
175            if artifact.path.trim().is_empty() {
176                return Err(PerfEvidenceValidationError::EmptyArtifactPath { index });
177            }
178
179            if artifact.kind.trim().is_empty() {
180                return Err(PerfEvidenceValidationError::EmptyArtifactKind { index });
181            }
182        }
183
184        Ok(())
185    }
186}
187
188#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
189pub struct PerfWorkload {
190    pub kind: PerfWorkloadKind,
191    pub name: String,
192    #[serde(default)]
193    pub description: Option<String>,
194    #[serde(default)]
195    pub command_args: Vec<String>,
196    #[serde(default)]
197    pub input_count: Option<PerfCount>,
198}
199
200impl PerfWorkload {
201    pub fn new(kind: PerfWorkloadKind, name: impl Into<String>) -> Self {
202        Self {
203            kind,
204            name: name.into(),
205            description: None,
206            command_args: Vec::new(),
207            input_count: None,
208        }
209    }
210}
211
212#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
213pub struct PerfCount {
214    pub value: u64,
215    #[serde(default)]
216    pub precision: PerfCountPrecision,
217}
218
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
220pub struct PerfMachineProfile {
221    #[serde(default)]
222    pub logical_cpus: Option<u32>,
223    #[serde(default)]
224    pub reserved_cores: Option<u32>,
225    #[serde(default)]
226    pub available_memory_bytes: Option<u64>,
227    #[serde(default)]
228    pub topology_class: Option<String>,
229}
230
231#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
232pub struct PerfPhaseTiming {
233    pub name: String,
234    pub kind: PerfPhaseKind,
235    pub elapsed_ms: u64,
236    #[serde(default)]
237    pub p50_ms: Option<u64>,
238    #[serde(default)]
239    pub p95_ms: Option<u64>,
240    #[serde(default)]
241    pub p99_ms: Option<u64>,
242    #[serde(default)]
243    pub samples: Option<PerfCount>,
244}
245
246impl PerfPhaseTiming {
247    pub fn new(name: impl Into<String>, kind: PerfPhaseKind, elapsed_ms: u64) -> Self {
248        Self {
249            name: name.into(),
250            kind,
251            elapsed_ms,
252            p50_ms: None,
253            p95_ms: None,
254            p99_ms: None,
255            samples: None,
256        }
257    }
258}
259
260#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
261pub struct PerfResourceSnapshot {
262    #[serde(default)]
263    pub peak_rss_bytes: Option<u64>,
264    #[serde(default)]
265    pub avg_cpu_utilization_pct_x100: Option<u32>,
266    #[serde(default)]
267    pub max_inflight_bytes: Option<u64>,
268    #[serde(default)]
269    pub disk_read_bytes: Option<u64>,
270    #[serde(default)]
271    pub disk_write_bytes: Option<u64>,
272    #[serde(default)]
273    pub notes: Vec<String>,
274}
275
276#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
277pub struct PerfCacheSnapshot {
278    #[serde(default)]
279    pub result_cache_hits: u64,
280    #[serde(default)]
281    pub result_cache_misses: u64,
282    #[serde(default)]
283    pub eviction_count: u64,
284    #[serde(default)]
285    pub approx_bytes: Option<u64>,
286    #[serde(default)]
287    pub byte_cap: Option<u64>,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
291pub struct PerfSearchSnapshot {
292    pub query_hash: String,
293    pub limit: u32,
294    #[serde(default)]
295    pub matched_count: Option<PerfCount>,
296    pub returned_hits: u32,
297    pub requested_mode: String,
298    pub realized_mode: String,
299    #[serde(default)]
300    pub fallback_tier: Option<String>,
301    #[serde(default)]
302    pub timed_out: bool,
303}
304
305#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
306pub struct PerfRebuildSnapshot {
307    pub execution_mode: String,
308    pub workers: u32,
309    #[serde(default)]
310    pub shard_count: Option<u32>,
311    #[serde(default)]
312    pub queued_items: Option<PerfCount>,
313    #[serde(default)]
314    pub indexed_items: Option<PerfCount>,
315    #[serde(default)]
316    pub checkpoint_count: Option<u64>,
317}
318
319#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
320pub struct PerfProofSummary {
321    #[serde(default)]
322    pub status: PerfProofStatus,
323    #[serde(default)]
324    pub baseline_artifact: Option<String>,
325    #[serde(default)]
326    pub comparison_artifact: Option<String>,
327    #[serde(default)]
328    pub p99_regression_basis_points: Option<i64>,
329    #[serde(default)]
330    pub notes: Vec<String>,
331}
332
333#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
334pub struct PerfArtifactRef {
335    pub label: String,
336    pub path: String,
337    pub kind: String,
338    #[serde(default)]
339    pub sha256: Option<String>,
340}
341
342#[derive(Debug, Clone, PartialEq, Eq)]
343pub enum PerfEvidenceValidationError {
344    UnsupportedSchemaVersion {
345        expected: &'static str,
346        actual: String,
347    },
348    EmptyRunId,
349    NegativeRecordedAtMs {
350        recorded_at_ms: i64,
351    },
352    EmptyWorkloadName,
353    EmptySearchQueryHash,
354    EmptySearchRequestedMode,
355    EmptySearchRealizedMode,
356    EmptyRebuildExecutionMode,
357    ZeroRebuildWorkers,
358    EmptyPhaseName {
359        index: usize,
360    },
361    PhaseQuantilesOutOfOrder {
362        index: usize,
363    },
364    EmptyArtifactLabel {
365        index: usize,
366    },
367    EmptyArtifactPath {
368        index: usize,
369    },
370    EmptyArtifactKind {
371        index: usize,
372    },
373}
374
375impl fmt::Display for PerfEvidenceValidationError {
376    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
377        match self {
378            Self::UnsupportedSchemaVersion { expected, actual } => {
379                write!(
380                    f,
381                    "unsupported perf evidence schema version {actual:?}; expected {expected:?}"
382                )
383            }
384            Self::EmptyRunId => write!(f, "perf evidence run_id cannot be empty"),
385            Self::NegativeRecordedAtMs { recorded_at_ms } => {
386                write!(
387                    f,
388                    "perf evidence recorded_at_ms cannot be negative: {recorded_at_ms}"
389                )
390            }
391            Self::EmptyWorkloadName => write!(f, "perf evidence workload.name cannot be empty"),
392            Self::EmptySearchQueryHash => {
393                write!(f, "perf evidence search.query_hash cannot be empty")
394            }
395            Self::EmptySearchRequestedMode => {
396                write!(f, "perf evidence search.requested_mode cannot be empty")
397            }
398            Self::EmptySearchRealizedMode => {
399                write!(f, "perf evidence search.realized_mode cannot be empty")
400            }
401            Self::EmptyRebuildExecutionMode => {
402                write!(f, "perf evidence rebuild.execution_mode cannot be empty")
403            }
404            Self::ZeroRebuildWorkers => {
405                write!(f, "perf evidence rebuild.workers must be greater than zero")
406            }
407            Self::EmptyPhaseName { index } => {
408                write!(f, "perf evidence phase at index {index} has an empty name")
409            }
410            Self::PhaseQuantilesOutOfOrder { index } => {
411                write!(
412                    f,
413                    "perf evidence phase at index {index} has out-of-order quantiles"
414                )
415            }
416            Self::EmptyArtifactLabel { index } => {
417                write!(
418                    f,
419                    "perf evidence artifact at index {index} has an empty label"
420                )
421            }
422            Self::EmptyArtifactPath { index } => {
423                write!(
424                    f,
425                    "perf evidence artifact at index {index} has an empty path"
426                )
427            }
428            Self::EmptyArtifactKind { index } => {
429                write!(
430                    f,
431                    "perf evidence artifact at index {index} has an empty kind"
432                )
433            }
434        }
435    }
436}
437
438impl Error for PerfEvidenceValidationError {}
439
440fn quantile_order_violated(lower: Option<u64>, upper: Option<u64>) -> bool {
441    matches!((lower, upper), (Some(lower), Some(upper)) if lower > upper)
442}
443
444#[derive(Debug)]
445pub enum PerfEvidenceIoError {
446    Io(io::Error),
447    Json(serde_json::Error),
448    Validation(PerfEvidenceValidationError),
449}
450
451impl fmt::Display for PerfEvidenceIoError {
452    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
453        match self {
454            Self::Io(err) => write!(f, "perf evidence I/O failed: {err}"),
455            Self::Json(err) => write!(f, "perf evidence JSON failed: {err}"),
456            Self::Validation(err) => write!(f, "perf evidence validation failed: {err}"),
457        }
458    }
459}
460
461impl Error for PerfEvidenceIoError {
462    fn source(&self) -> Option<&(dyn Error + 'static)> {
463        match self {
464            Self::Io(err) => Some(err),
465            Self::Json(err) => Some(err),
466            Self::Validation(err) => Some(err),
467        }
468    }
469}
470
471impl From<io::Error> for PerfEvidenceIoError {
472    fn from(err: io::Error) -> Self {
473        Self::Io(err)
474    }
475}
476
477impl From<serde_json::Error> for PerfEvidenceIoError {
478    fn from(err: serde_json::Error) -> Self {
479        Self::Json(err)
480    }
481}
482
483impl From<PerfEvidenceValidationError> for PerfEvidenceIoError {
484    fn from(err: PerfEvidenceValidationError) -> Self {
485        Self::Validation(err)
486    }
487}
488
489pub fn read_perf_evidence_ledger(
490    path: impl AsRef<Path>,
491) -> Result<PerfEvidenceLedger, PerfEvidenceIoError> {
492    let bytes = fs::read(path.as_ref())?;
493    let ledger: PerfEvidenceLedger = serde_json::from_slice(&bytes)?;
494    ledger.validate()?;
495    Ok(ledger)
496}
497
498pub fn write_perf_evidence_ledger(
499    ledger: &PerfEvidenceLedger,
500    path: impl AsRef<Path>,
501) -> Result<PerfArtifactRef, PerfEvidenceIoError> {
502    ledger.validate()?;
503    let path = path.as_ref();
504    if let Some(parent) = path
505        .parent()
506        .filter(|parent| !parent.as_os_str().is_empty())
507    {
508        fs::create_dir_all(parent)?;
509    }
510    let bytes = serde_json::to_vec_pretty(ledger)?;
511    fs::write(path, &bytes)?;
512    Ok(PerfArtifactRef {
513        label: "perf-evidence-ledger".to_string(),
514        path: path.display().to_string(),
515        kind: "json".to_string(),
516        sha256: Some(sha256_hex(&bytes)),
517    })
518}
519
520#[derive(Debug)]
521pub enum PerfEvidenceRecorderError {
522    ActivePhaseAlreadyRunning { active_phase: String },
523    NoActivePhase,
524    Validation(PerfEvidenceValidationError),
525}
526
527impl fmt::Display for PerfEvidenceRecorderError {
528    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
529        match self {
530            Self::ActivePhaseAlreadyRunning { active_phase } => {
531                write!(f, "perf evidence phase {active_phase:?} is already active")
532            }
533            Self::NoActivePhase => write!(f, "no perf evidence phase is active"),
534            Self::Validation(err) => {
535                write!(f, "perf evidence recorder produced invalid data: {err}")
536            }
537        }
538    }
539}
540
541impl Error for PerfEvidenceRecorderError {
542    fn source(&self) -> Option<&(dyn Error + 'static)> {
543        match self {
544            Self::Validation(err) => Some(err),
545            _ => None,
546        }
547    }
548}
549
550impl From<PerfEvidenceValidationError> for PerfEvidenceRecorderError {
551    fn from(err: PerfEvidenceValidationError) -> Self {
552        Self::Validation(err)
553    }
554}
555
556#[derive(Debug)]
557struct ActivePerfPhase {
558    name: String,
559    kind: PerfPhaseKind,
560    started_at: Instant,
561}
562
563/// Incrementally records a [`PerfEvidenceLedger`] without coupling callers to
564/// benchmark-only structs.
565///
566/// The recorder is intentionally small: callers provide workload identity and
567/// optional snapshots, then append explicit phases or time `begin_phase` /
568/// `finish_phase` spans. It never reads global process configuration.
569#[derive(Debug)]
570pub struct PerfEvidenceRecorder {
571    ledger: PerfEvidenceLedger,
572    active_phase: Option<ActivePerfPhase>,
573}
574
575impl PerfEvidenceRecorder {
576    pub fn new(run_id: impl Into<String>, workload: PerfWorkload, recorded_at_ms: i64) -> Self {
577        Self {
578            ledger: PerfEvidenceLedger::new(run_id, workload, recorded_at_ms),
579            active_phase: None,
580        }
581    }
582
583    pub fn start(run_id: impl Into<String>, workload: PerfWorkload) -> Self {
584        Self::new(run_id, workload, now_unix_ms())
585    }
586
587    pub fn ledger(&self) -> &PerfEvidenceLedger {
588        &self.ledger
589    }
590
591    pub fn machine(&mut self, machine: PerfMachineProfile) -> &mut Self {
592        self.ledger.machine = machine;
593        self
594    }
595
596    pub fn resource_snapshot(&mut self, resources: PerfResourceSnapshot) -> &mut Self {
597        self.ledger.resources = resources;
598        self
599    }
600
601    pub fn cache_snapshot(&mut self, cache: PerfCacheSnapshot) -> &mut Self {
602        self.ledger.cache = Some(cache);
603        self
604    }
605
606    pub fn search_snapshot(&mut self, search: PerfSearchSnapshot) -> &mut Self {
607        self.ledger.search = Some(search);
608        self
609    }
610
611    pub fn rebuild_snapshot(&mut self, rebuild: PerfRebuildSnapshot) -> &mut Self {
612        self.ledger.rebuild = Some(rebuild);
613        self
614    }
615
616    pub fn proof_summary(&mut self, proof: PerfProofSummary) -> &mut Self {
617        self.ledger.proof = proof;
618        self
619    }
620
621    pub fn env_kv(&mut self, key: impl Into<String>, value: impl Into<String>) -> &mut Self {
622        self.ledger.env.insert(key.into(), value.into());
623        self
624    }
625
626    pub fn artifact(&mut self, artifact: PerfArtifactRef) -> &mut Self {
627        self.ledger.artifacts.push(artifact);
628        self
629    }
630
631    pub fn record_phase(
632        &mut self,
633        phase: PerfPhaseTiming,
634    ) -> Result<&mut Self, PerfEvidenceRecorderError> {
635        validate_phase(&phase, self.ledger.phases.len())?;
636        self.ledger.phases.push(phase);
637        Ok(self)
638    }
639
640    pub fn begin_phase(
641        &mut self,
642        name: impl Into<String>,
643        kind: PerfPhaseKind,
644    ) -> Result<&mut Self, PerfEvidenceRecorderError> {
645        if let Some(active) = &self.active_phase {
646            return Err(PerfEvidenceRecorderError::ActivePhaseAlreadyRunning {
647                active_phase: active.name.clone(),
648            });
649        }
650        self.active_phase = Some(ActivePerfPhase {
651            name: name.into(),
652            kind,
653            started_at: Instant::now(),
654        });
655        Ok(self)
656    }
657
658    pub fn finish_phase(&mut self) -> Result<&mut Self, PerfEvidenceRecorderError> {
659        let Some(active) = self.active_phase.take() else {
660            return Err(PerfEvidenceRecorderError::NoActivePhase);
661        };
662        let elapsed_ms = active
663            .started_at
664            .elapsed()
665            .as_millis()
666            .min(u128::from(u64::MAX)) as u64;
667        self.record_phase(PerfPhaseTiming::new(active.name, active.kind, elapsed_ms))
668    }
669
670    pub fn finish(mut self) -> Result<PerfEvidenceLedger, PerfEvidenceRecorderError> {
671        if self.active_phase.is_some() {
672            self.finish_phase()?;
673        }
674        self.ledger.validate()?;
675        Ok(self.ledger)
676    }
677}
678
679#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
680#[serde(rename_all = "snake_case")]
681pub enum PerfReplayVerdict {
682    Clean,
683    Warning,
684    Failure,
685}
686
687impl PerfReplayVerdict {
688    pub fn should_fail_build(self) -> bool {
689        matches!(self, Self::Failure)
690    }
691
692    fn max(self, other: Self) -> Self {
693        match (self, other) {
694            (Self::Failure, _) | (_, Self::Failure) => Self::Failure,
695            (Self::Warning, _) | (_, Self::Warning) => Self::Warning,
696            _ => Self::Clean,
697        }
698    }
699}
700
701#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
702#[serde(rename_all = "snake_case")]
703pub enum PerfReplayMetric {
704    Validation,
705    MeasurementCoverage,
706    ProofStatus,
707    ProofP99Regression,
708    ComposedP99,
709    TotalElapsed,
710}
711
712#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
713pub struct PerfReplayFinding {
714    pub verdict: PerfReplayVerdict,
715    pub metric: PerfReplayMetric,
716    pub message: String,
717    #[serde(default)]
718    pub baseline_value: Option<i64>,
719    #[serde(default)]
720    pub current_value: Option<i64>,
721    #[serde(default)]
722    pub delta_basis_points: Option<i64>,
723    #[serde(default)]
724    pub threshold_basis_points: Option<i64>,
725}
726
727#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
728pub struct PerfReplayLogEvent {
729    pub level: String,
730    pub message: String,
731    #[serde(default)]
732    pub artifact_path: Option<String>,
733    pub run_id: String,
734    #[serde(default)]
735    pub command_args: Vec<String>,
736    #[serde(default)]
737    pub failure_reason: Option<String>,
738}
739
740#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
741pub struct PerfReplayReport {
742    pub current_run_id: String,
743    #[serde(default)]
744    pub baseline_run_id: Option<String>,
745    pub verdict: PerfReplayVerdict,
746    #[serde(default)]
747    pub findings: Vec<PerfReplayFinding>,
748    #[serde(default)]
749    pub logs: Vec<PerfReplayLogEvent>,
750}
751
752impl PerfReplayReport {
753    pub fn should_fail_build(&self) -> bool {
754        self.verdict.should_fail_build()
755    }
756
757    fn new(current: &PerfEvidenceLedger, baseline: Option<&PerfEvidenceLedger>) -> Self {
758        Self {
759            current_run_id: current.run_id.clone(),
760            baseline_run_id: baseline.map(|ledger| ledger.run_id.clone()),
761            verdict: PerfReplayVerdict::Clean,
762            findings: Vec::new(),
763            logs: Vec::new(),
764        }
765    }
766
767    fn add_finding(&mut self, finding: PerfReplayFinding) {
768        self.verdict = self.verdict.max(finding.verdict);
769        self.findings.push(finding);
770    }
771
772    fn log(
773        &mut self,
774        level: &str,
775        message: &str,
776        current: &PerfEvidenceLedger,
777        artifact_path: Option<&Path>,
778        failure_reason: Option<String>,
779    ) {
780        self.logs.push(PerfReplayLogEvent {
781            level: level.to_string(),
782            message: message.to_string(),
783            artifact_path: artifact_path.map(|path| path.display().to_string()),
784            run_id: current.run_id.clone(),
785            command_args: current.workload.command_args.clone(),
786            failure_reason,
787        });
788    }
789}
790
791#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
792pub struct PerfReplayThresholds {
793    pub warning_p99_regression_basis_points: i64,
794    pub failure_p99_regression_basis_points: i64,
795    pub warning_elapsed_regression_basis_points: i64,
796    pub failure_elapsed_regression_basis_points: i64,
797}
798
799impl PerfReplayThresholds {
800    pub fn defaults() -> Self {
801        Self {
802            warning_p99_regression_basis_points: 1_000,
803            failure_p99_regression_basis_points: 2_500,
804            warning_elapsed_regression_basis_points: 1_500,
805            failure_elapsed_regression_basis_points: 3_000,
806        }
807    }
808
809    pub fn try_new(
810        warning_p99_regression_basis_points: i64,
811        failure_p99_regression_basis_points: i64,
812        warning_elapsed_regression_basis_points: i64,
813        failure_elapsed_regression_basis_points: i64,
814    ) -> Result<Self, &'static str> {
815        validate_threshold_pair(
816            warning_p99_regression_basis_points,
817            failure_p99_regression_basis_points,
818            "p99",
819        )?;
820        validate_threshold_pair(
821            warning_elapsed_regression_basis_points,
822            failure_elapsed_regression_basis_points,
823            "elapsed",
824        )?;
825        Ok(Self {
826            warning_p99_regression_basis_points,
827            failure_p99_regression_basis_points,
828            warning_elapsed_regression_basis_points,
829            failure_elapsed_regression_basis_points,
830        })
831    }
832}
833
834impl Default for PerfReplayThresholds {
835    fn default() -> Self {
836        Self::defaults()
837    }
838}
839
840#[derive(Debug, Clone, PartialEq, Eq)]
841pub struct PerfReplayGate {
842    thresholds: PerfReplayThresholds,
843}
844
845impl PerfReplayGate {
846    pub fn new(thresholds: PerfReplayThresholds) -> Self {
847        Self { thresholds }
848    }
849
850    pub fn replay(
851        &self,
852        current: &PerfEvidenceLedger,
853        baseline: Option<&PerfEvidenceLedger>,
854    ) -> PerfReplayReport {
855        self.replay_with_artifact(current, baseline, None)
856    }
857
858    pub fn replay_with_artifact(
859        &self,
860        current: &PerfEvidenceLedger,
861        baseline: Option<&PerfEvidenceLedger>,
862        current_artifact_path: Option<&Path>,
863    ) -> PerfReplayReport {
864        let mut report = PerfReplayReport::new(current, baseline);
865        report.log(
866            "info",
867            "perf evidence replay started",
868            current,
869            current_artifact_path,
870            None,
871        );
872
873        if let Err(err) = current.validate() {
874            let failure_reason = err.to_string();
875            report.add_finding(PerfReplayFinding {
876                verdict: PerfReplayVerdict::Failure,
877                metric: PerfReplayMetric::Validation,
878                message: "current perf evidence ledger failed validation".to_string(),
879                baseline_value: None,
880                current_value: None,
881                delta_basis_points: None,
882                threshold_basis_points: None,
883            });
884            report.log(
885                "error",
886                "perf evidence replay failed",
887                current,
888                current_artifact_path,
889                Some(failure_reason),
890            );
891            return report;
892        }
893
894        if let Some(baseline) = baseline
895            && let Err(err) = baseline.validate()
896        {
897            let failure_reason = err.to_string();
898            report.add_finding(PerfReplayFinding {
899                verdict: PerfReplayVerdict::Failure,
900                metric: PerfReplayMetric::Validation,
901                message: "baseline perf evidence ledger failed validation".to_string(),
902                baseline_value: None,
903                current_value: None,
904                delta_basis_points: None,
905                threshold_basis_points: None,
906            });
907            report.log(
908                "error",
909                "perf evidence replay failed",
910                current,
911                current_artifact_path,
912                Some(failure_reason),
913            );
914            return report;
915        }
916
917        self.evaluate_measurement_coverage(current, baseline, &mut report);
918        self.evaluate_proof_status(current, &mut report);
919        self.evaluate_proof_p99(current, &mut report);
920        if let Some(baseline) = baseline {
921            self.evaluate_composed_p99(current, baseline, &mut report);
922            self.evaluate_total_elapsed(current, baseline, &mut report);
923        } else {
924            report.log(
925                "info",
926                "perf evidence replay had no baseline; validated current ledger only",
927                current,
928                current_artifact_path,
929                None,
930            );
931        }
932
933        if report.verdict.should_fail_build() {
934            let reason = report
935                .findings
936                .iter()
937                .find(|finding| finding.verdict == PerfReplayVerdict::Failure)
938                .map(|finding| finding.message.clone())
939                .unwrap_or_else(|| "perf evidence replay failed".to_string());
940            report.log(
941                "error",
942                "perf evidence replay failed",
943                current,
944                current_artifact_path,
945                Some(reason),
946            );
947        } else if report.verdict == PerfReplayVerdict::Warning {
948            report.log(
949                "warn",
950                "perf evidence replay produced warnings",
951                current,
952                current_artifact_path,
953                None,
954            );
955        } else {
956            report.log(
957                "info",
958                "perf evidence replay passed",
959                current,
960                current_artifact_path,
961                None,
962            );
963        }
964
965        report
966    }
967
968    pub fn replay_files<P>(
969        &self,
970        current_path: P,
971        baseline_path: Option<P>,
972    ) -> Result<PerfReplayReport, PerfEvidenceIoError>
973    where
974        P: AsRef<Path>,
975    {
976        let current_path = current_path.as_ref();
977        let current = read_perf_evidence_ledger(current_path)?;
978        let baseline = match baseline_path {
979            Some(path) => Some(read_perf_evidence_ledger(path.as_ref())?),
980            None => None,
981        };
982        Ok(self.replay_with_artifact(&current, baseline.as_ref(), Some(current_path)))
983    }
984
985    fn evaluate_measurement_coverage(
986        &self,
987        current: &PerfEvidenceLedger,
988        baseline: Option<&PerfEvidenceLedger>,
989        report: &mut PerfReplayReport,
990    ) {
991        let current_has_phase_timings = !current.phases.is_empty();
992        let current_has_proof = current.proof.status != PerfProofStatus::NotMeasured
993            || current.proof.p99_regression_basis_points.is_some();
994        if !current_has_phase_timings && !current_has_proof {
995            report.add_finding(PerfReplayFinding {
996                verdict: PerfReplayVerdict::Warning,
997                metric: PerfReplayMetric::MeasurementCoverage,
998                message: "current perf evidence ledger has no phase timings or proof summary"
999                    .to_string(),
1000                baseline_value: None,
1001                current_value: None,
1002                delta_basis_points: None,
1003                threshold_basis_points: None,
1004            });
1005        }
1006
1007        if baseline.is_some_and(|ledger| ledger.phases.is_empty()) {
1008            report.add_finding(PerfReplayFinding {
1009                verdict: PerfReplayVerdict::Warning,
1010                metric: PerfReplayMetric::MeasurementCoverage,
1011                message:
1012                    "baseline perf evidence ledger has no phase timings; timing comparisons skipped"
1013                        .to_string(),
1014                baseline_value: None,
1015                current_value: None,
1016                delta_basis_points: None,
1017                threshold_basis_points: None,
1018            });
1019        }
1020    }
1021
1022    fn evaluate_proof_status(&self, current: &PerfEvidenceLedger, report: &mut PerfReplayReport) {
1023        match current.proof.status {
1024            PerfProofStatus::Failed => report.add_finding(PerfReplayFinding {
1025                verdict: PerfReplayVerdict::Failure,
1026                metric: PerfReplayMetric::ProofStatus,
1027                message: "perf evidence proof status is failed".to_string(),
1028                baseline_value: None,
1029                current_value: None,
1030                delta_basis_points: None,
1031                threshold_basis_points: None,
1032            }),
1033            PerfProofStatus::Inconclusive => report.add_finding(PerfReplayFinding {
1034                verdict: PerfReplayVerdict::Warning,
1035                metric: PerfReplayMetric::ProofStatus,
1036                message: "perf evidence proof status is inconclusive".to_string(),
1037                baseline_value: None,
1038                current_value: None,
1039                delta_basis_points: None,
1040                threshold_basis_points: None,
1041            }),
1042            PerfProofStatus::NotMeasured | PerfProofStatus::Passed => {}
1043        }
1044    }
1045
1046    fn evaluate_proof_p99(&self, current: &PerfEvidenceLedger, report: &mut PerfReplayReport) {
1047        let Some(delta_basis_points) = current.proof.p99_regression_basis_points else {
1048            return;
1049        };
1050        self.add_threshold_finding(
1051            report,
1052            PerfReplayMetric::ProofP99Regression,
1053            "proof-reported p99 regression",
1054            None,
1055            None,
1056            delta_basis_points,
1057            self.thresholds.warning_p99_regression_basis_points,
1058            self.thresholds.failure_p99_regression_basis_points,
1059        );
1060    }
1061
1062    fn evaluate_composed_p99(
1063        &self,
1064        current: &PerfEvidenceLedger,
1065        baseline: &PerfEvidenceLedger,
1066        report: &mut PerfReplayReport,
1067    ) {
1068        let Some(baseline_p99) = composed_p99_ms(baseline) else {
1069            return;
1070        };
1071        let Some(current_p99) = composed_p99_ms(current) else {
1072            return;
1073        };
1074        let Some(delta_basis_points) = basis_points_delta(baseline_p99, current_p99) else {
1075            return;
1076        };
1077        self.add_threshold_finding(
1078            report,
1079            PerfReplayMetric::ComposedP99,
1080            "composed phase p99 regression",
1081            Some(baseline_p99),
1082            Some(current_p99),
1083            delta_basis_points,
1084            self.thresholds.warning_p99_regression_basis_points,
1085            self.thresholds.failure_p99_regression_basis_points,
1086        );
1087    }
1088
1089    fn evaluate_total_elapsed(
1090        &self,
1091        current: &PerfEvidenceLedger,
1092        baseline: &PerfEvidenceLedger,
1093        report: &mut PerfReplayReport,
1094    ) {
1095        let baseline_elapsed = total_elapsed_ms(baseline);
1096        let current_elapsed = total_elapsed_ms(current);
1097        let Some(delta_basis_points) = basis_points_delta(baseline_elapsed, current_elapsed) else {
1098            return;
1099        };
1100        self.add_threshold_finding(
1101            report,
1102            PerfReplayMetric::TotalElapsed,
1103            "total elapsed phase time regression",
1104            Some(baseline_elapsed),
1105            Some(current_elapsed),
1106            delta_basis_points,
1107            self.thresholds.warning_elapsed_regression_basis_points,
1108            self.thresholds.failure_elapsed_regression_basis_points,
1109        );
1110    }
1111
1112    #[allow(clippy::too_many_arguments)]
1113    fn add_threshold_finding(
1114        &self,
1115        report: &mut PerfReplayReport,
1116        metric: PerfReplayMetric,
1117        label: &str,
1118        baseline_value: Option<i64>,
1119        current_value: Option<i64>,
1120        delta_basis_points: i64,
1121        warning_basis_points: i64,
1122        failure_basis_points: i64,
1123    ) {
1124        if delta_basis_points < warning_basis_points {
1125            return;
1126        }
1127        let (verdict, threshold_basis_points) = if delta_basis_points >= failure_basis_points {
1128            (PerfReplayVerdict::Failure, failure_basis_points)
1129        } else {
1130            (PerfReplayVerdict::Warning, warning_basis_points)
1131        };
1132        report.add_finding(PerfReplayFinding {
1133            verdict,
1134            metric,
1135            message: format!("{label}: +{delta_basis_points} bps"),
1136            baseline_value,
1137            current_value,
1138            delta_basis_points: Some(delta_basis_points),
1139            threshold_basis_points: Some(threshold_basis_points),
1140        });
1141    }
1142}
1143
1144fn validate_phase(
1145    phase: &PerfPhaseTiming,
1146    index: usize,
1147) -> Result<(), PerfEvidenceValidationError> {
1148    if phase.name.trim().is_empty() {
1149        return Err(PerfEvidenceValidationError::EmptyPhaseName { index });
1150    }
1151    if quantile_order_violated(phase.p50_ms, phase.p95_ms)
1152        || quantile_order_violated(phase.p95_ms, phase.p99_ms)
1153        || quantile_order_violated(phase.p50_ms, phase.p99_ms)
1154    {
1155        return Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index });
1156    }
1157    Ok(())
1158}
1159
1160fn composed_p99_ms(ledger: &PerfEvidenceLedger) -> Option<i64> {
1161    let mut total = 0u64;
1162    let mut saw_phase = false;
1163    for phase in &ledger.phases {
1164        total = total.checked_add(phase.p99_ms?)?;
1165        saw_phase = true;
1166    }
1167    saw_phase.then_some(total.min(i64::MAX as u64) as i64)
1168}
1169
1170fn total_elapsed_ms(ledger: &PerfEvidenceLedger) -> i64 {
1171    ledger
1172        .phases
1173        .iter()
1174        .map(|phase| phase.elapsed_ms)
1175        .fold(0u64, u64::saturating_add)
1176        .min(i64::MAX as u64) as i64
1177}
1178
1179fn basis_points_delta(baseline: i64, current: i64) -> Option<i64> {
1180    if baseline <= 0 {
1181        return None;
1182    }
1183    let delta = i128::from(current) - i128::from(baseline);
1184    let scaled = delta.checked_mul(10_000)?;
1185    let rounded = if delta >= 0 {
1186        scaled.checked_add(i128::from(baseline / 2))?
1187    } else {
1188        scaled.checked_sub(i128::from(baseline / 2))?
1189    };
1190    let basis_points = rounded.checked_div(i128::from(baseline))?;
1191    i64::try_from(basis_points).ok()
1192}
1193
1194fn validate_threshold_pair(
1195    warning_basis_points: i64,
1196    failure_basis_points: i64,
1197    metric: &'static str,
1198) -> Result<(), &'static str> {
1199    if warning_basis_points < 0 || failure_basis_points < 0 {
1200        return Err("perf replay thresholds must be non-negative basis points");
1201    }
1202    if warning_basis_points >= failure_basis_points {
1203        return match metric {
1204            "p99" => Err(
1205                "warning_p99_regression_basis_points must be less than failure_p99_regression_basis_points",
1206            ),
1207            "elapsed" => Err(
1208                "warning_elapsed_regression_basis_points must be less than failure_elapsed_regression_basis_points",
1209            ),
1210            _ => Err("warning threshold must be less than failure threshold"),
1211        };
1212    }
1213    Ok(())
1214}
1215
1216fn now_unix_ms() -> i64 {
1217    SystemTime::now()
1218        .duration_since(UNIX_EPOCH)
1219        .map(|duration| duration.as_millis().min(i64::MAX as u128) as i64)
1220        .unwrap_or(0)
1221}
1222
1223fn sha256_hex(bytes: &[u8]) -> String {
1224    use sha2::{Digest, Sha256};
1225
1226    let digest = Sha256::digest(bytes);
1227    format!("{digest:x}")
1228}
1229
1230#[cfg(test)]
1231mod tests {
1232    use super::*;
1233    use serde_json::{Value, json};
1234
1235    fn representative_ledger() -> PerfEvidenceLedger {
1236        let mut ledger = PerfEvidenceLedger::new(
1237            "run-search-p99-001",
1238            PerfWorkload {
1239                kind: PerfWorkloadKind::Search,
1240                name: "hybrid-search-tail-latency".to_string(),
1241                description: Some("Representative hybrid search p99 probe".to_string()),
1242                command_args: vec![
1243                    "cass".to_string(),
1244                    "search".to_string(),
1245                    "wal conflict".to_string(),
1246                    "--json".to_string(),
1247                ],
1248                input_count: Some(PerfCount {
1249                    value: 1_000_000,
1250                    precision: PerfCountPrecision::LowerBound,
1251                }),
1252            },
1253            1_779_999_999_000,
1254        );
1255
1256        ledger.machine = PerfMachineProfile {
1257            logical_cpus: Some(64),
1258            reserved_cores: Some(8),
1259            available_memory_bytes: Some(256 * 1024 * 1024 * 1024),
1260            topology_class: Some("single_host_many_core".to_string()),
1261        };
1262        ledger.env = BTreeMap::from([("CASS_SEARCH_MODE".to_string(), "hybrid".to_string())]);
1263        ledger.phases = vec![
1264            phase("admission", PerfPhaseKind::Queueing, 2, 1, 2, 3),
1265            phase("bm25", PerfPhaseKind::Service, 18, 12, 16, 18),
1266            phase("semantic", PerfPhaseKind::Io, 35, 22, 31, 35),
1267            phase("merge", PerfPhaseKind::Synchronization, 7, 4, 6, 7),
1268            phase("retry-budget", PerfPhaseKind::Retries, 1, 0, 1, 1),
1269            phase("hydrate", PerfPhaseKind::Hydration, 9, 5, 8, 9),
1270            phase("emit-json", PerfPhaseKind::Output, 3, 2, 3, 3),
1271        ];
1272        ledger.resources = PerfResourceSnapshot {
1273            peak_rss_bytes: Some(2_147_483_648),
1274            avg_cpu_utilization_pct_x100: Some(5_250),
1275            max_inflight_bytes: Some(268_435_456),
1276            disk_read_bytes: Some(41_943_040),
1277            disk_write_bytes: Some(0),
1278            notes: vec!["warm lexical index".to_string()],
1279        };
1280        ledger.cache = Some(PerfCacheSnapshot {
1281            result_cache_hits: 42,
1282            result_cache_misses: 3,
1283            eviction_count: 1,
1284            approx_bytes: Some(64 * 1024 * 1024),
1285            byte_cap: Some(512 * 1024 * 1024),
1286        });
1287        ledger.search = Some(PerfSearchSnapshot {
1288            query_hash: "blake3:abc123".to_string(),
1289            limit: 20,
1290            matched_count: Some(PerfCount {
1291                value: 482,
1292                precision: PerfCountPrecision::Exact,
1293            }),
1294            returned_hits: 20,
1295            requested_mode: "hybrid".to_string(),
1296            realized_mode: "hybrid".to_string(),
1297            fallback_tier: None,
1298            timed_out: false,
1299        });
1300        ledger.proof = PerfProofSummary {
1301            status: PerfProofStatus::Passed,
1302            baseline_artifact: Some("tests/artifacts/perf/baseline.json".to_string()),
1303            comparison_artifact: Some("tests/artifacts/perf/candidate.json".to_string()),
1304            p99_regression_basis_points: Some(-250),
1305            notes: vec!["p99 improved by 2.5%".to_string()],
1306        };
1307        ledger.artifacts = vec![PerfArtifactRef {
1308            label: "candidate-ledger".to_string(),
1309            path: "tests/artifacts/perf/candidate.json".to_string(),
1310            kind: "json".to_string(),
1311            sha256: Some("0123456789abcdef".to_string()),
1312        }];
1313
1314        ledger
1315    }
1316
1317    fn phase(
1318        name: &str,
1319        kind: PerfPhaseKind,
1320        elapsed_ms: u64,
1321        p50_ms: u64,
1322        p95_ms: u64,
1323        p99_ms: u64,
1324    ) -> PerfPhaseTiming {
1325        PerfPhaseTiming {
1326            name: name.to_string(),
1327            kind,
1328            elapsed_ms,
1329            p50_ms: Some(p50_ms),
1330            p95_ms: Some(p95_ms),
1331            p99_ms: Some(p99_ms),
1332            samples: Some(PerfCount {
1333                value: 100,
1334                precision: PerfCountPrecision::Exact,
1335            }),
1336        }
1337    }
1338
1339    #[test]
1340    fn recorder_accumulates_phases_snapshots_and_artifacts() {
1341        let mut recorder = PerfEvidenceRecorder::new(
1342            "recorder-run",
1343            PerfWorkload {
1344                kind: PerfWorkloadKind::WatchOnce,
1345                name: "watch-once-ingest".to_string(),
1346                description: None,
1347                command_args: vec![
1348                    "cass".to_string(),
1349                    "index".to_string(),
1350                    "--watch-once".to_string(),
1351                    "/tmp/session.jsonl".to_string(),
1352                    "--json".to_string(),
1353                ],
1354                input_count: Some(PerfCount {
1355                    value: 64,
1356                    precision: PerfCountPrecision::Exact,
1357                }),
1358            },
1359            42,
1360        );
1361
1362        recorder
1363            .machine(PerfMachineProfile {
1364                logical_cpus: Some(64),
1365                reserved_cores: Some(4),
1366                available_memory_bytes: Some(256 * 1024 * 1024 * 1024),
1367                topology_class: Some("many_core".to_string()),
1368            })
1369            .env_kv("CASS_WATCH_ONCE_INGEST_CHUNK_CONVERSATIONS", "64")
1370            .cache_snapshot(PerfCacheSnapshot {
1371                result_cache_hits: 7,
1372                result_cache_misses: 2,
1373                eviction_count: 1,
1374                approx_bytes: Some(1_024),
1375                byte_cap: Some(2_048),
1376            })
1377            .artifact(PerfArtifactRef {
1378                label: "trace".to_string(),
1379                path: "tests/artifacts/perf/trace.json".to_string(),
1380                kind: "json".to_string(),
1381                sha256: None,
1382            });
1383        recorder
1384            .record_phase(phase("queue", PerfPhaseKind::Queueing, 3, 1, 2, 3))
1385            .unwrap()
1386            .begin_phase("emit-json", PerfPhaseKind::Output)
1387            .unwrap()
1388            .finish_phase()
1389            .unwrap();
1390
1391        let ledger = recorder.finish().unwrap();
1392
1393        ledger.validate().unwrap();
1394        assert_eq!(ledger.run_id, "recorder-run");
1395        assert_eq!(
1396            ledger.env["CASS_WATCH_ONCE_INGEST_CHUNK_CONVERSATIONS"],
1397            "64"
1398        );
1399        assert_eq!(ledger.phases.len(), 2);
1400        assert_eq!(ledger.phases[0].kind, PerfPhaseKind::Queueing);
1401        assert_eq!(ledger.phases[1].name, "emit-json");
1402        assert_eq!(ledger.artifacts[0].label, "trace");
1403    }
1404
1405    #[test]
1406    fn recorder_rejects_overlapping_or_missing_active_phase() {
1407        let mut recorder = PerfEvidenceRecorder::new(
1408            "active-phase-run",
1409            PerfWorkload::new(PerfWorkloadKind::Search, "search"),
1410            1,
1411        );
1412
1413        assert_eq!(
1414            recorder.finish_phase().unwrap_err().to_string(),
1415            "no perf evidence phase is active"
1416        );
1417
1418        recorder
1419            .begin_phase("service", PerfPhaseKind::Service)
1420            .unwrap();
1421        let err = recorder
1422            .begin_phase("io", PerfPhaseKind::Io)
1423            .unwrap_err()
1424            .to_string();
1425        assert!(err.contains("service"), "{err}");
1426    }
1427
1428    #[test]
1429    fn replay_gate_detects_p99_and_elapsed_regressions() {
1430        let baseline = representative_ledger();
1431        let mut current = representative_ledger();
1432        current.run_id = "current-regressed".to_string();
1433        current.phases = vec![
1434            phase("admission", PerfPhaseKind::Queueing, 4, 2, 3, 5),
1435            phase("bm25", PerfPhaseKind::Service, 30, 20, 24, 30),
1436            phase("semantic", PerfPhaseKind::Io, 45, 30, 40, 45),
1437            phase("merge", PerfPhaseKind::Synchronization, 12, 7, 10, 12),
1438            phase("retry-budget", PerfPhaseKind::Retries, 2, 1, 2, 2),
1439            phase("hydrate", PerfPhaseKind::Hydration, 18, 10, 15, 18),
1440            phase("emit-json", PerfPhaseKind::Output, 6, 3, 5, 6),
1441        ];
1442
1443        let gate =
1444            PerfReplayGate::new(PerfReplayThresholds::try_new(500, 1_000, 500, 1_000).unwrap());
1445        let report = gate.replay(&current, Some(&baseline));
1446
1447        assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1448        assert!(report.should_fail_build());
1449        assert!(
1450            report
1451                .findings
1452                .iter()
1453                .any(|finding| finding.metric == PerfReplayMetric::ComposedP99
1454                    && finding.verdict == PerfReplayVerdict::Failure),
1455            "{report:#?}"
1456        );
1457        assert!(
1458            report
1459                .findings
1460                .iter()
1461                .any(|finding| finding.metric == PerfReplayMetric::TotalElapsed),
1462            "{report:#?}"
1463        );
1464    }
1465
1466    #[test]
1467    fn replay_gate_warns_on_inconclusive_proof_and_fails_on_failed_proof() {
1468        let mut current = representative_ledger();
1469        current.proof.status = PerfProofStatus::Inconclusive;
1470
1471        let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1472        let report = gate.replay(&current, None);
1473
1474        assert_eq!(report.verdict, PerfReplayVerdict::Warning);
1475        assert!(
1476            report
1477                .findings
1478                .iter()
1479                .any(|finding| finding.metric == PerfReplayMetric::ProofStatus)
1480        );
1481
1482        current.proof.status = PerfProofStatus::Failed;
1483        let report = gate.replay(&current, None);
1484
1485        assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1486        assert!(
1487            report
1488                .logs
1489                .iter()
1490                .any(|event| event.failure_reason.as_deref()
1491                    == Some("perf evidence proof status is failed")),
1492            "{report:#?}"
1493        );
1494    }
1495
1496    #[test]
1497    fn replay_gate_uses_proof_reported_p99_without_baseline() {
1498        let mut current = representative_ledger();
1499        current.proof.p99_regression_basis_points = Some(1_500);
1500
1501        let gate =
1502            PerfReplayGate::new(PerfReplayThresholds::try_new(500, 1_000, 500, 1_000).unwrap());
1503        let report = gate.replay(&current, None);
1504
1505        assert_eq!(report.verdict, PerfReplayVerdict::Failure);
1506        assert!(
1507            report.findings.iter().any(|finding| finding.metric
1508                == PerfReplayMetric::ProofP99Regression
1509                && finding.delta_basis_points == Some(1_500)),
1510            "{report:#?}"
1511        );
1512    }
1513
1514    #[test]
1515    fn replay_gate_warns_when_current_ledger_has_no_measurements() {
1516        let current = PerfEvidenceLedger::new(
1517            "empty-measurement-run",
1518            PerfWorkload::new(PerfWorkloadKind::Search, "empty-measurement"),
1519            1,
1520        );
1521
1522        let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1523        let report = gate.replay(&current, None);
1524
1525        assert_eq!(report.verdict, PerfReplayVerdict::Warning);
1526        assert!(
1527            report
1528                .findings
1529                .iter()
1530                .any(|finding| finding.metric == PerfReplayMetric::MeasurementCoverage),
1531            "{report:#?}"
1532        );
1533    }
1534
1535    #[test]
1536    fn replay_thresholds_reject_unreachable_warning_bands() {
1537        assert_eq!(
1538            PerfReplayThresholds::try_new(1_000, 1_000, 500, 1_000),
1539            Err(
1540                "warning_p99_regression_basis_points must be less than failure_p99_regression_basis_points"
1541            )
1542        );
1543        assert_eq!(
1544            PerfReplayThresholds::try_new(500, 1_000, -1, 1_000),
1545            Err("perf replay thresholds must be non-negative basis points")
1546        );
1547    }
1548
1549    #[test]
1550    fn replay_log_events_include_command_shape_and_artifact_context() {
1551        let baseline = representative_ledger();
1552        let mut current = representative_ledger();
1553        current.run_id = "artifact-context".to_string();
1554        current.proof.status = PerfProofStatus::Failed;
1555
1556        let gate = PerfReplayGate::new(PerfReplayThresholds::defaults());
1557        let report = gate.replay_with_artifact(
1558            &current,
1559            Some(&baseline),
1560            Some(Path::new("tests/artifacts/perf/current.json")),
1561        );
1562
1563        let failure_log = report
1564            .logs
1565            .iter()
1566            .find(|event| event.level == "error")
1567            .expect("error log");
1568        assert_eq!(failure_log.run_id, "artifact-context");
1569        assert_eq!(
1570            failure_log.artifact_path.as_deref(),
1571            Some("tests/artifacts/perf/current.json")
1572        );
1573        assert_eq!(
1574            failure_log.command_args,
1575            ["cass", "search", "wal conflict", "--json"]
1576        );
1577        assert_eq!(
1578            failure_log.failure_reason.as_deref(),
1579            Some("perf evidence proof status is failed")
1580        );
1581    }
1582
1583    #[test]
1584    fn representative_ledger_validates_and_round_trips_json() {
1585        let ledger = representative_ledger();
1586
1587        ledger.validate().unwrap();
1588
1589        let encoded = serde_json::to_value(&ledger).unwrap();
1590        assert_eq!(encoded["schema_version"], PERF_EVIDENCE_SCHEMA_VERSION);
1591        assert_eq!(encoded["workload"]["kind"], "search");
1592        assert_eq!(encoded["phases"][0]["kind"], "queueing");
1593        assert_eq!(
1594            encoded["workload"]["input_count"]["precision"],
1595            "lower_bound"
1596        );
1597
1598        let decoded: PerfEvidenceLedger = serde_json::from_value(encoded).unwrap();
1599        assert_eq!(decoded, ledger);
1600    }
1601
1602    #[test]
1603    fn future_top_level_fields_are_ignored_by_old_readers() {
1604        let encoded = json!({
1605            "schema_version": PERF_EVIDENCE_SCHEMA_VERSION,
1606            "run_id": "run-with-future",
1607            "recorded_at_ms": 1,
1608            "workload": {
1609                "kind": "search",
1610                "name": "future-field-probe"
1611            },
1612            "future_controller_hint": {
1613                "new_field": true
1614            }
1615        });
1616
1617        let decoded: PerfEvidenceLedger = serde_json::from_value(encoded).unwrap();
1618
1619        assert_eq!(decoded.run_id, "run-with-future");
1620        decoded.validate().unwrap();
1621    }
1622
1623    #[test]
1624    fn validation_rejects_missing_identity_fields() {
1625        let mut ledger = representative_ledger();
1626        ledger.run_id = "  ".to_string();
1627
1628        assert_eq!(
1629            ledger.validate(),
1630            Err(PerfEvidenceValidationError::EmptyRunId)
1631        );
1632
1633        ledger = representative_ledger();
1634        ledger.workload.name.clear();
1635        assert_eq!(
1636            ledger.validate(),
1637            Err(PerfEvidenceValidationError::EmptyWorkloadName)
1638        );
1639    }
1640
1641    #[test]
1642    fn validation_rejects_unsupported_schema_and_negative_time() {
1643        let mut ledger = representative_ledger();
1644        ledger.schema_version = "2".to_string();
1645
1646        assert_eq!(
1647            ledger.validate(),
1648            Err(PerfEvidenceValidationError::UnsupportedSchemaVersion {
1649                expected: PERF_EVIDENCE_SCHEMA_VERSION,
1650                actual: "2".to_string(),
1651            })
1652        );
1653
1654        ledger = representative_ledger();
1655        ledger.recorded_at_ms = -1;
1656        assert_eq!(
1657            ledger.validate(),
1658            Err(PerfEvidenceValidationError::NegativeRecordedAtMs { recorded_at_ms: -1 })
1659        );
1660    }
1661
1662    #[test]
1663    fn validation_rejects_bad_phase_and_artifact_entries() {
1664        let mut ledger = representative_ledger();
1665        ledger.phases[0].name.clear();
1666
1667        assert_eq!(
1668            ledger.validate(),
1669            Err(PerfEvidenceValidationError::EmptyPhaseName { index: 0 })
1670        );
1671
1672        ledger = representative_ledger();
1673        ledger.phases[0].p50_ms = Some(10);
1674        ledger.phases[0].p95_ms = Some(5);
1675        assert_eq!(
1676            ledger.validate(),
1677            Err(PerfEvidenceValidationError::PhaseQuantilesOutOfOrder { index: 0 })
1678        );
1679
1680        ledger = representative_ledger();
1681        ledger.artifacts[0].label.clear();
1682        assert_eq!(
1683            ledger.validate(),
1684            Err(PerfEvidenceValidationError::EmptyArtifactLabel { index: 0 })
1685        );
1686
1687        ledger = representative_ledger();
1688        ledger.artifacts[0].path = " ".to_string();
1689        assert_eq!(
1690            ledger.validate(),
1691            Err(PerfEvidenceValidationError::EmptyArtifactPath { index: 0 })
1692        );
1693
1694        ledger = representative_ledger();
1695        ledger.artifacts[0].kind.clear();
1696        assert_eq!(
1697            ledger.validate(),
1698            Err(PerfEvidenceValidationError::EmptyArtifactKind { index: 0 })
1699        );
1700    }
1701
1702    #[test]
1703    fn validation_rejects_empty_nested_snapshot_fields() {
1704        let mut ledger = representative_ledger();
1705        ledger.search.as_mut().unwrap().query_hash.clear();
1706
1707        assert_eq!(
1708            ledger.validate(),
1709            Err(PerfEvidenceValidationError::EmptySearchQueryHash)
1710        );
1711
1712        ledger = representative_ledger();
1713        ledger.search.as_mut().unwrap().requested_mode = " ".to_string();
1714        assert_eq!(
1715            ledger.validate(),
1716            Err(PerfEvidenceValidationError::EmptySearchRequestedMode)
1717        );
1718
1719        ledger = representative_ledger();
1720        ledger.search.as_mut().unwrap().realized_mode.clear();
1721        assert_eq!(
1722            ledger.validate(),
1723            Err(PerfEvidenceValidationError::EmptySearchRealizedMode)
1724        );
1725
1726        ledger = representative_ledger();
1727        ledger.rebuild = Some(PerfRebuildSnapshot {
1728            execution_mode: " ".to_string(),
1729            workers: 1,
1730            shard_count: None,
1731            queued_items: None,
1732            indexed_items: None,
1733            checkpoint_count: None,
1734        });
1735        assert_eq!(
1736            ledger.validate(),
1737            Err(PerfEvidenceValidationError::EmptyRebuildExecutionMode)
1738        );
1739
1740        ledger = representative_ledger();
1741        ledger.rebuild = Some(PerfRebuildSnapshot {
1742            execution_mode: "flat_combining".to_string(),
1743            workers: 0,
1744            shard_count: None,
1745            queued_items: None,
1746            indexed_items: None,
1747            checkpoint_count: None,
1748        });
1749        assert_eq!(
1750            ledger.validate(),
1751            Err(PerfEvidenceValidationError::ZeroRebuildWorkers)
1752        );
1753    }
1754
1755    #[test]
1756    fn representative_ledger_covers_tail_decomposition_phase_kinds() {
1757        let ledger = representative_ledger();
1758        let phase_kinds = ledger
1759            .phases
1760            .iter()
1761            .map(|phase| phase.kind)
1762            .collect::<Vec<_>>();
1763
1764        for required in [
1765            PerfPhaseKind::Queueing,
1766            PerfPhaseKind::Service,
1767            PerfPhaseKind::Io,
1768            PerfPhaseKind::Synchronization,
1769            PerfPhaseKind::Retries,
1770            PerfPhaseKind::Hydration,
1771            PerfPhaseKind::Output,
1772        ] {
1773            assert!(
1774                phase_kinds.contains(&required),
1775                "missing required phase kind {required:?}"
1776            );
1777        }
1778    }
1779
1780    #[test]
1781    fn enum_serialization_is_stable_snake_case() {
1782        let encoded = serde_json::to_value(PerfEvidenceLedger {
1783            schema_version: PERF_EVIDENCE_SCHEMA_VERSION.to_string(),
1784            run_id: "enum-stability".to_string(),
1785            recorded_at_ms: 1,
1786            workload: PerfWorkload::new(PerfWorkloadKind::CacheWarm, "cache-warm"),
1787            machine: PerfMachineProfile::default(),
1788            env: BTreeMap::new(),
1789            phases: vec![PerfPhaseTiming::new("output", PerfPhaseKind::Output, 1)],
1790            resources: PerfResourceSnapshot::default(),
1791            cache: None,
1792            search: None,
1793            rebuild: None,
1794            proof: PerfProofSummary {
1795                status: PerfProofStatus::Inconclusive,
1796                ..PerfProofSummary::default()
1797            },
1798            artifacts: Vec::new(),
1799        })
1800        .unwrap();
1801
1802        assert_eq!(encoded["workload"]["kind"], "cache_warm");
1803        assert_eq!(encoded["phases"][0]["kind"], "output");
1804        assert_eq!(encoded["proof"]["status"], "inconclusive");
1805
1806        let precision: Value = serde_json::to_value(PerfCountPrecision::Unavailable).unwrap();
1807        assert_eq!(precision, "unavailable");
1808    }
1809}
coding_agent_search/perf_evidence.rs

coding_agent_search/
perf_evidence.rs