Skip to main content

mabi_runtime/
evidence.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use serde_json::Value as JsonValue;
4
5use crate::service::{RUNTIME_CONTRACT_VERSION, SNAPSHOT_METADATA_VERSION};
6use crate::session::RuntimeSessionSnapshot;
7
8/// Stable run evidence schema version consumed by Forge and Trials.
9pub const RUN_EVIDENCE_SCHEMA_VERSION: &str = "run-evidence-schema-v1";
10
11/// Stable trial artifact contract version consumed by Forge and Trials.
12pub const TRIAL_ARTIFACT_CONTRACT_VERSION: &str = "trial-artifact-contract-v1";
13
14/// Protocol profile identity carried through from Unified Readiness and Trials.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16pub struct ProtocolProfileEvidence {
17    pub protocol: String,
18    pub profile_id: String,
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub capability_id: Option<String>,
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub lane: Option<String>,
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub coverage_status: Option<String>,
25}
26
27impl ProtocolProfileEvidence {
28    pub fn new(protocol: impl Into<String>, profile_id: impl Into<String>) -> Self {
29        Self {
30            protocol: protocol.into(),
31            profile_id: profile_id.into(),
32            capability_id: None,
33            lane: None,
34            coverage_status: None,
35        }
36    }
37
38    pub fn with_capability_id(mut self, capability_id: impl Into<String>) -> Self {
39        self.capability_id = Some(capability_id.into());
40        self
41    }
42
43    pub fn with_lane(mut self, lane: impl Into<String>) -> Self {
44        self.lane = Some(lane.into());
45        self
46    }
47
48    pub fn with_coverage_status(mut self, coverage_status: impl Into<String>) -> Self {
49        self.coverage_status = Some(coverage_status.into());
50        self
51    }
52}
53
54/// Trial-owned pass criteria carried through without scoring it in mabinogion.
55#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
56pub struct PassCriteriaEvidence {
57    pub owner: String,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub criteria_id: Option<String>,
60    pub summary: String,
61    pub machine_conditions: Vec<JsonValue>,
62}
63
64impl PassCriteriaEvidence {
65    pub fn new(summary: impl Into<String>) -> Self {
66        Self {
67            owner: "mabinogion-trials".to_string(),
68            criteria_id: None,
69            summary: summary.into(),
70            machine_conditions: Vec::new(),
71        }
72    }
73
74    pub fn with_criteria_id(mut self, criteria_id: impl Into<String>) -> Self {
75        self.criteria_id = Some(criteria_id.into());
76        self
77    }
78
79    pub fn with_machine_condition(mut self, condition: JsonValue) -> Self {
80        self.machine_conditions.push(condition);
81        self
82    }
83}
84
85/// Artifact visibility boundary for replay and diagnostic evidence.
86#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
87#[serde(rename_all = "snake_case")]
88pub enum ArtifactVisibility {
89    PublicSummary,
90    PrivateRaw,
91    InternalOnly,
92}
93
94/// Failure replay artifact metadata. The artifact contents stay outside this struct.
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
96pub struct FailureReplayArtifact {
97    pub artifact_id: String,
98    pub kind: String,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub path: Option<String>,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub media_type: Option<String>,
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub digest: Option<String>,
105    pub visibility: ArtifactVisibility,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub description: Option<String>,
108}
109
110impl FailureReplayArtifact {
111    pub fn new(
112        artifact_id: impl Into<String>,
113        kind: impl Into<String>,
114        visibility: ArtifactVisibility,
115    ) -> Self {
116        Self {
117            artifact_id: artifact_id.into(),
118            kind: kind.into(),
119            path: None,
120            media_type: None,
121            digest: None,
122            visibility,
123            description: None,
124        }
125    }
126
127    pub fn with_path(mut self, path: impl Into<String>) -> Self {
128        self.path = Some(path.into());
129        self
130    }
131
132    pub fn with_media_type(mut self, media_type: impl Into<String>) -> Self {
133        self.media_type = Some(media_type.into());
134        self
135    }
136
137    pub fn with_digest(mut self, digest: impl Into<String>) -> Self {
138        self.digest = Some(digest.into());
139        self
140    }
141
142    pub fn with_description(mut self, description: impl Into<String>) -> Self {
143        self.description = Some(description.into());
144        self
145    }
146
147    fn public_summary(&self) -> Option<PublicFailureReplayArtifact> {
148        if self.visibility != ArtifactVisibility::PublicSummary {
149            return None;
150        }
151        Some(PublicFailureReplayArtifact {
152            artifact_id: self.artifact_id.clone(),
153            kind: self.kind.clone(),
154            media_type: self.media_type.clone(),
155            visibility: self.visibility,
156            description: self.description.clone(),
157        })
158    }
159}
160
161/// Public-safe failure replay artifact metadata.
162#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
163pub struct PublicFailureReplayArtifact {
164    pub artifact_id: String,
165    pub kind: String,
166    #[serde(skip_serializing_if = "Option::is_none")]
167    pub media_type: Option<String>,
168    pub visibility: ArtifactVisibility,
169    #[serde(skip_serializing_if = "Option::is_none")]
170    pub description: Option<String>,
171}
172
173/// Report-friendly metrics exported as evidence, not as Prometheus samples.
174#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
175pub struct RunEvidenceMetrics {
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub latency_ms: Option<f64>,
178    #[serde(skip_serializing_if = "Option::is_none")]
179    pub reconnect_count: Option<u64>,
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub error_count: Option<u64>,
182    pub recovery_events: Vec<RecoveryEvent>,
183    #[serde(skip_serializing_if = "Option::is_none")]
184    pub resource_usage: Option<ResourceUsageSummary>,
185}
186
187impl RunEvidenceMetrics {
188    pub fn with_latency_ms(mut self, latency_ms: f64) -> Self {
189        self.latency_ms = Some(latency_ms);
190        self
191    }
192
193    pub fn with_reconnect_count(mut self, reconnect_count: u64) -> Self {
194        self.reconnect_count = Some(reconnect_count);
195        self
196    }
197
198    pub fn with_error_count(mut self, error_count: u64) -> Self {
199        self.error_count = Some(error_count);
200        self
201    }
202
203    pub fn with_recovery_event(mut self, event: RecoveryEvent) -> Self {
204        self.recovery_events.push(event);
205        self
206    }
207
208    pub fn with_resource_usage(mut self, usage: ResourceUsageSummary) -> Self {
209        self.resource_usage = Some(usage);
210        self
211    }
212}
213
214/// Recovery event summary suitable for proof/report generation.
215#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
216pub struct RecoveryEvent {
217    pub event_id: String,
218    pub occurred_at: DateTime<Utc>,
219    pub kind: String,
220    pub summary: String,
221}
222
223impl RecoveryEvent {
224    pub fn new(
225        event_id: impl Into<String>,
226        kind: impl Into<String>,
227        summary: impl Into<String>,
228    ) -> Self {
229        Self {
230            event_id: event_id.into(),
231            occurred_at: Utc::now(),
232            kind: kind.into(),
233            summary: summary.into(),
234        }
235    }
236
237    pub fn occurred_at(mut self, occurred_at: DateTime<Utc>) -> Self {
238        self.occurred_at = occurred_at;
239        self
240    }
241}
242
243/// Resource usage summary captured by a caller-provided runner.
244#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
245pub struct ResourceUsageSummary {
246    #[serde(skip_serializing_if = "Option::is_none")]
247    pub peak_memory_bytes: Option<u64>,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub average_cpu_percent: Option<f64>,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub max_open_file_descriptors: Option<u64>,
252}
253
254/// Boundary between public proof summary and private raw diagnostics.
255#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
256pub struct PublicPrivateBoundary {
257    pub public_summary_fields: Vec<String>,
258    pub private_artifact_fields: Vec<String>,
259    pub private_artifact_policy: String,
260}
261
262impl Default for PublicPrivateBoundary {
263    fn default() -> Self {
264        Self {
265            public_summary_fields: vec![
266                "run_id".to_string(),
267                "engine_version".to_string(),
268                "protocol_profile".to_string(),
269                "trial_suite_version".to_string(),
270                "started_at".to_string(),
271                "ended_at".to_string(),
272                "feature_flags".to_string(),
273                "pass_criteria".to_string(),
274                "failure_replay_artifacts.public_summary".to_string(),
275                "metrics".to_string(),
276            ],
277            private_artifact_fields: vec![
278                "failure_replay_artifacts.path".to_string(),
279                "failure_replay_artifacts.digest".to_string(),
280                "raw_logs".to_string(),
281                "packet_captures".to_string(),
282            ],
283            private_artifact_policy: "private raw artifacts are referenced by metadata and are not embedded in public summaries".to_string(),
284        }
285    }
286}
287
288/// Full run evidence exported by mabinogion for Forge and Trials consumers.
289#[derive(Debug, Clone, Serialize, Deserialize)]
290pub struct RunEvidence {
291    pub run_evidence_schema_version: String,
292    pub trial_artifact_contract_version: String,
293    pub runtime_contract_version: String,
294    pub snapshot_metadata_version: String,
295    pub run_id: String,
296    pub engine_version: String,
297    pub protocol_profile: ProtocolProfileEvidence,
298    pub trial_suite_version: String,
299    pub started_at: DateTime<Utc>,
300    pub ended_at: DateTime<Utc>,
301    pub feature_flags: Vec<String>,
302    pub pass_criteria: PassCriteriaEvidence,
303    pub failure_replay_artifacts: Vec<FailureReplayArtifact>,
304    pub public_private_boundary: PublicPrivateBoundary,
305    pub runtime_snapshot: RuntimeSessionSnapshot,
306    #[serde(skip_serializing_if = "Option::is_none")]
307    pub metrics: Option<RunEvidenceMetrics>,
308}
309
310impl RunEvidence {
311    /// Returns a public-safe evidence summary for proof report input.
312    pub fn public_summary(&self) -> PublicRunEvidenceSummary {
313        PublicRunEvidenceSummary {
314            run_evidence_schema_version: self.run_evidence_schema_version.clone(),
315            run_id: self.run_id.clone(),
316            engine_version: self.engine_version.clone(),
317            protocol_profile: self.protocol_profile.clone(),
318            trial_suite_version: self.trial_suite_version.clone(),
319            started_at: self.started_at,
320            ended_at: self.ended_at,
321            feature_flags: self.feature_flags.clone(),
322            pass_criteria: self.pass_criteria.clone(),
323            failure_replay_artifacts: self
324                .failure_replay_artifacts
325                .iter()
326                .filter_map(FailureReplayArtifact::public_summary)
327                .collect(),
328            public_private_boundary: self.public_private_boundary.clone(),
329            metrics: self.metrics.clone(),
330        }
331    }
332}
333
334/// Public-safe proof/report summary.
335#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
336pub struct PublicRunEvidenceSummary {
337    pub run_evidence_schema_version: String,
338    pub run_id: String,
339    pub engine_version: String,
340    pub protocol_profile: ProtocolProfileEvidence,
341    pub trial_suite_version: String,
342    pub started_at: DateTime<Utc>,
343    pub ended_at: DateTime<Utc>,
344    pub feature_flags: Vec<String>,
345    pub pass_criteria: PassCriteriaEvidence,
346    pub failure_replay_artifacts: Vec<PublicFailureReplayArtifact>,
347    pub public_private_boundary: PublicPrivateBoundary,
348    #[serde(skip_serializing_if = "Option::is_none")]
349    pub metrics: Option<RunEvidenceMetrics>,
350}
351
352/// Builder used by Forge/Trials runners to assemble evidence from runtime output.
353#[derive(Debug, Clone)]
354pub struct RunEvidenceBuilder {
355    evidence: RunEvidence,
356}
357
358impl RunEvidenceBuilder {
359    pub fn new(
360        run_id: impl Into<String>,
361        trial_suite_version: impl Into<String>,
362        protocol_profile: ProtocolProfileEvidence,
363        pass_criteria: PassCriteriaEvidence,
364        runtime_snapshot: RuntimeSessionSnapshot,
365    ) -> Self {
366        let now = Utc::now();
367        Self {
368            evidence: RunEvidence {
369                run_evidence_schema_version: RUN_EVIDENCE_SCHEMA_VERSION.to_string(),
370                trial_artifact_contract_version: TRIAL_ARTIFACT_CONTRACT_VERSION.to_string(),
371                runtime_contract_version: RUNTIME_CONTRACT_VERSION.to_string(),
372                snapshot_metadata_version: SNAPSHOT_METADATA_VERSION.to_string(),
373                run_id: run_id.into(),
374                engine_version: mabi_core::RELEASE_VERSION.to_string(),
375                protocol_profile,
376                trial_suite_version: trial_suite_version.into(),
377                started_at: now,
378                ended_at: now,
379                feature_flags: Vec::new(),
380                pass_criteria,
381                failure_replay_artifacts: Vec::new(),
382                public_private_boundary: PublicPrivateBoundary::default(),
383                runtime_snapshot,
384                metrics: None,
385            },
386        }
387    }
388
389    pub fn engine_version(mut self, engine_version: impl Into<String>) -> Self {
390        self.evidence.engine_version = engine_version.into();
391        self
392    }
393
394    pub fn started_at(mut self, started_at: DateTime<Utc>) -> Self {
395        self.evidence.started_at = started_at;
396        self
397    }
398
399    pub fn ended_at(mut self, ended_at: DateTime<Utc>) -> Self {
400        self.evidence.ended_at = ended_at;
401        self
402    }
403
404    pub fn feature_flags(mut self, feature_flags: Vec<String>) -> Self {
405        self.evidence.feature_flags = feature_flags;
406        self
407    }
408
409    pub fn add_feature_flag(mut self, feature_flag: impl Into<String>) -> Self {
410        self.evidence.feature_flags.push(feature_flag.into());
411        self
412    }
413
414    pub fn add_failure_replay_artifact(mut self, artifact: FailureReplayArtifact) -> Self {
415        self.evidence.failure_replay_artifacts.push(artifact);
416        self
417    }
418
419    pub fn metrics(mut self, metrics: RunEvidenceMetrics) -> Self {
420        self.evidence.metrics = Some(metrics);
421        self
422    }
423
424    pub fn public_private_boundary(mut self, boundary: PublicPrivateBoundary) -> Self {
425        self.evidence.public_private_boundary = boundary;
426        self
427    }
428
429    pub fn build(self) -> RunEvidence {
430        self.evidence
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use serde_json::{json, Value as JsonValue};
437
438    use crate::evidence::{
439        ArtifactVisibility, FailureReplayArtifact, PassCriteriaEvidence, ProtocolProfileEvidence,
440        PublicPrivateBoundary, RecoveryEvent, ResourceUsageSummary, RunEvidenceBuilder,
441        RunEvidenceMetrics, RUN_EVIDENCE_SCHEMA_VERSION, TRIAL_ARTIFACT_CONTRACT_VERSION,
442    };
443    use crate::service::{ServiceSnapshot, ServiceState, RUNTIME_CONTRACT_VERSION};
444    use crate::session::RuntimeSessionSnapshot;
445
446    fn snapshot() -> RuntimeSessionSnapshot {
447        let mut service = ServiceSnapshot::new("evidence-modbus");
448        service.status.state = ServiceState::Running;
449        service.status.ready = true;
450        service.ensure_runtime_metadata();
451        RuntimeSessionSnapshot::new(vec![service])
452    }
453
454    fn evidence() -> crate::evidence::RunEvidence {
455        RunEvidenceBuilder::new(
456            "run-001",
457            "trials-2026.05",
458            ProtocolProfileEvidence::new("modbus", "modbus.l1.function_code")
459                .with_capability_id("modbus.function_code")
460                .with_lane("deterministic"),
461            PassCriteriaEvidence::new("All required Modbus function code checks pass")
462                .with_criteria_id("modbus-l1-pass")
463                .with_machine_condition(json!({"kind": "all_required_checks_pass"})),
464            snapshot(),
465        )
466        .engine_version("1.2.3")
467        .feature_flags(vec!["opcua-https-disabled".to_string()])
468        .add_failure_replay_artifact(
469            FailureReplayArtifact::new(
470                "public-summary",
471                "failure_summary",
472                ArtifactVisibility::PublicSummary,
473            )
474            .with_media_type("application/json")
475            .with_description("Public replay summary"),
476        )
477        .add_failure_replay_artifact(
478            FailureReplayArtifact::new("raw-log", "raw_log", ArtifactVisibility::PrivateRaw)
479                .with_path("/private/raw.log")
480                .with_digest("sha256:abc123")
481                .with_media_type("text/plain"),
482        )
483        .metrics(
484            RunEvidenceMetrics::default()
485                .with_latency_ms(12.5)
486                .with_reconnect_count(1)
487                .with_error_count(0)
488                .with_recovery_event(RecoveryEvent::new(
489                    "recovery-001",
490                    "reconnect",
491                    "Client reconnected after injected disconnect",
492                ))
493                .with_resource_usage(ResourceUsageSummary {
494                    peak_memory_bytes: Some(2048),
495                    average_cpu_percent: Some(1.5),
496                    max_open_file_descriptors: None,
497                }),
498        )
499        .public_private_boundary(PublicPrivateBoundary::default())
500        .build()
501    }
502
503    #[test]
504    fn run_evidence_serializes_required_contract_fields() {
505        let evidence = evidence();
506        let value = serde_json::to_value(&evidence).expect("evidence serializes");
507
508        for field in [
509            "run_id",
510            "engine_version",
511            "protocol_profile",
512            "trial_suite_version",
513            "started_at",
514            "ended_at",
515            "feature_flags",
516            "pass_criteria",
517            "failure_replay_artifacts",
518            "public_private_boundary",
519        ] {
520            assert!(value.get(field).is_some(), "{field} should serialize");
521        }
522        assert_eq!(
523            value["run_evidence_schema_version"],
524            RUN_EVIDENCE_SCHEMA_VERSION
525        );
526        assert_eq!(
527            value["trial_artifact_contract_version"],
528            TRIAL_ARTIFACT_CONTRACT_VERSION
529        );
530        assert_eq!(value["runtime_contract_version"], RUNTIME_CONTRACT_VERSION);
531        assert!(value.get("scoring_result").is_none());
532    }
533
534    #[test]
535    fn run_evidence_builder_preserves_runtime_snapshot_and_metadata() {
536        let evidence = evidence();
537
538        assert_eq!(evidence.run_id, "run-001");
539        assert_eq!(evidence.trial_suite_version, "trials-2026.05");
540        assert_eq!(evidence.protocol_profile.protocol, "modbus");
541        assert_eq!(evidence.runtime_snapshot.services.len(), 1);
542        assert!(evidence.runtime_snapshot.services[0]
543            .runtime_metadata()
544            .is_some());
545        assert_eq!(evidence.failure_replay_artifacts.len(), 2);
546        assert_eq!(
547            evidence.metrics.as_ref().and_then(|m| m.error_count),
548            Some(0)
549        );
550    }
551
552    #[test]
553    fn public_summary_excludes_private_artifact_paths_and_raw_fields() {
554        let summary = evidence().public_summary();
555        let value = serde_json::to_value(&summary).expect("summary serializes");
556        let text = serde_json::to_string(&value).expect("summary stringifies");
557
558        assert_eq!(summary.failure_replay_artifacts.len(), 1);
559        assert!(!text.contains("/private/raw.log"));
560        assert!(!text.contains("sha256:abc123"));
561        assert!(text.contains("public-summary"));
562        assert!(value.get("runtime_snapshot").is_none());
563    }
564
565    #[test]
566    fn failure_replay_artifacts_support_public_and_private_visibility() {
567        let evidence = evidence();
568        let visibilities = evidence
569            .failure_replay_artifacts
570            .iter()
571            .map(|artifact| artifact.visibility)
572            .collect::<Vec<_>>();
573
574        assert!(visibilities.contains(&ArtifactVisibility::PublicSummary));
575        assert!(visibilities.contains(&ArtifactVisibility::PrivateRaw));
576        let value: JsonValue = serde_json::to_value(&evidence).expect("evidence serializes");
577        assert_eq!(
578            value["failure_replay_artifacts"][1]["visibility"],
579            "private_raw"
580        );
581    }
582}