use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use stack_ids::{AttemptId, ClaimId, EnvelopeId, TraceCtx, TrialId};
use crate::estimator::EstimatorMeta;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
pub struct EvidenceBundleId(pub String);
impl EvidenceBundleId {
pub fn generate() -> Self {
Self(uuid::Uuid::new_v4().to_string())
}
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::fmt::Display for EvidenceBundleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct CausalQuestion {
pub description: String,
pub unit_definition: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct TreatmentSpec {
pub description: String,
pub baseline_description: String,
pub paired_trials: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct OutcomeSpec {
pub description: String,
pub measurement_method: String,
pub outcome_type: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RefutationAttempt {
pub method: String,
pub result: RefutationResult,
pub estimator_kind: Option<String>,
pub parameters: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum RefutationResult {
Passed {
estimate_change: Option<f64>,
},
Failed {
reason: String,
estimate_change: Option<f64>,
},
Inconclusive { reason: String },
Skipped { reason: String },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum VerificationTrialSide {
Baseline,
Patched,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct VerificationTrialRecord {
pub trial_id: TrialId,
pub attempt_id: AttemptId,
pub side: VerificationTrialSide,
pub completed: bool,
#[serde(default)]
pub receipt_handles: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ComparabilitySnapshot {
pub workload_id: String,
pub backend_family: String,
pub selected_checks: Vec<String>,
pub timeout_class: String,
pub config_flags: Vec<String>,
pub comparable: Option<bool>,
#[serde(default)]
pub violations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RefutationArtifactRecord {
pub artifact_id: String,
pub artifact_type: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub trial_id: Option<TrialId>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub attempt_id: Option<AttemptId>,
pub result: RefutationResult,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub estimate_delta: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub details: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum VerificationLifecycleState {
Unverified,
Verified,
Contradicted,
Superseded,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "state", rename_all = "snake_case")]
pub enum PromotionState {
NotPromoted,
Eligible,
Blocked {
reason: String,
},
Promoted {
#[serde(default, skip_serializing_if = "Option::is_none")]
version_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
promoted_at: Option<String>,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct VerificationSummary {
pub lifecycle_state: VerificationLifecycleState,
pub promotion_state: PromotionState,
pub completed_trial_count: u32,
pub passed_refutation_count: u32,
pub failed_refutation_count: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub comparability_snapshot_version: Option<String>,
#[serde(default)]
pub notes: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct EvidenceBundle {
pub id: EvidenceBundleId,
pub question: CausalQuestion,
pub treatment: TreatmentSpec,
pub outcome: OutcomeSpec,
pub covariates: Vec<String>,
pub identification_rationale: String,
pub estimator_kind: String,
pub estimator_version: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub estimator_meta: Option<EstimatorMeta>,
pub estimate: f64,
pub estimate_uncertainty: Option<f64>,
pub confidence: f32,
pub trial_count: u32,
pub variance_aware: bool,
#[serde(default)]
pub verification_trials: Vec<VerificationTrialRecord>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub comparability_snapshot: Option<ComparabilitySnapshot>,
pub refutations: Vec<RefutationAttempt>,
#[serde(default)]
pub refutation_artifacts: Vec<RefutationArtifactRecord>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub verification_summary: Option<VerificationSummary>,
pub raw_receipt_handle: Option<String>,
pub trace_ctx: Option<TraceCtx>,
pub attempt_id: Option<AttemptId>,
pub trial_id: Option<TrialId>,
pub replay_handle: Option<String>,
pub source_envelope_id: Option<EnvelopeId>,
pub claim_ids: Vec<ClaimId>,
pub created_at: String,
pub comparability_snapshot_version: Option<String>,
pub metadata: Option<serde_json::Value>,
}
impl EvidenceBundle {
pub fn new(
question: CausalQuestion,
treatment: TreatmentSpec,
outcome: OutcomeSpec,
estimator_kind: impl Into<String>,
estimator_version: impl Into<String>,
estimate: f64,
) -> Self {
Self {
id: EvidenceBundleId::generate(),
question,
treatment,
outcome,
covariates: Vec::new(),
identification_rationale: String::new(),
estimator_kind: estimator_kind.into(),
estimator_version: estimator_version.into(),
estimator_meta: None,
estimate,
estimate_uncertainty: None,
confidence: 0.0,
trial_count: 0,
variance_aware: false,
verification_trials: Vec::new(),
comparability_snapshot: None,
refutations: Vec::new(),
refutation_artifacts: Vec::new(),
verification_summary: None,
raw_receipt_handle: None,
trace_ctx: None,
attempt_id: None,
trial_id: None,
replay_handle: None,
source_envelope_id: None,
claim_ids: Vec::new(),
created_at: chrono::Utc::now().to_rfc3339(),
comparability_snapshot_version: None,
metadata: None,
}
}
pub fn all_refutations_passed(&self) -> bool {
self.refutations.iter().all(|r| {
matches!(
r.result,
RefutationResult::Passed { .. } | RefutationResult::Skipped { .. }
)
})
}
pub fn has_failed_refutation(&self) -> bool {
self.refutations
.iter()
.any(|r| matches!(r.result, RefutationResult::Failed { .. }))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn evidence_bundle_creation() {
let bundle = EvidenceBundle::new(
CausalQuestion {
description: "Does patch X fix bug Y?".into(),
unit_definition: "code patch".into(),
},
TreatmentSpec {
description: "Apply patch X".into(),
baseline_description: "Original code".into(),
paired_trials: true,
},
OutcomeSpec {
description: "Test suite passes".into(),
measurement_method: "test runner".into(),
outcome_type: "binary".into(),
},
"diff_in_diff",
"1.0.0",
0.85,
);
assert!(!bundle.id.as_str().is_empty());
assert_eq!(bundle.estimate, 0.85);
assert!(bundle.refutations.is_empty());
assert!(bundle.verification_trials.is_empty());
assert!(bundle.refutation_artifacts.is_empty());
assert!(bundle.estimator_meta.is_none());
assert!(bundle.verification_summary.is_none());
assert!(bundle.all_refutations_passed());
assert!(!bundle.has_failed_refutation());
}
#[test]
fn refutation_tracking() {
let mut bundle = EvidenceBundle::new(
CausalQuestion {
description: "test".into(),
unit_definition: "unit".into(),
},
TreatmentSpec {
description: "t".into(),
baseline_description: "b".into(),
paired_trials: false,
},
OutcomeSpec {
description: "o".into(),
measurement_method: "m".into(),
outcome_type: "binary".into(),
},
"ols",
"1.0.0",
0.5,
);
bundle.refutations.push(RefutationAttempt {
method: "placebo_treatment".into(),
result: RefutationResult::Passed {
estimate_change: Some(0.01),
},
estimator_kind: None,
parameters: None,
});
assert!(bundle.all_refutations_passed());
assert!(!bundle.has_failed_refutation());
bundle.refutations.push(RefutationAttempt {
method: "random_cause".into(),
result: RefutationResult::Failed {
reason: "estimate changed significantly".into(),
estimate_change: Some(0.45),
},
estimator_kind: None,
parameters: None,
});
assert!(!bundle.all_refutations_passed());
assert!(bundle.has_failed_refutation());
}
#[test]
fn serde_roundtrip() {
let bundle = EvidenceBundle::new(
CausalQuestion {
description: "q".into(),
unit_definition: "u".into(),
},
TreatmentSpec {
description: "t".into(),
baseline_description: "b".into(),
paired_trials: true,
},
OutcomeSpec {
description: "o".into(),
measurement_method: "m".into(),
outcome_type: "continuous".into(),
},
"iv",
"2.0.0",
1.23,
);
let json = serde_json::to_string(&bundle).unwrap();
let back: EvidenceBundle = serde_json::from_str(&json).unwrap();
assert_eq!(back.id.as_str(), bundle.id.as_str());
assert_eq!(back.estimate, bundle.estimate);
assert_eq!(back.estimator_kind, "iv");
}
#[test]
fn verification_fields_roundtrip_as_first_class_artifacts() {
let mut bundle = EvidenceBundle::new(
CausalQuestion {
description: "Does the patch improve benchmark latency?".into(),
unit_definition: "paired benchmark run".into(),
},
TreatmentSpec {
description: "apply patch-123".into(),
baseline_description: "baseline checkout".into(),
paired_trials: true,
},
OutcomeSpec {
description: "benchmark latency drops".into(),
measurement_method: "criterion benchmark".into(),
outcome_type: "continuous".into(),
},
"before_after",
"3.1.4",
0.27,
);
bundle.covariates = vec![
"workload:bench-a".into(),
"backend:cargo".into(),
"known_threat:cache_warmup".into(),
];
bundle.identification_rationale = "same workload, flags, and timeout class".into();
bundle.estimator_meta = Some(crate::estimator::EstimatorMeta {
kind: crate::estimator::EstimatorKind::Custom(
"living_memory_phase5_scorevector".into(),
),
version: "3.1.4".into(),
parameters: serde_json::json!({
"weighted_total": 0.27,
}),
random_seed: Some(7),
environment: Some(crate::estimator::EnvironmentFingerprint {
python_version: None,
package_versions: serde_json::json!({
"checks": ["cargo test"],
}),
platform: Some("linux".into()),
env_hash: Some("env-123".into()),
}),
timeout_secs: Some(60),
failure_mode: None,
request_schema_version: Some("living_memory.phase5.bundle.v1".into()),
response_schema_version: Some("semantic_memory_forge.evidence_bundle.v3".into()),
});
bundle.estimate_uncertainty = Some(0.04);
bundle.confidence = 0.91;
bundle.trial_count = 2;
bundle.variance_aware = true;
bundle.verification_trials = vec![
VerificationTrialRecord {
trial_id: TrialId::new("trial-baseline-1"),
attempt_id: AttemptId::new("attempt-family-1"),
side: VerificationTrialSide::Baseline,
completed: true,
receipt_handles: vec!["receipt:baseline".into()],
},
VerificationTrialRecord {
trial_id: TrialId::new("trial-patched-1"),
attempt_id: AttemptId::new("attempt-family-1"),
side: VerificationTrialSide::Patched,
completed: true,
receipt_handles: vec!["receipt:patched".into()],
},
];
bundle.comparability_snapshot = Some(ComparabilitySnapshot {
workload_id: "bench-a".into(),
backend_family: "cargo".into(),
selected_checks: vec!["cargo test".into()],
timeout_class: "short".into(),
config_flags: vec!["--all-features".into()],
comparable: Some(true),
violations: vec![],
});
bundle.refutations = vec![RefutationAttempt {
method: "placebo".into(),
result: RefutationResult::Passed {
estimate_change: Some(0.01),
},
estimator_kind: Some("living_memory_phase5_scorevector".into()),
parameters: Some(serde_json::json!({
"artifact_id": "placebo-1",
})),
}];
bundle.refutation_artifacts = vec![RefutationArtifactRecord {
artifact_id: "placebo-1".into(),
artifact_type: "placebo".into(),
trial_id: Some(TrialId::new("trial-baseline-1")),
attempt_id: Some(AttemptId::new("attempt-family-1")),
result: RefutationResult::Passed {
estimate_change: Some(0.01),
},
estimate_delta: Some(0.01),
details: Some("placebo preserved the null effect".into()),
}];
bundle.verification_summary = Some(VerificationSummary {
lifecycle_state: VerificationLifecycleState::Verified,
promotion_state: PromotionState::Eligible,
completed_trial_count: 2,
passed_refutation_count: 1,
failed_refutation_count: 0,
comparability_snapshot_version: Some("bench-a:cargo:short".into()),
notes: vec!["paired verification clean".into()],
});
bundle.raw_receipt_handle = Some("receipt:store:receipts:123".into());
bundle.trace_ctx = Some(TraceCtx::from_trace_id("trace-ver-001"));
bundle.attempt_id = Some(AttemptId::new("attempt-family-1"));
bundle.trial_id = Some(TrialId::new("trial-patched-1"));
bundle.replay_handle = Some("replay://attempt-family-1".into());
bundle.claim_ids = vec![ClaimId::new("claim-ver-001")];
bundle.comparability_snapshot_version = Some("bench-a:cargo:short".into());
let json = serde_json::to_value(&bundle).unwrap();
assert_eq!(
json["question"]["unit_definition"],
serde_json::json!("paired benchmark run")
);
assert_eq!(json["treatment"]["paired_trials"], serde_json::json!(true));
assert_eq!(
json["verification_trials"]
.as_array()
.expect("verification trials should serialize")
.len(),
2
);
assert_eq!(
json["refutation_artifacts"]
.as_array()
.expect("refutation artifacts should serialize")
.len(),
1
);
assert_eq!(
json["verification_summary"]["promotion_state"]["state"],
serde_json::json!("eligible")
);
assert_eq!(
json["comparability_snapshot"]["workload_id"],
serde_json::json!("bench-a")
);
let back: EvidenceBundle = serde_json::from_value(json).unwrap();
assert_eq!(back.question.description, bundle.question.description);
assert_eq!(back.treatment.description, bundle.treatment.description);
assert_eq!(back.outcome.description, bundle.outcome.description);
assert_eq!(back.verification_trials.len(), 2);
assert_eq!(back.refutation_artifacts.len(), 1);
assert_eq!(
back.verification_summary
.expect("verification summary should roundtrip")
.lifecycle_state,
VerificationLifecycleState::Verified
);
assert_eq!(
back.claim_ids[0].as_str(),
"claim-ver-001",
"claim bundles should remain first-class serialized artifacts"
);
}
}