use std::collections::BTreeMap;
use crate::api::{
AssertionRecord, AssertionResultRecord, AttachedArtifact, EvidenceEnvelope, EvidenceLevel,
FailureClassification, FileArtifactRef, ManifestPreconditions, MetricsSnapshot,
ProducerEvidenceRecord, ReplayArtifact, ReplayBundle, ReplayManifest, ReplayPayload,
RestartRequeryEvidence, RunSummary, ScenarioClass, ScenarioManifest, ScenarioOutcome,
TimelineEntry, TraceSummary, WorkloadIdentity, canonical_alloy_contract_record,
canonical_replay_schema_version, restart_requery_schema_id, scope,
};
#[test]
fn sprint_3_scope_is_exposed() {
assert_eq!(scope(), "phase-1-v0.1");
}
#[test]
fn replay_bundle_uses_stable_artifact_name() {
let bundle = ReplayBundle::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
);
assert_eq!(
bundle.artifact_name(),
"alloy-assurance-abc1234-embedded-evaluator-bundle-embedded-restart-requery"
);
}
#[test]
fn evidence_release_gate_blocks_on_release_failures() {
let mut evidence = EvidenceEnvelope::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
);
evidence.record_outcome(
ScenarioOutcome::Fail,
FailureClassification::RestartConsistency,
);
assert!(!evidence.permits_release());
evidence.record_outcome(ScenarioOutcome::Pass, FailureClassification::None);
assert!(evidence.permits_release());
}
#[test]
fn manifest_summary_mentions_expected_tags() {
let manifest = ScenarioManifest::new(
"plexus-capability-mismatch",
"embedded",
&["plexus", "capability-gap"],
);
assert_eq!(
manifest.summary_line(),
"plexus-capability-mismatch [embedded] tags=plexus,capability-gap"
);
}
#[test]
fn restart_requery_schema_identifier_is_stable() {
assert_eq!(restart_requery_schema_id(), "sr.iridium.restart-requery.v1");
}
#[test]
fn typed_restart_requery_payload_is_replay_ready() {
let evidence = RestartRequeryEvidence::new(
WorkloadIdentity::new(
"canonical-restart-requery",
"canonical-fixture-v1",
"hybrid-default",
"default-embeddings",
),
EvidenceEnvelope::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
),
vec![
AssertionRecord::new(
"persisted-segment-count",
"restart preserves persisted segment count",
ScenarioOutcome::Pass,
"segment-count=12",
"segment-count=12",
),
AssertionRecord::new(
"query-top-hit",
"re-query returns the same top document after restart",
ScenarioOutcome::Pass,
"doc-17",
"doc-17",
),
],
vec![
AttachedArtifact::new(
"replay-bundle",
"alloy-assurance-abc1234-embedded-evaluator-bundle-embedded-restart-requery",
"application/json",
),
AttachedArtifact::new("trace-summary", "trace-summary.md", "text/markdown"),
],
ReplayPayload::new(
"embedded-restart-requery",
ReplayBundle::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
),
"cargo run -p iridium -- replay embedded-restart-requery",
"iridium-acceptance",
),
);
assert_eq!(evidence.schema_id, "sr.iridium.restart-requery.v1");
assert!(evidence.is_replay_ready());
assert!(evidence.envelope.permits_release());
}
#[test]
fn typed_restart_requery_payload_tracks_failed_assertions() {
let evidence = RestartRequeryEvidence::new(
WorkloadIdentity::new(
"canonical-restart-requery",
"canonical-fixture-v1",
"hybrid-default",
"default-embeddings",
),
EvidenceEnvelope::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
),
vec![
AssertionRecord::new(
"persisted-segment-count",
"restart preserves persisted segment count",
ScenarioOutcome::Pass,
"segment-count=12",
"segment-count=12",
),
AssertionRecord::new(
"query-top-hit",
"re-query returns the same top document after restart",
ScenarioOutcome::Fail,
"doc-17",
"doc-42",
),
],
vec![AttachedArtifact::new(
"replay-bundle",
"alloy-assurance-abc1234-embedded-evaluator-bundle-embedded-restart-requery",
"application/json",
)],
ReplayPayload::new(
"embedded-restart-requery",
ReplayBundle::new(
"embedded-restart-requery",
"canonical-fixture-v1",
"abc1234",
"embedded-evaluator",
),
"cargo run -p iridium -- replay embedded-restart-requery",
"iridium-acceptance",
),
);
assert_eq!(evidence.envelope.outcome, ScenarioOutcome::Mixed);
assert_eq!(
evidence.envelope.failure,
FailureClassification::RestartConsistency
);
assert!(!evidence.envelope.permits_release());
}
#[test]
fn canonical_contract_record_uses_shared_metadata_helpers() {
let replay_bundle = ReplayBundle::new(
"embedded-restart-requery",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
);
let manifest = ScenarioManifest::new(
"sr.iridium.restart-requery.v1",
"embedded",
&["strontium", "iridium", "restart"],
);
let mut envelope = EvidenceEnvelope::new(
"artifact-1",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
);
envelope.record_outcome(
ScenarioOutcome::Mixed,
FailureClassification::RecoveryRegression,
);
let record = canonical_alloy_contract_record(&replay_bundle, &manifest, &envelope);
assert_eq!(record.scope, "phase-1-v0.1");
assert!(record.replay_bundle_artifact_name.contains("bundle"));
assert!(record.evidence_artifact_name.contains("report"));
assert!(record.evidence_report_line.contains("artifact-1"));
assert!(!record.permits_release);
}
#[test]
fn canonical_schema_version_is_stable() {
assert_eq!(canonical_replay_schema_version(), "phase-1-v0.1");
}
#[allow(clippy::too_many_lines)]
fn sample_replay_artifact() -> ReplayArtifact {
let replay_bundle = ReplayBundle::new(
"embedded-restart-requery",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
);
let manifest = ScenarioManifest::new(
"sr.iridium.restart-requery.v1",
"embedded",
&["strontium", "iridium", "restart"],
);
let mut envelope = EvidenceEnvelope::new(
"artifact-1",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
);
envelope.record_outcome(
ScenarioOutcome::Mixed,
FailureClassification::RecoveryRegression,
);
ReplayArtifact {
artifact_id: "artifact-1".to_owned(),
schema_version: canonical_replay_schema_version().to_owned(),
producing_repo: "strontium".to_owned(),
producing_commit: "deadbeef".to_owned(),
scenario_id: "sr.iridium.restart-requery.v1".to_owned(),
scenario_class: ScenarioClass::RestartRecovery,
maturity_tier: "embedded-evaluator".to_owned(),
dataset_family: "canonical-graphrag".to_owned(),
workload_id: "canonical-fixture".to_owned(),
result: ScenarioOutcome::Mixed,
evidence_level: EvidenceLevel::ReleaseBlocking,
failure_classification: Some(FailureClassification::RecoveryRegression),
started_at: "2026-03-13T00:00:00Z".to_owned(),
finished_at: "2026-03-13T00:00:01Z".to_owned(),
supporting_artifacts: vec![FileArtifactRef {
kind: "trace_summary".to_owned(),
path: "artifacts/trace.json".to_owned(),
description: Some("restart trace".to_owned()),
}],
alloy_contract: canonical_alloy_contract_record(&replay_bundle, &manifest, &envelope),
manifest: ReplayManifest {
scenario_id: "sr.iridium.restart-requery.v1".to_owned(),
title: "Restart re-query".to_owned(),
phase: "phase-1".to_owned(),
maturity_tier: "embedded-evaluator".to_owned(),
scenario_class: ScenarioClass::RestartRecovery,
producer_surfaces: vec!["iridium".to_owned(), "strontium".to_owned()],
preconditions: ManifestPreconditions {
dataset_family: "canonical-graphrag".to_owned(),
plan_profile: "hybrid-default".to_owned(),
cache_mode: "cache-on".to_owned(),
transport: Some("quic".to_owned()),
},
stimulus: "restart the evaluator and repeat the same query".to_owned(),
assertions: vec![
"persisted state is reused after restart".to_owned(),
"response class remains within tolerance".to_owned(),
],
evidence_level: EvidenceLevel::ReleaseBlocking,
required_artifacts: vec![
"replay-bundle".to_owned(),
"trace-summary".to_owned(),
"metrics-snapshot".to_owned(),
],
failure_classifications: vec![
FailureClassification::RestartConsistency,
FailureClassification::RecoveryRegression,
],
reproduction_steps: vec![
"cargo test ...".to_owned(),
"attach acceptance contract report and trace summary".to_owned(),
],
notes: vec!["phase-1 fixture".to_owned()],
},
run_summary: RunSummary {
result: ScenarioOutcome::Mixed,
assertion_results: vec![
AssertionResultRecord {
assertion: "persisted state is reused after restart".to_owned(),
passed: true,
detail: None,
},
AssertionResultRecord {
assertion: "response class remains within tolerance".to_owned(),
passed: false,
detail: Some("response class changed from pass to warn".to_owned()),
},
],
producer_evidence: vec![ProducerEvidenceRecord {
producer_surface: "iridium".to_owned(),
dataset_family: "canonical-graphrag".to_owned(),
workload_id: "canonical-fixture".to_owned(),
supporting_artifacts: vec![FileArtifactRef {
kind: "trace_summary".to_owned(),
path: "artifacts/trace.json".to_owned(),
description: Some("restart trace".to_owned()),
}],
metadata: BTreeMap::new(),
}],
reproduction_steps: vec![
"cargo test ...".to_owned(),
"attach acceptance contract report and trace summary".to_owned(),
],
},
timeline: vec![TimelineEntry {
sequence: 1,
label: "restart".to_owned(),
detail: Some("process restarted cleanly".to_owned()),
connection_id: Some("conn-1".to_owned()),
stream_id: Some("stream-7".to_owned()),
fault_class: Some("injected-reset".to_owned()),
}],
trace_summary: Some(TraceSummary {
trace_id: "trace-1".to_owned(),
summary: "restart trace".to_owned(),
}),
metrics_snapshot: Some(MetricsSnapshot {
counters: BTreeMap::from([(String::from("query_count"), 2)]),
}),
attachments: vec![FileArtifactRef {
kind: "metrics_snapshot".to_owned(),
path: "artifacts/metrics.json".to_owned(),
description: None,
}],
}
}
#[test]
fn portable_replay_artifact_round_trips_with_serde() {
let artifact = sample_replay_artifact();
let json = serde_json::to_string(&artifact).expect("serialize replay artifact");
let restored: ReplayArtifact =
serde_json::from_str(&json).expect("deserialize replay artifact");
assert_eq!(restored, artifact);
}
#[test]
fn scenario_specific_phase_1_requirements_are_enforced() {
let replay_bundle = ReplayBundle::new(
"sr.rhodium.quic-fault.v1",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
);
let manifest = ScenarioManifest::new(
"sr.rhodium.quic-fault.v1",
"embedded",
&["strontium", "rhodium", "fault"],
);
let mut envelope = EvidenceEnvelope::new(
"artifact-transport",
"canonical-graphrag",
"deadbeef",
"embedded-evaluator",
EvidenceLevel::ReleaseBlocking,
);
envelope.record_outcome(ScenarioOutcome::Fail, FailureClassification::TransportFault);
let artifact = ReplayArtifact {
artifact_id: "artifact-transport".to_owned(),
schema_version: canonical_replay_schema_version().to_owned(),
producing_repo: "strontium".to_owned(),
producing_commit: "deadbeef".to_owned(),
scenario_id: "sr.rhodium.quic-fault.v1".to_owned(),
scenario_class: ScenarioClass::TransportFault,
maturity_tier: "embedded-evaluator".to_owned(),
dataset_family: "canonical-graphrag".to_owned(),
workload_id: "canonical-fixture".to_owned(),
result: ScenarioOutcome::Fail,
evidence_level: EvidenceLevel::ReleaseBlocking,
failure_classification: Some(FailureClassification::TransportFault),
started_at: "2026-03-13T00:00:00Z".to_owned(),
finished_at: "2026-03-13T00:00:01Z".to_owned(),
supporting_artifacts: vec![],
alloy_contract: canonical_alloy_contract_record(&replay_bundle, &manifest, &envelope),
manifest: ReplayManifest {
scenario_id: "sr.rhodium.quic-fault.v1".to_owned(),
title: "Transport fault".to_owned(),
phase: "phase-1".to_owned(),
maturity_tier: "embedded-evaluator".to_owned(),
scenario_class: ScenarioClass::TransportFault,
producer_surfaces: vec!["rhodium".to_owned(), "strontium".to_owned()],
preconditions: ManifestPreconditions {
dataset_family: "canonical-graphrag".to_owned(),
plan_profile: "hybrid-default".to_owned(),
cache_mode: "cache-on".to_owned(),
transport: Some("quic".to_owned()),
},
stimulus: "inject a transport fault".to_owned(),
assertions: vec!["connection recovers or fails clearly".to_owned()],
evidence_level: EvidenceLevel::ReleaseBlocking,
required_artifacts: vec!["replay-bundle".to_owned()],
failure_classifications: vec![FailureClassification::TransportFault],
reproduction_steps: vec!["run transport fault scenario".to_owned()],
notes: vec![],
},
run_summary: RunSummary {
result: ScenarioOutcome::Fail,
assertion_results: vec![AssertionResultRecord {
assertion: "connection recovers or fails clearly".to_owned(),
passed: false,
detail: Some("unexpected hang".to_owned()),
}],
producer_evidence: vec![],
reproduction_steps: vec!["run transport fault scenario".to_owned()],
},
timeline: vec![TimelineEntry {
sequence: 1,
label: "fault".to_owned(),
detail: None,
connection_id: None,
stream_id: None,
fault_class: None,
}],
trace_summary: Some(TraceSummary {
trace_id: "trace-1".to_owned(),
summary: "fault trace".to_owned(),
}),
metrics_snapshot: None,
attachments: vec![],
};
let report = artifact.validate_phase_1_requirements();
assert_eq!(
report.missing_sections,
vec!["timeline.transport_correlation".to_owned()]
);
assert!(!report.is_valid());
}