use serde::{Deserialize, Serialize};
use std::path::Path;
mod serde_seed {
use serde::{Deserialize, Deserializer, Serializer};
pub fn serialize_opt_u64_as_str<S>(v: &Option<u64>, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match v {
Some(n) => s.serialize_str(&n.to_string()),
None => s.serialize_none(),
}
}
pub fn deserialize_opt_u64_from_str<'de, D>(d: D) -> Result<Option<u64>, D::Error>
where
D: Deserializer<'de>,
{
let opt: Option<serde_json::Value> = Option::deserialize(d)?;
match opt {
None | Some(serde_json::Value::Null) => Ok(None),
Some(serde_json::Value::String(s)) => {
let n = s.parse::<u64>().map_err(serde::de::Error::custom)?;
Ok(Some(n))
}
Some(serde_json::Value::Number(num)) => {
let n = num
.as_u64()
.ok_or_else(|| serde::de::Error::custom("seed number must be u64"))?;
Ok(Some(n))
}
Some(other) => Err(serde::de::Error::custom(format!(
"seed must be string or null, got: {other}"
))),
}
}
}
pub const SCHEMA_VERSION: u32 = 1;
pub const REASON_CODE_VERSION: u32 = 1;
pub const SEED_VERSION: u32 = 1;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Summary {
pub schema_version: u32,
pub reason_code_version: u32,
pub exit_code: i32,
pub reason_code: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_step: Option<String>,
pub provenance: Provenance,
#[serde(skip_serializing_if = "Option::is_none")]
pub results: Option<ResultsSummary>,
#[serde(skip_serializing_if = "Option::is_none")]
pub performance: Option<PerformanceMetrics>,
pub seeds: Seeds,
#[serde(skip_serializing_if = "Option::is_none")]
pub judge_metrics: Option<JudgeMetrics>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sarif: Option<SarifOutputInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SarifOutputInfo {
pub omitted: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Seeds {
pub seed_version: u32,
#[serde(
serialize_with = "serde_seed::serialize_opt_u64_as_str",
deserialize_with = "serde_seed::deserialize_opt_u64_from_str"
)]
pub order_seed: Option<u64>,
#[serde(
serialize_with = "serde_seed::serialize_opt_u64_as_str",
deserialize_with = "serde_seed::deserialize_opt_u64_from_str"
)]
pub judge_seed: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sampling_seed: Option<u64>,
}
impl Default for Seeds {
fn default() -> Self {
Self {
seed_version: SEED_VERSION,
order_seed: None,
judge_seed: None,
sampling_seed: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JudgeMetrics {
#[serde(skip_serializing_if = "Option::is_none")]
pub abstain_rate: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub flip_rate: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub consensus_rate: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub unavailable_count: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Provenance {
pub assay_version: String,
pub verify_mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub policy_pack_digest: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub baseline_digest: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub trace_digest: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub replay: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bundle_digest: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub replay_mode: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_run_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResultsSummary {
pub passed: usize,
pub failed: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub warned: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub skipped: Option<usize>,
pub total: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceMetrics {
pub total_duration_ms: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub verify_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub lint_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub runner_clone_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub runner_clone_count: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub profile_store_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub run_id_memory_bytes: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_hit_rate: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub slowest_tests: Option<Vec<SlowestTest>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub phase_timings: Option<PhaseTimings>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlowestTest {
pub test_id: String,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PhaseTimings {
#[serde(skip_serializing_if = "Option::is_none")]
pub ingest_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub eval_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub judge_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub report_ms: Option<u64>,
}
impl Provenance {
fn new(assay_version: &str, verify_enabled: bool) -> Self {
Self {
assay_version: assay_version.to_string(),
verify_mode: if verify_enabled {
"enabled".to_string()
} else {
"disabled".to_string()
},
policy_pack_digest: None,
baseline_digest: None,
trace_digest: None,
replay: None,
bundle_digest: None,
replay_mode: None,
source_run_id: None,
}
}
}
impl Summary {
pub fn success(assay_version: &str, verify_enabled: bool) -> Self {
Self {
schema_version: SCHEMA_VERSION,
reason_code_version: REASON_CODE_VERSION,
exit_code: 0,
reason_code: String::new(),
message: Some("All tests passed".to_string()),
next_step: None,
provenance: Provenance::new(assay_version, verify_enabled),
results: None,
performance: None,
seeds: Seeds::default(),
judge_metrics: None,
sarif: None,
}
}
pub fn failure(
exit_code: i32,
reason_code: &str,
message: &str,
next_step: &str,
assay_version: &str,
verify_enabled: bool,
) -> Self {
Self {
schema_version: SCHEMA_VERSION,
reason_code_version: REASON_CODE_VERSION,
exit_code,
reason_code: reason_code.to_string(),
message: Some(message.to_string()),
next_step: Some(next_step.to_string()),
provenance: Provenance::new(assay_version, verify_enabled),
results: None,
performance: None,
seeds: Seeds::default(),
judge_metrics: None,
sarif: None,
}
}
pub fn with_results(mut self, passed: usize, failed: usize, total: usize) -> Self {
self.results = Some(ResultsSummary {
passed,
failed,
warned: None,
skipped: None,
total,
});
self
}
pub fn with_duration(mut self, duration_ms: u64) -> Self {
self.performance = Some(PerformanceMetrics {
total_duration_ms: duration_ms,
verify_ms: None,
lint_ms: None,
runner_clone_ms: None,
runner_clone_count: None,
profile_store_ms: None,
run_id_memory_bytes: None,
cache_hit_rate: None,
slowest_tests: None,
phase_timings: None,
});
self
}
pub fn with_performance(mut self, performance: PerformanceMetrics) -> Self {
self.performance = Some(performance);
self
}
pub fn with_digests(
mut self,
policy_digest: Option<String>,
baseline_digest: Option<String>,
trace_digest: Option<String>,
) -> Self {
self.provenance.policy_pack_digest = policy_digest;
self.provenance.baseline_digest = baseline_digest;
self.provenance.trace_digest = trace_digest;
self
}
pub fn with_replay_provenance(
mut self,
bundle_digest: String,
replay_mode: &str,
source_run_id: Option<String>,
) -> Self {
self.provenance.replay = Some(true);
self.provenance.bundle_digest = Some(bundle_digest);
self.provenance.replay_mode = Some(replay_mode.to_string());
self.provenance.source_run_id = source_run_id;
self
}
pub fn with_seeds(mut self, order_seed: Option<u64>, judge_seed: Option<u64>) -> Self {
self.seeds.order_seed = order_seed;
self.seeds.judge_seed = judge_seed;
self
}
pub fn with_judge_metrics(mut self, metrics: JudgeMetrics) -> Self {
self.judge_metrics = Some(metrics);
self
}
pub fn with_sarif_omitted(mut self, omitted: u64) -> Self {
if omitted > 0 {
self.sarif = Some(SarifOutputInfo { omitted });
}
self
}
}
pub fn judge_metrics_from_results(results: &[crate::model::TestResultRow]) -> Option<JudgeMetrics> {
use crate::model::TestStatus;
let mut total_judge = 0u32;
let mut abstain_count = 0u32;
let mut consensus_count = 0u32;
let mut flip_count = 0u32;
for r in results {
let Some(metrics) = r.details.get("metrics").and_then(|m| m.as_object()) else {
continue;
};
for (_name, metric_val) in metrics {
let Some(details) = metric_val.get("details") else {
continue;
};
let verdict = details.get("verdict").and_then(|v| v.as_str());
let agreement = details.get("agreement").and_then(|v| v.as_f64());
let swapped = details
.get("swapped")
.and_then(|v| v.as_bool())
.unwrap_or(false);
if verdict.is_none() && agreement.is_none() {
continue;
}
total_judge += 1;
if verdict == Some("Abstain") {
abstain_count += 1;
}
if let Some(a) = agreement {
if a == 0.0 || a == 1.0 {
consensus_count += 1;
}
if swapped && a > 0.0 && a < 1.0 {
flip_count += 1;
}
}
}
}
if total_judge == 0 {
return None;
}
let total = total_judge as f64;
Some(JudgeMetrics {
abstain_rate: Some(abstain_count as f64 / total),
flip_rate: Some(flip_count as f64 / total),
consensus_rate: Some(consensus_count as f64 / total),
unavailable_count: Some(
results
.iter()
.filter(|r| matches!(r.status, TestStatus::Error))
.filter(|r| {
let m = r.message.to_lowercase();
m.contains("timeout")
|| m.contains("500")
|| m.contains("502")
|| m.contains("503")
|| m.contains("504")
|| m.contains("rate limit")
|| m.contains("network")
})
.count() as u32,
),
})
}
pub fn write_summary(summary: &Summary, out: &Path) -> anyhow::Result<()> {
let json = serde_json::to_string_pretty(summary)?;
std::fs::write(out, json)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_success_summary() {
let summary = Summary::success("2.12.0", true)
.with_results(10, 0, 10)
.with_duration(1234);
assert_eq!(summary.schema_version, 1);
assert_eq!(summary.reason_code_version, 1);
assert_eq!(summary.exit_code, 0);
assert_eq!(summary.reason_code, "");
assert_eq!(summary.provenance.verify_mode, "enabled");
}
#[test]
fn test_failure_summary() {
let summary = Summary::failure(
2,
"E_TRACE_NOT_FOUND",
"Trace file not found: traces/ci.jsonl",
"Run: assay doctor --config ci-eval.yaml",
"2.12.0",
true,
);
assert_eq!(summary.reason_code_version, 1);
assert_eq!(summary.exit_code, 2);
assert_eq!(summary.reason_code, "E_TRACE_NOT_FOUND");
assert!(summary.next_step.is_some());
}
#[test]
fn test_summary_serialization() {
let summary = Summary::success("2.12.0", true).with_results(5, 2, 7);
let json = serde_json::to_string_pretty(&summary).unwrap();
assert!(json.contains("\"schema_version\": 1"));
assert!(json.contains("\"reason_code_version\": 1"));
assert!(json.contains("\"assay_version\": \"2.12.0\""));
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(
v["reason_code_version"], 1,
"reason_code_version must be present and integer"
);
assert_eq!(v["seeds"]["seed_version"], 1);
assert!(
v["seeds"].get("order_seed").is_some(),
"order_seed key must exist"
);
assert!(
v["seeds"].get("judge_seed").is_some(),
"judge_seed key must exist"
);
assert!(v["seeds"]["order_seed"].is_null());
assert!(v["seeds"]["judge_seed"].is_null());
}
#[test]
fn test_seeds_serialize_as_string() {
let summary = Summary::success("2.12.0", true)
.with_results(1, 0, 1)
.with_seeds(Some(17390767342376325021), None);
let json = serde_json::to_string(&summary).unwrap();
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
assert!(
v["seeds"]["order_seed"].is_string(),
"order_seed must be string to avoid precision loss"
);
assert_eq!(
v["seeds"]["order_seed"].as_str(),
Some("17390767342376325021")
);
assert!(v["seeds"]["judge_seed"].is_null());
}
#[test]
fn test_judge_metrics_abstain_not_counted_as_unavailable() {
use crate::model::{TestResultRow, TestStatus};
let results = vec![TestResultRow {
test_id: "t1".into(),
status: TestStatus::Pass,
score: Some(0.5),
cached: false,
message: String::new(),
details: serde_json::json!({
"metrics": {
"m1": { "details": { "verdict": "Abstain", "agreement": 0.5 } }
}
}),
duration_ms: None,
fingerprint: None,
skip_reason: None,
attempts: None,
error_policy_applied: None,
}];
let metrics = judge_metrics_from_results(&results).unwrap();
assert_eq!(metrics.abstain_rate, Some(1.0));
assert_eq!(metrics.unavailable_count, Some(0));
}
}