1use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9mod serde_seed {
11 use serde::{Deserialize, Deserializer, Serializer};
12
13 pub fn serialize_opt_u64_as_str<S>(v: &Option<u64>, s: S) -> Result<S::Ok, S::Error>
14 where
15 S: Serializer,
16 {
17 match v {
18 Some(n) => s.serialize_str(&n.to_string()),
19 None => s.serialize_none(),
20 }
21 }
22
23 pub fn deserialize_opt_u64_from_str<'de, D>(d: D) -> Result<Option<u64>, D::Error>
24 where
25 D: Deserializer<'de>,
26 {
27 let opt: Option<serde_json::Value> = Option::deserialize(d)?;
28 match opt {
29 None | Some(serde_json::Value::Null) => Ok(None),
30 Some(serde_json::Value::String(s)) => {
31 let n = s.parse::<u64>().map_err(serde::de::Error::custom)?;
32 Ok(Some(n))
33 }
34 Some(serde_json::Value::Number(num)) => {
35 let n = num
37 .as_u64()
38 .ok_or_else(|| serde::de::Error::custom("seed number must be u64"))?;
39 Ok(Some(n))
40 }
41 Some(other) => Err(serde::de::Error::custom(format!(
42 "seed must be string or null, got: {other}"
43 ))),
44 }
45 }
46}
47
48pub const SCHEMA_VERSION: u32 = 1;
50
51pub const REASON_CODE_VERSION: u32 = 1;
54
55pub const SEED_VERSION: u32 = 1;
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct Summary {
64 pub schema_version: u32,
66
67 pub reason_code_version: u32,
69
70 pub exit_code: i32,
72
73 pub reason_code: String,
75
76 #[serde(skip_serializing_if = "Option::is_none")]
78 pub message: Option<String>,
79
80 #[serde(skip_serializing_if = "Option::is_none")]
82 pub next_step: Option<String>,
83
84 pub provenance: Provenance,
86
87 #[serde(skip_serializing_if = "Option::is_none")]
89 pub results: Option<ResultsSummary>,
90
91 #[serde(skip_serializing_if = "Option::is_none")]
93 pub performance: Option<PerformanceMetrics>,
94
95 pub seeds: Seeds,
97
98 #[serde(skip_serializing_if = "Option::is_none")]
100 pub judge_metrics: Option<JudgeMetrics>,
101
102 #[serde(skip_serializing_if = "Option::is_none")]
104 pub sarif: Option<SarifOutputInfo>,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct SarifOutputInfo {
110 pub omitted: u64,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Seeds {
117 pub seed_version: u32,
119 #[serde(
121 serialize_with = "serde_seed::serialize_opt_u64_as_str",
122 deserialize_with = "serde_seed::deserialize_opt_u64_from_str"
123 )]
124 pub order_seed: Option<u64>,
125 #[serde(
127 serialize_with = "serde_seed::serialize_opt_u64_as_str",
128 deserialize_with = "serde_seed::deserialize_opt_u64_from_str"
129 )]
130 pub judge_seed: Option<u64>,
131 #[serde(skip_serializing_if = "Option::is_none")]
133 pub sampling_seed: Option<u64>,
134}
135
136impl Default for Seeds {
137 fn default() -> Self {
138 Self {
139 seed_version: SEED_VERSION,
140 order_seed: None,
141 judge_seed: None,
142 sampling_seed: None,
143 }
144 }
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct JudgeMetrics {
150 #[serde(skip_serializing_if = "Option::is_none")]
152 pub abstain_rate: Option<f64>,
153 #[serde(skip_serializing_if = "Option::is_none")]
155 pub flip_rate: Option<f64>,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub consensus_rate: Option<f64>,
159 #[serde(skip_serializing_if = "Option::is_none")]
161 pub unavailable_count: Option<u32>,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct Provenance {
167 pub assay_version: String,
169
170 pub verify_mode: String,
172
173 #[serde(skip_serializing_if = "Option::is_none")]
175 pub policy_pack_digest: Option<String>,
176
177 #[serde(skip_serializing_if = "Option::is_none")]
179 pub baseline_digest: Option<String>,
180
181 #[serde(skip_serializing_if = "Option::is_none")]
183 pub trace_digest: Option<String>,
184
185 #[serde(skip_serializing_if = "Option::is_none")]
187 pub replay: Option<bool>,
188
189 #[serde(skip_serializing_if = "Option::is_none")]
191 pub bundle_digest: Option<String>,
192
193 #[serde(skip_serializing_if = "Option::is_none")]
195 pub replay_mode: Option<String>,
196
197 #[serde(skip_serializing_if = "Option::is_none")]
199 pub source_run_id: Option<String>,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct ResultsSummary {
205 pub passed: usize,
207
208 pub failed: usize,
210
211 #[serde(skip_serializing_if = "Option::is_none")]
213 pub warned: Option<usize>,
214
215 #[serde(skip_serializing_if = "Option::is_none")]
217 pub skipped: Option<usize>,
218
219 pub total: usize,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct PerformanceMetrics {
226 pub total_duration_ms: u64,
228
229 #[serde(skip_serializing_if = "Option::is_none")]
231 pub verify_ms: Option<u64>,
232
233 #[serde(skip_serializing_if = "Option::is_none")]
235 pub lint_ms: Option<u64>,
236
237 #[serde(skip_serializing_if = "Option::is_none")]
239 pub runner_clone_ms: Option<u64>,
240
241 #[serde(skip_serializing_if = "Option::is_none")]
243 pub runner_clone_count: Option<u64>,
244
245 #[serde(skip_serializing_if = "Option::is_none")]
247 pub profile_store_ms: Option<u64>,
248
249 #[serde(skip_serializing_if = "Option::is_none")]
251 pub run_id_memory_bytes: Option<u64>,
252
253 #[serde(skip_serializing_if = "Option::is_none")]
255 pub cache_hit_rate: Option<f64>,
256
257 #[serde(skip_serializing_if = "Option::is_none")]
259 pub slowest_tests: Option<Vec<SlowestTest>>,
260
261 #[serde(skip_serializing_if = "Option::is_none")]
263 pub phase_timings: Option<PhaseTimings>,
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize)]
268pub struct SlowestTest {
269 pub test_id: String,
270 pub duration_ms: u64,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct PhaseTimings {
276 #[serde(skip_serializing_if = "Option::is_none")]
277 pub ingest_ms: Option<u64>,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub eval_ms: Option<u64>,
280 #[serde(skip_serializing_if = "Option::is_none")]
281 pub judge_ms: Option<u64>,
282 #[serde(skip_serializing_if = "Option::is_none")]
283 pub report_ms: Option<u64>,
284}
285
286impl Provenance {
287 fn new(assay_version: &str, verify_enabled: bool) -> Self {
289 Self {
290 assay_version: assay_version.to_string(),
291 verify_mode: if verify_enabled {
292 "enabled".to_string()
293 } else {
294 "disabled".to_string()
295 },
296 policy_pack_digest: None,
297 baseline_digest: None,
298 trace_digest: None,
299 replay: None,
300 bundle_digest: None,
301 replay_mode: None,
302 source_run_id: None,
303 }
304 }
305}
306
307impl Summary {
308 pub fn success(assay_version: &str, verify_enabled: bool) -> Self {
310 Self {
311 schema_version: SCHEMA_VERSION,
312 reason_code_version: REASON_CODE_VERSION,
313 exit_code: 0,
314 reason_code: String::new(),
315 message: Some("All tests passed".to_string()),
316 next_step: None,
317 provenance: Provenance::new(assay_version, verify_enabled),
318 results: None,
319 performance: None,
320 seeds: Seeds::default(),
321 judge_metrics: None,
322 sarif: None,
323 }
324 }
325
326 pub fn failure(
328 exit_code: i32,
329 reason_code: &str,
330 message: &str,
331 next_step: &str,
332 assay_version: &str,
333 verify_enabled: bool,
334 ) -> Self {
335 Self {
336 schema_version: SCHEMA_VERSION,
337 reason_code_version: REASON_CODE_VERSION,
338 exit_code,
339 reason_code: reason_code.to_string(),
340 message: Some(message.to_string()),
341 next_step: Some(next_step.to_string()),
342 provenance: Provenance::new(assay_version, verify_enabled),
343 results: None,
344 performance: None,
345 seeds: Seeds::default(),
346 judge_metrics: None,
347 sarif: None,
348 }
349 }
350
351 pub fn with_results(mut self, passed: usize, failed: usize, total: usize) -> Self {
353 self.results = Some(ResultsSummary {
354 passed,
355 failed,
356 warned: None,
357 skipped: None,
358 total,
359 });
360 self
361 }
362
363 pub fn with_duration(mut self, duration_ms: u64) -> Self {
365 self.performance = Some(PerformanceMetrics {
366 total_duration_ms: duration_ms,
367 verify_ms: None,
368 lint_ms: None,
369 runner_clone_ms: None,
370 runner_clone_count: None,
371 profile_store_ms: None,
372 run_id_memory_bytes: None,
373 cache_hit_rate: None,
374 slowest_tests: None,
375 phase_timings: None,
376 });
377 self
378 }
379
380 pub fn with_performance(mut self, performance: PerformanceMetrics) -> Self {
382 self.performance = Some(performance);
383 self
384 }
385
386 pub fn with_digests(
388 mut self,
389 policy_digest: Option<String>,
390 baseline_digest: Option<String>,
391 trace_digest: Option<String>,
392 ) -> Self {
393 self.provenance.policy_pack_digest = policy_digest;
394 self.provenance.baseline_digest = baseline_digest;
395 self.provenance.trace_digest = trace_digest;
396 self
397 }
398
399 pub fn with_replay_provenance(
401 mut self,
402 bundle_digest: String,
403 replay_mode: &str,
404 source_run_id: Option<String>,
405 ) -> Self {
406 self.provenance.replay = Some(true);
407 self.provenance.bundle_digest = Some(bundle_digest);
408 self.provenance.replay_mode = Some(replay_mode.to_string());
409 self.provenance.source_run_id = source_run_id;
410 self
411 }
412
413 pub fn with_seeds(mut self, order_seed: Option<u64>, judge_seed: Option<u64>) -> Self {
415 self.seeds.order_seed = order_seed;
416 self.seeds.judge_seed = judge_seed;
417 self
418 }
419
420 pub fn with_judge_metrics(mut self, metrics: JudgeMetrics) -> Self {
422 self.judge_metrics = Some(metrics);
423 self
424 }
425
426 pub fn with_sarif_omitted(mut self, omitted: u64) -> Self {
428 if omitted > 0 {
429 self.sarif = Some(SarifOutputInfo { omitted });
430 }
431 self
432 }
433}
434
435pub fn judge_metrics_from_results(results: &[crate::model::TestResultRow]) -> Option<JudgeMetrics> {
439 use crate::model::TestStatus;
440
441 let mut total_judge = 0u32;
442 let mut abstain_count = 0u32;
443 let mut consensus_count = 0u32;
444 let mut flip_count = 0u32;
445
446 for r in results {
447 let Some(metrics) = r.details.get("metrics").and_then(|m| m.as_object()) else {
448 continue;
449 };
450 for (_name, metric_val) in metrics {
451 let Some(details) = metric_val.get("details") else {
452 continue;
453 };
454 let verdict = details.get("verdict").and_then(|v| v.as_str());
455 let agreement = details.get("agreement").and_then(|v| v.as_f64());
456 let swapped = details
457 .get("swapped")
458 .and_then(|v| v.as_bool())
459 .unwrap_or(false);
460
461 if verdict.is_none() && agreement.is_none() {
462 continue;
463 }
464 total_judge += 1;
465
466 if verdict == Some("Abstain") {
467 abstain_count += 1;
468 }
469 if let Some(a) = agreement {
470 if a == 0.0 || a == 1.0 {
471 consensus_count += 1;
472 }
473 if swapped && a > 0.0 && a < 1.0 {
479 flip_count += 1;
480 }
481 }
482 }
483 }
484
485 if total_judge == 0 {
486 return None;
487 }
488
489 let total = total_judge as f64;
490 Some(JudgeMetrics {
491 abstain_rate: Some(abstain_count as f64 / total),
492 flip_rate: Some(flip_count as f64 / total),
493 consensus_rate: Some(consensus_count as f64 / total),
494 unavailable_count: Some(
495 results
496 .iter()
497 .filter(|r| matches!(r.status, TestStatus::Error))
498 .filter(|r| {
499 let m = r.message.to_lowercase();
500 m.contains("timeout")
501 || m.contains("500")
502 || m.contains("502")
503 || m.contains("503")
504 || m.contains("504")
505 || m.contains("rate limit")
506 || m.contains("network")
507 })
508 .count() as u32,
509 ),
510 })
511}
512
513pub fn write_summary(summary: &Summary, out: &Path) -> anyhow::Result<()> {
515 let json = serde_json::to_string_pretty(summary)?;
516 std::fs::write(out, json)?;
517 Ok(())
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523
524 #[test]
525 fn test_success_summary() {
526 let summary = Summary::success("2.12.0", true)
527 .with_results(10, 0, 10)
528 .with_duration(1234);
529
530 assert_eq!(summary.schema_version, 1);
531 assert_eq!(summary.reason_code_version, 1);
532 assert_eq!(summary.exit_code, 0);
533 assert_eq!(summary.reason_code, "");
534 assert_eq!(summary.provenance.verify_mode, "enabled");
535 }
536
537 #[test]
538 fn test_failure_summary() {
539 let summary = Summary::failure(
540 2,
541 "E_TRACE_NOT_FOUND",
542 "Trace file not found: traces/ci.jsonl",
543 "Run: assay doctor --config ci-eval.yaml",
544 "2.12.0",
545 true,
546 );
547
548 assert_eq!(summary.reason_code_version, 1);
549 assert_eq!(summary.exit_code, 2);
550 assert_eq!(summary.reason_code, "E_TRACE_NOT_FOUND");
551 assert!(summary.next_step.is_some());
552 }
553
554 #[test]
555 fn test_summary_serialization() {
556 let summary = Summary::success("2.12.0", true).with_results(5, 2, 7);
557
558 let json = serde_json::to_string_pretty(&summary).unwrap();
559 assert!(json.contains("\"schema_version\": 1"));
560 assert!(json.contains("\"reason_code_version\": 1"));
561 assert!(json.contains("\"assay_version\": \"2.12.0\""));
562
563 let v: serde_json::Value = serde_json::from_str(&json).unwrap();
564 assert_eq!(
565 v["reason_code_version"], 1,
566 "reason_code_version must be present and integer"
567 );
568
569 assert_eq!(v["seeds"]["seed_version"], 1);
571 assert!(
572 v["seeds"].get("order_seed").is_some(),
573 "order_seed key must exist"
574 );
575 assert!(
576 v["seeds"].get("judge_seed").is_some(),
577 "judge_seed key must exist"
578 );
579 assert!(v["seeds"]["order_seed"].is_null());
580 assert!(v["seeds"]["judge_seed"].is_null());
581 }
582
583 #[test]
584 fn test_seeds_serialize_as_string() {
585 let summary = Summary::success("2.12.0", true)
586 .with_results(1, 0, 1)
587 .with_seeds(Some(17390767342376325021), None);
588
589 let json = serde_json::to_string(&summary).unwrap();
590 let v: serde_json::Value = serde_json::from_str(&json).unwrap();
591 assert!(
592 v["seeds"]["order_seed"].is_string(),
593 "order_seed must be string to avoid precision loss"
594 );
595 assert_eq!(
596 v["seeds"]["order_seed"].as_str(),
597 Some("17390767342376325021")
598 );
599 assert!(v["seeds"]["judge_seed"].is_null());
600 }
601
602 #[test]
603 fn test_judge_metrics_abstain_not_counted_as_unavailable() {
604 use crate::model::{TestResultRow, TestStatus};
605
606 let results = vec![TestResultRow {
609 test_id: "t1".into(),
610 status: TestStatus::Pass,
611 score: Some(0.5),
612 cached: false,
613 message: String::new(),
614 details: serde_json::json!({
615 "metrics": {
616 "m1": { "details": { "verdict": "Abstain", "agreement": 0.5 } }
617 }
618 }),
619 duration_ms: None,
620 fingerprint: None,
621 skip_reason: None,
622 attempts: None,
623 error_policy_applied: None,
624 }];
625 let metrics = judge_metrics_from_results(&results).unwrap();
626 assert_eq!(metrics.abstain_rate, Some(1.0));
627 assert_eq!(metrics.unavailable_count, Some(0));
628 }
629}