1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use serde::{Deserialize, Serialize};
4use std::path::Path;
5use uuid::Uuid;
6
7#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9pub struct EvalCaseResultArtifact {
10 pub case_id: Uuid,
11 pub score: f32,
12 pub passed: bool,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct EvalSummaryArtifact {
18 pub total_cases: usize,
19 pub passed_cases: usize,
20 pub pass_rate: f32,
21 pub overall_pass: bool,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
26pub struct EvalResultsArtifact {
27 pub schema_version: String,
28 pub generated_at: DateTime<Utc>,
29 pub suite_name: String,
30 pub suite_version: String,
31 pub suite_digest: String,
32 pub summary: EvalSummaryArtifact,
33 pub case_results: Vec<EvalCaseResultArtifact>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
38pub struct DiffSummaryArtifact {
39 pub spec_changed_paths: Vec<String>,
40 pub spec_only_in_a: Vec<String>,
41 pub spec_only_in_b: Vec<String>,
42 pub run_events_a: usize,
43 pub run_events_b: usize,
44 pub run_added: usize,
45 pub run_removed: usize,
46 pub run_reordered: usize,
47 pub run_param_changed: usize,
48}
49
50pub fn write_eval_results_json(path: &Path, artifact: &EvalResultsArtifact) -> Result<()> {
52 let content = serde_json::to_string_pretty(artifact).context("serialize eval artifact")?;
53 std::fs::write(path, content).with_context(|| format!("write {:?}", path))?;
54 Ok(())
55}
56
57pub fn render_diff_summary_md(artifact: &DiffSummaryArtifact) -> String {
59 let mut out = String::new();
60 out.push_str("# Diff Summary\n\n");
61 out.push_str("## Spec\n");
62 out.push_str(&format!(
63 "- changed paths: {}\n- only in A: {}\n- only in B: {}\n\n",
64 artifact.spec_changed_paths.len(),
65 artifact.spec_only_in_a.len(),
66 artifact.spec_only_in_b.len()
67 ));
68
69 if !artifact.spec_changed_paths.is_empty() {
70 out.push_str("### Changed Paths\n");
71 for p in &artifact.spec_changed_paths {
72 out.push_str(&format!("- `{}`\n", p));
73 }
74 out.push('\n');
75 }
76
77 out.push_str("## Run\n");
78 out.push_str(&format!(
79 "- events A: {}\n- events B: {}\n- added tool calls: {}\n- removed tool calls: {}\n- reordered tool calls: {}\n- param changed: {}\n",
80 artifact.run_events_a,
81 artifact.run_events_b,
82 artifact.run_added,
83 artifact.run_removed,
84 artifact.run_reordered,
85 artifact.run_param_changed
86 ));
87 out
88}
89
90pub fn write_diff_summary_md(path: &Path, artifact: &DiffSummaryArtifact) -> Result<()> {
92 let md = render_diff_summary_md(artifact);
93 std::fs::write(path, md).with_context(|| format!("write {:?}", path))?;
94 Ok(())
95}
96
97#[cfg(test)]
98mod tests {
99 use super::*;
100 use serde_json::json;
101
102 #[test]
103 fn eval_results_schema_has_expected_keys() {
104 let artifact = EvalResultsArtifact {
105 schema_version: "1.0".to_string(),
106 generated_at: DateTime::parse_from_rfc3339("2026-01-01T00:00:00Z")
107 .expect("parse RFC3339")
108 .with_timezone(&Utc),
109 suite_name: "smoke".to_string(),
110 suite_version: "0.1.0".to_string(),
111 suite_digest: "abc".to_string(),
112 summary: EvalSummaryArtifact {
113 total_cases: 2,
114 passed_cases: 1,
115 pass_rate: 0.5,
116 overall_pass: false,
117 },
118 case_results: vec![EvalCaseResultArtifact {
119 case_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111")
120 .expect("valid UUID"),
121 score: 1.0,
122 passed: true,
123 }],
124 };
125
126 let raw = serde_json::to_value(&artifact).expect("serialize artifact");
127 let obj = raw.as_object().expect("artifact object");
128 assert!(obj.contains_key("schema_version"));
129 assert!(obj.contains_key("generated_at"));
130 assert!(obj.contains_key("suite_name"));
131 assert!(obj.contains_key("suite_version"));
132 assert!(obj.contains_key("suite_digest"));
133 assert!(obj.contains_key("summary"));
134 assert!(obj.contains_key("case_results"));
135
136 assert_eq!(raw["summary"]["total_cases"], json!(2));
137 assert_eq!(raw["summary"]["passed_cases"], json!(1));
138 assert_eq!(raw["case_results"][0]["score"], json!(1.0));
139 }
140
141 #[test]
142 fn diff_summary_markdown_render_is_stable() {
143 let artifact = DiffSummaryArtifact {
144 spec_changed_paths: vec!["/model".to_string(), "/routing/strategy".to_string()],
145 spec_only_in_a: vec!["/legacy".to_string()],
146 spec_only_in_b: vec![],
147 run_events_a: 12,
148 run_events_b: 14,
149 run_added: 1,
150 run_removed: 0,
151 run_reordered: 2,
152 run_param_changed: 3,
153 };
154
155 let actual = render_diff_summary_md(&artifact);
156 let expected = "# Diff Summary\n\n## Spec\n- changed paths: 2\n- only in A: 1\n- only in B: 0\n\n### Changed Paths\n- `/model`\n- `/routing/strategy`\n\n## Run\n- events A: 12\n- events B: 14\n- added tool calls: 1\n- removed tool calls: 0\n- reordered tool calls: 2\n- param changed: 3\n";
157 assert_eq!(actual, expected);
158 }
159}