Skip to main content

aivcs_core/
reporting.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use serde::{Deserialize, Serialize};
4use std::path::Path;
5use uuid::Uuid;
6
7/// Single eval case result in the persisted eval results artifact.
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9pub struct EvalCaseResultArtifact {
10    pub case_id: Uuid,
11    pub score: f32,
12    pub passed: bool,
13}
14
15/// Eval summary section persisted in eval_results.json.
16#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct EvalSummaryArtifact {
18    pub total_cases: usize,
19    pub passed_cases: usize,
20    pub pass_rate: f32,
21    pub overall_pass: bool,
22}
23
24/// Canonical eval results artifact written for CI and PR reporting.
25#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
26pub struct EvalResultsArtifact {
27    pub schema_version: String,
28    pub generated_at: DateTime<Utc>,
29    pub suite_name: String,
30    pub suite_version: String,
31    pub suite_digest: String,
32    pub summary: EvalSummaryArtifact,
33    pub case_results: Vec<EvalCaseResultArtifact>,
34}
35
36/// Compact data model used to render diff_summary.md.
37#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
38pub struct DiffSummaryArtifact {
39    pub spec_changed_paths: Vec<String>,
40    pub spec_only_in_a: Vec<String>,
41    pub spec_only_in_b: Vec<String>,
42    pub run_events_a: usize,
43    pub run_events_b: usize,
44    pub run_added: usize,
45    pub run_removed: usize,
46    pub run_reordered: usize,
47    pub run_param_changed: usize,
48}
49
50/// Write eval_results.json in pretty JSON format.
51pub fn write_eval_results_json(path: &Path, artifact: &EvalResultsArtifact) -> Result<()> {
52    let content = serde_json::to_string_pretty(artifact).context("serialize eval artifact")?;
53    std::fs::write(path, content).with_context(|| format!("write {:?}", path))?;
54    Ok(())
55}
56
57/// Render markdown summary for PR/comment/check output.
58pub fn render_diff_summary_md(artifact: &DiffSummaryArtifact) -> String {
59    let mut out = String::new();
60    out.push_str("# Diff Summary\n\n");
61    out.push_str("## Spec\n");
62    out.push_str(&format!(
63        "- changed paths: {}\n- only in A: {}\n- only in B: {}\n\n",
64        artifact.spec_changed_paths.len(),
65        artifact.spec_only_in_a.len(),
66        artifact.spec_only_in_b.len()
67    ));
68
69    if !artifact.spec_changed_paths.is_empty() {
70        out.push_str("### Changed Paths\n");
71        for p in &artifact.spec_changed_paths {
72            out.push_str(&format!("- `{}`\n", p));
73        }
74        out.push('\n');
75    }
76
77    out.push_str("## Run\n");
78    out.push_str(&format!(
79        "- events A: {}\n- events B: {}\n- added tool calls: {}\n- removed tool calls: {}\n- reordered tool calls: {}\n- param changed: {}\n",
80        artifact.run_events_a,
81        artifact.run_events_b,
82        artifact.run_added,
83        artifact.run_removed,
84        artifact.run_reordered,
85        artifact.run_param_changed
86    ));
87    out
88}
89
90/// Write diff_summary.md.
91pub fn write_diff_summary_md(path: &Path, artifact: &DiffSummaryArtifact) -> Result<()> {
92    let md = render_diff_summary_md(artifact);
93    std::fs::write(path, md).with_context(|| format!("write {:?}", path))?;
94    Ok(())
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use serde_json::json;
101
102    #[test]
103    fn eval_results_schema_has_expected_keys() {
104        let artifact = EvalResultsArtifact {
105            schema_version: "1.0".to_string(),
106            generated_at: DateTime::parse_from_rfc3339("2026-01-01T00:00:00Z")
107                .expect("parse RFC3339")
108                .with_timezone(&Utc),
109            suite_name: "smoke".to_string(),
110            suite_version: "0.1.0".to_string(),
111            suite_digest: "abc".to_string(),
112            summary: EvalSummaryArtifact {
113                total_cases: 2,
114                passed_cases: 1,
115                pass_rate: 0.5,
116                overall_pass: false,
117            },
118            case_results: vec![EvalCaseResultArtifact {
119                case_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111")
120                    .expect("valid UUID"),
121                score: 1.0,
122                passed: true,
123            }],
124        };
125
126        let raw = serde_json::to_value(&artifact).expect("serialize artifact");
127        let obj = raw.as_object().expect("artifact object");
128        assert!(obj.contains_key("schema_version"));
129        assert!(obj.contains_key("generated_at"));
130        assert!(obj.contains_key("suite_name"));
131        assert!(obj.contains_key("suite_version"));
132        assert!(obj.contains_key("suite_digest"));
133        assert!(obj.contains_key("summary"));
134        assert!(obj.contains_key("case_results"));
135
136        assert_eq!(raw["summary"]["total_cases"], json!(2));
137        assert_eq!(raw["summary"]["passed_cases"], json!(1));
138        assert_eq!(raw["case_results"][0]["score"], json!(1.0));
139    }
140
141    #[test]
142    fn diff_summary_markdown_render_is_stable() {
143        let artifact = DiffSummaryArtifact {
144            spec_changed_paths: vec!["/model".to_string(), "/routing/strategy".to_string()],
145            spec_only_in_a: vec!["/legacy".to_string()],
146            spec_only_in_b: vec![],
147            run_events_a: 12,
148            run_events_b: 14,
149            run_added: 1,
150            run_removed: 0,
151            run_reordered: 2,
152            run_param_changed: 3,
153        };
154
155        let actual = render_diff_summary_md(&artifact);
156        let expected = "# Diff Summary\n\n## Spec\n- changed paths: 2\n- only in A: 1\n- only in B: 0\n\n### Changed Paths\n- `/model`\n- `/routing/strategy`\n\n## Run\n- events A: 12\n- events B: 14\n- added tool calls: 1\n- removed tool calls: 0\n- reordered tool calls: 2\n- param changed: 3\n";
157        assert_eq!(actual, expected);
158    }
159}