1use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8
9use crate::conversation::Transcript;
10use crate::eval::EvalOutcome;
11use crate::provider::Usage;
12use crate::skill::Finding;
13
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
16pub struct CaseRun {
17 pub case: String,
19 pub skill: String,
21 pub platform: String,
23 pub model: String,
25 pub passed: bool,
27 pub turns: usize,
29 pub evals: Vec<EvalOutcome>,
31 pub transcript: Transcript,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
38 pub usage: Option<Usage>,
39}
40
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
43pub struct Summary {
44 pub cases: usize,
46 pub runs: usize,
48 pub passed: usize,
50 pub failed: usize,
52 #[serde(default, skip_serializing_if = "Option::is_none")]
55 pub usage: Option<Usage>,
56}
57
58#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
60pub struct Report {
61 pub passed: bool,
63 pub summary: Summary,
65 pub runs: Vec<CaseRun>,
67}
68
69impl Report {
70 #[must_use]
72 pub fn new(runs: Vec<CaseRun>) -> Self {
73 let mut case_names: Vec<&str> = runs.iter().map(|r| r.case.as_str()).collect();
74 case_names.sort_unstable();
75 case_names.dedup();
76 let passed_runs = runs.iter().filter(|r| r.passed).count();
77 let mut total_usage = Usage::default();
78 for run in &runs {
79 if let Some(u) = &run.usage {
80 total_usage.add(u);
81 }
82 }
83 let usage = (!total_usage.is_empty()).then_some(total_usage);
84 let summary = Summary {
85 cases: case_names.len(),
86 runs: runs.len(),
87 passed: passed_runs,
88 failed: runs.len() - passed_runs,
89 usage,
90 };
91 Report {
92 passed: summary.failed == 0 && !runs.is_empty(),
93 summary,
94 runs,
95 }
96 }
97
98 pub fn to_json(&self) -> Result<String, serde_json::Error> {
104 serde_json::to_string_pretty(self)
105 }
106
107 #[must_use]
110 pub fn to_human(&self) -> String {
111 let mut out = String::new();
112 for run in &self.runs {
113 let mark = if run.passed { "PASS" } else { "FAIL" };
114 out.push_str(&format!(
115 "{mark} {} [{}/{}]\n",
116 run.case, run.platform, run.model
117 ));
118 for eval in &run.evals {
119 if !eval.passed {
120 out.push_str(&format!(
121 " - {}: {} ({})\n",
122 eval.label,
123 eval.detail.summary(),
124 eval.reason
125 ));
126 }
127 }
128 }
129 out.push_str(&format!(
130 "{}/{} runs passed\n",
131 self.summary.passed, self.summary.runs
132 ));
133 if let Some(usage) = &self.summary.usage {
134 let mut parts = Vec::new();
135 if let Some(cost) = usage.cost_usd {
136 parts.push(format!("${cost:.4}"));
137 }
138 if let (Some(i), Some(o)) = (usage.input_tokens, usage.output_tokens) {
139 parts.push(format!("{} in / {} out tokens", i, o));
140 } else {
141 if let Some(i) = usage.input_tokens {
142 parts.push(format!("{i} input tokens"));
143 }
144 if let Some(o) = usage.output_tokens {
145 parts.push(format!("{o} output tokens"));
146 }
147 }
148 if !parts.is_empty() {
149 out.push_str(&format!("usage: {}\n", parts.join(", ")));
150 }
151 }
152 out
153 }
154}
155
156#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
159pub struct ValidationFinding {
160 pub skill: String,
162 pub message: String,
164}
165
166#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
168pub struct ValidationReport {
169 pub valid: bool,
171 pub findings: Vec<ValidationFinding>,
173}
174
175impl ValidationReport {
176 #[must_use]
178 pub fn new(findings: &[Finding]) -> Self {
179 ValidationReport {
180 valid: findings.is_empty(),
181 findings: findings
182 .iter()
183 .map(|f| ValidationFinding {
184 skill: f.skill.to_string_lossy().into_owned(),
185 message: f.message.clone(),
186 })
187 .collect(),
188 }
189 }
190
191 pub fn to_json(&self) -> Result<String, serde_json::Error> {
197 serde_json::to_string_pretty(self)
198 }
199}