use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::conversation::Transcript;
use crate::eval::EvalOutcome;
use crate::provider::Usage;
use crate::skill::Finding;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct CaseRun {
pub case: String,
pub skill: String,
pub platform: String,
pub model: String,
pub passed: bool,
pub turns: usize,
pub evals: Vec<EvalOutcome>,
pub transcript: Transcript,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct Summary {
pub cases: usize,
pub runs: usize,
pub passed: usize,
pub failed: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct Report {
pub passed: bool,
pub summary: Summary,
pub runs: Vec<CaseRun>,
}
impl Report {
#[must_use]
pub fn new(runs: Vec<CaseRun>) -> Self {
let mut case_names: Vec<&str> = runs.iter().map(|r| r.case.as_str()).collect();
case_names.sort_unstable();
case_names.dedup();
let passed_runs = runs.iter().filter(|r| r.passed).count();
let mut total_usage = Usage::default();
for run in &runs {
if let Some(u) = &run.usage {
total_usage.add(u);
}
}
let usage = (!total_usage.is_empty()).then_some(total_usage);
let summary = Summary {
cases: case_names.len(),
runs: runs.len(),
passed: passed_runs,
failed: runs.len() - passed_runs,
usage,
};
Report {
passed: summary.failed == 0 && !runs.is_empty(),
summary,
runs,
}
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(self)
}
#[must_use]
pub fn to_human(&self) -> String {
let mut out = String::new();
for run in &self.runs {
let mark = if run.passed { "PASS" } else { "FAIL" };
out.push_str(&format!(
"{mark} {} [{}/{}]\n",
run.case, run.platform, run.model
));
for eval in &run.evals {
if !eval.passed {
out.push_str(&format!(
" - {}: {} ({})\n",
eval.label,
eval.detail.summary(),
eval.reason
));
}
}
}
out.push_str(&format!(
"{}/{} runs passed\n",
self.summary.passed, self.summary.runs
));
if let Some(usage) = &self.summary.usage {
let mut parts = Vec::new();
if let Some(cost) = usage.cost_usd {
parts.push(format!("${cost:.4}"));
}
if let (Some(i), Some(o)) = (usage.input_tokens, usage.output_tokens) {
parts.push(format!("{} in / {} out tokens", i, o));
} else {
if let Some(i) = usage.input_tokens {
parts.push(format!("{i} input tokens"));
}
if let Some(o) = usage.output_tokens {
parts.push(format!("{o} output tokens"));
}
}
if !parts.is_empty() {
out.push_str(&format!("usage: {}\n", parts.join(", ")));
}
}
out
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct ValidationFinding {
pub skill: String,
pub message: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct ValidationReport {
pub valid: bool,
pub findings: Vec<ValidationFinding>,
}
impl ValidationReport {
#[must_use]
pub fn new(findings: &[Finding]) -> Self {
ValidationReport {
valid: findings.is_empty(),
findings: findings
.iter()
.map(|f| ValidationFinding {
skill: f.skill.to_string_lossy().into_owned(),
message: f.message.clone(),
})
.collect(),
}
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(self)
}
}