use std::fs::{self, File};
use std::io::Write;
use std::path::Path;
use crate::Result;
use crate::suite::{EvalResult, ScenarioStatus};
pub fn write_outputs(result: &EvalResult, output_dir: &Path, write_junit: bool) -> Result<()> {
fs::create_dir_all(output_dir)?;
fs::write(
output_dir.join("summary.json"),
serde_json::to_string_pretty(result)?,
)?;
fs::write(output_dir.join("summary.md"), render_summary(result))?;
fs::write(output_dir.join("failures.md"), render_failures(result))?;
write_jsonl(result, &output_dir.join("per_scenario.jsonl"))?;
if write_junit {
fs::write(output_dir.join("junit.xml"), render_junit(result))?;
}
Ok(())
}
fn write_jsonl(result: &EvalResult, path: &Path) -> Result<()> {
let mut file = File::create(path)?;
for scenario in &result.scenarios {
writeln!(file, "{}", serde_json::to_string(scenario)?)?;
}
Ok(())
}
fn render_summary(result: &EvalResult) -> String {
let mut out = String::new();
out.push_str(&format!("# Eval Results: {}\n\n", result.suite));
out.push_str(&format!("Agent: `{}`\n\n", result.agent));
out.push_str(&format!(
"Passed: {}/{} ({:.1}%) | Failed: {} | Skipped: {}\n\n",
result.passed,
result.total,
if result.total == 0 {
0.0
} else {
result.passed as f64 / result.total as f64 * 100.0
},
result.failed,
result.skipped
));
out.push_str(&format!(
"Duration: {}ms | Avg latency: {:.1}ms/turn\n\n",
result.duration_ms, result.metrics.avg_latency_ms
));
out.push_str("## Scenarios\n\n| Scenario | Status | Attempts | Duration |\n|----------|--------|----------|----------|\n");
for scenario in &result.scenarios {
out.push_str(&format!(
"| `{}` | {} | {} | {}ms |\n",
scenario.id,
status_label(&scenario.status),
scenario.attempts.len(),
scenario.duration_ms
));
}
out
}
fn render_failures(result: &EvalResult) -> String {
let mut out = String::new();
out.push_str(&format!("# Eval Failures: {}\n\n", result.suite));
for scenario in &result.scenarios {
if !scenario.status.is_failed() && !scenario.status.is_error() {
continue;
}
out.push_str(&format!("## {}\n\n", scenario.id));
out.push_str(&format!("Status: {}\n\n", status_label(&scenario.status)));
if let Some(attempt) = scenario.attempts.last() {
for turn in &attempt.turns {
let failed: Vec<_> = turn
.assertion_results
.iter()
.filter(|r| !r.passed)
.collect();
if failed.is_empty() {
continue;
}
out.push_str(&format!("### Turn {}\n\n", turn.index + 1));
out.push_str(&format!("Input: `{}`\n\n", turn.input.value));
out.push_str(&format!("Response: `{}`\n\n", turn.response.value));
for assertion in failed {
out.push_str(&format!(
"- `{}` failed: expected `{}`, actual `{}`\n",
assertion.assertion, assertion.expected, assertion.actual
));
}
out.push('\n');
}
}
}
out
}
fn render_junit(result: &EvalResult) -> String {
let mut out = String::new();
out.push_str(&format!(
"<testsuite name=\"{}\" tests=\"{}\" failures=\"{}\" skipped=\"{}\">\n",
escape_xml(&result.suite),
result.total,
result.failed,
result.skipped
));
for scenario in &result.scenarios {
out.push_str(&format!(
" <testcase name=\"{}\">\n",
escape_xml(&scenario.id)
));
match &scenario.status {
ScenarioStatus::Passed => {}
ScenarioStatus::Skipped { reason } => {
out.push_str(&format!(
" <skipped message=\"{}\" />\n",
escape_xml(reason.as_deref().unwrap_or("skipped"))
));
}
ScenarioStatus::Failed { reason } => {
out.push_str(&format!(
" <failure message=\"{}\" />\n",
escape_xml(reason)
));
}
ScenarioStatus::Error { message } => {
out.push_str(&format!(
" <error message=\"{}\" />\n",
escape_xml(message)
));
}
}
out.push_str(" </testcase>\n");
}
out.push_str("</testsuite>\n");
out
}
fn status_label(status: &ScenarioStatus) -> String {
match status {
ScenarioStatus::Passed => "passed".to_string(),
ScenarioStatus::Failed { reason } => format!("failed: {}", reason),
ScenarioStatus::Skipped { reason } => {
format!("skipped: {}", reason.as_deref().unwrap_or("skipped"))
}
ScenarioStatus::Error { message } => format!("error: {}", message),
}
}
fn escape_xml(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::redaction::RedactedString;
use crate::suite::{AttemptResult, EvalResult, ScenarioResult, ScenarioStatus, TurnResult};
fn result() -> EvalResult {
EvalResult {
schema_version: 1,
suite: "suite".to_string(),
agent: "agent.yaml".to_string(),
total: 1,
passed: 1,
failed: 0,
skipped: 0,
duration_ms: 1,
scenarios: vec![ScenarioResult {
id: "scenario".to_string(),
name: None,
tags: Vec::new(),
language: None,
status: ScenarioStatus::Passed,
failure_category: None,
flaky: false,
attempts: vec![AttemptResult {
attempt: 0,
turns: vec![TurnResult {
index: 0,
input: RedactedString::redacted("[redacted]"),
response: RedactedString::redacted("[redacted]"),
state: None,
metadata: None,
evidence: crate::evidence::TurnEvidence {
response_metadata: Some(
serde_json::json!({"secret":"should-not-serialize"}),
),
state: None,
state_history: Vec::new(),
context: serde_json::json!({"secret":"should-not-serialize"}),
tool_executions: Vec::new(),
skill: None,
disambiguation: None,
facts: None,
relationship: None,
persona: None,
orchestration: None,
observability: None,
},
assertion_results: Vec::new(),
latency_ms: 1,
observability_span_id: None,
}],
status: ScenarioStatus::Passed,
duration_ms: 1,
}],
duration_ms: 1,
retries_used: 0,
}],
metrics: crate::metrics::EvalMetrics::default(),
observability: None,
}
}
#[test]
fn json_outputs_omit_raw_evidence() {
let dir = std::env::temp_dir().join(format!(
"ai_agents_eval_output_test_{}",
uuid::Uuid::new_v4()
));
write_outputs(&result(), &dir, true).unwrap();
let summary = std::fs::read_to_string(dir.join("summary.json")).unwrap();
let per_scenario = std::fs::read_to_string(dir.join("per_scenario.jsonl")).unwrap();
assert!(!summary.contains("should-not-serialize"));
assert!(!per_scenario.contains("should-not-serialize"));
assert!(dir.join("junit.xml").exists());
let _ = std::fs::remove_dir_all(dir);
}
}