use agentcarousel_core::{CaseResult, CaseStatus, EvalScores, RubricScore, Run};
use console::style;
use serde_json::Value;
const HUMAN_ERROR_MAX: usize = 280;
const JUDGE_SUMMARY_MAX: usize = 160;
const RUBRIC_SNIPPET_MAX: usize = 100;
fn case_label(case_id: &str) -> &str {
case_id.rsplit_once('/').map(|(_, s)| s).unwrap_or(case_id)
}
fn header_skill_label(run: &Run) -> String {
if let Some(ref s) = run.skill_or_agent {
return s.clone();
}
run.cases
.first()
.map(|c| {
c.case_id
.0
.rsplit_once('/')
.map(|(p, _)| p.to_string())
.unwrap_or_else(|| c.case_id.0.clone())
})
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "fixtures".to_string())
}
fn run_subtitle(run: &Run) -> String {
let cmd = run.command.as_str();
if run.runner_mock_only {
let mut parts: Vec<&str> = Vec::new();
if run.runner_offline {
parts.push("offline");
}
if run.runner_mock_strict {
parts.push("mock-strict");
}
let inner = if parts.is_empty() {
"mock".to_string()
} else {
parts.join(" · ")
};
format!("Running {cmd} ({inner})")
} else {
format!("Running {cmd} (live)")
}
}
fn case_duration_secs(case: &CaseResult) -> f64 {
let ms = case.metrics.total_latency_ms as f64 / 1000.0;
if ms <= 0.0 {
0.1
} else {
ms
}
}
fn cli_binary_name() -> String {
std::env::current_exe()
.ok()
.and_then(|path| {
path.file_stem()
.map(|stem| stem.to_string_lossy().into_owned())
})
.unwrap_or_else(|| "agentcarousel".to_string())
}
fn truncate_human(s: &str, max_chars: usize) -> String {
let collapsed: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
let trimmed = collapsed.trim();
if trimmed.chars().count() <= max_chars {
return trimmed.to_string();
}
format!(
"{}…",
trimmed
.chars()
.take(max_chars.saturating_sub(1))
.collect::<String>()
)
}
fn extract_json_message(v: &Value) -> Option<String> {
if let Some(err) = v.get("error") {
if let Some(m) = err.get("message").and_then(|x| x.as_str()) {
return Some(m.to_string());
}
}
if let Some(m) = v.get("message").and_then(|x| x.as_str()) {
return Some(m.to_string());
}
None
}
fn humanize_error_line(err: &str) -> String {
let trimmed = err.trim();
if trimmed.is_empty() {
return String::new();
}
if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
if let Some(msg) = extract_json_message(&v) {
return truncate_human(&msg, HUMAN_ERROR_MAX);
}
}
if let Some(start) = trimmed.find('{') {
let tail = trimmed[start..].trim();
if let Ok(v) = serde_json::from_str::<Value>(tail) {
if let Some(msg) = extract_json_message(&v) {
let prefix = trimmed[..start].trim();
let core = truncate_human(&msg, HUMAN_ERROR_MAX);
if prefix.is_empty() {
return core;
}
return truncate_human(&format!("{prefix} {core}"), HUMAN_ERROR_MAX);
}
}
}
truncate_human(trimmed, HUMAN_ERROR_MAX)
}
fn print_eval_failure_rationale(scores: &EvalScores) {
match scores.evaluator.as_str() {
"judge" => print_judge_failure_summary(scores),
"rules" => {
for rs in &scores.rubric_scores {
if rs.rubric_id == "rules" {
if let Some(rat) = rs.rationale.as_ref() {
println!(
" › rules: {}",
style(truncate_human(rat, HUMAN_ERROR_MAX)).dim()
);
}
break;
}
}
}
"golden" | "process" => {
let mut failing: Vec<&RubricScore> = scores
.rubric_scores
.iter()
.filter(|r| r.score < 1.0 - f32::EPSILON)
.collect();
failing.sort_by(|a, b| {
a.score
.partial_cmp(&b.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
for rs in failing.iter().take(3) {
let snippet = rs
.rationale
.as_deref()
.map(|s| truncate_human(s, RUBRIC_SNIPPET_MAX))
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "no rationale".to_string());
println!(
" › {} · {} ({:.2}): {}",
scores.evaluator,
rs.rubric_id,
rs.score,
style(snippet).dim()
);
}
if failing.is_empty() && !scores.passed {
println!(
" › {}: {}",
scores.evaluator,
style("below effectiveness threshold or aggregate failure").dim()
);
}
}
other => {
let mut low: Vec<&RubricScore> = scores
.rubric_scores
.iter()
.filter(|r| r.score < 1.0 - f32::EPSILON)
.collect();
low.sort_by(|a, b| {
a.score
.partial_cmp(&b.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
for rs in low.iter().take(2) {
let snippet = rs
.rationale
.as_deref()
.map(|s| truncate_human(s, RUBRIC_SNIPPET_MAX))
.unwrap_or_default();
if snippet.is_empty() {
continue;
}
println!(
" › {} · {} ({:.2}): {}",
other,
rs.rubric_id,
rs.score,
style(snippet).dim()
);
}
}
}
}
fn judge_overall_summary_line(judge_rationale: Option<&str>) -> Option<String> {
let jr = judge_rationale?.trim();
if jr.is_empty() {
return None;
}
let t = truncate_human(jr, JUDGE_SUMMARY_MAX);
if t.is_empty() || t == "judge completed without rationale" {
None
} else {
Some(t)
}
}
fn print_judge_failure_summary(scores: &EvalScores) {
if let Some(line) = judge_overall_summary_line(scores.judge_rationale.as_deref()) {
println!(" › judge: {}", style(line).dim());
}
let mut low: Vec<&RubricScore> = scores
.rubric_scores
.iter()
.filter(|r| r.score < 1.0 - f32::EPSILON)
.collect();
low.sort_by(|a, b| {
a.score
.partial_cmp(&b.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
for rs in low.iter().take(2) {
let snippet = rs
.rationale
.as_deref()
.map(|s| truncate_human(s, RUBRIC_SNIPPET_MAX))
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "no rationale".to_string());
println!(
" › judge · {} ({:.2}): {}",
rs.rubric_id,
rs.score,
style(snippet).dim()
);
}
if low.is_empty() && !scores.passed {
println!(
" › judge: {}",
style("scores did not meet pass threshold").dim()
);
}
}
fn print_case_failure_details(case: &CaseResult) {
if let Some(out) = case.trace.final_output.as_ref() {
if !out.is_empty() && matches!(case.status, CaseStatus::Failed | CaseStatus::Error) {
let one_line = out.replace('\n', " ").trim().to_string();
let esc = if one_line.chars().count() > 120 {
format!("{}…", one_line.chars().take(117).collect::<String>())
} else {
one_line
};
println!(" agent replied: \"{}\"", style(esc).dim());
}
}
if let Some(scores) = case.eval_scores.as_ref() {
let show_eval = matches!(case.status, CaseStatus::Failed)
|| (matches!(case.status, CaseStatus::Error | CaseStatus::TimedOut) && !scores.passed);
if show_eval {
print_eval_failure_rationale(scores);
}
}
if let Some(err) = case.error.as_ref() {
if !err.is_empty() {
let human = humanize_error_line(err);
if !human.is_empty() {
println!(" › {}", style(human).dim());
}
}
}
}
pub fn print_terminal(run: &Run) {
let skill = header_skill_label(run);
let n = run.summary.total;
println!(
"🎠 AgentCarousel v{} · {} · {} cases",
run.agentcarousel_version, skill, n
);
println!();
println!("{}", run_subtitle(run));
println!();
let col_w = run
.cases
.iter()
.map(|c| case_label(&c.case_id.0).chars().count())
.max()
.unwrap_or(0)
.max(40);
for case in &run.cases {
let label = case_label(&case.case_id.0);
let secs = case_duration_secs(case);
let pad = col_w.saturating_sub(label.chars().count());
let padding = " ".repeat(pad);
match case.status {
CaseStatus::Passed => println!(" ✅ PASS {}{} ({:.1}s)", label, padding, secs),
CaseStatus::Failed => println!(" ❌ FAIL {}{} ({:.1}s)", label, padding, secs),
CaseStatus::Skipped => println!(
" {} SKIP {}{} ({:.1}s)",
style("⏭").yellow(),
label,
padding,
secs
),
CaseStatus::Flaky => println!(
" {} FLAKY {}{} ({:.1}s)",
style("⚠").yellow(),
label,
padding,
secs
),
CaseStatus::TimedOut => println!(
" {} TIMEOUT {}{} ({:.1}s)",
style("⏱").red(),
label,
padding,
secs
),
CaseStatus::Error => println!(
" {} ERROR {}{} ({:.1}s)",
style("✖").red(),
label,
padding,
secs
),
}
if matches!(
case.status,
CaseStatus::Failed | CaseStatus::Error | CaseStatus::TimedOut
) {
println!();
print_case_failure_details(case);
}
if case.metrics.runs_attempted > 1 {
let mut stat_parts = Vec::new();
if let Some(stddev) = case.metrics.latency_stddev_ms {
stat_parts.push(format!("latency σ={stddev:.0}ms"));
}
if let Some(stddev) = case.metrics.effectiveness_stddev {
stat_parts.push(format!("effectiveness σ={stddev:.3}"));
}
if !stat_parts.is_empty() {
println!(
" {}",
style(format!("stats: {}", stat_parts.join(", "))).dim()
);
}
}
}
let s = &run.summary;
let passed = s.passed;
let total = s.total;
let failed = s.failed;
println!();
println!(" ──────────────────────────────────────────────────────");
if failed > 0 {
let fw = if failed == 1 { "failure" } else { "failures" };
println!(
" Results {} / {} passed {} {}",
passed, total, failed, fw
);
} else {
println!(" Results {} / {} passed", passed, total);
}
if let Some(mean) = s.mean_effectiveness_score {
println!(" Effectiveness score: {:.2} / 1.00", mean);
}
if s.tokens_in.is_some() || s.tokens_out.is_some() {
let ti = s.tokens_in.unwrap_or(0);
let to = s.tokens_out.unwrap_or(0);
let total = ti + to;
println!();
println!(" {}", style("Token Consumption 🪙").bold());
println!(" › total: {}", style(total).cyan());
println!(" ├─ in: {}", ti);
println!(" └─ out: {}", to);
if let Some(m) = s.mean_tokens_per_judged_case {
println!(" › avg tokens/judged case: {}", m);
}
}
let issues = failed + s.errored + s.timed_out + s.flaky;
if issues == 0 {
println!(
" {}",
style("Certificate: ISSUED — all checks passed").green()
);
} else {
println!(
" {}",
style("Certificate: NOT ISSUED — agent quarantined until all cases pass cleanly").red()
);
}
let bin = cli_binary_name();
let id = run.id.0.as_str();
println!(" run id: {}", id);
println!(" Next: {} report show {}", bin, id);
println!(" ──────────────────────────────────────────────────────");
}
pub fn print_terminal_summary(run: &Run) {
let skill = header_skill_label(run);
println!(
"🎠 AgentCarousel v{} · {} · {} cases",
run.agentcarousel_version, skill, run.summary.total
);
println!(
"{} {} cases (pass rate {:.0}%)",
style("Run").bold(),
run.summary.total,
run.summary.pass_rate * 100.0
);
if run.summary.failed > 0 || run.summary.errored > 0 || run.summary.timed_out > 0 {
println!(
"{} failed, {} errored, {} timed out",
style(run.summary.failed).red(),
style(run.summary.errored).red(),
style(run.summary.timed_out).red()
);
}
if let Some(error_line) = format_provider_errors(&run.summary.provider_errors) {
println!("{}", style(error_line).yellow());
}
if run.summary.tokens_in.is_some() || run.summary.tokens_out.is_some() {
let ti = run.summary.tokens_in.unwrap_or(0);
let to = run.summary.tokens_out.unwrap_or(0);
let total = ti + to;
let mut parts = vec![
format!("total={}", total),
format!("in={}", ti),
format!("out={}", to),
];
if let Some(m) = run.summary.mean_tokens_per_judged_case {
parts.push(format!("avg_per_judged={}", m));
}
println!(
"{}",
style(format!("🪙 tokens: {}", parts.join(", "))).dim()
);
}
}
fn format_provider_errors(errors: &agentcarousel_core::ProviderErrorMetrics) -> Option<String> {
let mut parts = Vec::new();
if errors.status_429 > 0 {
parts.push(format!("429={}", errors.status_429));
}
if errors.status_500 > 0 {
parts.push(format!("500={}", errors.status_500));
}
if errors.status_503 > 0 {
parts.push(format!("503={}", errors.status_503));
}
if errors.status_504 > 0 {
parts.push(format!("504={}", errors.status_504));
}
if parts.is_empty() {
None
} else {
Some(format!("provider errors: {}", parts.join(", ")))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn humanize_extracts_gemini_error_message() {
let raw = r#"live generation failed: gemini generation failed (400 Bad Request): {
"error": {
"code": 400,
"message": "API key not valid. Please pass a valid API key.",
"status": "INVALID_ARGUMENT"
}
}"#;
let h = humanize_error_line(raw);
assert!(h.contains("API key not valid"));
assert!(!h.contains("\"error\""));
}
#[test]
fn humanize_pure_json_error_object() {
let raw = r#"{"error":{"message":"Rate limited"}}"#;
assert_eq!(humanize_error_line(raw), "Rate limited");
}
#[test]
fn humanize_fallback_truncates_long_plain_text() {
let raw = "x".repeat(400);
let h = humanize_error_line(&raw);
assert!(h.ends_with('…'));
assert!(h.chars().count() <= HUMAN_ERROR_MAX + 1);
}
#[test]
fn truncate_human_collapses_whitespace() {
assert_eq!(truncate_human(" hello world ", 100), "hello world");
}
#[test]
fn judge_overall_summary_omits_placeholder() {
assert_eq!(
judge_overall_summary_line(Some("judge completed without rationale")),
None
);
assert_eq!(
judge_overall_summary_line(Some("Missing registry URL in stub.")),
Some("Missing registry URL in stub.".to_string())
);
}
}