Skip to main content

swink_agent_eval/report/
console.rs

1//! Plain-text, line-oriented console reporter.
2//!
3//! Spec 043 clarification Q8 and §FR-041 pin terminal output to plain text:
4//! no ANSI color, no cursor control, no interactivity. The reporter emits
5//! one line per case verdict followed by indented per-evaluator detail, plus
6//! a trailing summary block.
7//!
8//! # Example output
9//!
10//! ```text
11//! Eval set: demo-set (3/4 passed)
12//! - case_a  PASS  (120ms)
13//!     helpfulness  score=0.82  threshold=0.50  PASS
14//!     correctness  score=0.91  threshold=0.60  PASS
15//! - case_b  FAIL  (150ms)
16//!     helpfulness  score=0.12  threshold=0.50  FAIL  reason: off-topic
17//! Summary: 3 passed, 1 failed, total_cost=$0.012345, duration=420ms
18//! ```
19
20use std::fmt::Write as _;
21
22use crate::{EvalCaseResult, EvalMetricResult, EvalSetResult, Verdict};
23
24use super::{Reporter, ReporterError, ReporterOutput};
25
26/// Always-on, plain-text terminal reporter (spec 043 §FR-041, Q8).
27///
28/// The reporter is a zero-sized struct because it holds no configuration:
29/// the rendering is deterministic and produces the same bytes for a given
30/// result regardless of terminal capability.
31#[derive(Debug, Default, Clone, Copy)]
32pub struct ConsoleReporter;
33
34impl ConsoleReporter {
35    /// Create a new reporter. Present for API symmetry with peer reporters.
36    #[must_use]
37    pub const fn new() -> Self {
38        Self
39    }
40}
41
42impl Reporter for ConsoleReporter {
43    fn render(&self, result: &EvalSetResult) -> Result<ReporterOutput, ReporterError> {
44        let mut out = String::new();
45        writeln!(
46            out,
47            "Eval set: {id} ({passed}/{total} passed)",
48            id = result.eval_set_id,
49            passed = result.summary.passed,
50            total = result.summary.total_cases,
51        )
52        .map_err(|e| ReporterError::Format(e.to_string()))?;
53
54        for case in &result.case_results {
55            write_case_line(&mut out, case)?;
56            for metric in &case.metric_results {
57                write_metric_line(&mut out, metric)?;
58            }
59        }
60
61        writeln!(
62            out,
63            "Summary: {passed} passed, {failed} failed, total_cost=${cost:.6}, duration={dur}ms",
64            passed = result.summary.passed,
65            failed = result.summary.failed,
66            cost = result.summary.total_cost.total,
67            dur = result.summary.total_duration.as_millis(),
68        )
69        .map_err(|e| ReporterError::Format(e.to_string()))?;
70
71        Ok(ReporterOutput::Stdout(out))
72    }
73}
74
75fn write_case_line(out: &mut String, case: &EvalCaseResult) -> Result<(), ReporterError> {
76    writeln!(
77        out,
78        "- {id}  {verdict}  ({dur}ms)",
79        id = case.case_id,
80        verdict = verdict_label(case.verdict),
81        dur = case.invocation.total_duration.as_millis(),
82    )
83    .map_err(|e| ReporterError::Format(e.to_string()))
84}
85
86fn write_metric_line(out: &mut String, metric: &EvalMetricResult) -> Result<(), ReporterError> {
87    let verdict = metric.score.verdict();
88    write!(
89        out,
90        "    {name}  score={score:.2}  threshold={th:.2}  {verdict}",
91        name = metric.evaluator_name,
92        score = metric.score.value,
93        th = metric.score.threshold,
94        verdict = verdict_label(verdict),
95    )
96    .map_err(|e| ReporterError::Format(e.to_string()))?;
97    if let Some(details) = metric.details.as_ref().filter(|s| !s.is_empty()) {
98        // Single-line only; strip embedded newlines so terminal output stays
99        // strictly line-oriented per Q8.
100        let sanitized = details.replace(['\n', '\r'], " ");
101        write!(out, "  reason: {sanitized}").map_err(|e| ReporterError::Format(e.to_string()))?;
102    }
103    writeln!(out).map_err(|e| ReporterError::Format(e.to_string()))?;
104    Ok(())
105}
106
107const fn verdict_label(v: Verdict) -> &'static str {
108    match v {
109        Verdict::Pass => "PASS",
110        Verdict::Fail => "FAIL",
111    }
112}