Skip to main content

swink_agent_eval/report/
html.rs

1//! Self-contained HTML reporter (feature `html-report`, spec 043 §FR-041).
2//!
3//! The HTML artifact is a single file with inline CSS and no external assets.
4//! Case detail is presented via native `<details>` / `<summary>` elements, so
5//! the report remains useful even with JavaScript disabled.
6
7use std::path::PathBuf;
8
9use askama::Template;
10
11use crate::{EvalMetricResult, EvalSetResult, Verdict};
12
13use super::{Reporter, ReporterError, ReporterOutput};
14
15/// Default artifact filename for HTML reports.
16pub const DEFAULT_ARTIFACT_NAME: &str = "eval-report.html";
17
18/// Self-contained HTML reporter (spec 043 §FR-041).
19#[derive(Debug, Default, Clone, Copy)]
20pub struct HtmlReporter;
21
22impl HtmlReporter {
23    /// Create a new reporter.
24    #[must_use]
25    pub const fn new() -> Self {
26        Self
27    }
28}
29
30impl Reporter for HtmlReporter {
31    fn render(&self, result: &EvalSetResult) -> Result<ReporterOutput, ReporterError> {
32        let view = HtmlReportView::from(result);
33        let html = view
34            .render()
35            .map_err(|err| ReporterError::Format(err.to_string()))?;
36        Ok(ReporterOutput::Artifact {
37            path: PathBuf::from(DEFAULT_ARTIFACT_NAME),
38            bytes: html.into_bytes(),
39        })
40    }
41}
42
43#[derive(Template)]
44#[template(
45    ext = "html",
46    escape = "html",
47    source = r#"<!DOCTYPE html>
48<html lang="en">
49<head>
50  <meta charset="utf-8">
51  <meta name="viewport" content="width=device-width, initial-scale=1">
52  <title>{{ eval_set_id }} report</title>
53  <style>
54    :root {
55      color-scheme: light;
56      --bg: #f6f1e8;
57      --panel: #fffaf1;
58      --ink: #1f1a17;
59      --muted: #6c5f55;
60      --line: #d7cabd;
61      --pass: #245d43;
62      --fail: #8b2e24;
63      --accent: #a86f2c;
64    }
65    * { box-sizing: border-box; }
66    body {
67      margin: 0;
68      padding: 24px;
69      background:
70        radial-gradient(circle at top left, rgba(168, 111, 44, 0.12), transparent 28rem),
71        linear-gradient(180deg, #fbf7f0 0%, var(--bg) 100%);
72      color: var(--ink);
73      font: 16px/1.5 Georgia, "Times New Roman", serif;
74    }
75    main {
76      max-width: 1080px;
77      margin: 0 auto;
78    }
79    .hero, .summary, .case {
80      background: var(--panel);
81      border: 1px solid var(--line);
82      border-radius: 18px;
83      box-shadow: 0 10px 30px rgba(52, 37, 22, 0.06);
84    }
85    .hero {
86      padding: 24px;
87      margin-bottom: 18px;
88    }
89    .eyebrow {
90      margin: 0 0 8px;
91      color: var(--accent);
92      font-size: 0.8rem;
93      letter-spacing: 0.12em;
94      text-transform: uppercase;
95    }
96    h1 {
97      margin: 0;
98      font-size: clamp(2rem, 4vw, 3rem);
99      line-height: 1.05;
100    }
101    .subtitle {
102      margin: 10px 0 0;
103      color: var(--muted);
104    }
105    .summary {
106      display: grid;
107      grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
108      gap: 12px;
109      padding: 18px;
110      margin-bottom: 18px;
111    }
112    .summary-card {
113      padding: 12px 14px;
114      border-radius: 14px;
115      background: rgba(255, 255, 255, 0.65);
116      border: 1px solid rgba(215, 202, 189, 0.8);
117    }
118    .summary-card dt {
119      margin: 0 0 4px;
120      color: var(--muted);
121      font-size: 0.8rem;
122      text-transform: uppercase;
123      letter-spacing: 0.08em;
124    }
125    .summary-card dd {
126      margin: 0;
127      font-size: 1.2rem;
128      font-weight: 700;
129    }
130    .cases {
131      display: grid;
132      gap: 12px;
133    }
134    .case {
135      padding: 0;
136      overflow: hidden;
137    }
138    .case details {
139      width: 100%;
140    }
141    .case summary {
142      list-style: none;
143      cursor: pointer;
144      padding: 16px 18px;
145      display: grid;
146      grid-template-columns: minmax(0, 1fr) auto auto;
147      gap: 12px;
148      align-items: center;
149    }
150    .case summary::-webkit-details-marker { display: none; }
151    .case-id {
152      font-size: 1.1rem;
153      font-weight: 700;
154      overflow-wrap: anywhere;
155    }
156    .pill {
157      border-radius: 999px;
158      padding: 4px 10px;
159      font-size: 0.82rem;
160      font-weight: 700;
161      letter-spacing: 0.04em;
162      text-transform: uppercase;
163    }
164    .pass {
165      color: var(--pass);
166      background: rgba(36, 93, 67, 0.12);
167    }
168    .fail {
169      color: var(--fail);
170      background: rgba(139, 46, 36, 0.12);
171    }
172    .duration {
173      color: var(--muted);
174      white-space: nowrap;
175    }
176    .case-body {
177      padding: 0 18px 18px;
178      border-top: 1px solid var(--line);
179    }
180    table {
181      width: 100%;
182      border-collapse: collapse;
183      margin-top: 12px;
184      font-size: 0.95rem;
185    }
186    th, td {
187      text-align: left;
188      padding: 10px 8px;
189      border-bottom: 1px solid var(--line);
190      vertical-align: top;
191    }
192    th {
193      color: var(--muted);
194      font-size: 0.8rem;
195      text-transform: uppercase;
196      letter-spacing: 0.08em;
197    }
198    .reason {
199      color: var(--muted);
200      overflow-wrap: anywhere;
201    }
202    .empty {
203      padding: 24px;
204      text-align: center;
205      color: var(--muted);
206      border: 1px dashed var(--line);
207      border-radius: 18px;
208      background: rgba(255, 250, 241, 0.7);
209    }
210    @media (max-width: 700px) {
211      body { padding: 14px; }
212      .case summary {
213        grid-template-columns: 1fr;
214        align-items: start;
215      }
216    }
217  </style>
218</head>
219<body>
220  <main>
221    <section class="hero">
222      <p class="eyebrow">Eval Report</p>
223      <h1>{{ eval_set_id }}</h1>
224      <p class="subtitle">Generated at unix timestamp {{ timestamp }}. {{ passed }} / {{ total_cases }} cases passed.</p>
225    </section>
226
227    <section class="summary" aria-label="summary">
228      <dl class="summary-card"><dt>Passed</dt><dd>{{ passed }} / {{ total_cases }}</dd></dl>
229      <dl class="summary-card"><dt>Failed</dt><dd>{{ failed }}</dd></dl>
230      <dl class="summary-card"><dt>Total Cost</dt><dd>${{ total_cost }}</dd></dl>
231      <dl class="summary-card"><dt>Total Duration</dt><dd>{{ total_duration_ms }}ms</dd></dl>
232      <dl class="summary-card"><dt>Total Tokens</dt><dd>{{ total_tokens }}</dd></dl>
233    </section>
234
235    {% if cases.len() == 0 %}
236    <section class="empty">No case results were recorded.</section>
237    {% else %}
238    <section class="cases" aria-label="cases">
239      {% for case in cases %}
240      <article class="case">
241        <details{% if loop.index0 == 0 %} open{% endif %}>
242          <summary>
243            <span class="case-id">{{ case.case_id }}</span>
244            <span class="pill {{ case.verdict_class }}">{{ case.verdict_label }}</span>
245            <span class="duration">{{ case.duration_ms }}ms</span>
246          </summary>
247          <div class="case-body">
248            {% if case.metrics.len() == 0 %}
249            <p class="reason">No evaluator metrics were recorded for this case.</p>
250            {% else %}
251            <table>
252              <thead>
253                <tr>
254                  <th>Evaluator</th>
255                  <th>Score</th>
256                  <th>Threshold</th>
257                  <th>Verdict</th>
258                  <th>Reason</th>
259                </tr>
260              </thead>
261              <tbody>
262                {% for metric in case.metrics %}
263                <tr>
264                  <td>{{ metric.evaluator }}</td>
265                  <td>{{ metric.score }}</td>
266                  <td>{{ metric.threshold }}</td>
267                  <td><span class="pill {{ metric.verdict_class }}">{{ metric.verdict_label }}</span></td>
268                  <td class="reason">{{ metric.reason }}</td>
269                </tr>
270                {% endfor %}
271              </tbody>
272            </table>
273            {% endif %}
274          </div>
275        </details>
276      </article>
277      {% endfor %}
278    </section>
279    {% endif %}
280  </main>
281</body>
282</html>
283"#
284)]
285struct HtmlReportView {
286    eval_set_id: String,
287    timestamp: u64,
288    total_cases: usize,
289    passed: usize,
290    failed: usize,
291    total_cost: String,
292    total_duration_ms: u128,
293    total_tokens: u64,
294    cases: Vec<HtmlCaseView>,
295}
296
297struct HtmlCaseView {
298    case_id: String,
299    verdict_label: &'static str,
300    verdict_class: &'static str,
301    duration_ms: u128,
302    metrics: Vec<HtmlMetricView>,
303}
304
305struct HtmlMetricView {
306    evaluator: String,
307    score: String,
308    threshold: String,
309    verdict_label: &'static str,
310    verdict_class: &'static str,
311    reason: String,
312}
313
314impl From<&EvalSetResult> for HtmlReportView {
315    fn from(result: &EvalSetResult) -> Self {
316        Self {
317            eval_set_id: result.eval_set_id.clone(),
318            timestamp: result.timestamp,
319            total_cases: result.summary.total_cases,
320            passed: result.summary.passed,
321            failed: result.summary.failed,
322            total_cost: format!("{:.6}", result.summary.total_cost.total),
323            total_duration_ms: result.summary.total_duration.as_millis(),
324            total_tokens: result.summary.total_usage.total,
325            cases: result.case_results.iter().map(HtmlCaseView::from).collect(),
326        }
327    }
328}
329
330impl From<&crate::EvalCaseResult> for HtmlCaseView {
331    fn from(case: &crate::EvalCaseResult) -> Self {
332        Self {
333            case_id: case.case_id.clone(),
334            verdict_label: verdict_label(case.verdict),
335            verdict_class: verdict_class(case.verdict),
336            duration_ms: case.invocation.total_duration.as_millis(),
337            metrics: case
338                .metric_results
339                .iter()
340                .map(HtmlMetricView::from)
341                .collect(),
342        }
343    }
344}
345
346impl From<&EvalMetricResult> for HtmlMetricView {
347    fn from(metric: &EvalMetricResult) -> Self {
348        let verdict = metric.score.verdict();
349        Self {
350            evaluator: metric.evaluator_name.clone(),
351            score: format!("{:.2}", metric.score.value),
352            threshold: format!("{:.2}", metric.score.threshold),
353            verdict_label: verdict_label(verdict),
354            verdict_class: verdict_class(verdict),
355            reason: metric.details.clone().unwrap_or_default(),
356        }
357    }
358}
359
360const fn verdict_label(verdict: Verdict) -> &'static str {
361    match verdict {
362        Verdict::Pass => "PASS",
363        Verdict::Fail => "FAIL",
364    }
365}
366
367const fn verdict_class(verdict: Verdict) -> &'static str {
368    match verdict {
369        Verdict::Pass => "pass",
370        Verdict::Fail => "fail",
371    }
372}