1use std::path::PathBuf;
8
9use askama::Template;
10
11use crate::{EvalMetricResult, EvalSetResult, Verdict};
12
13use super::{Reporter, ReporterError, ReporterOutput};
14
15pub const DEFAULT_ARTIFACT_NAME: &str = "eval-report.html";
17
18#[derive(Debug, Default, Clone, Copy)]
20pub struct HtmlReporter;
21
22impl HtmlReporter {
23 #[must_use]
25 pub const fn new() -> Self {
26 Self
27 }
28}
29
30impl Reporter for HtmlReporter {
31 fn render(&self, result: &EvalSetResult) -> Result<ReporterOutput, ReporterError> {
32 let view = HtmlReportView::from(result);
33 let html = view
34 .render()
35 .map_err(|err| ReporterError::Format(err.to_string()))?;
36 Ok(ReporterOutput::Artifact {
37 path: PathBuf::from(DEFAULT_ARTIFACT_NAME),
38 bytes: html.into_bytes(),
39 })
40 }
41}
42
43#[derive(Template)]
44#[template(
45 ext = "html",
46 escape = "html",
47 source = r#"<!DOCTYPE html>
48<html lang="en">
49<head>
50 <meta charset="utf-8">
51 <meta name="viewport" content="width=device-width, initial-scale=1">
52 <title>{{ eval_set_id }} report</title>
53 <style>
54 :root {
55 color-scheme: light;
56 --bg: #f6f1e8;
57 --panel: #fffaf1;
58 --ink: #1f1a17;
59 --muted: #6c5f55;
60 --line: #d7cabd;
61 --pass: #245d43;
62 --fail: #8b2e24;
63 --accent: #a86f2c;
64 }
65 * { box-sizing: border-box; }
66 body {
67 margin: 0;
68 padding: 24px;
69 background:
70 radial-gradient(circle at top left, rgba(168, 111, 44, 0.12), transparent 28rem),
71 linear-gradient(180deg, #fbf7f0 0%, var(--bg) 100%);
72 color: var(--ink);
73 font: 16px/1.5 Georgia, "Times New Roman", serif;
74 }
75 main {
76 max-width: 1080px;
77 margin: 0 auto;
78 }
79 .hero, .summary, .case {
80 background: var(--panel);
81 border: 1px solid var(--line);
82 border-radius: 18px;
83 box-shadow: 0 10px 30px rgba(52, 37, 22, 0.06);
84 }
85 .hero {
86 padding: 24px;
87 margin-bottom: 18px;
88 }
89 .eyebrow {
90 margin: 0 0 8px;
91 color: var(--accent);
92 font-size: 0.8rem;
93 letter-spacing: 0.12em;
94 text-transform: uppercase;
95 }
96 h1 {
97 margin: 0;
98 font-size: clamp(2rem, 4vw, 3rem);
99 line-height: 1.05;
100 }
101 .subtitle {
102 margin: 10px 0 0;
103 color: var(--muted);
104 }
105 .summary {
106 display: grid;
107 grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
108 gap: 12px;
109 padding: 18px;
110 margin-bottom: 18px;
111 }
112 .summary-card {
113 padding: 12px 14px;
114 border-radius: 14px;
115 background: rgba(255, 255, 255, 0.65);
116 border: 1px solid rgba(215, 202, 189, 0.8);
117 }
118 .summary-card dt {
119 margin: 0 0 4px;
120 color: var(--muted);
121 font-size: 0.8rem;
122 text-transform: uppercase;
123 letter-spacing: 0.08em;
124 }
125 .summary-card dd {
126 margin: 0;
127 font-size: 1.2rem;
128 font-weight: 700;
129 }
130 .cases {
131 display: grid;
132 gap: 12px;
133 }
134 .case {
135 padding: 0;
136 overflow: hidden;
137 }
138 .case details {
139 width: 100%;
140 }
141 .case summary {
142 list-style: none;
143 cursor: pointer;
144 padding: 16px 18px;
145 display: grid;
146 grid-template-columns: minmax(0, 1fr) auto auto;
147 gap: 12px;
148 align-items: center;
149 }
150 .case summary::-webkit-details-marker { display: none; }
151 .case-id {
152 font-size: 1.1rem;
153 font-weight: 700;
154 overflow-wrap: anywhere;
155 }
156 .pill {
157 border-radius: 999px;
158 padding: 4px 10px;
159 font-size: 0.82rem;
160 font-weight: 700;
161 letter-spacing: 0.04em;
162 text-transform: uppercase;
163 }
164 .pass {
165 color: var(--pass);
166 background: rgba(36, 93, 67, 0.12);
167 }
168 .fail {
169 color: var(--fail);
170 background: rgba(139, 46, 36, 0.12);
171 }
172 .duration {
173 color: var(--muted);
174 white-space: nowrap;
175 }
176 .case-body {
177 padding: 0 18px 18px;
178 border-top: 1px solid var(--line);
179 }
180 table {
181 width: 100%;
182 border-collapse: collapse;
183 margin-top: 12px;
184 font-size: 0.95rem;
185 }
186 th, td {
187 text-align: left;
188 padding: 10px 8px;
189 border-bottom: 1px solid var(--line);
190 vertical-align: top;
191 }
192 th {
193 color: var(--muted);
194 font-size: 0.8rem;
195 text-transform: uppercase;
196 letter-spacing: 0.08em;
197 }
198 .reason {
199 color: var(--muted);
200 overflow-wrap: anywhere;
201 }
202 .empty {
203 padding: 24px;
204 text-align: center;
205 color: var(--muted);
206 border: 1px dashed var(--line);
207 border-radius: 18px;
208 background: rgba(255, 250, 241, 0.7);
209 }
210 @media (max-width: 700px) {
211 body { padding: 14px; }
212 .case summary {
213 grid-template-columns: 1fr;
214 align-items: start;
215 }
216 }
217 </style>
218</head>
219<body>
220 <main>
221 <section class="hero">
222 <p class="eyebrow">Eval Report</p>
223 <h1>{{ eval_set_id }}</h1>
224 <p class="subtitle">Generated at unix timestamp {{ timestamp }}. {{ passed }} / {{ total_cases }} cases passed.</p>
225 </section>
226
227 <section class="summary" aria-label="summary">
228 <dl class="summary-card"><dt>Passed</dt><dd>{{ passed }} / {{ total_cases }}</dd></dl>
229 <dl class="summary-card"><dt>Failed</dt><dd>{{ failed }}</dd></dl>
230 <dl class="summary-card"><dt>Total Cost</dt><dd>${{ total_cost }}</dd></dl>
231 <dl class="summary-card"><dt>Total Duration</dt><dd>{{ total_duration_ms }}ms</dd></dl>
232 <dl class="summary-card"><dt>Total Tokens</dt><dd>{{ total_tokens }}</dd></dl>
233 </section>
234
235 {% if cases.len() == 0 %}
236 <section class="empty">No case results were recorded.</section>
237 {% else %}
238 <section class="cases" aria-label="cases">
239 {% for case in cases %}
240 <article class="case">
241 <details{% if loop.index0 == 0 %} open{% endif %}>
242 <summary>
243 <span class="case-id">{{ case.case_id }}</span>
244 <span class="pill {{ case.verdict_class }}">{{ case.verdict_label }}</span>
245 <span class="duration">{{ case.duration_ms }}ms</span>
246 </summary>
247 <div class="case-body">
248 {% if case.metrics.len() == 0 %}
249 <p class="reason">No evaluator metrics were recorded for this case.</p>
250 {% else %}
251 <table>
252 <thead>
253 <tr>
254 <th>Evaluator</th>
255 <th>Score</th>
256 <th>Threshold</th>
257 <th>Verdict</th>
258 <th>Reason</th>
259 </tr>
260 </thead>
261 <tbody>
262 {% for metric in case.metrics %}
263 <tr>
264 <td>{{ metric.evaluator }}</td>
265 <td>{{ metric.score }}</td>
266 <td>{{ metric.threshold }}</td>
267 <td><span class="pill {{ metric.verdict_class }}">{{ metric.verdict_label }}</span></td>
268 <td class="reason">{{ metric.reason }}</td>
269 </tr>
270 {% endfor %}
271 </tbody>
272 </table>
273 {% endif %}
274 </div>
275 </details>
276 </article>
277 {% endfor %}
278 </section>
279 {% endif %}
280 </main>
281</body>
282</html>
283"#
284)]
285struct HtmlReportView {
286 eval_set_id: String,
287 timestamp: u64,
288 total_cases: usize,
289 passed: usize,
290 failed: usize,
291 total_cost: String,
292 total_duration_ms: u128,
293 total_tokens: u64,
294 cases: Vec<HtmlCaseView>,
295}
296
297struct HtmlCaseView {
298 case_id: String,
299 verdict_label: &'static str,
300 verdict_class: &'static str,
301 duration_ms: u128,
302 metrics: Vec<HtmlMetricView>,
303}
304
305struct HtmlMetricView {
306 evaluator: String,
307 score: String,
308 threshold: String,
309 verdict_label: &'static str,
310 verdict_class: &'static str,
311 reason: String,
312}
313
314impl From<&EvalSetResult> for HtmlReportView {
315 fn from(result: &EvalSetResult) -> Self {
316 Self {
317 eval_set_id: result.eval_set_id.clone(),
318 timestamp: result.timestamp,
319 total_cases: result.summary.total_cases,
320 passed: result.summary.passed,
321 failed: result.summary.failed,
322 total_cost: format!("{:.6}", result.summary.total_cost.total),
323 total_duration_ms: result.summary.total_duration.as_millis(),
324 total_tokens: result.summary.total_usage.total,
325 cases: result.case_results.iter().map(HtmlCaseView::from).collect(),
326 }
327 }
328}
329
330impl From<&crate::EvalCaseResult> for HtmlCaseView {
331 fn from(case: &crate::EvalCaseResult) -> Self {
332 Self {
333 case_id: case.case_id.clone(),
334 verdict_label: verdict_label(case.verdict),
335 verdict_class: verdict_class(case.verdict),
336 duration_ms: case.invocation.total_duration.as_millis(),
337 metrics: case
338 .metric_results
339 .iter()
340 .map(HtmlMetricView::from)
341 .collect(),
342 }
343 }
344}
345
346impl From<&EvalMetricResult> for HtmlMetricView {
347 fn from(metric: &EvalMetricResult) -> Self {
348 let verdict = metric.score.verdict();
349 Self {
350 evaluator: metric.evaluator_name.clone(),
351 score: format!("{:.2}", metric.score.value),
352 threshold: format!("{:.2}", metric.score.threshold),
353 verdict_label: verdict_label(verdict),
354 verdict_class: verdict_class(verdict),
355 reason: metric.details.clone().unwrap_or_default(),
356 }
357 }
358}
359
360const fn verdict_label(verdict: Verdict) -> &'static str {
361 match verdict {
362 Verdict::Pass => "PASS",
363 Verdict::Fail => "FAIL",
364 }
365}
366
367const fn verdict_class(verdict: Verdict) -> &'static str {
368 match verdict {
369 Verdict::Pass => "pass",
370 Verdict::Fail => "fail",
371 }
372}