swink_agent_eval/report/mod.rs
1//! Reporters and export surfaces for eval results.
2//!
3//! Spec 043 §FR-041 requires always-on, plain-text reporters that render an
4//! [`EvalSetResult`] to stdout strings or artifact bytes. The `Reporter` trait
5//! is the common surface; concrete implementations live in sibling modules:
6//!
7//! * [`ConsoleReporter`] — plain-text, line-oriented (no ANSI, no cursor
8//! control, no interactivity per Q8 clarification).
9//! * [`JsonReporter`] — self-contained JSON matching
10//! `specs/043-evals-adv-features/contracts/eval-result.schema.json`.
11//! * [`MarkdownReporter`] — PR-comment-ready Markdown table.
12//! * `HtmlReporter` — self-contained HTML artifact using native
13//! `<details>` / `<summary>` disclosure (behind `html-report`).
14//!
15//! LangSmith export (behind `langsmith`) follows in a later task.
16//!
17//! All reporters are deterministic: given the same `EvalSetResult` they
18//! produce byte-identical output.
19//!
20//! [`EvalSetResult`]: crate::EvalSetResult
21
22use std::path::PathBuf;
23
24use thiserror::Error;
25
26use crate::EvalSetResult;
27
28pub mod console;
29#[cfg(feature = "html-report")]
30pub mod html;
31pub mod json;
32#[cfg(feature = "langsmith")]
33pub mod langsmith;
34pub mod markdown;
35
36pub use console::ConsoleReporter;
37#[cfg(feature = "html-report")]
38pub use html::HtmlReporter;
39pub use json::{JsonReporter, SCHEMA_VERSION};
40#[cfg(feature = "langsmith")]
41pub use langsmith::{LangSmithExportError, LangSmithExporter};
42pub use markdown::MarkdownReporter;
43
44/// Stable JSON schema path shipped alongside spec 043.
45///
46/// Consumers that want to validate a reporter's JSON output against the
47/// published schema can `include_str!` this file; the `JsonReporter`
48/// regression tests do exactly that.
49pub const JSON_SCHEMA_PATH: &str = "specs/043-evals-adv-features/contracts/eval-result.schema.json";
50
51/// Renders an [`EvalSetResult`] into a concrete output surface.
52///
53/// Per spec 043 §FR-041 the three always-on reporters
54/// ([`ConsoleReporter`], [`JsonReporter`], [`MarkdownReporter`]) are plain,
55/// deterministic, and side-effect-free. Reporters that target a remote
56/// backend (e.g. LangSmith) use [`ReporterOutput::Remote`] and may perform
57/// network I/O; consult each reporter's documentation.
58///
59/// [`EvalSetResult`]: crate::EvalSetResult
60pub trait Reporter: Send + Sync {
61 /// Render the given result.
62 ///
63 /// # Errors
64 ///
65 /// Returns [`ReporterError`] when formatting fails, when an artifact
66 /// cannot be written, or when a remote backend rejects the payload.
67 fn render(&self, result: &EvalSetResult) -> Result<ReporterOutput, ReporterError>;
68}
69
70/// Output produced by a [`Reporter::render`] call.
71///
72/// The three variants cover the common delivery channels:
73/// * [`Stdout`](Self::Stdout) — text intended for terminal display.
74/// * [`Artifact`](Self::Artifact) — bytes + filesystem path the caller may
75/// persist (e.g. `--out report.json`).
76/// * [`Remote`](Self::Remote) — the payload was pushed to an external
77/// backend; `identifier` is backend-specific (LangSmith run id, etc.).
78#[derive(Debug, Clone)]
79pub enum ReporterOutput {
80 /// Plain text suitable for stdout or a log line.
81 Stdout(String),
82 /// A byte artifact to write at the given path.
83 Artifact {
84 /// Suggested destination path. The reporter does not write to it;
85 /// the caller decides whether to persist.
86 path: PathBuf,
87 /// Raw bytes of the artifact.
88 bytes: Vec<u8>,
89 },
90 /// A remote push result, identified by backend + opaque id.
91 Remote {
92 /// Human-readable backend name (e.g. `"langsmith"`).
93 backend: String,
94 /// Backend-specific identifier (e.g. LangSmith run id).
95 identifier: String,
96 },
97}
98
99/// Error surface for [`Reporter`] implementations.
100#[derive(Debug, Error)]
101pub enum ReporterError {
102 /// Filesystem or stream I/O failed.
103 #[error("reporter I/O error: {0}")]
104 Io(#[from] std::io::Error),
105 /// Rendering/serialization failed (e.g. JSON encoding).
106 #[error("reporter formatting error: {0}")]
107 Format(String),
108 /// A remote backend push failed.
109 #[error("reporter network error: {0}")]
110 Network(String),
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116
117 #[test]
118 fn reporter_error_from_io() {
119 let io_err = std::io::Error::other("boom");
120 let err: ReporterError = io_err.into();
121 assert!(err.to_string().contains("boom"));
122 assert!(matches!(err, ReporterError::Io(_)));
123 }
124
125 #[test]
126 fn reporter_output_variants_are_constructible() {
127 let _stdout = ReporterOutput::Stdout("hello".into());
128 let _artifact = ReporterOutput::Artifact {
129 path: PathBuf::from("/tmp/out.json"),
130 bytes: vec![0xDE, 0xAD, 0xBE, 0xEF],
131 };
132 let _remote = ReporterOutput::Remote {
133 backend: "langsmith".into(),
134 identifier: "run-1234".into(),
135 };
136 }
137
138 #[test]
139 fn schema_path_constant_points_at_repo_contract() {
140 // Sanity: the published schema path stays stable; reporters and
141 // their regression tests must reference the same string.
142 assert!(JSON_SCHEMA_PATH.ends_with("eval-result.schema.json"));
143 }
144}