1use std::fs;
2use std::io::{BufRead as _, BufReader, Write as _};
3use std::net::TcpStream;
4use std::path::{Path, PathBuf};
5use std::process::Command as ProcessCommand;
6use std::thread;
7use std::time::{Duration, Instant};
8
9use anyhow::{Context as _, Result, anyhow, bail};
10use serde::{Deserialize, Serialize};
11
12use super::static_server::StaticServer;
13use super::{AuditArgs, ImageArgs, ViewportArg, evaluate_image};
14
15#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
17#[serde(rename_all = "snake_case")]
18pub enum AuditStatus {
19 Pass,
21 Fail,
23 Error,
25 Skipped,
27}
28
29#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
31pub struct AuditReport {
32 pub schema_version: u32,
34 pub aggregate_status: AuditStatus,
36 pub url: String,
38 pub elapsed_ms: u128,
40 pub options: AuditOptionsReport,
42 pub screenshots: Vec<ScreenshotReport>,
44}
45
46#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
48pub struct AuditOptionsReport {
49 pub question: String,
51 pub model: Option<String>,
53 pub effort: Option<String>,
55 pub system_prompt_provided: bool,
57 pub skip_ai: bool,
59 pub fake_pass: bool,
61}
62
63#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
65pub struct ScreenshotReport {
66 pub name: String,
68 pub width: u32,
70 pub height: u32,
72 pub path: PathBuf,
74 pub rubric: RubricReport,
76}
77
78#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
80#[serde(tag = "status", rename_all = "snake_case")]
81pub enum RubricReport {
82 Pass {
84 reason: String,
86 anomalies: Vec<String>,
88 },
89 Fail {
91 reason: String,
93 anomalies: Vec<String>,
95 },
96 Error {
98 message: String,
100 },
101 Skipped {
103 reason: String,
105 },
106}
107
108pub(super) fn run_audit(args: AuditArgs) -> Result<()> {
109 let started = Instant::now();
110 let question = args.questions.resolve().map_err(|e| anyhow!(e))?;
111 create_clean_dir(&args.screenshots)?;
112 let viewports = if args.viewports.is_empty() {
113 vec![
114 ViewportArg {
115 name: "desktop".into(),
116 width: 1440,
117 height: 1100,
118 },
119 ViewportArg {
120 name: "mobile".into(),
121 width: 390,
122 height: 1200,
123 },
124 ]
125 } else {
126 args.viewports.clone()
127 };
128 let server = StaticServer::start(args.root.clone(), 0)?;
129 let url = format!("{}{}", server.base_url(), args.path.trim_start_matches('/'));
130 ensure_hosted_path_ok(&url)?;
131 let mut screenshots = Vec::new();
132
133 for viewport in viewports {
134 let path = args.screenshots.join(format!("{}.png", viewport.name));
135 capture_screenshot(&args, &url, &viewport, &path)?;
136 let rubric = if args.fake_pass {
137 RubricReport::Pass {
138 reason: "fake pass requested".into(),
139 anomalies: Vec::new(),
140 }
141 } else if args.skip_ai {
142 RubricReport::Skipped {
143 reason: "AI rubric skipped by flag".into(),
144 }
145 } else {
146 evaluate_audit_image(&args, &path)
147 };
148 screenshots.push(ScreenshotReport {
149 name: viewport.name,
150 width: viewport.width,
151 height: viewport.height,
152 path,
153 rubric,
154 });
155 }
156
157 let aggregate_status = aggregate_status(&screenshots);
158 let report = AuditReport {
159 schema_version: 1,
160 aggregate_status,
161 url,
162 elapsed_ms: started.elapsed().as_millis(),
163 options: AuditOptionsReport {
164 question: question.clone(),
165 model: args.model.clone(),
166 effort: args.effort.clone(),
167 system_prompt_provided: args.system_prompt.is_some(),
168 skip_ai: args.skip_ai,
169 fake_pass: args.fake_pass,
170 },
171 screenshots,
172 };
173 write_report(&args.report, &report)?;
174 if args.fail_on_rubric && matches!(aggregate_status, AuditStatus::Fail | AuditStatus::Error) {
175 bail!("visual rubric audit finished with aggregate status {aggregate_status:?}");
176 }
177 Ok(())
178}
179
180fn evaluate_audit_image(args: &AuditArgs, image: &Path) -> RubricReport {
181 let image_args = ImageArgs {
182 image: image.to_path_buf(),
183 questions: args.questions.clone(),
184 system_prompt: args.system_prompt.clone(),
185 model: args.model.clone(),
186 effort: args.effort.clone(),
187 codex_acp: args.codex_acp.clone(),
188 name: image.display().to_string(),
189 json: false,
190 };
191 match evaluate_image(&image_args) {
192 Ok(verdict) if verdict.verdict.is_pass() => RubricReport::Pass {
193 reason: verdict.reason,
194 anomalies: verdict.anomalies,
195 },
196 Ok(verdict) => RubricReport::Fail {
197 reason: verdict.reason,
198 anomalies: verdict.anomalies,
199 },
200 Err(error) => RubricReport::Error {
201 message: error.to_string(),
202 },
203 }
204}
205
206fn create_clean_dir(path: &Path) -> Result<()> {
207 if path.exists() {
208 match fs::remove_dir_all(path) {
209 Ok(()) => {}
210 Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
211 Err(error) => return Err(error).with_context(|| format!("clean {}", path.display())),
212 }
213 }
214 fs::create_dir_all(path).with_context(|| format!("create {}", path.display()))
215}
216
217fn write_report(path: &Path, report: &AuditReport) -> Result<()> {
218 if let Some(parent) = path.parent() {
219 fs::create_dir_all(parent).with_context(|| format!("create {}", parent.display()))?;
220 }
221 let json = serde_json::to_string_pretty(report)?;
222 fs::write(path, json).with_context(|| format!("write {}", path.display()))
223}
224
225fn aggregate_status(screenshots: &[ScreenshotReport]) -> AuditStatus {
226 if screenshots
227 .iter()
228 .any(|screenshot| matches!(screenshot.rubric, RubricReport::Error { .. }))
229 {
230 AuditStatus::Error
231 } else if screenshots
232 .iter()
233 .any(|screenshot| matches!(screenshot.rubric, RubricReport::Fail { .. }))
234 {
235 AuditStatus::Fail
236 } else if screenshots
237 .iter()
238 .all(|screenshot| matches!(screenshot.rubric, RubricReport::Skipped { .. }))
239 {
240 AuditStatus::Skipped
241 } else {
242 AuditStatus::Pass
243 }
244}
245
246fn ensure_hosted_path_ok(url: &str) -> Result<()> {
247 let status = http_status(url).with_context(|| format!("check hosted path {url}"))?;
248 if status != 200 {
249 bail!("hosted path {url} returned HTTP {status}");
250 }
251 Ok(())
252}
253
254fn http_status(url: &str) -> Result<u16> {
255 let rest = url
256 .strip_prefix("http://127.0.0.1:")
257 .with_context(|| format!("audit URL {url} must start with http://127.0.0.1:"))?;
258 let (port, path) = rest
259 .split_once('/')
260 .with_context(|| format!("audit URL {url} is missing a path after the port"))?;
261 let port = port
262 .parse::<u16>()
263 .with_context(|| format!("audit URL {url} has invalid port {port:?}"))?;
264 let mut stream = TcpStream::connect(("127.0.0.1", port))
265 .with_context(|| format!("connect local audit server on 127.0.0.1:{port}"))?;
266 write!(
267 stream,
268 "GET /{path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n"
269 )?;
270 let mut status_line = String::new();
271 BufReader::new(stream)
272 .read_line(&mut status_line)
273 .with_context(|| format!("read HTTP status from local audit URL {url}"))?;
274 status_line
275 .split_whitespace()
276 .nth(1)
277 .with_context(|| format!("local audit URL {url} returned no HTTP status code"))?
278 .parse()
279 .with_context(|| format!("parse HTTP status from {status_line:?}"))
280}
281
282fn capture_screenshot(
283 args: &AuditArgs,
284 url: &str,
285 viewport: &ViewportArg,
286 output: &Path,
287) -> Result<()> {
288 let mut last_error = None;
289 for attempt in 0..=args.capture_retries {
290 if args.wait_ms > 0 {
291 thread::sleep(Duration::from_millis(args.wait_ms));
292 }
293 match capture_screenshot_once(args, url, viewport, output) {
294 Ok(()) => return Ok(()),
295 Err(error) => last_error = Some(error),
296 }
297 if attempt < args.capture_retries {
298 thread::sleep(Duration::from_millis(100));
299 }
300 }
301 Err(last_error.unwrap_or_else(|| anyhow!("browser capture failed")))
302}
303
304fn capture_screenshot_once(
305 args: &AuditArgs,
306 url: &str,
307 viewport: &ViewportArg,
308 output: &Path,
309) -> Result<()> {
310 let mut command = ProcessCommand::new(&args.browser);
311 command
312 .arg("--headless")
313 .arg("--disable-gpu")
314 .arg("--hide-scrollbars")
315 .arg("--no-sandbox")
316 .arg(format!(
317 "--window-size={},{}",
318 viewport.width, viewport.height
319 ));
320 if let Some(scale) = args.device_scale_factor {
321 command.arg(format!("--force-device-scale-factor={scale}"));
322 }
323 command
324 .args(&args.browser_args)
325 .arg(format!("--screenshot={}", output.display()))
326 .arg(url);
327 let status = command
328 .status()
329 .with_context(|| format!("run browser {}", args.browser.display()))?;
330 if !status.success() {
331 bail!(
332 "browser {} failed for {} with status {status}",
333 args.browser.display(),
334 viewport.name
335 );
336 }
337 if !output.exists() {
338 bail!("browser did not write {}", output.display());
339 }
340 Ok(())
341}