1use std::fs;
2use std::io::{BufRead as _, BufReader, Write as _};
3use std::net::TcpStream;
4use std::path::{Path, PathBuf};
5use std::process::Command as ProcessCommand;
6use std::thread;
7use std::time::{Duration, Instant};
8
9use anyhow::{Context as _, Result, anyhow, bail};
10use serde::{Deserialize, Serialize};
11
12use super::static_server::StaticServer;
13use super::{AuditArgs, ImageArgs, ViewportArg, evaluate_image};
14
15#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
17#[serde(rename_all = "snake_case")]
18pub enum AuditStatus {
19 Pass,
21 Fail,
23 Error,
25 Skipped,
27}
28
29#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
31pub struct AuditReport {
32 pub schema_version: u32,
34 pub aggregate_status: AuditStatus,
36 pub url: String,
38 pub elapsed_ms: u128,
40 pub options: AuditOptionsReport,
42 pub screenshots: Vec<ScreenshotReport>,
44}
45
46#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
48pub struct AuditOptionsReport {
49 pub question: String,
51 pub model: Option<String>,
53 pub effort: Option<String>,
55 pub system_prompt_provided: bool,
57 pub skip_ai: bool,
59 pub fake_pass: bool,
61}
62
63#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
65pub struct ScreenshotReport {
66 pub name: String,
68 pub width: u32,
70 pub height: u32,
72 pub path: PathBuf,
74 pub rubric: RubricReport,
76}
77
78#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
80#[serde(tag = "status", rename_all = "snake_case")]
81pub enum RubricReport {
82 Pass {
84 reason: String,
86 anomalies: Vec<String>,
88 },
89 Fail {
91 reason: String,
93 anomalies: Vec<String>,
95 },
96 Error {
98 message: String,
100 },
101 Skipped {
103 reason: String,
105 },
106}
107
108pub(super) fn run_audit(args: AuditArgs) -> Result<()> {
109 let started = Instant::now();
110 create_clean_dir(&args.screenshots)?;
111 let viewports = if args.viewports.is_empty() {
112 vec![
113 ViewportArg {
114 name: "desktop".into(),
115 width: 1440,
116 height: 1100,
117 },
118 ViewportArg {
119 name: "mobile".into(),
120 width: 390,
121 height: 1200,
122 },
123 ]
124 } else {
125 args.viewports.clone()
126 };
127 let server = StaticServer::start(args.root.clone(), 0)?;
128 let url = format!("{}{}", server.base_url(), args.path.trim_start_matches('/'));
129 ensure_hosted_path_ok(&url)?;
130 let mut screenshots = Vec::new();
131
132 for viewport in viewports {
133 let path = args.screenshots.join(format!("{}.png", viewport.name));
134 capture_screenshot(&args, &url, &viewport, &path)?;
135 let rubric = if args.fake_pass {
136 RubricReport::Pass {
137 reason: "fake pass requested".into(),
138 anomalies: Vec::new(),
139 }
140 } else if args.skip_ai {
141 RubricReport::Skipped {
142 reason: "AI rubric skipped by flag".into(),
143 }
144 } else {
145 evaluate_audit_image(&args, &path)
146 };
147 screenshots.push(ScreenshotReport {
148 name: viewport.name,
149 width: viewport.width,
150 height: viewport.height,
151 path,
152 rubric,
153 });
154 }
155
156 let aggregate_status = aggregate_status(&screenshots);
157 let report = AuditReport {
158 schema_version: 1,
159 aggregate_status,
160 url,
161 elapsed_ms: started.elapsed().as_millis(),
162 options: AuditOptionsReport {
163 question: args.question.clone(),
164 model: args.model.clone(),
165 effort: args.effort.clone(),
166 system_prompt_provided: args.system_prompt.is_some(),
167 skip_ai: args.skip_ai,
168 fake_pass: args.fake_pass,
169 },
170 screenshots,
171 };
172 write_report(&args.report, &report)?;
173 if args.fail_on_rubric && matches!(aggregate_status, AuditStatus::Fail | AuditStatus::Error) {
174 bail!("visual rubric audit finished with aggregate status {aggregate_status:?}");
175 }
176 Ok(())
177}
178
179fn evaluate_audit_image(args: &AuditArgs, image: &Path) -> RubricReport {
180 let image_args = ImageArgs {
181 image: image.to_path_buf(),
182 question: args.question.clone(),
183 system_prompt: args.system_prompt.clone(),
184 model: args.model.clone(),
185 effort: args.effort.clone(),
186 codex_acp: args.codex_acp.clone(),
187 name: image.display().to_string(),
188 json: false,
189 };
190 match evaluate_image(&image_args) {
191 Ok(verdict) if verdict.verdict.is_pass() => RubricReport::Pass {
192 reason: verdict.reason,
193 anomalies: verdict.anomalies,
194 },
195 Ok(verdict) => RubricReport::Fail {
196 reason: verdict.reason,
197 anomalies: verdict.anomalies,
198 },
199 Err(error) => RubricReport::Error {
200 message: error.to_string(),
201 },
202 }
203}
204
205fn create_clean_dir(path: &Path) -> Result<()> {
206 if path.exists() {
207 match fs::remove_dir_all(path) {
208 Ok(()) => {}
209 Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
210 Err(error) => return Err(error).with_context(|| format!("clean {}", path.display())),
211 }
212 }
213 fs::create_dir_all(path).with_context(|| format!("create {}", path.display()))
214}
215
216fn write_report(path: &Path, report: &AuditReport) -> Result<()> {
217 if let Some(parent) = path.parent() {
218 fs::create_dir_all(parent).with_context(|| format!("create {}", parent.display()))?;
219 }
220 let json = serde_json::to_string_pretty(report)?;
221 fs::write(path, json).with_context(|| format!("write {}", path.display()))
222}
223
224fn aggregate_status(screenshots: &[ScreenshotReport]) -> AuditStatus {
225 if screenshots
226 .iter()
227 .any(|screenshot| matches!(screenshot.rubric, RubricReport::Error { .. }))
228 {
229 AuditStatus::Error
230 } else if screenshots
231 .iter()
232 .any(|screenshot| matches!(screenshot.rubric, RubricReport::Fail { .. }))
233 {
234 AuditStatus::Fail
235 } else if screenshots
236 .iter()
237 .all(|screenshot| matches!(screenshot.rubric, RubricReport::Skipped { .. }))
238 {
239 AuditStatus::Skipped
240 } else {
241 AuditStatus::Pass
242 }
243}
244
245fn ensure_hosted_path_ok(url: &str) -> Result<()> {
246 let status = http_status(url).with_context(|| format!("check hosted path {url}"))?;
247 if status != 200 {
248 bail!("hosted path {url} returned HTTP {status}");
249 }
250 Ok(())
251}
252
253fn http_status(url: &str) -> Result<u16> {
254 let rest = url
255 .strip_prefix("http://127.0.0.1:")
256 .context("only local audit URLs are supported")?;
257 let (port, path) = rest
258 .split_once('/')
259 .context("local audit URL missing path")?;
260 let port = port.parse::<u16>().context("local audit URL port")?;
261 let mut stream = TcpStream::connect(("127.0.0.1", port)).context("connect local server")?;
262 write!(
263 stream,
264 "GET /{path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n"
265 )?;
266 let mut status_line = String::new();
267 BufReader::new(stream)
268 .read_line(&mut status_line)
269 .context("read local server status")?;
270 status_line
271 .split_whitespace()
272 .nth(1)
273 .context("missing HTTP status")?
274 .parse()
275 .context("parse HTTP status")
276}
277
278fn capture_screenshot(
279 args: &AuditArgs,
280 url: &str,
281 viewport: &ViewportArg,
282 output: &Path,
283) -> Result<()> {
284 let mut last_error = None;
285 for attempt in 0..=args.capture_retries {
286 if args.wait_ms > 0 {
287 thread::sleep(Duration::from_millis(args.wait_ms));
288 }
289 match capture_screenshot_once(args, url, viewport, output) {
290 Ok(()) => return Ok(()),
291 Err(error) => last_error = Some(error),
292 }
293 if attempt < args.capture_retries {
294 thread::sleep(Duration::from_millis(100));
295 }
296 }
297 Err(last_error.unwrap_or_else(|| anyhow!("browser capture failed")))
298}
299
300fn capture_screenshot_once(
301 args: &AuditArgs,
302 url: &str,
303 viewport: &ViewportArg,
304 output: &Path,
305) -> Result<()> {
306 let mut command = ProcessCommand::new(&args.browser);
307 command
308 .arg("--headless")
309 .arg("--disable-gpu")
310 .arg("--hide-scrollbars")
311 .arg("--no-sandbox")
312 .arg(format!(
313 "--window-size={},{}",
314 viewport.width, viewport.height
315 ));
316 if let Some(scale) = args.device_scale_factor {
317 command.arg(format!("--force-device-scale-factor={scale}"));
318 }
319 command
320 .args(&args.browser_args)
321 .arg(format!("--screenshot={}", output.display()))
322 .arg(url);
323 let status = command
324 .status()
325 .with_context(|| format!("run browser {}", args.browser.display()))?;
326 if !status.success() {
327 bail!(
328 "browser {} failed for {} with status {status}",
329 args.browser.display(),
330 viewport.name
331 );
332 }
333 if !output.exists() {
334 bail!("browser did not write {}", output.display());
335 }
336 Ok(())
337}