1#![warn(missing_docs)]
6
7pub mod cli;
8mod errors;
9mod pool;
10mod typed_strings;
11
12use std::ffi::{OsStr, OsString};
13use std::io::{BufRead as _, BufReader, Write as _};
14use std::path::{Path, PathBuf};
15use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
16
17use base64::Engine as _;
18use serde::{Deserialize, Serialize};
19
20pub use cli::Cli;
21pub use errors::{PoolError, RateLimitEvent, RubricError};
22pub use pool::{PoolConfig, PoolStats, RubricPool};
23pub use typed_strings::{RubricEffort, RubricVerdictStatus};
24
25#[derive(Debug, Deserialize, Serialize)]
26pub struct RubricVerdict {
28 pub verdict: RubricVerdictStatus,
30 pub reason: String,
32 #[serde(default, deserialize_with = "deserialize_anomalies")]
34 pub anomalies: Vec<String>,
35}
36
37#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
39pub struct RubricOptions {
40 pub model: Option<String>,
42 pub effort: Option<RubricEffort>,
44 pub system_prompt: Option<String>,
46}
47
48#[derive(Clone, Debug, PartialEq, Eq)]
50pub struct RubricRunConfig {
51 pub codex_acp_binary: PathBuf,
53 pub extra_env: Vec<(OsString, OsString)>,
55 pub cwd: Option<PathBuf>,
57}
58
59impl Default for RubricRunConfig {
60 fn default() -> Self {
61 Self {
62 codex_acp_binary: default_codex_acp_binary(),
63 extra_env: Vec::new(),
64 cwd: None,
65 }
66 }
67}
68
69pub const DEFAULT_SYSTEM_PROMPT: &str = "\
71You are a UI regression auditor. \
72You will be shown one screenshot and asked a specific question. Reply with strict \
73JSON matching this schema and nothing else:
74{ \"verdict\": \"pass\" | \"fail\", \"reason\": string, \"anomalies\": string[] }
75Fail criteria: text clipped or overflowing its container, overlapping interactive \
76elements, missing/blank regions where content should appear, illegible contrast, \
77visibly broken layout. Cosmetic differences from previous runs are NOT failures \
78unless they make the UI worse by the criteria above.";
79
80pub const DEFAULT_CODEX_ACP_MODEL: &str = "gpt-5.4-mini";
82pub const DEFAULT_CODEX_ACP_REASONING_EFFORT: &str = "medium";
84
85#[must_use]
87pub fn default_options() -> RubricOptions {
88 RubricOptions {
89 model: Some(DEFAULT_CODEX_ACP_MODEL.to_string()),
90 effort: Some(DEFAULT_CODEX_ACP_REASONING_EFFORT.into()),
91 system_prompt: Some(DEFAULT_SYSTEM_PROMPT.to_string()),
92 }
93}
94
95#[must_use]
97pub fn default_codex_acp_binary() -> PathBuf {
98 PathBuf::from("codex-acp")
99}
100
101pub fn encode_png(png_path: &Path) -> Result<String, PoolError> {
107 let bytes = std::fs::read(png_path)
108 .map_err(|e| PoolError::Rpc(format!("read png {}: {e}", png_path.display())))?;
109 Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
110}
111
112pub fn assert_image_rubric(png_path: &Path, name: &str, question: &str) -> Result<(), RubricError> {
119 let verdict = evaluate_image_rubric(png_path, question)?;
120 assert_verdict(name, verdict)
121}
122
123pub fn evaluate_image_rubric(
129 png_path: &Path,
130 question: &str,
131) -> Result<RubricVerdict, RubricError> {
132 evaluate_image_rubric_with_options(png_path, question, default_options())
133}
134
135pub fn evaluate_image_rubric_with_options(
141 png_path: &Path,
142 question: &str,
143 opts: RubricOptions,
144) -> Result<RubricVerdict, RubricError> {
145 evaluate_image_rubric_with_config(png_path, question, opts, RubricRunConfig::default())
146}
147
148pub fn evaluate_image_rubric_with_config(
154 png_path: &Path,
155 question: &str,
156 opts: RubricOptions,
157 config: RubricRunConfig,
158) -> Result<RubricVerdict, RubricError> {
159 let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
160 path: png_path.to_path_buf(),
161 source,
162 })?;
163 let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
164 let text = run_codex_acp_rubric(
165 &b64,
166 question,
167 opts.model.as_deref().unwrap_or(DEFAULT_CODEX_ACP_MODEL),
168 opts.effort
169 .as_deref()
170 .unwrap_or(DEFAULT_CODEX_ACP_REASONING_EFFORT),
171 opts.system_prompt
172 .as_deref()
173 .unwrap_or(DEFAULT_SYSTEM_PROMPT),
174 &config,
175 )?;
176
177 parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
178}
179
180pub fn parse_verdict(text: &str) -> Result<RubricVerdict, serde_json::Error> {
187 match serde_json::from_str(text) {
188 Ok(verdict) => Ok(verdict),
189 Err(source) => match extract_json_object(text) {
190 Some(json) => serde_json::from_str(json),
191 None => Err(source),
192 },
193 }
194}
195
196fn extract_json_object(text: &str) -> Option<&str> {
197 let start = text.find('{')?;
198 let mut depth = 0usize;
199 let mut in_string = false;
200 let mut escaped = false;
201
202 for (offset, character) in text[start..].char_indices() {
203 if in_string {
204 if escaped {
205 escaped = false;
206 } else if character == '\\' {
207 escaped = true;
208 } else if character == '"' {
209 in_string = false;
210 }
211 continue;
212 }
213
214 match character {
215 '"' => in_string = true,
216 '{' => depth = depth.saturating_add(1),
217 '}' => {
218 depth = depth.saturating_sub(1);
219 if depth == 0 {
220 let end = start + offset + character.len_utf8();
221 return Some(&text[start..end]);
222 }
223 }
224 _ => {}
225 }
226 }
227
228 None
229}
230
231fn deserialize_anomalies<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
232where
233 D: serde::Deserializer<'de>,
234{
235 let values = Vec::<serde_json::Value>::deserialize(deserializer)?;
236 Ok(values.into_iter().map(anomaly_to_string).collect())
237}
238
239fn anomaly_to_string(value: serde_json::Value) -> String {
240 match value {
241 serde_json::Value::String(text) => text,
242 serde_json::Value::Object(mut object) => {
243 let issue = object
244 .remove("issue")
245 .and_then(|value| value.as_str().map(str::to_owned));
246 let fix = object
247 .remove("fix")
248 .and_then(|value| value.as_str().map(str::to_owned));
249 match (issue, fix) {
250 (Some(issue), Some(fix)) => format!("{issue} Fix: {fix}"),
251 (Some(issue), None) => issue,
252 (None, Some(fix)) => fix,
253 (None, None) => serde_json::Value::Object(object).to_string(),
254 }
255 }
256 other => other.to_string(),
257 }
258}
259
260pub fn assert_verdict(name: &str, verdict: RubricVerdict) -> Result<(), RubricError> {
266 if verdict.verdict.is_pass() {
267 Ok(())
268 } else {
269 Err(RubricError::Assertion {
270 name: name.to_string(),
271 reason: verdict.reason,
272 anomalies: verdict.anomalies,
273 })
274 }
275}
276
277pub fn run(cli: Cli) -> anyhow::Result<()> {
283 cli::run(cli)
284}
285
286fn run_codex_acp_rubric(
287 b64_png: &str,
288 question: &str,
289 model: &str,
290 effort: &str,
291 system_prompt: &str,
292 config: &RubricRunConfig,
293) -> Result<String, PoolError> {
294 let mut acp = AcpClient::spawn(
295 &config.codex_acp_binary,
296 model,
297 effort,
298 &config.extra_env,
299 config.cwd.as_deref(),
300 )?;
301 acp.start_session(config.cwd.as_deref())?;
302
303 let prompt = format!("{system_prompt}\n\nQuestion: {question}");
304 acp.prompt_image(&prompt, b64_png)
305}
306
307struct AcpClient {
308 child: Child,
309 stdin: ChildStdin,
310 stdout: BufReader<ChildStdout>,
311 next_id: i64,
312 session_id: Option<String>,
313}
314
315impl AcpClient {
316 fn spawn(
317 binary: &Path,
318 model: &str,
319 effort: &str,
320 extra_env: &[(OsString, OsString)],
321 cwd: Option<&Path>,
322 ) -> Result<Self, PoolError> {
323 let mut command = Command::new(binary);
324 command
325 .arg("-c")
326 .arg(format!("model=\"{model}\""))
327 .arg("-c")
328 .arg(format!("model_reasoning_effort=\"{effort}\""))
329 .stdin(Stdio::piped())
330 .stdout(Stdio::piped())
331 .stderr(Stdio::piped());
332 if let Some(cwd) = cwd {
333 command.current_dir(cwd);
334 }
335 for (key, value) in extra_env {
336 command.env::<&OsStr, &OsStr>(key.as_os_str(), value.as_os_str());
337 }
338 let mut child = command
339 .spawn()
340 .map_err(|e| PoolError::Spawn(format!("spawn {}: {e}", binary.display())))?;
341
342 let stdin = child
343 .stdin
344 .take()
345 .ok_or_else(|| PoolError::Spawn("codex-acp stdin unavailable".to_string()))?;
346 let stdout = child
347 .stdout
348 .take()
349 .ok_or_else(|| PoolError::Spawn("codex-acp stdout unavailable".to_string()))?;
350
351 Ok(Self {
352 child,
353 stdin,
354 stdout: BufReader::new(stdout),
355 next_id: 1,
356 session_id: None,
357 })
358 }
359
360 fn start_session(&mut self, cwd: Option<&Path>) -> Result<(), PoolError> {
361 let init_id = self.claim_id();
362 self.request(
363 init_id,
364 "initialize",
365 serde_json::json!({
366 "protocolVersion": 1,
367 "clientCapabilities": {},
368 "clientInfo": {
369 "name": "cb-rubric",
370 "version": env!("CARGO_PKG_VERSION")
371 }
372 }),
373 )?;
374
375 let cwd = match cwd {
376 Some(cwd) => cwd.to_path_buf(),
377 None => {
378 std::env::current_dir().map_err(|e| PoolError::Rpc(format!("current dir: {e}")))?
379 }
380 }
381 .to_string_lossy()
382 .into_owned();
383 let session_request_id = self.claim_id();
384 let session_id = self.request(
385 session_request_id,
386 "session/new",
387 serde_json::json!({
388 "cwd": cwd,
389 "mcpServers": []
390 }),
391 )?["sessionId"]
392 .as_str()
393 .ok_or_else(|| PoolError::Rpc("unexpected session/new response shape".to_string()))?
394 .to_string();
395 self.session_id = Some(session_id);
396 Ok(())
397 }
398
399 fn prompt_image(&mut self, prompt: &str, b64_png: &str) -> Result<String, PoolError> {
400 let session_id = self
401 .session_id
402 .clone()
403 .ok_or_else(|| PoolError::Rpc("session not initialized".to_string()))?;
404 let prompt_id = self.claim_id();
405 self.prompt(
406 prompt_id,
407 &session_id,
408 serde_json::json!({
409 "sessionId": session_id,
410 "prompt": [
411 { "type": "text", "text": prompt },
412 { "type": "image", "data": b64_png, "mimeType": "image/png" }
413 ]
414 }),
415 )
416 }
417
418 fn claim_id(&mut self) -> i64 {
419 let id = self.next_id;
420 self.next_id += 1;
421 id
422 }
423
424 fn request(
425 &mut self,
426 id: i64,
427 method: &str,
428 params: serde_json::Value,
429 ) -> Result<serde_json::Value, PoolError> {
430 self.send(id, method, params)?;
431
432 loop {
433 let msg = self.read_message()?;
434 if msg["id"].as_i64() == Some(id) {
435 return rpc_result(msg);
436 }
437 }
438 }
439
440 fn prompt(
441 &mut self,
442 id: i64,
443 session_id: &str,
444 params: serde_json::Value,
445 ) -> Result<String, PoolError> {
446 self.send(id, "session/prompt", params)?;
447
448 let mut text = String::new();
449 loop {
450 let msg = self.read_message()?;
451 if msg["id"].as_i64() == Some(id) {
452 rpc_result(msg)?;
453 return Ok(text);
454 }
455
456 if msg["method"] == "session/update" && msg["params"]["sessionId"] == session_id {
457 let update = &msg["params"]["update"];
458 if update["sessionUpdate"] == "agent_message_chunk" {
459 if let Some(chunk) = update["content"]["text"].as_str() {
460 text.push_str(chunk);
461 }
462 }
463 }
464 }
465 }
466
467 fn send(&mut self, id: i64, method: &str, params: serde_json::Value) -> Result<(), PoolError> {
468 let msg = serde_json::json!({
469 "jsonrpc": "2.0",
470 "id": id,
471 "method": method,
472 "params": params,
473 });
474 serde_json::to_writer(&mut self.stdin, &msg)
475 .map_err(|e| PoolError::Rpc(format!("write codex-acp request: {e}")))?;
476 self.stdin
477 .write_all(b"\n")
478 .map_err(|e| PoolError::Rpc(format!("write codex-acp newline: {e}")))?;
479 self.stdin
480 .flush()
481 .map_err(|e| PoolError::Rpc(format!("flush codex-acp request: {e}")))
482 }
483
484 fn read_message(&mut self) -> Result<serde_json::Value, PoolError> {
485 let mut line = String::new();
486 let n = self
487 .stdout
488 .read_line(&mut line)
489 .map_err(|e| PoolError::Rpc(format!("read codex-acp response: {e}")))?;
490 if n == 0 {
491 let stderr = self
492 .child
493 .stderr
494 .take()
495 .map(|mut stderr| {
496 let mut buf = String::new();
497 let _ = std::io::Read::read_to_string(&mut stderr, &mut buf);
498 buf
499 })
500 .unwrap_or_default();
501 return Err(PoolError::WorkerCrashed {
502 worker_id: usize::MAX,
503 message: format!("codex-acp exited before response: {stderr}"),
504 });
505 }
506
507 serde_json::from_str(&line)
508 .map_err(|e| PoolError::Rpc(format!("parse codex-acp message {line:?}: {e}")))
509 }
510}
511
512impl Drop for AcpClient {
513 fn drop(&mut self) {
514 let _ = self.child.kill();
515 let _ = self.child.wait();
516 }
517}
518
519fn rpc_result(msg: serde_json::Value) -> Result<serde_json::Value, PoolError> {
520 if let Some(error) = msg.get("error") {
521 let message = error.to_string();
522 let lowered = message.to_ascii_lowercase();
523 if lowered.contains("usage limit") || lowered.contains("quota") {
524 Err(PoolError::QuotaExceeded)
525 } else if lowered.contains("rate limit") {
526 Err(PoolError::RateLimited {
527 retry_after: parse_retry_after(error),
528 })
529 } else {
530 Err(PoolError::Rpc(format!("codex-acp rpc error: {error}")))
531 }
532 } else {
533 Ok(msg["result"].clone())
534 }
535}
536
537fn parse_retry_after(error: &serde_json::Value) -> Option<std::time::Duration> {
538 let candidates = [
539 &error["retry_after"],
540 &error["retryAfter"],
541 &error["data"]["retry_after"],
542 &error["data"]["retryAfter"],
543 ];
544 for candidate in candidates {
545 if let Some(seconds) = candidate.as_u64() {
546 return Some(std::time::Duration::from_secs(seconds));
547 }
548 if let Some(seconds) = candidate.as_f64() {
549 if seconds.is_finite() && seconds >= 0.0 {
550 return Some(std::time::Duration::from_secs_f64(seconds));
551 }
552 }
553 if let Some(value) = candidate.as_str() {
554 if let Ok(seconds) = value.parse::<u64>() {
555 return Some(std::time::Duration::from_secs(seconds));
556 }
557 }
558 }
559 None
560}
561
562#[cfg(test)]
563mod tests;