Skip to main content

visual_rubric/
lib.rs

1//! Shared AI visual-rubric runner for screenshot review.
2//!
3//! This crate owns the Codex ACP plumbing so browser screenshots, offscreen
4//! renderer captures, and VM/VNC screenshots can use one rubric path.
5#![warn(missing_docs)]
6
7pub mod cli;
8mod errors;
9mod pool;
10mod typed_strings;
11
12use std::ffi::{OsStr, OsString};
13use std::io::{BufRead as _, BufReader, Write as _};
14use std::path::{Path, PathBuf};
15use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
16
17use base64::Engine as _;
18use serde::{Deserialize, Serialize};
19
20pub use cli::Cli;
21pub use errors::{PoolError, RateLimitEvent, RubricError};
22pub use pool::{PoolConfig, PoolStats, RubricPool};
23pub use typed_strings::{RubricEffort, RubricVerdictStatus};
24
25#[derive(Debug, Deserialize, Serialize)]
26/// Parsed rubric verdict returned by Codex ACP.
27pub struct RubricVerdict {
28    /// Machine-readable pass/fail status.
29    pub verdict: RubricVerdictStatus,
30    /// Human-readable reason for the verdict.
31    pub reason: String,
32    /// Optional anomalies observed in the screenshot.
33    #[serde(default, deserialize_with = "deserialize_anomalies")]
34    pub anomalies: Vec<String>,
35}
36
37/// Optional model settings for one rubric request.
38#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
39pub struct RubricOptions {
40    /// Codex model override.
41    pub model: Option<String>,
42    /// Reasoning effort override.
43    pub effort: Option<RubricEffort>,
44    /// System prompt override.
45    pub system_prompt: Option<String>,
46}
47
48/// Runtime configuration for direct Codex ACP calls.
49#[derive(Clone, Debug, PartialEq, Eq)]
50pub struct RubricRunConfig {
51    /// Path to the `codex-acp` executable.
52    pub codex_acp_binary: PathBuf,
53    /// Extra environment variables for the child process.
54    pub extra_env: Vec<(OsString, OsString)>,
55    /// Working directory passed to Codex ACP.
56    pub cwd: Option<PathBuf>,
57}
58
59impl Default for RubricRunConfig {
60    fn default() -> Self {
61        Self {
62            codex_acp_binary: default_codex_acp_binary(),
63            extra_env: Vec::new(),
64            cwd: None,
65        }
66    }
67}
68
69/// Default system prompt used for screenshot rubric requests.
70pub const DEFAULT_SYSTEM_PROMPT: &str = "\
71You are a UI regression auditor. \
72You will be shown one screenshot and asked a specific question. Reply with strict \
73JSON matching this schema and nothing else:
74{ \"verdict\": \"pass\" | \"fail\", \"reason\": string, \"anomalies\": string[] }
75Fail criteria: text clipped or overflowing its container, overlapping interactive \
76elements, missing/blank regions where content should appear, illegible contrast, \
77visibly broken layout. Cosmetic differences from previous runs are NOT failures \
78unless they make the UI worse by the criteria above.";
79
80/// Default Codex ACP model.
81pub const DEFAULT_CODEX_ACP_MODEL: &str = "gpt-5.4-mini";
82/// Default Codex ACP reasoning effort.
83pub const DEFAULT_CODEX_ACP_REASONING_EFFORT: &str = "medium";
84
85/// Returns the default rubric options.
86#[must_use]
87pub fn default_options() -> RubricOptions {
88    RubricOptions {
89        model: Some(DEFAULT_CODEX_ACP_MODEL.to_string()),
90        effort: Some(DEFAULT_CODEX_ACP_REASONING_EFFORT.into()),
91        system_prompt: Some(DEFAULT_SYSTEM_PROMPT.to_string()),
92    }
93}
94
95/// Returns the default Codex ACP executable name.
96#[must_use]
97pub fn default_codex_acp_binary() -> PathBuf {
98    PathBuf::from("codex-acp")
99}
100
101/// Reads and base64-encodes a PNG file.
102///
103/// # Errors
104///
105/// Returns [`PoolError::Rpc`] when the PNG cannot be read.
106pub fn encode_png(png_path: &Path) -> Result<String, PoolError> {
107    let bytes = std::fs::read(png_path)
108        .map_err(|e| PoolError::Rpc(format!("read png {}: {e}", png_path.display())))?;
109    Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
110}
111
112/// Evaluates a PNG and returns an error when the verdict is not pass.
113///
114/// # Errors
115///
116/// Returns [`RubricError`] for PNG IO, Codex ACP, JSON parsing, or failed
117/// assertion errors.
118pub fn assert_image_rubric(png_path: &Path, name: &str, question: &str) -> Result<(), RubricError> {
119    let verdict = evaluate_image_rubric(png_path, question)?;
120    assert_verdict(name, verdict)
121}
122
123/// Evaluates a PNG with default options.
124///
125/// # Errors
126///
127/// Returns [`RubricError`] for PNG IO, Codex ACP, or verdict parsing failures.
128pub fn evaluate_image_rubric(
129    png_path: &Path,
130    question: &str,
131) -> Result<RubricVerdict, RubricError> {
132    evaluate_image_rubric_with_options(png_path, question, default_options())
133}
134
135/// Evaluates a PNG with caller-provided model options.
136///
137/// # Errors
138///
139/// Returns [`RubricError`] for PNG IO, Codex ACP, or verdict parsing failures.
140pub fn evaluate_image_rubric_with_options(
141    png_path: &Path,
142    question: &str,
143    opts: RubricOptions,
144) -> Result<RubricVerdict, RubricError> {
145    evaluate_image_rubric_with_config(png_path, question, opts, RubricRunConfig::default())
146}
147
148/// Evaluates a PNG with caller-provided model and runtime configuration.
149///
150/// # Errors
151///
152/// Returns [`RubricError`] for PNG IO, Codex ACP, or verdict parsing failures.
153pub fn evaluate_image_rubric_with_config(
154    png_path: &Path,
155    question: &str,
156    opts: RubricOptions,
157    config: RubricRunConfig,
158) -> Result<RubricVerdict, RubricError> {
159    let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
160        path: png_path.to_path_buf(),
161        source,
162    })?;
163    let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
164    let text = run_codex_acp_rubric(
165        &b64,
166        question,
167        opts.model.as_deref().unwrap_or(DEFAULT_CODEX_ACP_MODEL),
168        opts.effort
169            .as_deref()
170            .unwrap_or(DEFAULT_CODEX_ACP_REASONING_EFFORT),
171        opts.system_prompt
172            .as_deref()
173            .unwrap_or(DEFAULT_SYSTEM_PROMPT),
174        &config,
175    )?;
176
177    parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
178}
179
180/// Parses strict rubric JSON into a typed verdict.
181///
182/// # Errors
183///
184/// Returns the underlying JSON error when the text is malformed or contains an
185/// unsupported verdict status.
186pub fn parse_verdict(text: &str) -> Result<RubricVerdict, serde_json::Error> {
187    match serde_json::from_str(text) {
188        Ok(verdict) => Ok(verdict),
189        Err(source) => match extract_json_object(text) {
190            Some(json) => serde_json::from_str(json),
191            None => Err(source),
192        },
193    }
194}
195
196fn extract_json_object(text: &str) -> Option<&str> {
197    let start = text.find('{')?;
198    let mut depth = 0usize;
199    let mut in_string = false;
200    let mut escaped = false;
201
202    for (offset, character) in text[start..].char_indices() {
203        if in_string {
204            if escaped {
205                escaped = false;
206            } else if character == '\\' {
207                escaped = true;
208            } else if character == '"' {
209                in_string = false;
210            }
211            continue;
212        }
213
214        match character {
215            '"' => in_string = true,
216            '{' => depth = depth.saturating_add(1),
217            '}' => {
218                depth = depth.saturating_sub(1);
219                if depth == 0 {
220                    let end = start + offset + character.len_utf8();
221                    return Some(&text[start..end]);
222                }
223            }
224            _ => {}
225        }
226    }
227
228    None
229}
230
231fn deserialize_anomalies<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
232where
233    D: serde::Deserializer<'de>,
234{
235    let values = Vec::<serde_json::Value>::deserialize(deserializer)?;
236    Ok(values.into_iter().map(anomaly_to_string).collect())
237}
238
239fn anomaly_to_string(value: serde_json::Value) -> String {
240    match value {
241        serde_json::Value::String(text) => text,
242        serde_json::Value::Object(mut object) => {
243            let issue = object
244                .remove("issue")
245                .and_then(|value| value.as_str().map(str::to_owned));
246            let fix = object
247                .remove("fix")
248                .and_then(|value| value.as_str().map(str::to_owned));
249            match (issue, fix) {
250                (Some(issue), Some(fix)) => format!("{issue} Fix: {fix}"),
251                (Some(issue), None) => issue,
252                (None, Some(fix)) => fix,
253                (None, None) => serde_json::Value::Object(object).to_string(),
254            }
255        }
256        other => other.to_string(),
257    }
258}
259
260/// Converts a verdict into an assertion-style result.
261///
262/// # Errors
263///
264/// Returns [`RubricError::Assertion`] when the verdict is not pass.
265pub fn assert_verdict(name: &str, verdict: RubricVerdict) -> Result<(), RubricError> {
266    if verdict.verdict.is_pass() {
267        Ok(())
268    } else {
269        Err(RubricError::Assertion {
270            name: name.to_string(),
271            reason: verdict.reason,
272            anomalies: verdict.anomalies,
273        })
274    }
275}
276
277/// Runs the CLI command.
278///
279/// # Errors
280///
281/// Returns command parsing, IO, Codex ACP, or audit failures as [`anyhow::Error`].
282pub fn run(cli: Cli) -> anyhow::Result<()> {
283    cli::run(cli)
284}
285
286fn run_codex_acp_rubric(
287    b64_png: &str,
288    question: &str,
289    model: &str,
290    effort: &str,
291    system_prompt: &str,
292    config: &RubricRunConfig,
293) -> Result<String, PoolError> {
294    let mut acp = AcpClient::spawn(
295        &config.codex_acp_binary,
296        model,
297        effort,
298        &config.extra_env,
299        config.cwd.as_deref(),
300    )?;
301    acp.start_session(config.cwd.as_deref())?;
302
303    let prompt = format!("{system_prompt}\n\nQuestion: {question}");
304    acp.prompt_image(&prompt, b64_png)
305}
306
307struct AcpClient {
308    child: Child,
309    stdin: ChildStdin,
310    stdout: BufReader<ChildStdout>,
311    next_id: i64,
312    session_id: Option<String>,
313}
314
315impl AcpClient {
316    fn spawn(
317        binary: &Path,
318        model: &str,
319        effort: &str,
320        extra_env: &[(OsString, OsString)],
321        cwd: Option<&Path>,
322    ) -> Result<Self, PoolError> {
323        let mut command = Command::new(binary);
324        command
325            .arg("-c")
326            .arg(format!("model=\"{model}\""))
327            .arg("-c")
328            .arg(format!("model_reasoning_effort=\"{effort}\""))
329            .stdin(Stdio::piped())
330            .stdout(Stdio::piped())
331            .stderr(Stdio::piped());
332        if let Some(cwd) = cwd {
333            command.current_dir(cwd);
334        }
335        for (key, value) in extra_env {
336            command.env::<&OsStr, &OsStr>(key.as_os_str(), value.as_os_str());
337        }
338        let mut child = command
339            .spawn()
340            .map_err(|e| PoolError::Spawn(format!("spawn {}: {e}", binary.display())))?;
341
342        let stdin = child
343            .stdin
344            .take()
345            .ok_or_else(|| PoolError::Spawn("codex-acp stdin unavailable".to_string()))?;
346        let stdout = child
347            .stdout
348            .take()
349            .ok_or_else(|| PoolError::Spawn("codex-acp stdout unavailable".to_string()))?;
350
351        Ok(Self {
352            child,
353            stdin,
354            stdout: BufReader::new(stdout),
355            next_id: 1,
356            session_id: None,
357        })
358    }
359
360    fn start_session(&mut self, cwd: Option<&Path>) -> Result<(), PoolError> {
361        let init_id = self.claim_id();
362        self.request(
363            init_id,
364            "initialize",
365            serde_json::json!({
366                "protocolVersion": 1,
367                "clientCapabilities": {},
368                "clientInfo": {
369                    "name": "cb-rubric",
370                    "version": env!("CARGO_PKG_VERSION")
371                }
372            }),
373        )?;
374
375        let cwd = match cwd {
376            Some(cwd) => cwd.to_path_buf(),
377            None => {
378                std::env::current_dir().map_err(|e| PoolError::Rpc(format!("current dir: {e}")))?
379            }
380        }
381        .to_string_lossy()
382        .into_owned();
383        let session_request_id = self.claim_id();
384        let session_id = self.request(
385            session_request_id,
386            "session/new",
387            serde_json::json!({
388                "cwd": cwd,
389                "mcpServers": []
390            }),
391        )?["sessionId"]
392            .as_str()
393            .ok_or_else(|| PoolError::Rpc("unexpected session/new response shape".to_string()))?
394            .to_string();
395        self.session_id = Some(session_id);
396        Ok(())
397    }
398
399    fn prompt_image(&mut self, prompt: &str, b64_png: &str) -> Result<String, PoolError> {
400        let session_id = self
401            .session_id
402            .clone()
403            .ok_or_else(|| PoolError::Rpc("session not initialized".to_string()))?;
404        let prompt_id = self.claim_id();
405        self.prompt(
406            prompt_id,
407            &session_id,
408            serde_json::json!({
409                "sessionId": session_id,
410                "prompt": [
411                    { "type": "text", "text": prompt },
412                    { "type": "image", "data": b64_png, "mimeType": "image/png" }
413                ]
414            }),
415        )
416    }
417
418    fn claim_id(&mut self) -> i64 {
419        let id = self.next_id;
420        self.next_id += 1;
421        id
422    }
423
424    fn request(
425        &mut self,
426        id: i64,
427        method: &str,
428        params: serde_json::Value,
429    ) -> Result<serde_json::Value, PoolError> {
430        self.send(id, method, params)?;
431
432        loop {
433            let msg = self.read_message()?;
434            if msg["id"].as_i64() == Some(id) {
435                return rpc_result(msg);
436            }
437        }
438    }
439
440    fn prompt(
441        &mut self,
442        id: i64,
443        session_id: &str,
444        params: serde_json::Value,
445    ) -> Result<String, PoolError> {
446        self.send(id, "session/prompt", params)?;
447
448        let mut text = String::new();
449        loop {
450            let msg = self.read_message()?;
451            if msg["id"].as_i64() == Some(id) {
452                rpc_result(msg)?;
453                return Ok(text);
454            }
455
456            if msg["method"] == "session/update" && msg["params"]["sessionId"] == session_id {
457                let update = &msg["params"]["update"];
458                if update["sessionUpdate"] == "agent_message_chunk" {
459                    if let Some(chunk) = update["content"]["text"].as_str() {
460                        text.push_str(chunk);
461                    }
462                }
463            }
464        }
465    }
466
467    fn send(&mut self, id: i64, method: &str, params: serde_json::Value) -> Result<(), PoolError> {
468        let msg = serde_json::json!({
469            "jsonrpc": "2.0",
470            "id": id,
471            "method": method,
472            "params": params,
473        });
474        serde_json::to_writer(&mut self.stdin, &msg)
475            .map_err(|e| PoolError::Rpc(format!("write codex-acp request: {e}")))?;
476        self.stdin
477            .write_all(b"\n")
478            .map_err(|e| PoolError::Rpc(format!("write codex-acp newline: {e}")))?;
479        self.stdin
480            .flush()
481            .map_err(|e| PoolError::Rpc(format!("flush codex-acp request: {e}")))
482    }
483
484    fn read_message(&mut self) -> Result<serde_json::Value, PoolError> {
485        let mut line = String::new();
486        let n = self
487            .stdout
488            .read_line(&mut line)
489            .map_err(|e| PoolError::Rpc(format!("read codex-acp response: {e}")))?;
490        if n == 0 {
491            let stderr = self
492                .child
493                .stderr
494                .take()
495                .map(|mut stderr| {
496                    let mut buf = String::new();
497                    let _ = std::io::Read::read_to_string(&mut stderr, &mut buf);
498                    buf
499                })
500                .unwrap_or_default();
501            return Err(PoolError::WorkerCrashed {
502                worker_id: usize::MAX,
503                message: format!("codex-acp exited before response: {stderr}"),
504            });
505        }
506
507        serde_json::from_str(&line)
508            .map_err(|e| PoolError::Rpc(format!("parse codex-acp message {line:?}: {e}")))
509    }
510}
511
512impl Drop for AcpClient {
513    fn drop(&mut self) {
514        let _ = self.child.kill();
515        let _ = self.child.wait();
516    }
517}
518
519fn rpc_result(msg: serde_json::Value) -> Result<serde_json::Value, PoolError> {
520    if let Some(error) = msg.get("error") {
521        let message = error.to_string();
522        let lowered = message.to_ascii_lowercase();
523        if lowered.contains("usage limit") || lowered.contains("quota") {
524            Err(PoolError::QuotaExceeded)
525        } else if lowered.contains("rate limit") {
526            Err(PoolError::RateLimited {
527                retry_after: parse_retry_after(error),
528            })
529        } else {
530            Err(PoolError::Rpc(format!("codex-acp rpc error: {error}")))
531        }
532    } else {
533        Ok(msg["result"].clone())
534    }
535}
536
537fn parse_retry_after(error: &serde_json::Value) -> Option<std::time::Duration> {
538    let candidates = [
539        &error["retry_after"],
540        &error["retryAfter"],
541        &error["data"]["retry_after"],
542        &error["data"]["retryAfter"],
543    ];
544    for candidate in candidates {
545        if let Some(seconds) = candidate.as_u64() {
546            return Some(std::time::Duration::from_secs(seconds));
547        }
548        if let Some(seconds) = candidate.as_f64() {
549            if seconds.is_finite() && seconds >= 0.0 {
550                return Some(std::time::Duration::from_secs_f64(seconds));
551            }
552        }
553        if let Some(value) = candidate.as_str() {
554            if let Ok(seconds) = value.parse::<u64>() {
555                return Some(std::time::Duration::from_secs(seconds));
556            }
557        }
558    }
559    None
560}
561
562#[cfg(test)]
563mod tests;