Skip to main content

visual_rubric/
lib.rs

1//! Shared AI visual-rubric runner for screenshot review.
2//!
3//! This crate owns the Codex ACP plumbing so browser screenshots, offscreen
4//! renderer captures, and VM/VNC screenshots can use one rubric path.
5//!
6//! It also provides a two-stage pipeline: vision model extraction via an
7//! OpenAI-compatible HTTP API, then rubric scoring via ACP.
8#![warn(missing_docs)]
9
10mod acp;
11mod batch;
12pub mod cli;
13mod errors;
14mod pool;
15pub mod presets;
16mod typed_strings;
17pub mod vision;
18
19use std::ffi::OsString;
20use std::path::{Path, PathBuf};
21
22use base64::Engine as _;
23use serde::{Deserialize, Serialize};
24
25use acp::AcpClient;
26use vision::VisionApiConfig;
27
28pub use acp::build_codex_acp_args;
29pub use batch::{
30    AggregateStatus, AssetChange, AssetRubricReport, AssetRubricResult, AssetSnapshot,
31    BatchRubricConfig, BatchRubricReport, BatchRubricRun, IssueClassificationInput,
32    IssueClassifier, IssueRecommendation, RecommendationSeverity, SelectionMode, diff_snapshots,
33    select_changed,
34};
35pub use cli::Cli;
36pub use errors::{PoolError, RateLimitEvent, RubricError};
37pub use pool::{LogCaptureConfig, LogPathMode, PoolConfig, PoolStats, RubricPool};
38pub use typed_strings::{RubricEffort, RubricVerdictStatus};
39
40/// Parsed rubric verdict returned by ACP.
41#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
42pub struct RubricVerdict {
43    /// Machine-readable pass/fail status.
44    pub verdict: RubricVerdictStatus,
45    /// Human-readable reason for the verdict.
46    pub reason: String,
47    /// Optional anomalies observed in the screenshot.
48    #[serde(default, deserialize_with = "deserialize_anomalies")]
49    pub anomalies: Vec<String>,
50}
51
52/// Optional model settings for one rubric request.
53#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
54pub struct RubricOptions {
55    /// ACP model override.
56    pub model: Option<String>,
57    /// Reasoning effort override.
58    pub effort: Option<RubricEffort>,
59    /// System prompt override.
60    pub system_prompt: Option<String>,
61}
62
63/// Runtime configuration for direct ACP calls.
64#[derive(Clone, Debug, PartialEq, Eq)]
65pub struct RubricRunConfig {
66    /// Path to the ACP binary (e.g. `codex-acp` or `opencode`).
67    pub codex_acp_binary: PathBuf,
68    /// Extra CLI arguments for the ACP binary.
69    /// Defaults to `["-c", "model=...", "-c", "model_reasoning_effort=..."]`
70    /// for codex-acp. For opencode use `["acp"]`.
71    pub acp_args: Vec<String>,
72    /// Extra environment variables for the child process.
73    pub extra_env: Vec<(OsString, OsString)>,
74    /// Working directory passed to ACP.
75    pub cwd: Option<PathBuf>,
76}
77
78impl Default for RubricRunConfig {
79    fn default() -> Self {
80        Self {
81            codex_acp_binary: default_codex_acp_binary(),
82            acp_args: build_codex_acp_args(
83                DEFAULT_CODEX_ACP_MODEL,
84                DEFAULT_CODEX_ACP_REASONING_EFFORT,
85            ),
86            extra_env: Vec::new(),
87            cwd: None,
88        }
89    }
90}
91
92/// Default system prompt used for screenshot rubric requests.
93///
94/// Shared with the `ui-regression` question preset.
95pub const DEFAULT_SYSTEM_PROMPT: &str = presets::UI_REGRESSION_SYSTEM_PROMPT;
96
97/// Default Codex ACP model.
98pub const DEFAULT_CODEX_ACP_MODEL: &str = "gpt-5.4-mini";
99/// Default Codex ACP reasoning effort.
100pub const DEFAULT_CODEX_ACP_REASONING_EFFORT: &str = "medium";
101
102/// Default prompt for the vision extraction stage.
103///
104/// Asks the vision model to describe the screenshot as structured JSON
105/// so a text-only rubric model (e.g. DeepSeek V4 via opencode) can score it.
106pub const DEFAULT_VISION_PROMPT: &str = "\
107You are a UI description engine. Given a screenshot, produce a structured JSON \
108description of all visible user interface elements, their text content, layout, \
109and any visual issues (clipping, overlap, blank regions, contrast problems). \
110Output ONLY valid JSON with no additional text.";
111
112/// Returns the default rubric options.
113#[must_use]
114pub fn default_options() -> RubricOptions {
115    RubricOptions {
116        model: Some(DEFAULT_CODEX_ACP_MODEL.to_string()),
117        effort: Some(DEFAULT_CODEX_ACP_REASONING_EFFORT.into()),
118        system_prompt: Some(DEFAULT_SYSTEM_PROMPT.to_string()),
119    }
120}
121
122/// Returns the default Codex ACP executable name.
123#[must_use]
124pub fn default_codex_acp_binary() -> PathBuf {
125    PathBuf::from("codex-acp")
126}
127
128/// Reads and base64-encodes a PNG file.
129///
130/// # Errors
131///
132/// Returns [`PoolError::Rpc`] when the PNG cannot be read.
133pub fn encode_png(png_path: &Path) -> Result<String, PoolError> {
134    let bytes = std::fs::read(png_path)
135        .map_err(|e| PoolError::Rpc(format!("read png {}: {e}", png_path.display())))?;
136    Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
137}
138
139/// Evaluates a PNG and returns an error when the verdict is not pass.
140///
141/// # Errors
142///
143/// Returns [`RubricError`] for PNG IO, ACP, JSON parsing, or failed
144/// assertion errors.
145pub fn assert_image_rubric(png_path: &Path, name: &str, question: &str) -> Result<(), RubricError> {
146    let verdict = evaluate_image_rubric(png_path, question)?;
147    assert_verdict(name, verdict)
148}
149
150/// Evaluates a PNG with default options.
151///
152/// # Errors
153///
154/// Returns [`RubricError`] for PNG IO, ACP, or verdict parsing failures.
155pub fn evaluate_image_rubric(
156    png_path: &Path,
157    question: &str,
158) -> Result<RubricVerdict, RubricError> {
159    evaluate_image_rubric_with_options(png_path, question, default_options())
160}
161
162/// Evaluates a PNG with caller-provided model options.
163///
164/// # Errors
165///
166/// Returns [`RubricError`] for PNG IO, ACP, or verdict parsing failures.
167pub fn evaluate_image_rubric_with_options(
168    png_path: &Path,
169    question: &str,
170    opts: RubricOptions,
171) -> Result<RubricVerdict, RubricError> {
172    evaluate_image_rubric_with_config(png_path, question, opts, RubricRunConfig::default())
173}
174
175/// Evaluates a PNG with caller-provided model and runtime configuration.
176///
177/// # Errors
178///
179/// Returns [`RubricError`] for PNG IO, ACP, or verdict parsing failures.
180pub fn evaluate_image_rubric_with_config(
181    png_path: &Path,
182    question: &str,
183    opts: RubricOptions,
184    config: RubricRunConfig,
185) -> Result<RubricVerdict, RubricError> {
186    let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
187        path: png_path.to_path_buf(),
188        source,
189    })?;
190    let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
191    let text = run_codex_acp_rubric(
192        &b64,
193        question,
194        opts.model
195            .as_deref()
196            .map_or(DEFAULT_CODEX_ACP_MODEL, |model| model),
197        opts.effort
198            .as_deref()
199            .map_or(DEFAULT_CODEX_ACP_REASONING_EFFORT, |effort| effort),
200        opts.system_prompt
201            .as_deref()
202            .map_or(DEFAULT_SYSTEM_PROMPT, |system_prompt| system_prompt),
203        &config,
204    )?;
205
206    parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
207}
208
209/// Two-stage pipeline evaluation: vision model → rubric model.
210///
211/// Stage 1: Sends the screenshot to an OpenAI-compatible vision API and
212/// returns a structured JSON description.
213///
214/// Stage 2: Sends the structured description (plus the rubric question) to
215/// the configured ACP backend for the final rubric verdict.
216///
217/// # Errors
218///
219/// Returns [`RubricError`] for PNG IO, vision API, ACP, or verdict parsing
220/// failures.
221pub fn evaluate_image_rubric_pipeline(
222    png_path: &Path,
223    question: &str,
224    vision_config: &VisionApiConfig,
225    vision_prompt: &str,
226    rubric_options: &RubricOptions,
227    rubric_config: &RubricRunConfig,
228) -> Result<RubricVerdict, RubricError> {
229    let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
230        path: png_path.to_path_buf(),
231        source,
232    })?;
233    let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
234
235    let structured =
236        vision::call_vision_api(&b64, vision_prompt, vision_config).map_err(RubricError::Pool)?;
237
238    let system_prompt = rubric_options
239        .system_prompt
240        .as_deref()
241        .map_or(DEFAULT_SYSTEM_PROMPT, |system_prompt| system_prompt);
242    let rubric_prompt =
243        format!("{system_prompt}\n\nUI description:\n{structured}\n\nQuestion: {question}");
244
245    let mut acp = AcpClient::spawn(
246        &rubric_config.codex_acp_binary,
247        &rubric_config.acp_args,
248        &rubric_config.extra_env,
249        rubric_config.cwd.as_deref(),
250    )
251    .map_err(RubricError::Pool)?;
252    acp.start_session(rubric_config.cwd.as_deref())
253        .map_err(RubricError::Pool)?;
254
255    let text = acp.prompt_text(&rubric_prompt).map_err(RubricError::Pool)?;
256
257    parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
258}
259
260/// Parses strict rubric JSON into a typed verdict.
261///
262/// # Errors
263///
264/// Returns the underlying JSON error when the text is malformed or contains an
265/// unsupported verdict status.
266pub fn parse_verdict(text: &str) -> Result<RubricVerdict, serde_json::Error> {
267    match serde_json::from_str(text) {
268        Ok(verdict) => Ok(verdict),
269        Err(source) => match extract_json_object(text) {
270            Some(json) => serde_json::from_str(json),
271            None => Err(source),
272        },
273    }
274}
275
276fn extract_json_object(text: &str) -> Option<&str> {
277    let start = text.find('{')?;
278    let mut depth = 0usize;
279    let mut in_string = false;
280    let mut escaped = false;
281
282    for (offset, character) in text[start..].char_indices() {
283        if in_string {
284            if escaped {
285                escaped = false;
286            } else if character == '\\' {
287                escaped = true;
288            } else if character == '"' {
289                in_string = false;
290            }
291            continue;
292        }
293
294        match character {
295            '"' => in_string = true,
296            '{' => depth = depth.saturating_add(1),
297            '}' => {
298                depth = depth.saturating_sub(1);
299                if depth == 0 {
300                    let end = start + offset + character.len_utf8();
301                    return Some(&text[start..end]);
302                }
303            }
304            _ => {}
305        }
306    }
307
308    None
309}
310
311fn deserialize_anomalies<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
312where
313    D: serde::Deserializer<'de>,
314{
315    let values = Vec::<serde_json::Value>::deserialize(deserializer)?;
316    Ok(values.into_iter().map(anomaly_to_string).collect())
317}
318
319fn anomaly_to_string(value: serde_json::Value) -> String {
320    match value {
321        serde_json::Value::String(text) => text,
322        serde_json::Value::Object(mut object) => {
323            let issue = object
324                .remove("issue")
325                .and_then(|value| value.as_str().map(str::to_owned));
326            let fix = object
327                .remove("fix")
328                .and_then(|value| value.as_str().map(str::to_owned));
329            match (issue, fix) {
330                (Some(issue), Some(fix)) => format!("{issue} Fix: {fix}"),
331                (Some(issue), None) => issue,
332                (None, Some(fix)) => fix,
333                (None, None) => serde_json::Value::Object(object).to_string(),
334            }
335        }
336        other => other.to_string(),
337    }
338}
339
340/// Converts a verdict into an assertion-style result.
341///
342/// # Errors
343///
344/// Returns [`RubricError::Assertion`] when the verdict is not pass.
345pub fn assert_verdict(name: &str, verdict: RubricVerdict) -> Result<(), RubricError> {
346    if verdict.verdict.is_pass() {
347        Ok(())
348    } else {
349        Err(RubricError::Assertion {
350            name: name.to_string(),
351            reason: verdict.reason,
352            anomalies: verdict.anomalies,
353        })
354    }
355}
356
357/// Runs the CLI command.
358///
359/// # Errors
360///
361/// Returns command parsing, IO, ACP, or audit failures as [`anyhow::Error`].
362pub fn run(cli: Cli) -> anyhow::Result<()> {
363    cli::run(cli)
364}
365
366fn run_codex_acp_rubric(
367    b64_png: &str,
368    question: &str,
369    model: &str,
370    effort: &str,
371    system_prompt: &str,
372    config: &RubricRunConfig,
373) -> Result<String, PoolError> {
374    let args = effective_acp_args(config, model, effort);
375    let mut acp = AcpClient::spawn(
376        &config.codex_acp_binary,
377        args.as_slice(),
378        &config.extra_env,
379        config.cwd.as_deref(),
380    )?;
381    acp.start_session(config.cwd.as_deref())?;
382
383    let prompt = format!("{system_prompt}\n\nQuestion: {question}");
384    acp.prompt_image(&prompt, b64_png)
385}
386
387fn effective_acp_args(config: &RubricRunConfig, model: &str, effort: &str) -> Vec<String> {
388    if config.acp_args
389        == build_codex_acp_args(DEFAULT_CODEX_ACP_MODEL, DEFAULT_CODEX_ACP_REASONING_EFFORT)
390    {
391        build_codex_acp_args(model, effort)
392    } else {
393        config.acp_args.clone()
394    }
395}
396
397#[cfg(test)]
398mod tests;