1#![warn(missing_docs)]
9
10mod acp;
11mod batch;
12pub mod cli;
13mod errors;
14mod pool;
15pub mod presets;
16mod typed_strings;
17pub mod vision;
18
19use std::ffi::OsString;
20use std::path::{Path, PathBuf};
21
22use base64::Engine as _;
23use serde::{Deserialize, Serialize};
24
25use acp::AcpClient;
26use vision::VisionApiConfig;
27
28pub use acp::build_codex_acp_args;
29pub use batch::{
30 AggregateStatus, AssetChange, AssetRubricReport, AssetRubricResult, AssetSnapshot,
31 BatchRubricConfig, BatchRubricReport, BatchRubricRun, IssueClassificationInput,
32 IssueClassifier, IssueRecommendation, RecommendationSeverity, SelectionMode, diff_snapshots,
33 select_changed,
34};
35pub use cli::Cli;
36pub use errors::{PoolError, RateLimitEvent, RubricError};
37pub use pool::{LogCaptureConfig, LogPathMode, PoolConfig, PoolStats, RubricPool};
38pub use typed_strings::{RubricEffort, RubricVerdictStatus};
39
40#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
42pub struct RubricVerdict {
43 pub verdict: RubricVerdictStatus,
45 pub reason: String,
47 #[serde(default, deserialize_with = "deserialize_anomalies")]
49 pub anomalies: Vec<String>,
50}
51
52#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
54pub struct RubricOptions {
55 pub model: Option<String>,
57 pub effort: Option<RubricEffort>,
59 pub system_prompt: Option<String>,
61}
62
63#[derive(Clone, Debug, PartialEq, Eq)]
65pub struct RubricRunConfig {
66 pub codex_acp_binary: PathBuf,
68 pub acp_args: Vec<String>,
72 pub extra_env: Vec<(OsString, OsString)>,
74 pub cwd: Option<PathBuf>,
76}
77
78impl Default for RubricRunConfig {
79 fn default() -> Self {
80 Self {
81 codex_acp_binary: default_codex_acp_binary(),
82 acp_args: build_codex_acp_args(
83 DEFAULT_CODEX_ACP_MODEL,
84 DEFAULT_CODEX_ACP_REASONING_EFFORT,
85 ),
86 extra_env: Vec::new(),
87 cwd: None,
88 }
89 }
90}
91
92pub const DEFAULT_SYSTEM_PROMPT: &str = presets::UI_REGRESSION_SYSTEM_PROMPT;
96
97pub const DEFAULT_CODEX_ACP_MODEL: &str = "gpt-5.4-mini";
99pub const DEFAULT_CODEX_ACP_REASONING_EFFORT: &str = "medium";
101
102pub const DEFAULT_VISION_PROMPT: &str = "\
107You are a UI description engine. Given a screenshot, produce a structured JSON \
108description of all visible user interface elements, their text content, layout, \
109and any visual issues (clipping, overlap, blank regions, contrast problems). \
110Output ONLY valid JSON with no additional text.";
111
112#[must_use]
114pub fn default_options() -> RubricOptions {
115 RubricOptions {
116 model: Some(DEFAULT_CODEX_ACP_MODEL.to_string()),
117 effort: Some(DEFAULT_CODEX_ACP_REASONING_EFFORT.into()),
118 system_prompt: Some(DEFAULT_SYSTEM_PROMPT.to_string()),
119 }
120}
121
122#[must_use]
124pub fn default_codex_acp_binary() -> PathBuf {
125 PathBuf::from("codex-acp")
126}
127
128pub fn encode_png(png_path: &Path) -> Result<String, PoolError> {
134 let bytes = std::fs::read(png_path)
135 .map_err(|e| PoolError::Rpc(format!("read png {}: {e}", png_path.display())))?;
136 Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
137}
138
139pub fn assert_image_rubric(png_path: &Path, name: &str, question: &str) -> Result<(), RubricError> {
146 let verdict = evaluate_image_rubric(png_path, question)?;
147 assert_verdict(name, verdict)
148}
149
150pub fn evaluate_image_rubric(
156 png_path: &Path,
157 question: &str,
158) -> Result<RubricVerdict, RubricError> {
159 evaluate_image_rubric_with_options(png_path, question, default_options())
160}
161
162pub fn evaluate_image_rubric_with_options(
168 png_path: &Path,
169 question: &str,
170 opts: RubricOptions,
171) -> Result<RubricVerdict, RubricError> {
172 evaluate_image_rubric_with_config(png_path, question, opts, RubricRunConfig::default())
173}
174
175pub fn evaluate_image_rubric_with_config(
181 png_path: &Path,
182 question: &str,
183 opts: RubricOptions,
184 config: RubricRunConfig,
185) -> Result<RubricVerdict, RubricError> {
186 let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
187 path: png_path.to_path_buf(),
188 source,
189 })?;
190 let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
191 let text = run_codex_acp_rubric(
192 &b64,
193 question,
194 opts.model
195 .as_deref()
196 .map_or(DEFAULT_CODEX_ACP_MODEL, |model| model),
197 opts.effort
198 .as_deref()
199 .map_or(DEFAULT_CODEX_ACP_REASONING_EFFORT, |effort| effort),
200 opts.system_prompt
201 .as_deref()
202 .map_or(DEFAULT_SYSTEM_PROMPT, |system_prompt| system_prompt),
203 &config,
204 )?;
205
206 parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
207}
208
209pub fn evaluate_image_rubric_pipeline(
222 png_path: &Path,
223 question: &str,
224 vision_config: &VisionApiConfig,
225 vision_prompt: &str,
226 rubric_options: &RubricOptions,
227 rubric_config: &RubricRunConfig,
228) -> Result<RubricVerdict, RubricError> {
229 let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
230 path: png_path.to_path_buf(),
231 source,
232 })?;
233 let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
234
235 let structured =
236 vision::call_vision_api(&b64, vision_prompt, vision_config).map_err(RubricError::Pool)?;
237
238 let system_prompt = rubric_options
239 .system_prompt
240 .as_deref()
241 .map_or(DEFAULT_SYSTEM_PROMPT, |system_prompt| system_prompt);
242 let rubric_prompt =
243 format!("{system_prompt}\n\nUI description:\n{structured}\n\nQuestion: {question}");
244
245 let mut acp = AcpClient::spawn(
246 &rubric_config.codex_acp_binary,
247 &rubric_config.acp_args,
248 &rubric_config.extra_env,
249 rubric_config.cwd.as_deref(),
250 )
251 .map_err(RubricError::Pool)?;
252 acp.start_session(rubric_config.cwd.as_deref())
253 .map_err(RubricError::Pool)?;
254
255 let text = acp.prompt_text(&rubric_prompt).map_err(RubricError::Pool)?;
256
257 parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
258}
259
260pub fn parse_verdict(text: &str) -> Result<RubricVerdict, serde_json::Error> {
267 match serde_json::from_str(text) {
268 Ok(verdict) => Ok(verdict),
269 Err(source) => match extract_json_object(text) {
270 Some(json) => serde_json::from_str(json),
271 None => Err(source),
272 },
273 }
274}
275
276fn extract_json_object(text: &str) -> Option<&str> {
277 let start = text.find('{')?;
278 let mut depth = 0usize;
279 let mut in_string = false;
280 let mut escaped = false;
281
282 for (offset, character) in text[start..].char_indices() {
283 if in_string {
284 if escaped {
285 escaped = false;
286 } else if character == '\\' {
287 escaped = true;
288 } else if character == '"' {
289 in_string = false;
290 }
291 continue;
292 }
293
294 match character {
295 '"' => in_string = true,
296 '{' => depth = depth.saturating_add(1),
297 '}' => {
298 depth = depth.saturating_sub(1);
299 if depth == 0 {
300 let end = start + offset + character.len_utf8();
301 return Some(&text[start..end]);
302 }
303 }
304 _ => {}
305 }
306 }
307
308 None
309}
310
311fn deserialize_anomalies<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
312where
313 D: serde::Deserializer<'de>,
314{
315 let values = Vec::<serde_json::Value>::deserialize(deserializer)?;
316 Ok(values.into_iter().map(anomaly_to_string).collect())
317}
318
319fn anomaly_to_string(value: serde_json::Value) -> String {
320 match value {
321 serde_json::Value::String(text) => text,
322 serde_json::Value::Object(mut object) => {
323 let issue = object
324 .remove("issue")
325 .and_then(|value| value.as_str().map(str::to_owned));
326 let fix = object
327 .remove("fix")
328 .and_then(|value| value.as_str().map(str::to_owned));
329 match (issue, fix) {
330 (Some(issue), Some(fix)) => format!("{issue} Fix: {fix}"),
331 (Some(issue), None) => issue,
332 (None, Some(fix)) => fix,
333 (None, None) => serde_json::Value::Object(object).to_string(),
334 }
335 }
336 other => other.to_string(),
337 }
338}
339
340pub fn assert_verdict(name: &str, verdict: RubricVerdict) -> Result<(), RubricError> {
346 if verdict.verdict.is_pass() {
347 Ok(())
348 } else {
349 Err(RubricError::Assertion {
350 name: name.to_string(),
351 reason: verdict.reason,
352 anomalies: verdict.anomalies,
353 })
354 }
355}
356
357pub fn run(cli: Cli) -> anyhow::Result<()> {
363 cli::run(cli)
364}
365
366fn run_codex_acp_rubric(
367 b64_png: &str,
368 question: &str,
369 model: &str,
370 effort: &str,
371 system_prompt: &str,
372 config: &RubricRunConfig,
373) -> Result<String, PoolError> {
374 let args = effective_acp_args(config, model, effort);
375 let mut acp = AcpClient::spawn(
376 &config.codex_acp_binary,
377 args.as_slice(),
378 &config.extra_env,
379 config.cwd.as_deref(),
380 )?;
381 acp.start_session(config.cwd.as_deref())?;
382
383 let prompt = format!("{system_prompt}\n\nQuestion: {question}");
384 acp.prompt_image(&prompt, b64_png)
385}
386
387fn effective_acp_args(config: &RubricRunConfig, model: &str, effort: &str) -> Vec<String> {
388 if config.acp_args
389 == build_codex_acp_args(DEFAULT_CODEX_ACP_MODEL, DEFAULT_CODEX_ACP_REASONING_EFFORT)
390 {
391 build_codex_acp_args(model, effort)
392 } else {
393 config.acp_args.clone()
394 }
395}
396
397#[cfg(test)]
398mod tests;