#![warn(missing_docs)]
pub mod cli;
mod errors;
mod pool;
mod typed_strings;
use std::ffi::{OsStr, OsString};
use std::io::{BufRead as _, BufReader, Write as _};
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
use base64::Engine as _;
use serde::{Deserialize, Serialize};
pub use cli::Cli;
pub use errors::{PoolError, RateLimitEvent, RubricError};
pub use pool::{PoolConfig, PoolStats, RubricPool};
pub use typed_strings::{RubricEffort, RubricVerdictStatus};
#[derive(Debug, Deserialize, Serialize)]
pub struct RubricVerdict {
pub verdict: RubricVerdictStatus,
pub reason: String,
#[serde(default, deserialize_with = "deserialize_anomalies")]
pub anomalies: Vec<String>,
}
#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct RubricOptions {
pub model: Option<String>,
pub effort: Option<RubricEffort>,
pub system_prompt: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RubricRunConfig {
pub codex_acp_binary: PathBuf,
pub extra_env: Vec<(OsString, OsString)>,
pub cwd: Option<PathBuf>,
}
impl Default for RubricRunConfig {
fn default() -> Self {
Self {
codex_acp_binary: default_codex_acp_binary(),
extra_env: Vec::new(),
cwd: None,
}
}
}
pub const DEFAULT_SYSTEM_PROMPT: &str = "\
You are a UI regression auditor. \
You will be shown one screenshot and asked a specific question. Reply with strict \
JSON matching this schema and nothing else:
{ \"verdict\": \"pass\" | \"fail\", \"reason\": string, \"anomalies\": string[] }
Fail criteria: text clipped or overflowing its container, overlapping interactive \
elements, missing/blank regions where content should appear, illegible contrast, \
visibly broken layout. Cosmetic differences from previous runs are NOT failures \
unless they make the UI worse by the criteria above.";
pub const DEFAULT_CODEX_ACP_MODEL: &str = "gpt-5.4-mini";
pub const DEFAULT_CODEX_ACP_REASONING_EFFORT: &str = "medium";
#[must_use]
pub fn default_options() -> RubricOptions {
RubricOptions {
model: Some(DEFAULT_CODEX_ACP_MODEL.to_string()),
effort: Some(DEFAULT_CODEX_ACP_REASONING_EFFORT.into()),
system_prompt: Some(DEFAULT_SYSTEM_PROMPT.to_string()),
}
}
#[must_use]
pub fn default_codex_acp_binary() -> PathBuf {
PathBuf::from("codex-acp")
}
pub fn encode_png(png_path: &Path) -> Result<String, PoolError> {
let bytes = std::fs::read(png_path)
.map_err(|e| PoolError::Rpc(format!("read png {}: {e}", png_path.display())))?;
Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
}
pub fn assert_image_rubric(png_path: &Path, name: &str, question: &str) -> Result<(), RubricError> {
let verdict = evaluate_image_rubric(png_path, question)?;
assert_verdict(name, verdict)
}
pub fn evaluate_image_rubric(
png_path: &Path,
question: &str,
) -> Result<RubricVerdict, RubricError> {
evaluate_image_rubric_with_options(png_path, question, default_options())
}
pub fn evaluate_image_rubric_with_options(
png_path: &Path,
question: &str,
opts: RubricOptions,
) -> Result<RubricVerdict, RubricError> {
evaluate_image_rubric_with_config(png_path, question, opts, RubricRunConfig::default())
}
pub fn evaluate_image_rubric_with_config(
png_path: &Path,
question: &str,
opts: RubricOptions,
config: RubricRunConfig,
) -> Result<RubricVerdict, RubricError> {
let bytes = std::fs::read(png_path).map_err(|source| RubricError::ReadPng {
path: png_path.to_path_buf(),
source,
})?;
let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
let text = run_codex_acp_rubric(
&b64,
question,
opts.model.as_deref().unwrap_or(DEFAULT_CODEX_ACP_MODEL),
opts.effort
.as_deref()
.unwrap_or(DEFAULT_CODEX_ACP_REASONING_EFFORT),
opts.system_prompt
.as_deref()
.unwrap_or(DEFAULT_SYSTEM_PROMPT),
&config,
)?;
parse_verdict(&text).map_err(|source| RubricError::ParseVerdict { text, source })
}
pub fn parse_verdict(text: &str) -> Result<RubricVerdict, serde_json::Error> {
match serde_json::from_str(text) {
Ok(verdict) => Ok(verdict),
Err(source) => match extract_json_object(text) {
Some(json) => serde_json::from_str(json),
None => Err(source),
},
}
}
fn extract_json_object(text: &str) -> Option<&str> {
let start = text.find('{')?;
let mut depth = 0usize;
let mut in_string = false;
let mut escaped = false;
for (offset, character) in text[start..].char_indices() {
if in_string {
if escaped {
escaped = false;
} else if character == '\\' {
escaped = true;
} else if character == '"' {
in_string = false;
}
continue;
}
match character {
'"' => in_string = true,
'{' => depth = depth.saturating_add(1),
'}' => {
depth = depth.saturating_sub(1);
if depth == 0 {
let end = start + offset + character.len_utf8();
return Some(&text[start..end]);
}
}
_ => {}
}
}
None
}
fn deserialize_anomalies<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let values = Vec::<serde_json::Value>::deserialize(deserializer)?;
Ok(values.into_iter().map(anomaly_to_string).collect())
}
fn anomaly_to_string(value: serde_json::Value) -> String {
match value {
serde_json::Value::String(text) => text,
serde_json::Value::Object(mut object) => {
let issue = object
.remove("issue")
.and_then(|value| value.as_str().map(str::to_owned));
let fix = object
.remove("fix")
.and_then(|value| value.as_str().map(str::to_owned));
match (issue, fix) {
(Some(issue), Some(fix)) => format!("{issue} Fix: {fix}"),
(Some(issue), None) => issue,
(None, Some(fix)) => fix,
(None, None) => serde_json::Value::Object(object).to_string(),
}
}
other => other.to_string(),
}
}
pub fn assert_verdict(name: &str, verdict: RubricVerdict) -> Result<(), RubricError> {
if verdict.verdict.is_pass() {
Ok(())
} else {
Err(RubricError::Assertion {
name: name.to_string(),
reason: verdict.reason,
anomalies: verdict.anomalies,
})
}
}
pub fn run(cli: Cli) -> anyhow::Result<()> {
cli::run(cli)
}
fn run_codex_acp_rubric(
b64_png: &str,
question: &str,
model: &str,
effort: &str,
system_prompt: &str,
config: &RubricRunConfig,
) -> Result<String, PoolError> {
let mut acp = AcpClient::spawn(
&config.codex_acp_binary,
model,
effort,
&config.extra_env,
config.cwd.as_deref(),
)?;
acp.start_session(config.cwd.as_deref())?;
let prompt = format!("{system_prompt}\n\nQuestion: {question}");
acp.prompt_image(&prompt, b64_png)
}
struct AcpClient {
child: Child,
stdin: ChildStdin,
stdout: BufReader<ChildStdout>,
next_id: i64,
session_id: Option<String>,
}
impl AcpClient {
fn spawn(
binary: &Path,
model: &str,
effort: &str,
extra_env: &[(OsString, OsString)],
cwd: Option<&Path>,
) -> Result<Self, PoolError> {
let mut command = Command::new(binary);
command
.arg("-c")
.arg(format!("model=\"{model}\""))
.arg("-c")
.arg(format!("model_reasoning_effort=\"{effort}\""))
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped());
if let Some(cwd) = cwd {
command.current_dir(cwd);
}
for (key, value) in extra_env {
command.env::<&OsStr, &OsStr>(key.as_os_str(), value.as_os_str());
}
let mut child = command
.spawn()
.map_err(|e| PoolError::Spawn(format!("spawn {}: {e}", binary.display())))?;
let stdin = child
.stdin
.take()
.ok_or_else(|| PoolError::Spawn("codex-acp stdin unavailable".to_string()))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| PoolError::Spawn("codex-acp stdout unavailable".to_string()))?;
Ok(Self {
child,
stdin,
stdout: BufReader::new(stdout),
next_id: 1,
session_id: None,
})
}
fn start_session(&mut self, cwd: Option<&Path>) -> Result<(), PoolError> {
let init_id = self.claim_id();
self.request(
init_id,
"initialize",
serde_json::json!({
"protocolVersion": 1,
"clientCapabilities": {},
"clientInfo": {
"name": "cb-rubric",
"version": env!("CARGO_PKG_VERSION")
}
}),
)?;
let cwd = match cwd {
Some(cwd) => cwd.to_path_buf(),
None => {
std::env::current_dir().map_err(|e| PoolError::Rpc(format!("current dir: {e}")))?
}
}
.to_string_lossy()
.into_owned();
let session_request_id = self.claim_id();
let session_id = self.request(
session_request_id,
"session/new",
serde_json::json!({
"cwd": cwd,
"mcpServers": []
}),
)?["sessionId"]
.as_str()
.ok_or_else(|| PoolError::Rpc("unexpected session/new response shape".to_string()))?
.to_string();
self.session_id = Some(session_id);
Ok(())
}
fn prompt_image(&mut self, prompt: &str, b64_png: &str) -> Result<String, PoolError> {
let session_id = self
.session_id
.clone()
.ok_or_else(|| PoolError::Rpc("session not initialized".to_string()))?;
let prompt_id = self.claim_id();
self.prompt(
prompt_id,
&session_id,
serde_json::json!({
"sessionId": session_id,
"prompt": [
{ "type": "text", "text": prompt },
{ "type": "image", "data": b64_png, "mimeType": "image/png" }
]
}),
)
}
fn claim_id(&mut self) -> i64 {
let id = self.next_id;
self.next_id += 1;
id
}
fn request(
&mut self,
id: i64,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, PoolError> {
self.send(id, method, params)?;
loop {
let msg = self.read_message()?;
if msg["id"].as_i64() == Some(id) {
return rpc_result(msg);
}
}
}
fn prompt(
&mut self,
id: i64,
session_id: &str,
params: serde_json::Value,
) -> Result<String, PoolError> {
self.send(id, "session/prompt", params)?;
let mut text = String::new();
loop {
let msg = self.read_message()?;
if msg["id"].as_i64() == Some(id) {
rpc_result(msg)?;
return Ok(text);
}
if msg["method"] == "session/update" && msg["params"]["sessionId"] == session_id {
let update = &msg["params"]["update"];
if update["sessionUpdate"] == "agent_message_chunk" {
if let Some(chunk) = update["content"]["text"].as_str() {
text.push_str(chunk);
}
}
}
}
}
fn send(&mut self, id: i64, method: &str, params: serde_json::Value) -> Result<(), PoolError> {
let msg = serde_json::json!({
"jsonrpc": "2.0",
"id": id,
"method": method,
"params": params,
});
serde_json::to_writer(&mut self.stdin, &msg)
.map_err(|e| PoolError::Rpc(format!("write codex-acp request: {e}")))?;
self.stdin
.write_all(b"\n")
.map_err(|e| PoolError::Rpc(format!("write codex-acp newline: {e}")))?;
self.stdin
.flush()
.map_err(|e| PoolError::Rpc(format!("flush codex-acp request: {e}")))
}
fn read_message(&mut self) -> Result<serde_json::Value, PoolError> {
let mut line = String::new();
let n = self
.stdout
.read_line(&mut line)
.map_err(|e| PoolError::Rpc(format!("read codex-acp response: {e}")))?;
if n == 0 {
let stderr = self
.child
.stderr
.take()
.map(|mut stderr| {
let mut buf = String::new();
let _ = std::io::Read::read_to_string(&mut stderr, &mut buf);
buf
})
.unwrap_or_default();
return Err(PoolError::WorkerCrashed {
worker_id: usize::MAX,
message: format!("codex-acp exited before response: {stderr}"),
});
}
serde_json::from_str(&line)
.map_err(|e| PoolError::Rpc(format!("parse codex-acp message {line:?}: {e}")))
}
}
impl Drop for AcpClient {
fn drop(&mut self) {
let _ = self.child.kill();
let _ = self.child.wait();
}
}
fn rpc_result(msg: serde_json::Value) -> Result<serde_json::Value, PoolError> {
if let Some(error) = msg.get("error") {
let message = error.to_string();
let lowered = message.to_ascii_lowercase();
if lowered.contains("usage limit") || lowered.contains("quota") {
Err(PoolError::QuotaExceeded)
} else if lowered.contains("rate limit") {
Err(PoolError::RateLimited {
retry_after: parse_retry_after(error),
})
} else {
Err(PoolError::Rpc(format!("codex-acp rpc error: {error}")))
}
} else {
Ok(msg["result"].clone())
}
}
fn parse_retry_after(error: &serde_json::Value) -> Option<std::time::Duration> {
let candidates = [
&error["retry_after"],
&error["retryAfter"],
&error["data"]["retry_after"],
&error["data"]["retryAfter"],
];
for candidate in candidates {
if let Some(seconds) = candidate.as_u64() {
return Some(std::time::Duration::from_secs(seconds));
}
if let Some(seconds) = candidate.as_f64() {
if seconds.is_finite() && seconds >= 0.0 {
return Some(std::time::Duration::from_secs_f64(seconds));
}
}
if let Some(value) = candidate.as_str() {
if let Ok(seconds) = value.parse::<u64>() {
return Some(std::time::Duration::from_secs(seconds));
}
}
}
None
}
#[cfg(test)]
mod tests;