visual_rubric/
cli.rs

1//! Command-line interface and audit report types.
2
3use std::path::PathBuf;
4
5use anyhow::{Context as _, Result, anyhow};
6use clap::{Parser, Subcommand};
7
8mod audit;
9mod static_server;
10#[cfg(test)]
11mod tests;
12
13#[cfg(test)]
14use audit::RubricReport;
15use audit::run_audit;
16pub use audit::{AuditReport, AuditStatus};
17use static_server::StaticServer;
18#[cfg(test)]
19use static_server::{content_type, resolve_static_path};
20
21/// Parsed command-line interface for `visual-rubric`.
22#[derive(Debug, Parser)]
23#[command(name = "visual-rubric")]
24pub struct Cli {
25    #[command(subcommand)]
26    command: Option<Commands>,
27    #[command(flatten)]
28    image: LegacyImageArgs,
29}
30
31#[derive(Debug, Subcommand)]
32enum Commands {
33    /// Evaluate one screenshot.
34    Image(ImageArgs),
35    /// Host a local static site, capture screenshots, and evaluate them.
36    Audit(AuditArgs),
37    /// Serve a local static directory for manual browser testing.
38    Serve(ServeArgs),
39}
40
41#[derive(Clone, Debug, Parser)]
42struct LegacyImageArgs {
43    #[arg(long)]
44    image: Option<PathBuf>,
45    #[arg(long)]
46    question: Option<String>,
47    #[arg(long)]
48    system_prompt: Option<String>,
49    #[arg(long)]
50    model: Option<String>,
51    #[arg(long)]
52    effort: Option<String>,
53    #[arg(long)]
54    codex_acp: Option<PathBuf>,
55    #[arg(long, default_value = "vnc-screenshot")]
56    name: String,
57    #[arg(long)]
58    json: bool,
59}
60
61#[derive(Clone, Debug, Parser)]
62struct ImageArgs {
63    #[arg(long)]
64    image: PathBuf,
65    #[arg(long)]
66    question: String,
67    #[arg(long)]
68    system_prompt: Option<String>,
69    #[arg(long)]
70    model: Option<String>,
71    #[arg(long)]
72    effort: Option<String>,
73    #[arg(long)]
74    codex_acp: Option<PathBuf>,
75    #[arg(long, default_value = "screenshot")]
76    name: String,
77    #[arg(long)]
78    json: bool,
79}
80
81#[derive(Clone, Debug, Parser)]
82struct AuditArgs {
83    /// Static site root to serve.
84    #[arg(long)]
85    root: PathBuf,
86    /// Path under the hosted root to capture.
87    #[arg(long, default_value = "/")]
88    path: String,
89    /// Output directory for screenshots.
90    #[arg(long, default_value = "target/visual-rubric")]
91    screenshots: PathBuf,
92    /// JSON report path.
93    #[arg(long, default_value = "target/visual-rubric/report.json")]
94    report: PathBuf,
95    /// Browser binary for headless screenshots.
96    #[arg(long, env = "VISUAL_RUBRIC_BROWSER", default_value = "chromium")]
97    browser: PathBuf,
98    /// Extra argument passed to the browser. May be repeated.
99    #[arg(long = "browser-arg")]
100    browser_args: Vec<String>,
101    /// Delay before each browser capture, in milliseconds.
102    #[arg(long, default_value_t = 0)]
103    wait_ms: u64,
104    /// Device scale factor passed to Chromium.
105    #[arg(long)]
106    device_scale_factor: Option<f32>,
107    /// Number of times to retry a failed browser capture.
108    #[arg(long, default_value_t = 0)]
109    capture_retries: u32,
110    /// Return a non-zero exit when any rubric fails or errors.
111    #[arg(long)]
112    fail_on_rubric: bool,
113    /// Viewports as name=WIDTHxHEIGHT. May be repeated.
114    #[arg(long = "viewport")]
115    viewports: Vec<ViewportArg>,
116    #[arg(long)]
117    question: String,
118    #[arg(long)]
119    system_prompt: Option<String>,
120    #[arg(long)]
121    model: Option<String>,
122    #[arg(long)]
123    effort: Option<String>,
124    #[arg(long)]
125    codex_acp: Option<PathBuf>,
126    /// Generate pass verdicts without starting codex-acp.
127    #[arg(long)]
128    fake_pass: bool,
129    /// Capture screenshots and report deterministic data without model calls.
130    #[arg(long)]
131    skip_ai: bool,
132}
133
134#[derive(Clone, Debug, Parser)]
135struct ServeArgs {
136    #[arg(long)]
137    root: PathBuf,
138    #[arg(long, default_value_t = 1111)]
139    port: u16,
140}
141
142#[derive(Clone, Debug)]
143struct ViewportArg {
144    name: String,
145    width: u32,
146    height: u32,
147}
148
149/// Runs a parsed CLI command.
150///
151/// # Errors
152///
153/// Returns command, IO, browser, Codex ACP, or rubric audit failures.
154pub fn run(cli: Cli) -> Result<()> {
155    match cli.command {
156        Some(Commands::Image(args)) => run_image(args),
157        Some(Commands::Audit(args)) => run_audit(args),
158        Some(Commands::Serve(args)) => run_serve(args),
159        None => run_image(cli.image.try_into()?),
160    }
161}
162
163fn run_image(args: ImageArgs) -> Result<()> {
164    let verdict = evaluate_image(&args)?;
165    if args.json {
166        println!("{}", serde_json::to_string(&verdict)?);
167        return Ok(());
168    }
169    crate::assert_verdict(&args.name, verdict)
170        .map(|()| println!("visual rubric passed"))
171        .map_err(|error| anyhow!(error))
172}
173
174fn run_serve(args: ServeArgs) -> Result<()> {
175    let server = StaticServer::start(args.root, args.port)?;
176    println!("{}", server.base_url());
177    server.wait_forever()
178}
179
180fn evaluate_image(args: &ImageArgs) -> Result<crate::RubricVerdict> {
181    let options = crate::RubricOptions {
182        model: args.model.clone(),
183        effort: args.effort.clone().map(Into::into),
184        system_prompt: args.system_prompt.clone(),
185    };
186    if let Some(codex_acp) = &args.codex_acp {
187        let pool = crate::RubricPool::new(crate::PoolConfig {
188            workers: 1,
189            codex_acp_binary: codex_acp.clone(),
190            default_options: merge_with_defaults(options),
191            ..crate::PoolConfig::default()
192        })?;
193        let verdict = pool.submit(&args.image, &args.question, crate::RubricOptions::default())?;
194        let _ = pool.shutdown();
195        Ok(verdict)
196    } else {
197        crate::evaluate_image_rubric_with_options(&args.image, &args.question, options)
198            .map_err(|error| anyhow!(error))
199    }
200}
201
202fn merge_with_defaults(mut options: crate::RubricOptions) -> crate::RubricOptions {
203    let defaults = crate::default_options();
204    if options.model.is_none() {
205        options.model = defaults.model;
206    }
207    if options.effort.is_none() {
208        options.effort = defaults.effort;
209    }
210    if options.system_prompt.is_none() {
211        options.system_prompt = defaults.system_prompt;
212    }
213    options
214}
215
216impl TryFrom<LegacyImageArgs> for ImageArgs {
217    type Error = anyhow::Error;
218
219    fn try_from(value: LegacyImageArgs) -> Result<Self> {
220        Ok(Self {
221            image: value.image.context("--image is required")?,
222            question: value.question.context("--question is required")?,
223            system_prompt: value.system_prompt,
224            model: value.model,
225            effort: value.effort,
226            codex_acp: value.codex_acp,
227            name: value.name,
228            json: value.json,
229        })
230    }
231}
232
233impl std::str::FromStr for ViewportArg {
234    type Err = anyhow::Error;
235
236    fn from_str(value: &str) -> Result<Self> {
237        let (name, size) = value
238            .split_once('=')
239            .context("viewport must be name=WIDTHxHEIGHT")?;
240        let (width, height) = size
241            .split_once('x')
242            .context("viewport size must be WIDTHxHEIGHT")?;
243        Ok(Self {
244            name: name.to_string(),
245            width: width.parse().context("viewport width must be an integer")?,
246            height: height
247                .parse()
248                .context("viewport height must be an integer")?,
249        })
250    }
251}
visual_rubric/cli.rs

visual_rubric/
cli.rs