Skip to main content

visual_rubric/
cli.rs

1//! Command-line interface and audit report types.
2
3use std::path::PathBuf;
4
5use anyhow::{Context as _, Result, anyhow};
6use clap::{Args, Parser, Subcommand};
7
8use crate::presets::PresetError;
9
10mod audit;
11pub mod configured;
12pub mod pipeline;
13mod static_server;
14#[cfg(test)]
15mod tests;
16
17#[cfg(test)]
18use audit::RubricReport;
19use audit::run_audit;
20pub use audit::{AuditReport, AuditStatus};
21use static_server::StaticServer;
22#[cfg(test)]
23use static_server::{content_type, resolve_static_path};
24
25/// Parsed command-line interface for `visual-rubric`.
26#[derive(Debug, Parser)]
27#[command(name = "visual-rubric")]
28pub struct Cli {
29    #[command(subcommand)]
30    command: Option<Commands>,
31    #[command(flatten)]
32    image: LegacyImageArgs,
33}
34
35#[derive(Debug, Subcommand)]
36enum Commands {
37    /// Evaluate one screenshot.
38    Image(ImageArgs),
39    /// Host a local static site, capture screenshots, and evaluate them.
40    Audit(AuditArgs),
41    /// Serve a local static directory for manual browser testing.
42    Serve(ServeArgs),
43    /// Two-stage pipeline: vision model → rubric model.
44    ///
45    /// Stage 1 sends the screenshot to an OpenAI-compatible vision API
46    /// (e.g. Qwen VL via llama-swap) producing structured JSON.
47    /// Stage 2 sends that description to an ACP backend (opencode with
48    /// DeepSeek V4, or codex-acp) for the final rubric verdict.
49    Pipeline(pipeline::PipelineArgs),
50    /// Pipeline configured via a TOML file written by the HM module.
51    ///
52    /// Reads configuration from `~/.config/visual-rubric/config.toml` so the
53    /// user only needs `--image` and `--question`. CLI flags override
54    /// individual TOML fields for one-off testing.
55    Configured(configured::ConfiguredArgs),
56}
57
58#[derive(Clone, Debug, Parser)]
59struct LegacyImageArgs {
60    #[arg(long)]
61    image: Option<PathBuf>,
62    #[arg(long)]
63    question: Option<String>,
64    #[arg(long)]
65    preset: Option<String>,
66    #[arg(long)]
67    system_prompt: Option<String>,
68    #[arg(long)]
69    model: Option<String>,
70    #[arg(long)]
71    effort: Option<String>,
72    #[arg(long)]
73    codex_acp: Option<PathBuf>,
74    #[arg(long, default_value = "vnc-screenshot")]
75    name: String,
76    #[arg(long)]
77    json: bool,
78}
79
80#[derive(Clone, Debug, Parser)]
81struct ImageArgs {
82    #[arg(long)]
83    image: PathBuf,
84    #[command(flatten)]
85    questions: QuestionSource,
86    #[arg(long)]
87    system_prompt: Option<String>,
88    #[arg(long)]
89    model: Option<String>,
90    #[arg(long)]
91    effort: Option<String>,
92    #[arg(long)]
93    codex_acp: Option<PathBuf>,
94    #[arg(long, default_value = "screenshot")]
95    name: String,
96    #[arg(long)]
97    json: bool,
98}
99
100#[derive(Clone, Debug, Parser)]
101struct AuditArgs {
102    /// Static site root to serve.
103    #[arg(long)]
104    root: PathBuf,
105    /// Path under the hosted root to capture.
106    #[arg(long, default_value = "/")]
107    path: String,
108    /// Output directory for screenshots.
109    #[arg(long, default_value = "target/visual-rubric")]
110    screenshots: PathBuf,
111    /// JSON report path.
112    #[arg(long, default_value = "target/visual-rubric/report.json")]
113    report: PathBuf,
114    /// Browser binary for headless screenshots.
115    #[arg(long, env = "VISUAL_RUBRIC_BROWSER", default_value = "chromium")]
116    browser: PathBuf,
117    /// Extra argument passed to the browser. May be repeated.
118    #[arg(long = "browser-arg")]
119    browser_args: Vec<String>,
120    /// Delay before each browser capture, in milliseconds.
121    #[arg(long, default_value_t = 0)]
122    wait_ms: u64,
123    /// Device scale factor passed to Chromium.
124    #[arg(long)]
125    device_scale_factor: Option<f32>,
126    /// Number of times to retry a failed browser capture.
127    #[arg(long, default_value_t = 0)]
128    capture_retries: u32,
129    /// Return a non-zero exit when any rubric fails or errors.
130    #[arg(long)]
131    fail_on_rubric: bool,
132    /// Viewports as name=WIDTHxHEIGHT. May be repeated.
133    #[arg(long = "viewport")]
134    viewports: Vec<ViewportArg>,
135    #[command(flatten)]
136    questions: QuestionSource,
137    #[arg(long)]
138    system_prompt: Option<String>,
139    #[arg(long)]
140    model: Option<String>,
141    #[arg(long)]
142    effort: Option<String>,
143    #[arg(long)]
144    codex_acp: Option<PathBuf>,
145    /// Generate pass verdicts without starting codex-acp.
146    #[arg(long)]
147    fake_pass: bool,
148    /// Capture screenshots and report deterministic data without model calls.
149    #[arg(long)]
150    skip_ai: bool,
151}
152
153#[derive(Clone, Debug, Parser)]
154struct ServeArgs {
155    #[arg(long)]
156    root: PathBuf,
157    #[arg(long, default_value_t = 1111)]
158    port: u16,
159}
160
161#[derive(Clone, Debug)]
162struct ViewportArg {
163    name: String,
164    width: u32,
165    height: u32,
166}
167
168/// Source of the rubric question: either an explicit `--question` string
169/// or a named `--preset`.  At least one of the two must be provided.
170#[derive(Clone, Debug, Args)]
171pub struct QuestionSource {
172    /// Explicit rubric question (required if --preset is not set).
173    #[arg(long, required_unless_present = "preset")]
174    pub question: Option<String>,
175
176    /// Named question preset (required if --question is not set).
177    #[arg(long, required_unless_present = "question")]
178    pub preset: Option<String>,
179}
180
181impl QuestionSource {
182    /// Build a source from an already-known question string (no preset).
183    pub fn from_question(question: String) -> Self {
184        Self {
185            question: Some(question),
186            preset: None,
187        }
188    }
189
190    /// Resolves the effective question text.
191    ///
192    /// When `--preset` was provided, delegates to [`crate::presets::resolve`].
193    /// When `--question` was provided, returns it verbatim.
194    pub fn resolve(&self) -> Result<String, PresetError> {
195        match (&self.preset, &self.question) {
196            (Some(name), _) => {
197                let questions = crate::presets::resolve(name)?;
198                Ok(questions.join("\n"))
199            }
200            (None, Some(q)) => Ok(q.clone()),
201            (None, None) => unreachable!("clap enforces at least one of --question or --preset"),
202        }
203    }
204
205    /// Resolves the preset's standard system prompt.
206    ///
207    /// Returns `None` when no preset was selected or the preset does not
208    /// define a system prompt.  Callers should prefer an explicit
209    /// `--system-prompt` over this value.
210    pub fn resolve_system_prompt(&self) -> Result<Option<String>, PresetError> {
211        match &self.preset {
212            Some(name) => Ok(crate::presets::find(name)?
213                .system_prompt()
214                .map(str::to_owned)),
215            None => Ok(None),
216        }
217    }
218}
219
220/// Runs a parsed CLI command.
221///
222/// # Errors
223///
224/// Returns command, IO, browser, ACP, or rubric audit failures.
225pub fn run(cli: Cli) -> Result<()> {
226    match cli.command {
227        Some(Commands::Image(args)) => run_image(args),
228        Some(Commands::Audit(args)) => run_audit(args),
229        Some(Commands::Serve(args)) => run_serve(args),
230        Some(Commands::Pipeline(args)) => pipeline::run_pipeline(args),
231        Some(Commands::Configured(args)) => configured::run_configured(args),
232        None => run_image(cli.image.try_into()?),
233    }
234}
235
236fn run_image(args: ImageArgs) -> Result<()> {
237    let verdict = evaluate_image(&args)?;
238    if args.json {
239        println!("{}", serde_json::to_string(&verdict)?);
240        return Ok(());
241    }
242    crate::assert_verdict(&args.name, verdict)
243        .map(|()| println!("visual rubric passed"))
244        .map_err(|error| anyhow!(error))
245}
246
247fn run_serve(args: ServeArgs) -> Result<()> {
248    let server = StaticServer::start(args.root, args.port)?;
249    println!("{}", server.base_url());
250    server.wait_forever()
251}
252
253fn evaluate_image(args: &ImageArgs) -> Result<crate::RubricVerdict> {
254    let question = args.questions.resolve().map_err(|e| anyhow!(e))?;
255    let system_prompt = match args.system_prompt.clone() {
256        Some(prompt) => Some(prompt),
257        None => args
258            .questions
259            .resolve_system_prompt()
260            .map_err(|e| anyhow!(e))?,
261    };
262    let options = crate::RubricOptions {
263        model: args.model.clone(),
264        effort: args.effort.clone().map(Into::into),
265        system_prompt,
266    };
267    if let Some(codex_acp) = &args.codex_acp {
268        let pool = crate::RubricPool::new(crate::PoolConfig {
269            workers: 1,
270            codex_acp_binary: codex_acp.clone(),
271            default_options: merge_with_defaults(options),
272            ..crate::PoolConfig::default()
273        })?;
274        let verdict = pool.submit(&args.image, &question, crate::RubricOptions::default())?;
275        let _ = pool.shutdown();
276        Ok(verdict)
277    } else {
278        crate::evaluate_image_rubric_with_options(&args.image, &question, options)
279            .map_err(|error| anyhow!(error))
280    }
281}
282
283fn merge_with_defaults(mut options: crate::RubricOptions) -> crate::RubricOptions {
284    let defaults = crate::default_options();
285    if options.model.is_none() {
286        options.model = defaults.model;
287    }
288    if options.effort.is_none() {
289        options.effort = defaults.effort;
290    }
291    if options.system_prompt.is_none() {
292        options.system_prompt = defaults.system_prompt;
293    }
294    options
295}
296
297impl TryFrom<LegacyImageArgs> for ImageArgs {
298    type Error = anyhow::Error;
299
300    fn try_from(value: LegacyImageArgs) -> Result<Self> {
301        let questions = match (value.question, value.preset) {
302            (Some(q), _) => QuestionSource::from_question(q),
303            (None, Some(preset)) => QuestionSource {
304                question: None,
305                preset: Some(preset),
306            },
307            (None, None) => {
308                anyhow::bail!("either --question or --preset is required");
309            }
310        };
311        Ok(Self {
312            image: value.image.context("--image is required")?,
313            questions,
314            system_prompt: value.system_prompt,
315            model: value.model,
316            effort: value.effort,
317            codex_acp: value.codex_acp,
318            name: value.name,
319            json: value.json,
320        })
321    }
322}
323
324impl std::str::FromStr for ViewportArg {
325    type Err = anyhow::Error;
326
327    fn from_str(value: &str) -> Result<Self> {
328        let (name, size) = value
329            .split_once('=')
330            .context("viewport must be name=WIDTHxHEIGHT")?;
331        let (width, height) = size
332            .split_once('x')
333            .context("viewport size must be WIDTHxHEIGHT")?;
334        if name.is_empty() {
335            anyhow::bail!("viewport name must not be empty");
336        }
337        if name.contains('/') || name.contains('\\') {
338            anyhow::bail!("viewport name {name:?} must not contain path separators");
339        }
340        let width = width.parse().context("viewport width must be an integer")?;
341        let height = height
342            .parse()
343            .context("viewport height must be an integer")?;
344        if width == 0 || height == 0 {
345            anyhow::bail!("viewport dimensions must be greater than zero");
346        }
347        Ok(Self {
348            name: name.to_string(),
349            width,
350            height,
351        })
352    }
353}