Skip to main content

visual_rubric/cli/
configured.rs

1//! `configured` subcommand — applies the rubric using a TOML config file
2//! written by the Home Manager module.
3//!
4//! The default config path is `~/.config/visual-rubric/config.toml`.
5//! CLI flags override individual TOML fields for one-off testing.
6
7use std::path::PathBuf;
8
9use anyhow::{Context as _, Result, anyhow};
10use clap::ValueEnum;
11use serde::Deserialize;
12
13use crate::vision::VisionApiConfig;
14
15use super::QuestionSource;
16
17const DEFAULT_CONFIG_PATH: &str = "visual-rubric/config.toml";
18const DEFAULT_DIRECT_MODEL: &str = "gpt-5.5";
19const DEFAULT_DIRECT_EFFORT: &str = "medium";
20const DEFAULT_PIPELINE_VISION_MODEL: &str = "qwen3-vl-8b";
21
22/// Arguments for the `configured` subcommand.
23#[derive(Clone, Debug, clap::Parser)]
24pub struct ConfiguredArgs {
25    /// PNG screenshot path.
26    #[arg(long)]
27    pub image: PathBuf,
28
29    /// Rubric question (required if --preset is not set).
30    #[command(flatten)]
31    pub questions: QuestionSource,
32
33    /// Asset name for assertion messages.
34    #[arg(long, default_value = "screenshot")]
35    pub name: String,
36
37    /// Output verdict as JSON.
38    #[arg(long)]
39    pub json: bool,
40
41    /// TOML config path (default: ~/.config/visual-rubric/config.toml).
42    #[arg(long)]
43    pub config: Option<PathBuf>,
44
45    // --- CLI overrides (fall back to TOML, then built-in defaults) ---
46    /// Backend mode: direct codex-acp image evaluation or two-stage pipeline.
47    #[arg(long, value_enum)]
48    pub mode: Option<ConfiguredMode>,
49
50    /// Vision API base URL.
51    #[arg(long)]
52    pub vision_url: Option<String>,
53
54    /// Vision model name.
55    #[arg(long)]
56    pub vision_model: Option<String>,
57
58    /// Vision API key (Bearer token).
59    #[arg(long)]
60    pub vision_api_key: Option<String>,
61
62    /// Custom prompt for the vision extraction stage.
63    #[arg(long)]
64    pub vision_prompt: Option<String>,
65
66    /// ACP binary path (opencode or codex-acp).
67    #[arg(long)]
68    pub acp_binary: Option<String>,
69
70    /// Extra CLI arguments for the ACP binary. May be repeated.
71    #[arg(long = "acp-arg")]
72    pub acp_args: Vec<String>,
73
74    /// Rubric model name (passed to codex-acp; ignored for opencode).
75    #[arg(long)]
76    pub model: Option<String>,
77
78    /// Rubric reasoning effort.
79    #[arg(long)]
80    pub effort: Option<String>,
81
82    /// Rubric system prompt override.
83    #[arg(long)]
84    pub system_prompt: Option<String>,
85}
86
87/// Configured backend mode.
88#[derive(Clone, Copy, Debug, Default, Deserialize, PartialEq, Eq, ValueEnum)]
89#[serde(rename_all = "kebab-case")]
90pub enum ConfiguredMode {
91    /// Direct screenshot evaluation through codex-acp.
92    Direct,
93    /// Qwen3-VL vision extraction followed by ACP rubric scoring.
94    #[default]
95    Pipeline,
96}
97
98/// TOML config shape written by the HM module.
99#[derive(Debug, Deserialize, Default)]
100#[serde(default)]
101struct TomlConfig {
102    mode: Option<ConfiguredMode>,
103    vision: TomlVision,
104    rubric: TomlRubric,
105}
106
107#[derive(Debug, Deserialize, Default)]
108#[serde(default)]
109struct TomlVision {
110    url: Option<String>,
111    model: Option<String>,
112    api_key: Option<String>,
113    prompt: Option<String>,
114}
115
116#[derive(Debug, Deserialize, Default)]
117#[serde(default)]
118struct TomlRubric {
119    backend: Option<String>,
120    args: Option<Vec<String>>,
121    model: Option<String>,
122    effort: Option<String>,
123    system_prompt: Option<String>,
124}
125
126/// Runs the configured pipeline using TOML config + CLI overrides.
127///
128/// # Errors
129///
130/// Returns errors from the vision API, ACP, or verdict parsing.
131pub fn run_configured(args: ConfiguredArgs) -> Result<()> {
132    let question = args.questions.resolve().map_err(|e| anyhow!(e))?;
133    let toml = load_config(args.config.as_deref())?;
134    let mode = args.mode.or(toml.mode).unwrap_or_default();
135
136    let system_prompt = match args.system_prompt.or(toml.rubric.system_prompt) {
137        Some(prompt) => Some(prompt),
138        None => args
139            .questions
140            .resolve_system_prompt()
141            .map_err(|e| anyhow!(e))?,
142    };
143    let rubric_options = rubric_options_for_mode(
144        mode,
145        args.model,
146        toml.rubric.model,
147        args.effort,
148        toml.rubric.effort,
149        system_prompt,
150    );
151
152    let verdict = match mode {
153        ConfiguredMode::Direct => {
154            let rubric_config = direct_rubric_config(args.acp_binary, toml.rubric.backend);
155            crate::evaluate_image_rubric_with_config(
156                &args.image,
157                &question,
158                rubric_options,
159                rubric_config,
160            )
161            .with_context(|| format!("direct rubric for {} failed", args.image.display()))?
162        }
163        ConfiguredMode::Pipeline => {
164            let vision_url = args
165                .vision_url
166                .or(toml.vision.url)
167                .context("vision URL is not set. Set it in config.toml or pass --vision-url")?;
168
169            let vision_model = args
170                .vision_model
171                .or(toml.vision.model)
172                .unwrap_or_else(|| DEFAULT_PIPELINE_VISION_MODEL.to_string());
173
174            let vision_config = VisionApiConfig {
175                url: vision_url,
176                model: vision_model,
177                api_key: args.vision_api_key.or(toml.vision.api_key),
178            };
179
180            let vision_prompt = args
181                .vision_prompt
182                .or(toml.vision.prompt)
183                .unwrap_or_else(|| crate::DEFAULT_VISION_PROMPT.to_string());
184
185            let acp_args = if !args.acp_args.is_empty() {
186                args.acp_args
187            } else if let Some(toml_args) = &toml.rubric.args {
188                toml_args.clone()
189            } else {
190                vec!["acp".to_string()]
191            };
192
193            let acp_binary = args
194                .acp_binary
195                .or(toml.rubric.backend)
196                .unwrap_or_else(|| "opencode".to_string());
197
198            let rubric_config = crate::RubricRunConfig {
199                codex_acp_binary: acp_binary.into(),
200                acp_args,
201                extra_env: Vec::new(),
202                cwd: None,
203            };
204
205            crate::evaluate_image_rubric_pipeline(
206                &args.image,
207                &question,
208                &vision_config,
209                &vision_prompt,
210                &rubric_options,
211                &rubric_config,
212            )
213            .with_context(|| format!("pipeline for {} failed", args.image.display()))?
214        }
215    };
216
217    if args.json {
218        println!("{}", serde_json::to_string(&verdict)?);
219        return Ok(());
220    }
221
222    crate::assert_verdict(&args.name, verdict)
223        .map(|()| println!("visual rubric passed"))
224        .map_err(|error| anyhow::anyhow!(error))
225}
226
227fn rubric_options_for_mode(
228    mode: ConfiguredMode,
229    cli_model: Option<String>,
230    toml_model: Option<String>,
231    cli_effort: Option<String>,
232    toml_effort: Option<String>,
233    system_prompt: Option<String>,
234) -> crate::RubricOptions {
235    let (default_model, default_effort) = match mode {
236        ConfiguredMode::Direct => (Some(DEFAULT_DIRECT_MODEL), Some(DEFAULT_DIRECT_EFFORT)),
237        ConfiguredMode::Pipeline => (None, None),
238    };
239    crate::RubricOptions {
240        model: cli_model
241            .or(toml_model)
242            .or_else(|| default_model.map(str::to_string)),
243        effort: cli_effort
244            .or(toml_effort)
245            .or_else(|| default_effort.map(str::to_string))
246            .map(Into::into),
247        system_prompt,
248    }
249}
250
251fn direct_rubric_config(
252    cli_binary: Option<String>,
253    toml_binary: Option<String>,
254) -> crate::RubricRunConfig {
255    crate::RubricRunConfig {
256        codex_acp_binary: cli_binary
257            .or(toml_binary)
258            .unwrap_or_else(|| "codex-acp".to_string())
259            .into(),
260        acp_args: Vec::new(),
261        extra_env: Vec::new(),
262        cwd: None,
263    }
264}
265
266fn load_config(cli_path: Option<&std::path::Path>) -> Result<TomlConfig> {
267    let path = match cli_path {
268        Some(p) => p.to_path_buf(),
269        None => match dirs_config_dir() {
270            Some(base) => base.join(DEFAULT_CONFIG_PATH),
271            None => return Ok(TomlConfig::default()),
272        },
273    };
274
275    let content = match std::fs::read_to_string(&path) {
276        Ok(c) => c,
277        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(TomlConfig::default()),
278        Err(e) => return Err(e).context(format!("read config {}", path.display())),
279    };
280
281    toml::from_str(&content).context(format!("parse config {}", path.display()))
282}
283
284fn dirs_config_dir() -> Option<PathBuf> {
285    #[cfg(target_os = "linux")]
286    {
287        std::env::var("XDG_CONFIG_HOME")
288            .ok()
289            .map(PathBuf::from)
290            .or_else(|| {
291                std::env::var("HOME")
292                    .ok()
293                    .map(|h| PathBuf::from(h).join(".config"))
294            })
295    }
296    #[cfg(not(target_os = "linux"))]
297    {
298        dirs::config_dir()
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn parses_direct_mode_without_vision_config() {
308        let config: TomlConfig = toml::from_str(
309            r#"
310mode = "direct"
311
312[rubric]
313backend = "codex-acp"
314"#,
315        )
316        .unwrap();
317
318        assert_eq!(config.mode, Some(ConfiguredMode::Direct));
319        assert!(config.vision.url.is_none());
320    }
321
322    #[test]
323    fn direct_mode_defaults_to_gpt55_medium_codex_acp() {
324        let options = rubric_options_for_mode(ConfiguredMode::Direct, None, None, None, None, None);
325        let config = direct_rubric_config(None, None);
326
327        assert_eq!(options.model.as_deref(), Some("gpt-5.5"));
328        assert_eq!(options.effort.as_deref(), Some("medium"));
329        assert_eq!(config.codex_acp_binary, PathBuf::from("codex-acp"));
330        assert!(config.acp_args.is_empty());
331    }
332
333    #[test]
334    fn pipeline_mode_does_not_add_direct_model_defaults() {
335        let options =
336            rubric_options_for_mode(ConfiguredMode::Pipeline, None, None, None, None, None);
337
338        assert!(options.model.is_none());
339        assert!(options.effort.is_none());
340    }
341
342    #[test]
343    fn rubric_options_prefer_cli_then_toml_then_direct_defaults() {
344        let options = rubric_options_for_mode(
345            ConfiguredMode::Direct,
346            Some("cli-model".to_string()),
347            Some("toml-model".to_string()),
348            Some("high".to_string()),
349            Some("low".to_string()),
350            Some("Use the exact rubric.".to_string()),
351        );
352
353        assert_eq!(options.model.as_deref(), Some("cli-model"));
354        assert_eq!(options.effort.as_deref(), Some("high"));
355        assert_eq!(
356            options.system_prompt.as_deref(),
357            Some("Use the exact rubric.")
358        );
359
360        let options = rubric_options_for_mode(
361            ConfiguredMode::Direct,
362            None,
363            Some("toml-model".to_string()),
364            None,
365            Some("low".to_string()),
366            None,
367        );
368
369        assert_eq!(options.model.as_deref(), Some("toml-model"));
370        assert_eq!(options.effort.as_deref(), Some("low"));
371    }
372
373    #[test]
374    fn pipeline_options_use_cli_or_toml_without_direct_defaults() {
375        let options = rubric_options_for_mode(
376            ConfiguredMode::Pipeline,
377            None,
378            Some("rubric-model".to_string()),
379            None,
380            Some("medium".to_string()),
381            None,
382        );
383
384        assert_eq!(options.model.as_deref(), Some("rubric-model"));
385        assert_eq!(options.effort.as_deref(), Some("medium"));
386
387        let options = rubric_options_for_mode(
388            ConfiguredMode::Pipeline,
389            Some("cli-model".to_string()),
390            Some("toml-model".to_string()),
391            Some("high".to_string()),
392            Some("low".to_string()),
393            None,
394        );
395
396        assert_eq!(options.model.as_deref(), Some("cli-model"));
397        assert_eq!(options.effort.as_deref(), Some("high"));
398    }
399
400    #[test]
401    fn direct_rubric_config_prefers_cli_then_toml_then_default() {
402        let config = direct_rubric_config(
403            Some("cli-codex-acp".to_string()),
404            Some("toml-codex-acp".to_string()),
405        );
406        assert_eq!(config.codex_acp_binary, PathBuf::from("cli-codex-acp"));
407        assert!(config.acp_args.is_empty());
408
409        let config = direct_rubric_config(None, Some("toml-codex-acp".to_string()));
410        assert_eq!(config.codex_acp_binary, PathBuf::from("toml-codex-acp"));
411        assert!(config.acp_args.is_empty());
412
413        let config = direct_rubric_config(None, None);
414        assert_eq!(config.codex_acp_binary, PathBuf::from("codex-acp"));
415        assert!(config.acp_args.is_empty());
416    }
417
418    #[test]
419    fn parses_full_configured_toml_schema() {
420        let config: TomlConfig = toml::from_str(
421            r#"
422mode = "pipeline"
423
424[vision]
425url = "http://localhost:8013"
426model = "qwen3-vl-8b"
427api_key = "secret"
428prompt = "Describe this UI."
429
430[rubric]
431backend = "opencode"
432args = ["acp", "--debug"]
433model = "deepseek-v4"
434effort = "high"
435system_prompt = "Return strict rubric JSON."
436"#,
437        )
438        .unwrap();
439
440        assert_eq!(config.mode, Some(ConfiguredMode::Pipeline));
441        assert_eq!(config.vision.url.as_deref(), Some("http://localhost:8013"));
442        assert_eq!(config.vision.model.as_deref(), Some("qwen3-vl-8b"));
443        assert_eq!(config.vision.api_key.as_deref(), Some("secret"));
444        assert_eq!(config.vision.prompt.as_deref(), Some("Describe this UI."));
445        assert_eq!(config.rubric.backend.as_deref(), Some("opencode"));
446        assert_eq!(
447            config.rubric.args.as_deref(),
448            Some(["acp".to_string(), "--debug".to_string()].as_slice())
449        );
450        assert_eq!(config.rubric.model.as_deref(), Some("deepseek-v4"));
451        assert_eq!(config.rubric.effort.as_deref(), Some("high"));
452        assert_eq!(
453            config.rubric.system_prompt.as_deref(),
454            Some("Return strict rubric JSON.")
455        );
456    }
457
458    #[test]
459    fn pipeline_mode_requires_vision_url() {
460        let temp = tempfile::TempDir::new().unwrap();
461        let config_path = temp.path().join("config.toml");
462        std::fs::write(&config_path, "mode = \"pipeline\"\n").unwrap();
463
464        let err = run_configured(ConfiguredArgs {
465            image: temp.path().join("missing.png"),
466            questions: QuestionSource::from_question("Does it render?".to_string()),
467            name: "screenshot".to_string(),
468            json: false,
469            config: Some(config_path),
470            mode: None,
471            vision_url: None,
472            vision_model: None,
473            vision_api_key: None,
474            vision_prompt: None,
475            acp_binary: None,
476            acp_args: Vec::new(),
477            model: None,
478            effort: None,
479            system_prompt: None,
480        })
481        .unwrap_err();
482
483        assert!(err.to_string().contains("vision URL is not set"));
484    }
485
486    #[test]
487    fn invalid_mode_is_rejected_by_toml_parser() {
488        let err = toml::from_str::<TomlConfig>("mode = \"gpt55\"").unwrap_err();
489
490        assert!(err.to_string().contains("mode"));
491    }
492}