noether_engine/llm/
cli_provider.rs

1//! Generic subprocess-based LLM provider.
2//!
3//! Covers Claude Desktop / Claude CLI, Gemini CLI, Cursor Agent, and
4//! OpenCode — the four "subscription CLIs" a developer commonly has
5//! logged in on a workstation. Each has its own argv shape but they
6//! all share the same execution contract:
7//!
8//!   - spawn the binary with a fixed flag set + the prompt as argv;
9//!   - stdin is closed (these tools read their prompt from `-p`);
10//!   - exit 0 + non-empty stdout = success;
11//!   - anything else = `LlmError::Provider` with stderr text.
12//!
13//! ## Why a single generic provider
14//!
15//! caloron-noether already implements this multi-provider fallback in
16//! Python (`stages/phases/_llm.py`) and learned the hard edge cases —
17//! the 25-second timeout cap to stay under Nix's default 30-second
18//! kill, the `SKIP_CLI` escape hatch for sandboxed environments where
19//! CLI auth isn't mounted, the exact argv incantation per tool. This
20//! module ports those lessons into the Rust engine so noether-grid
21//! workers get the same behaviour, for free, with the same failure
22//! modes. See `docs/research/llm-here.md` for the long-term plan to
23//! unify all three implementations behind one shared tool.
24//!
25//! ## Sandbox handling
26//!
27//! When `NOETHER_LLM_SKIP_CLI=1` is set, every CLI provider refuses to
28//! advertise itself as available (`available() == false`). Intended
29//! for stages that run inside the Nix executor, which mounts a
30//! restricted `$HOME` that doesn't carry the operator's CLI auth
31//! state — without this gate, a subscription CLI stalls waiting for
32//! interactive login and gets SIGKILL'd by the runner.
33//!
34//! ## Timeout
35//!
36//! Default `timeout_secs = 25`. Deliberately under Nix's 30-second
37//! default stage kill so a stalled CLI reports `Provider(timeout)`
38//! instead of the stage runner's less useful "process killed" error.
39//! Callers outside the Nix executor can bump it up via
40//! `CliConfig::timeout_secs`.
41
42use super::{LlmConfig, LlmError, LlmProvider, Message, Role};
43
44// ── Per-CLI definitions ─────────────────────────────────────────────────────
45
46/// A static description of one CLI tool: the binary name, the argv
47/// template, and how system prompts are passed (if at all). Used by
48/// [`CliProvider::new`] to pick a concrete tool.
49#[derive(Debug, Clone, Copy)]
50pub struct CliSpec {
51    /// The executable name to look up on `PATH` (and to invoke).
52    pub binary: &'static str,
53    /// Provider slug the broker routes on (matches
54    /// `Effect::Llm { model: "<slug>" }` exactly).
55    pub provider_slug: &'static str,
56    /// Default model slug the worker advertises when none is configured.
57    pub default_model: &'static str,
58    /// How this CLI takes its prompt on the argv.
59    pub prompt_style: PromptStyle,
60    /// Shape of the system-prompt flag. `None` = the CLI has no system
61    /// prompt support and any system-role messages are concatenated
62    /// into the user prompt.
63    pub system_flag: Option<&'static str>,
64}
65
66#[derive(Debug, Clone, Copy)]
67pub enum PromptStyle {
68    /// `-p <prompt>` positional flag (claude, gemini, cursor-agent).
69    DashP,
70    /// `run <prompt>` subcommand (opencode).
71    RunSubcommand,
72}
73
74/// Concrete per-CLI specs. Argv shapes are identical to what
75/// caloron's `_llm.py` uses — keep in sync when either side changes.
76pub mod specs {
77    use super::*;
78
79    /// Claude Desktop / Claude CLI — `claude -p PROMPT`.
80    ///
81    /// The `--dangerously-skip-permissions` flag is what caloron uses
82    /// to bypass the interactive tool-use prompt that otherwise
83    /// appears even in non-interactive mode. Without it the CLI
84    /// blocks waiting for "do you want to allow this?".
85    pub const CLAUDE: CliSpec = CliSpec {
86        binary: "claude",
87        provider_slug: "anthropic-cli",
88        default_model: "claude-desktop",
89        prompt_style: PromptStyle::DashP,
90        system_flag: Some("--append-system-prompt"),
91    };
92
93    /// Google Gemini CLI — `gemini -y -p PROMPT`. `-y` auto-accepts
94    /// the first-run consent prompt.
95    pub const GEMINI: CliSpec = CliSpec {
96        binary: "gemini",
97        provider_slug: "google-cli",
98        default_model: "gemini-desktop",
99        prompt_style: PromptStyle::DashP,
100        system_flag: None,
101    };
102
103    /// Cursor Agent CLI — `cursor-agent -p PROMPT --output-format text`.
104    pub const CURSOR: CliSpec = CliSpec {
105        binary: "cursor-agent",
106        provider_slug: "cursor-cli",
107        default_model: "cursor-desktop",
108        prompt_style: PromptStyle::DashP,
109        system_flag: None,
110    };
111
112    /// OpenCode CLI — `opencode run PROMPT`.
113    pub const OPENCODE: CliSpec = CliSpec {
114        binary: "opencode",
115        provider_slug: "opencode",
116        default_model: "opencode-default",
117        prompt_style: PromptStyle::RunSubcommand,
118        system_flag: None,
119    };
120
121    /// All specs, in the fallback order caloron settled on.
122    pub const ALL: &[CliSpec] = &[CLAUDE, GEMINI, CURSOR, OPENCODE];
123}
124
125// ── Config ──────────────────────────────────────────────────────────────────
126
127/// Tunables for one [`CliProvider`] instance.
128#[derive(Debug, Clone)]
129pub struct CliConfig {
130    /// Override the binary path. Defaults to `spec.binary` (PATH lookup).
131    pub binary: Option<String>,
132    /// Wall-clock timeout for a single completion. Default 25s so a
133    /// stalled CLI reports a timeout before Nix's 30s stage kill.
134    pub timeout_secs: u64,
135}
136
137impl Default for CliConfig {
138    fn default() -> Self {
139        Self {
140            binary: None,
141            timeout_secs: 25,
142        }
143    }
144}
145
146/// Check whether CLI providers are globally suppressed. Set
147/// `NOETHER_LLM_SKIP_CLI=1` inside a sandboxed environment (the Nix
148/// executor being the obvious one) where subscription CLIs would
149/// stall waiting for auth state that isn't mounted.
150pub fn cli_providers_suppressed() -> bool {
151    std::env::var("NOETHER_LLM_SKIP_CLI")
152        .map(|v| matches!(v.as_str(), "1" | "true" | "yes" | "on"))
153        .unwrap_or(false)
154}
155
156// ── The provider ────────────────────────────────────────────────────────────
157
158/// LLM provider that delegates each completion to a subscription CLI.
159/// Stateless — each call spawns a fresh subprocess.
160pub struct CliProvider {
161    spec: CliSpec,
162    config: CliConfig,
163}
164
165impl CliProvider {
166    pub fn new(spec: CliSpec) -> Self {
167        Self::with_config(spec, CliConfig::default())
168    }
169
170    pub fn with_config(spec: CliSpec, config: CliConfig) -> Self {
171        Self { spec, config }
172    }
173
174    /// The binary this provider will invoke.
175    pub fn binary(&self) -> &str {
176        self.config.binary.as_deref().unwrap_or(self.spec.binary)
177    }
178
179    /// True when this CLI is installed on the host and CLI providers
180    /// aren't globally suppressed. Does NOT verify auth state — we
181    /// find that out at first dispatch.
182    pub fn available(&self) -> bool {
183        if cli_providers_suppressed() {
184            return false;
185        }
186        binary_runs(self.binary())
187    }
188
189    pub fn spec(&self) -> CliSpec {
190        self.spec
191    }
192}
193
194impl LlmProvider for CliProvider {
195    fn complete(&self, messages: &[Message], config: &LlmConfig) -> Result<String, LlmError> {
196        if cli_providers_suppressed() {
197            return Err(LlmError::Provider(
198                "CLI providers suppressed via NOETHER_LLM_SKIP_CLI".into(),
199            ));
200        }
201
202        let (system_text, dialogue) = split_system_from_dialogue(messages);
203        let prompt = compose_prompt(&dialogue, &system_text, self.spec.system_flag);
204
205        let mut cmd = std::process::Command::new(self.binary());
206        match self.spec.prompt_style {
207            PromptStyle::DashP => {
208                // Tool-specific extra flags — keep aligned with
209                // caloron's _llm.py; if either side changes, update both.
210                if self.spec.binary == "claude" {
211                    cmd.arg("--dangerously-skip-permissions");
212                }
213                if self.spec.binary == "gemini" {
214                    cmd.arg("-y");
215                }
216                if let (Some(flag), Some(sys)) = (self.spec.system_flag, system_text.as_ref()) {
217                    cmd.arg(flag).arg(sys);
218                }
219                if !config.model.is_empty()
220                    && config.model != self.spec.default_model
221                    && config.model != "unknown"
222                {
223                    cmd.arg("--model").arg(&config.model);
224                }
225                cmd.arg("-p").arg(&prompt);
226                if self.spec.binary == "cursor-agent" {
227                    cmd.arg("--output-format").arg("text");
228                }
229            }
230            PromptStyle::RunSubcommand => {
231                cmd.arg("run").arg(&prompt);
232            }
233        }
234
235        run_with_timeout(cmd, self.config.timeout_secs)
236    }
237}
238
239// ── Helpers ────────────────────────────────────────────────────────────────
240
241fn split_system_from_dialogue(messages: &[Message]) -> (Option<String>, Vec<String>) {
242    let mut system_parts: Vec<String> = Vec::new();
243    let mut dialogue: Vec<String> = Vec::new();
244    for m in messages {
245        match m.role {
246            Role::System => system_parts.push(m.content.clone()),
247            Role::User => dialogue.push(format!("USER: {}", m.content)),
248            Role::Assistant => dialogue.push(format!("ASSISTANT: {}", m.content)),
249        }
250    }
251    let system = if system_parts.is_empty() {
252        None
253    } else {
254        Some(system_parts.join("\n\n"))
255    };
256    (system, dialogue)
257}
258
259/// Final prompt string passed as the tool's last argv. Tools that can
260/// carry a system prompt via flag (claude) get the dialogue only;
261/// others get `SYSTEM: …\n\n` prepended so the instructions aren't
262/// lost.
263fn compose_prompt(
264    dialogue: &[String],
265    system: &Option<String>,
266    system_flag: Option<&str>,
267) -> String {
268    let body = dialogue.join("\n\n");
269    match (system, system_flag) {
270        (Some(sys), None) => format!("SYSTEM: {sys}\n\n{body}"),
271        _ => body,
272    }
273}
274
275/// `binary --version` succeeds. Fast, cheap, doesn't need auth state.
276fn binary_runs(binary: &str) -> bool {
277    std::process::Command::new(binary)
278        .arg("--version")
279        .stdout(std::process::Stdio::null())
280        .stderr(std::process::Stdio::null())
281        .status()
282        .map(|s| s.success())
283        .unwrap_or(false)
284}
285
286fn run_with_timeout(mut cmd: std::process::Command, timeout_secs: u64) -> Result<String, LlmError> {
287    let timeout = std::time::Duration::from_secs(timeout_secs);
288    let (tx, rx) = std::sync::mpsc::channel();
289    let child = std::thread::spawn(move || {
290        let out = cmd
291            .stdin(std::process::Stdio::null())
292            .stdout(std::process::Stdio::piped())
293            .stderr(std::process::Stdio::piped())
294            .output();
295        let _ = tx.send(out);
296    });
297
298    let out = match rx.recv_timeout(timeout) {
299        Ok(Ok(o)) => o,
300        Ok(Err(e)) => return Err(LlmError::Provider(format!("CLI spawn failed: {e}"))),
301        Err(_) => {
302            return Err(LlmError::Provider(format!(
303                "CLI exceeded {timeout_secs}s timeout"
304            )))
305        }
306    };
307    let _ = child.join();
308
309    if !out.status.success() {
310        let stderr = String::from_utf8_lossy(&out.stderr);
311        return Err(LlmError::Provider(format!(
312            "CLI exit {}: {}",
313            out.status.code().unwrap_or(-1),
314            stderr.trim()
315        )));
316    }
317    let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string();
318    if stdout.is_empty() {
319        return Err(LlmError::Provider("CLI produced empty output".into()));
320    }
321    Ok(stdout)
322}
323
324// ── Back-compat shims ──────────────────────────────────────────────────────
325
326/// Old name kept so existing call-sites still compile.
327#[deprecated(note = "use CliProvider::new(specs::CLAUDE)")]
328pub type ClaudeCliProvider = CliProvider;
329
330/// Convenience constructor preserved for the call-site in providers.rs.
331pub fn new_claude_cli() -> CliProvider {
332    CliProvider::new(specs::CLAUDE)
333}
334
335// ── Tests ────────────────────────────────────────────────────────────────────
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    fn provider_for(spec: CliSpec, binary_override: &str) -> CliProvider {
342        CliProvider::with_config(
343            spec,
344            CliConfig {
345                binary: Some(binary_override.into()),
346                timeout_secs: 2,
347            },
348        )
349    }
350
351    #[test]
352    fn missing_binary_is_not_available() {
353        for spec in specs::ALL {
354            let p = provider_for(*spec, "/nonexistent/never-here-xyz");
355            assert!(!p.available(), "should be unavailable for {}", spec.binary);
356        }
357    }
358
359    #[test]
360    fn missing_binary_completion_returns_provider_error() {
361        let p = provider_for(specs::CLAUDE, "/nonexistent/never-here-xyz");
362        let err = p
363            .complete(
364                &[Message::user("hi")],
365                &LlmConfig {
366                    model: "claude-desktop".into(),
367                    ..Default::default()
368                },
369            )
370            .unwrap_err();
371        assert!(matches!(err, LlmError::Provider(_)));
372    }
373
374    #[test]
375    fn skip_cli_env_suppresses_all_providers() {
376        let prev = std::env::var("NOETHER_LLM_SKIP_CLI").ok();
377        std::env::set_var("NOETHER_LLM_SKIP_CLI", "1");
378        let p = provider_for(specs::CLAUDE, "/bin/true");
379        assert!(!p.available());
380        let err = p
381            .complete(
382                &[Message::user("hi")],
383                &LlmConfig {
384                    model: "claude-desktop".into(),
385                    ..Default::default()
386                },
387            )
388            .unwrap_err();
389        match err {
390            LlmError::Provider(m) => assert!(m.contains("suppressed")),
391            _ => panic!("expected Provider(suppressed)"),
392        }
393        match prev {
394            Some(v) => std::env::set_var("NOETHER_LLM_SKIP_CLI", v),
395            None => std::env::remove_var("NOETHER_LLM_SKIP_CLI"),
396        }
397    }
398
399    #[test]
400    fn compose_prompt_inlines_system_when_no_flag() {
401        let body = compose_prompt(&["USER: hello".into()], &Some("be terse".into()), None);
402        assert!(body.contains("SYSTEM: be terse"));
403        assert!(body.contains("USER: hello"));
404    }
405
406    #[test]
407    fn compose_prompt_omits_inline_system_when_flag_exists() {
408        let body = compose_prompt(
409            &["USER: hi".into()],
410            &Some("be terse".into()),
411            Some("--append-system-prompt"),
412        );
413        assert!(!body.contains("SYSTEM:"));
414        assert!(body.contains("USER: hi"));
415    }
416
417    #[test]
418    fn all_specs_have_distinct_binaries_and_slugs() {
419        let binaries: std::collections::HashSet<_> = specs::ALL.iter().map(|s| s.binary).collect();
420        let slugs: std::collections::HashSet<_> =
421            specs::ALL.iter().map(|s| s.provider_slug).collect();
422        assert_eq!(binaries.len(), specs::ALL.len());
423        assert_eq!(slugs.len(), specs::ALL.len());
424    }
425}
noether_engine/llm/cli_provider.rs

noether_engine/llm/
cli_provider.rs