noether-engine 0.7.3

Noether composition engine: Lagrange graph AST, type checker, planner, executor, semantic index, LLM-backed composition agent
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
//! Generic subprocess-based LLM provider.
//!
//! Covers Claude Desktop / Claude CLI, Gemini CLI, Cursor Agent, and
//! OpenCode — the four "subscription CLIs" a developer commonly has
//! logged in on a workstation. Each has its own argv shape but they
//! all share the same execution contract:
//!
//!   - spawn the binary with a fixed flag set + the prompt as argv;
//!   - stdin is closed (these tools read their prompt from `-p`);
//!   - exit 0 + non-empty stdout = success;
//!   - anything else = `LlmError::Provider` with stderr text.
//!
//! ## Why a single generic provider
//!
//! caloron-noether already implements this multi-provider fallback in
//! Python (`stages/phases/_llm.py`) and learned the hard edge cases —
//! the 25-second timeout cap to stay under Nix's default 30-second
//! kill, the `SKIP_CLI` escape hatch for sandboxed environments where
//! CLI auth isn't mounted, the exact argv incantation per tool. This
//! module ports those lessons into the Rust engine so noether-grid
//! workers get the same behaviour, for free, with the same failure
//! modes. See `docs/research/llm-here.md` for the long-term plan to
//! unify all three implementations behind one shared tool.
//!
//! ## Sandbox handling
//!
//! When `NOETHER_LLM_SKIP_CLI=1` is set, every CLI provider refuses to
//! advertise itself as available (`available() == false`). Intended
//! for stages that run inside the Nix executor, which mounts a
//! restricted `$HOME` that doesn't carry the operator's CLI auth
//! state — without this gate, a subscription CLI stalls waiting for
//! interactive login and gets SIGKILL'd by the runner.
//!
//! ## Timeout
//!
//! Default `timeout_secs = 25`. Deliberately under Nix's 30-second
//! default stage kill so a stalled CLI reports `Provider(timeout)`
//! instead of the stage runner's less useful "process killed" error.
//! Callers outside the Nix executor can bump it up via
//! `CliConfig::timeout_secs`.

use super::{LlmConfig, LlmError, LlmProvider, Message, Role};

// ── Per-CLI definitions ─────────────────────────────────────────────────────

/// A static description of one CLI tool: the binary name, the argv
/// template, and how system prompts are passed (if at all). Used by
/// [`CliProvider::new`] to pick a concrete tool.
#[derive(Debug, Clone, Copy)]
pub struct CliSpec {
    /// The executable name to look up on `PATH` (and to invoke).
    pub binary: &'static str,
    /// Provider slug the broker routes on (matches
    /// `Effect::Llm { model: "<slug>" }` exactly).
    pub provider_slug: &'static str,
    /// Default model slug the worker advertises when none is configured.
    pub default_model: &'static str,
    /// How this CLI takes its prompt on the argv.
    pub prompt_style: PromptStyle,
    /// Shape of the system-prompt flag. `None` = the CLI has no system
    /// prompt support and any system-role messages are concatenated
    /// into the user prompt.
    pub system_flag: Option<&'static str>,
}

#[derive(Debug, Clone, Copy)]
pub enum PromptStyle {
    /// `-p <prompt>` positional flag (claude, gemini, cursor-agent).
    DashP,
    /// `run <prompt>` subcommand (opencode).
    RunSubcommand,
}

/// Concrete per-CLI specs. Argv shapes are identical to what
/// caloron's `_llm.py` uses — keep in sync when either side changes.
pub mod specs {
    use super::*;

    /// Claude Desktop / Claude CLI — `claude -p PROMPT`.
    ///
    /// The `--dangerously-skip-permissions` flag is what caloron uses
    /// to bypass the interactive tool-use prompt that otherwise
    /// appears even in non-interactive mode. Without it the CLI
    /// blocks waiting for "do you want to allow this?".
    pub const CLAUDE: CliSpec = CliSpec {
        binary: "claude",
        provider_slug: "anthropic-cli",
        default_model: "claude-desktop",
        prompt_style: PromptStyle::DashP,
        system_flag: Some("--append-system-prompt"),
    };

    /// Google Gemini CLI — `gemini -y -p PROMPT`. `-y` auto-accepts
    /// the first-run consent prompt.
    pub const GEMINI: CliSpec = CliSpec {
        binary: "gemini",
        provider_slug: "google-cli",
        default_model: "gemini-desktop",
        prompt_style: PromptStyle::DashP,
        system_flag: None,
    };

    /// Cursor Agent CLI — `cursor-agent -p PROMPT --output-format text`.
    pub const CURSOR: CliSpec = CliSpec {
        binary: "cursor-agent",
        provider_slug: "cursor-cli",
        default_model: "cursor-desktop",
        prompt_style: PromptStyle::DashP,
        system_flag: None,
    };

    /// OpenCode CLI — `opencode run PROMPT`.
    pub const OPENCODE: CliSpec = CliSpec {
        binary: "opencode",
        provider_slug: "opencode",
        default_model: "opencode-default",
        prompt_style: PromptStyle::RunSubcommand,
        system_flag: None,
    };

    /// All specs, in the fallback order caloron settled on.
    pub const ALL: &[CliSpec] = &[CLAUDE, GEMINI, CURSOR, OPENCODE];
}

// ── Config ──────────────────────────────────────────────────────────────────

/// Tunables for one [`CliProvider`] instance.
#[derive(Debug, Clone)]
pub struct CliConfig {
    /// Override the binary path. Defaults to `spec.binary` (PATH lookup).
    pub binary: Option<String>,
    /// Wall-clock timeout for a single completion. Default 25s so a
    /// stalled CLI reports a timeout before Nix's 30s stage kill.
    pub timeout_secs: u64,
}

impl Default for CliConfig {
    fn default() -> Self {
        Self {
            binary: None,
            timeout_secs: 25,
        }
    }
}

/// Check whether CLI providers are globally suppressed. Set
/// `NOETHER_LLM_SKIP_CLI=1` inside a sandboxed environment (the Nix
/// executor being the obvious one) where subscription CLIs would
/// stall waiting for auth state that isn't mounted.
pub fn cli_providers_suppressed() -> bool {
    std::env::var("NOETHER_LLM_SKIP_CLI")
        .map(|v| matches!(v.as_str(), "1" | "true" | "yes" | "on"))
        .unwrap_or(false)
}

// ── The provider ────────────────────────────────────────────────────────────

/// LLM provider that delegates each completion to a subscription CLI.
/// Stateless — each call spawns a fresh subprocess.
pub struct CliProvider {
    spec: CliSpec,
    config: CliConfig,
}

impl CliProvider {
    pub fn new(spec: CliSpec) -> Self {
        Self::with_config(spec, CliConfig::default())
    }

    pub fn with_config(spec: CliSpec, config: CliConfig) -> Self {
        Self { spec, config }
    }

    /// The binary this provider will invoke.
    pub fn binary(&self) -> &str {
        self.config.binary.as_deref().unwrap_or(self.spec.binary)
    }

    /// True when this CLI is installed on the host and CLI providers
    /// aren't globally suppressed. Does NOT verify auth state — we
    /// find that out at first dispatch.
    pub fn available(&self) -> bool {
        if cli_providers_suppressed() {
            return false;
        }
        binary_runs(self.binary())
    }

    pub fn spec(&self) -> CliSpec {
        self.spec
    }
}

impl LlmProvider for CliProvider {
    fn complete(&self, messages: &[Message], config: &LlmConfig) -> Result<String, LlmError> {
        if cli_providers_suppressed() {
            return Err(LlmError::Provider(
                "CLI providers suppressed via NOETHER_LLM_SKIP_CLI".into(),
            ));
        }

        let (system_text, dialogue) = split_system_from_dialogue(messages);
        let prompt = compose_prompt(&dialogue, &system_text, self.spec.system_flag);

        let mut cmd = std::process::Command::new(self.binary());
        match self.spec.prompt_style {
            PromptStyle::DashP => {
                // Tool-specific extra flags — keep aligned with
                // caloron's _llm.py; if either side changes, update both.
                if self.spec.binary == "claude" {
                    cmd.arg("--dangerously-skip-permissions");
                }
                if self.spec.binary == "gemini" {
                    cmd.arg("-y");
                }
                if let (Some(flag), Some(sys)) = (self.spec.system_flag, system_text.as_ref()) {
                    cmd.arg(flag).arg(sys);
                }
                if !config.model.is_empty()
                    && config.model != self.spec.default_model
                    && config.model != "unknown"
                {
                    cmd.arg("--model").arg(&config.model);
                }
                cmd.arg("-p").arg(&prompt);
                if self.spec.binary == "cursor-agent" {
                    cmd.arg("--output-format").arg("text");
                }
            }
            PromptStyle::RunSubcommand => {
                cmd.arg("run").arg(&prompt);
            }
        }

        run_with_timeout(cmd, self.config.timeout_secs)
    }
}

// ── Helpers ────────────────────────────────────────────────────────────────

fn split_system_from_dialogue(messages: &[Message]) -> (Option<String>, Vec<String>) {
    let mut system_parts: Vec<String> = Vec::new();
    let mut dialogue: Vec<String> = Vec::new();
    for m in messages {
        match m.role {
            Role::System => system_parts.push(m.content.clone()),
            Role::User => dialogue.push(format!("USER: {}", m.content)),
            Role::Assistant => dialogue.push(format!("ASSISTANT: {}", m.content)),
        }
    }
    let system = if system_parts.is_empty() {
        None
    } else {
        Some(system_parts.join("\n\n"))
    };
    (system, dialogue)
}

/// Final prompt string passed as the tool's last argv. Tools that can
/// carry a system prompt via flag (claude) get the dialogue only;
/// others get `SYSTEM: …\n\n` prepended so the instructions aren't
/// lost.
fn compose_prompt(
    dialogue: &[String],
    system: &Option<String>,
    system_flag: Option<&str>,
) -> String {
    let body = dialogue.join("\n\n");
    match (system, system_flag) {
        (Some(sys), None) => format!("SYSTEM: {sys}\n\n{body}"),
        _ => body,
    }
}

/// `binary --version` succeeds. Fast, cheap, doesn't need auth state.
fn binary_runs(binary: &str) -> bool {
    std::process::Command::new(binary)
        .arg("--version")
        .stdout(std::process::Stdio::null())
        .stderr(std::process::Stdio::null())
        .status()
        .map(|s| s.success())
        .unwrap_or(false)
}

fn run_with_timeout(mut cmd: std::process::Command, timeout_secs: u64) -> Result<String, LlmError> {
    let timeout = std::time::Duration::from_secs(timeout_secs);
    let (tx, rx) = std::sync::mpsc::channel();
    let child = std::thread::spawn(move || {
        let out = cmd
            .stdin(std::process::Stdio::null())
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
            .output();
        let _ = tx.send(out);
    });

    let out = match rx.recv_timeout(timeout) {
        Ok(Ok(o)) => o,
        Ok(Err(e)) => return Err(LlmError::Provider(format!("CLI spawn failed: {e}"))),
        Err(_) => {
            return Err(LlmError::Provider(format!(
                "CLI exceeded {timeout_secs}s timeout"
            )))
        }
    };
    let _ = child.join();

    if !out.status.success() {
        let stderr = String::from_utf8_lossy(&out.stderr);
        return Err(LlmError::Provider(format!(
            "CLI exit {}: {}",
            out.status.code().unwrap_or(-1),
            stderr.trim()
        )));
    }
    let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string();
    if stdout.is_empty() {
        return Err(LlmError::Provider("CLI produced empty output".into()));
    }
    Ok(stdout)
}

// ── Back-compat shims ──────────────────────────────────────────────────────

/// Old name kept so existing call-sites still compile.
#[deprecated(note = "use CliProvider::new(specs::CLAUDE)")]
pub type ClaudeCliProvider = CliProvider;

/// Convenience constructor preserved for the call-site in providers.rs.
pub fn new_claude_cli() -> CliProvider {
    CliProvider::new(specs::CLAUDE)
}

// ── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    fn provider_for(spec: CliSpec, binary_override: &str) -> CliProvider {
        CliProvider::with_config(
            spec,
            CliConfig {
                binary: Some(binary_override.into()),
                timeout_secs: 2,
            },
        )
    }

    #[test]
    fn missing_binary_is_not_available() {
        for spec in specs::ALL {
            let p = provider_for(*spec, "/nonexistent/never-here-xyz");
            assert!(!p.available(), "should be unavailable for {}", spec.binary);
        }
    }

    #[test]
    fn missing_binary_completion_returns_provider_error() {
        let p = provider_for(specs::CLAUDE, "/nonexistent/never-here-xyz");
        let err = p
            .complete(
                &[Message::user("hi")],
                &LlmConfig {
                    model: "claude-desktop".into(),
                    ..Default::default()
                },
            )
            .unwrap_err();
        assert!(matches!(err, LlmError::Provider(_)));
    }

    #[test]
    fn skip_cli_env_suppresses_all_providers() {
        let prev = std::env::var("NOETHER_LLM_SKIP_CLI").ok();
        std::env::set_var("NOETHER_LLM_SKIP_CLI", "1");
        let p = provider_for(specs::CLAUDE, "/bin/true");
        assert!(!p.available());
        let err = p
            .complete(
                &[Message::user("hi")],
                &LlmConfig {
                    model: "claude-desktop".into(),
                    ..Default::default()
                },
            )
            .unwrap_err();
        match err {
            LlmError::Provider(m) => assert!(m.contains("suppressed")),
            _ => panic!("expected Provider(suppressed)"),
        }
        match prev {
            Some(v) => std::env::set_var("NOETHER_LLM_SKIP_CLI", v),
            None => std::env::remove_var("NOETHER_LLM_SKIP_CLI"),
        }
    }

    #[test]
    fn compose_prompt_inlines_system_when_no_flag() {
        let body = compose_prompt(&["USER: hello".into()], &Some("be terse".into()), None);
        assert!(body.contains("SYSTEM: be terse"));
        assert!(body.contains("USER: hello"));
    }

    #[test]
    fn compose_prompt_omits_inline_system_when_flag_exists() {
        let body = compose_prompt(
            &["USER: hi".into()],
            &Some("be terse".into()),
            Some("--append-system-prompt"),
        );
        assert!(!body.contains("SYSTEM:"));
        assert!(body.contains("USER: hi"));
    }

    #[test]
    fn all_specs_have_distinct_binaries_and_slugs() {
        let binaries: std::collections::HashSet<_> = specs::ALL.iter().map(|s| s.binary).collect();
        let slugs: std::collections::HashSet<_> =
            specs::ALL.iter().map(|s| s.provider_slug).collect();
        assert_eq!(binaries.len(), specs::ALL.len());
        assert_eq!(slugs.len(), specs::ALL.len());
    }
}