car-server-core 0.24.1

Transport-neutral library for the CAR daemon JSON-RPC dispatcher (used by car-server and tokhn-daemon)
//! Delegation to an external agentic CLI (Claude Code, Codex, Gemini).
//!
//! The CLI does the coding inside the worktree; **CAR keeps the verdict**:
//! after every invocation the runtime re-runs the outcome contract itself
//! through the policy-gated shell tool. A CLI claiming success doesn't matter
//! — the checks do.
//!
//! When the daemon's MCP listener is bound, its URL is threaded into
//! [`InvokeOptions::mcp_endpoint`] so the CLI's **CAR-namespace** tool calls
//! (`memory_*`, `verify`, `skill_*`) route back through car-server's policy +
//! memgine — gated and audited. The CLI's own **built-in** tools (Edit, Bash)
//! still run with the CLI's permissions inside the worktree: that is the
//! residual Phase 2 stage-4b upstream limitation. The pinned `cwd`, the
//! contract re-evaluation, and the merge approval gate remain the containment
//! for those built-ins until tool round-trip governance lands in
//! `car-external-agents`.

use std::sync::atomic::Ordering;
use std::sync::Arc;

use car_external_agents::{InvokeOptions, StreamEventEmitter};

use super::contract::{evaluate_contract, OutcomeContract};
use super::native_loop::LoopOutcome;
use super::session::{CancelFlag, CoderEventKind, EventSink};
use super::shell_tool::WorktreeExecutor;

/// Tuning for external delegation.
#[derive(Debug, Clone)]
pub struct ExternalLoopConfig {
    /// Per-invocation model-turn cap (maps to the CLI's `--max-turns`).
    pub max_turns: Option<u32>,
    /// Per-invocation wall-clock budget (runner clamps to 1h).
    pub timeout_secs: Option<u64>,
    /// Fresh repair invocations after a red first pass (the stream-json
    /// protocol has no session resume yet, so repairs re-state the task plus
    /// the failing-check output).
    pub repair_invokes: u32,
}

impl Default for ExternalLoopConfig {
    fn default() -> Self {
        Self {
            max_turns: Some(50),
            timeout_secs: Some(1800),
            repair_invokes: 1,
        }
    }
}

/// The task text handed to the CLI: intent + contract + ground rules.
fn build_task(intent: &str, contract: &OutcomeContract, feedback: Option<&str>) -> String {
    let mut task = format!(
        "{intent}\n\n\
         OUTCOME CONTRACT — your work is verified by running these checks at the repository \
         root; all must pass:\n{}\n\
         Ground rules:\n\
         - Work only inside the current directory (an isolated git worktree).\n\
         - Do NOT git commit, push, or touch remotes; the runtime owns version control.\n\
         - Run the checks yourself before finishing.\n",
        contract.render()
    );
    if let Some(fb) = feedback {
        task.push_str(&format!(
            "\nA previous attempt left these checks FAILING — fix the code so they pass:\n{fb}"
        ));
    }
    task
}

/// The per-invocation options handed to the runner. The `mcp_endpoint`, when
/// set, routes the CLI's CAR-namespace tool calls through the daemon's policy +
/// memgine; `allowed_tools: None` leaves the CLI's own built-in tools on their
/// default (ungoverned) policy — see the module docs.
fn build_invoke_opts(
    executor: &WorktreeExecutor,
    cfg: &ExternalLoopConfig,
    mcp_endpoint: Option<&str>,
) -> InvokeOptions {
    InvokeOptions {
        cwd: Some(executor.worktree().to_path_buf()),
        allowed_tools: None, // the CLI's default policy; see module docs
        max_turns: cfg.max_turns,
        timeout_secs: cfg.timeout_secs,
        // Gate + audit the CLI's CAR-namespace tool calls through the daemon
        // when its MCP listener is bound; None degrades cleanly.
        mcp_endpoint: mcp_endpoint.map(String::from),
        ..Default::default()
    }
}

/// Run the external engine to completion, cancellation, or exhaustion.
///
/// Cancellation is checked between invocations only — the runner does not yet
/// expose a kill handle for an in-flight CLI process (same limitation as
/// `agents.chat`); the invocation's own timeout bounds the wait.
pub async fn run_external_loop(
    agent_id: &str,
    intent: &str,
    contract: &OutcomeContract,
    executor: &WorktreeExecutor,
    sink: &Arc<EventSink>,
    cancel: &CancelFlag,
    cfg: &ExternalLoopConfig,
    // Daemon MCP URL, when bound. Routes the CLI's CAR-namespace tool calls
    // through the daemon's policy + memgine. `None` degrades cleanly.
    mcp_endpoint: Option<&str>,
) -> LoopOutcome {
    let attempts = 1 + cfg.repair_invokes;
    let mut feedback: Option<String> = None;
    let mut last_results = Vec::new();

    for attempt in 1..=attempts {
        if cancel.load(Ordering::SeqCst) {
            return LoopOutcome {
                passed: false,
                iterations: attempt - 1,
                last_results,
                error: Some("cancelled".into()),
            };
        }
        sink.emit(CoderEventKind::IterationStarted {
            n: attempt,
            max: attempts,
        });

        let task = build_task(intent, contract, feedback.as_deref());
        let opts = build_invoke_opts(executor, cfg, mcp_endpoint);

        let emitter_sink = sink.clone();
        let emitter: StreamEventEmitter = Arc::new(move |event| {
            if let Ok(raw) = serde_json::to_value(&event) {
                emitter_sink.emit(CoderEventKind::ExternalEvent { raw });
            }
        });

        match car_external_agents::invoke_with_emitter(agent_id, &task, opts, Some(emitter)).await
        {
            Ok(result) => {
                if result.is_error {
                    sink.emit(CoderEventKind::Error {
                        message: format!(
                            "external agent '{agent_id}' reported an error: {}",
                            result.error.as_deref().unwrap_or("unknown")
                        ),
                    });
                    // Fall through to evaluation anyway — partial work may
                    // already satisfy the contract.
                }
            }
            Err(e) => {
                // Spawn/transport failure: the caller decides whether to fall
                // back to the native loop.
                return LoopOutcome {
                    passed: false,
                    iterations: attempt,
                    last_results,
                    error: Some(format!("external agent '{agent_id}' failed: {e}")),
                };
            }
        }

        // CAR's verdict, not the CLI's.
        last_results = evaluate_contract(contract, executor, sink).await;
        if last_results.iter().all(|r| r.passed) {
            return LoopOutcome {
                passed: true,
                iterations: attempt,
                last_results,
                error: None,
            };
        }
        feedback = Some(
            last_results
                .iter()
                .filter(|r| !r.passed)
                .map(|r| format!("FAILED {} (exit {:?}):\n{}", r.name, r.exit_code, r.output_tail))
                .collect::<Vec<_>>()
                .join("\n\n"),
        );
    }

    LoopOutcome {
        passed: false,
        iterations: attempts,
        last_results,
        error: None,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::coder::contract::ContractCheck;

    fn contract() -> OutcomeContract {
        OutcomeContract {
            description: "x".into(),
            checks: vec![ContractCheck {
                name: "tests".into(),
                command: "cargo test".into(),
                expect_exit_zero: true,
                output_contains: None,
                timeout_secs: 300,
            }],
        }
    }

    #[test]
    fn task_carries_intent_contract_and_ground_rules() {
        let t = build_task("add a CLI flag", &contract(), None);
        assert!(t.contains("add a CLI flag"));
        assert!(t.contains("cargo test"));
        assert!(t.contains("Do NOT git commit"));
        assert!(!t.contains("FAILING"));
    }

    #[test]
    fn repair_task_carries_failure_feedback() {
        let t = build_task("x", &contract(), Some("FAILED tests (exit Some(1)):\nboom"));
        assert!(t.contains("previous attempt"));
        assert!(t.contains("boom"));
    }

    #[test]
    fn mcp_endpoint_is_threaded_into_invoke_opts() {
        let dir = tempfile::tempdir().unwrap();
        let executor = WorktreeExecutor::new(dir.path());
        let cfg = ExternalLoopConfig::default();
        let opts = build_invoke_opts(&executor, &cfg, Some("http://127.0.0.1:9102/mcp"));
        assert_eq!(opts.mcp_endpoint.as_deref(), Some("http://127.0.0.1:9102/mcp"));
        // The CLI's own built-in tools stay on the default policy.
        assert!(opts.allowed_tools.is_none());
    }

    #[test]
    fn absent_mcp_endpoint_degrades_to_none() {
        let dir = tempfile::tempdir().unwrap();
        let executor = WorktreeExecutor::new(dir.path());
        let cfg = ExternalLoopConfig::default();
        let opts = build_invoke_opts(&executor, &cfg, None);
        assert!(opts.mcp_endpoint.is_none());
    }

    #[tokio::test]
    async fn unknown_agent_is_a_clean_error_outcome() {
        let dir = tempfile::tempdir().unwrap();
        let executor = WorktreeExecutor::new(dir.path());
        let sink = Arc::new(EventSink::test_sink());
        let cancel: CancelFlag = Arc::new(std::sync::atomic::AtomicBool::new(false));
        let outcome = run_external_loop(
            "no-such-cli",
            "x",
            &contract(),
            &executor,
            &sink,
            &cancel,
            &ExternalLoopConfig::default(),
            None,
        )
        .await;
        assert!(!outcome.passed);
        let err = outcome.error.expect("spawn failure must surface");
        assert!(err.contains("no-such-cli"), "{err}");
    }

    #[tokio::test]
    async fn pre_cancelled_loop_does_not_invoke() {
        let dir = tempfile::tempdir().unwrap();
        let executor = WorktreeExecutor::new(dir.path());
        let sink = Arc::new(EventSink::test_sink());
        let cancel: CancelFlag = Arc::new(std::sync::atomic::AtomicBool::new(true));
        let outcome = run_external_loop(
            "claude-code",
            "x",
            &contract(),
            &executor,
            &sink,
            &cancel,
            &ExternalLoopConfig::default(),
            None,
        )
        .await;
        assert_eq!(outcome.error.as_deref(), Some("cancelled"));
        assert_eq!(outcome.iterations, 0);
    }
}