car-external-agents 0.24.1

//! Per-task invocation of external CLI agents.
//!
//! Phase 2 stage 3: spawn `claude -p --output-format stream-json
//! --input-format stream-json`, write the user's task to stdin,
//! consume the event stream, and return an aggregate
//! [`InvokeResult`].
//!
//! ## Cost discipline
//!
//! Every live invocation burns the user's subscription quota
//! (Claude Code uses ~30K cache-creation tokens per cold call).
//! Tests in this module use mock event streams via
//! [`process_stream`] — never spawn the real binary in CI.
//! Integration tests against a real CLI live behind an env-gated
//! `--ignored` flag so contributors opt in deliberately.
//!
//! ## Tool execution
//!
//! Phase 2 stage 3 (this file): observe-only. The agent uses its
//! own built-in tool set; we capture every `tool_use` block in
//! the assistant's content stream and report the count, but we
//! don't yet round-trip them through CAR's policy + eventlog.
//! That governance layer lands in stage 4.

use crate::protocol::{
    parse_line, AssistantEventData, StreamEvent, SystemEventData, ToolUseRequest,
};

/// Callback fired for each [`StreamEvent`] as it's parsed off the
/// child's stdout. The default invocation path (FFI callers, the
/// one-shot daemon path) passes `None` — `process_stream` runs
/// silently and the [`InvokeResult`] is the only output. The
/// streaming JSON-RPC path (`agents.invoke_external { stream: true }`)
/// passes a closure that fans each event out to the host's WS as
/// an `agents.chat.event` notification — same channel supervised
/// agents use, so host UIs render a single streaming surface for
/// both kinds of agent.
///
/// `Sync` so the closure can be held across `.await` in
/// `process_stream`; `'static` so it can be cloned into the
/// runner task without lifetime threading.
pub type StreamEventEmitter = Arc<dyn Fn(StreamEvent) + Send + Sync + 'static>;

use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncWriteExt};
use tokio::process::Command;

/// Caller-supplied options for a single invocation.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct InvokeOptions {
    /// Working directory the agent runs in. Inherits the daemon's
    /// cwd when `None`.
    #[serde(default)]
    pub cwd: Option<PathBuf>,
    /// Allowed tool names. When provided, passed through as
    /// `--allowed-tools "<list>"` so the binary refuses anything
    /// outside the list. `None` = use the binary's default policy.
    /// `Some(vec![])` = deny every tool (text-only response).
    #[serde(default)]
    pub allowed_tools: Option<Vec<String>>,
    /// Cap on model turns. Maps to `--max-turns`. Cost-control
    /// guard for runaway loops.
    #[serde(default)]
    pub max_turns: Option<u32>,
    /// Hard deadline. Defaults to 5 minutes when `None`. Anything
    /// above an hour is clamped down to one hour to prevent
    /// stuck-process resource exhaustion.
    #[serde(default)]
    pub timeout_secs: Option<u64>,
    /// MCP server URL to inject via `--mcp-config`. When set, the
    /// runner writes a temporary `mcp-config.json` pointing the
    /// agent at this endpoint and adds the flag to the spawn args
    /// — typically the daemon's own `/mcp` endpoint, so every tool
    /// call the agent issues against the CAR namespace flows
    /// through the daemon's policy + memgine. The temp file lives
    /// in the OS temp dir for the duration of the spawn and is
    /// removed on drop.
    ///
    /// Phase 2 stage 4b first cut. When set with the daemon's MCP
    /// URL, claude has access to `memory_add_fact`,
    /// `memory_query`, `verify`, and the `skill_*` tools — all
    /// gated by car-server's runtime, all logged to the same
    /// memgine WS clients see.
    #[serde(default)]
    pub mcp_endpoint: Option<String>,
}

const DEFAULT_TIMEOUT_SECS: u64 = 300;
const MAX_TIMEOUT_SECS: u64 = 3600;

/// Aggregate outcome of one external-agent invocation.
///
/// Self-contained shape so this crate doesn't pull `car-multi`
/// just for `AgentOutput`. The WS layer in `car-server-core`
/// re-shapes this when callers want the canonical AgentOutput.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct InvokeResult {
    /// Final answer from the agent's `result` event. Empty when
    /// the run errored before producing one.
    pub answer: String,
    /// Stable session id from the `system.init` event, when present.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub session_id: Option<String>,
    /// Number of model turns the agent took.
    #[serde(default)]
    pub turns: u32,
    /// Count of `tool_use` blocks the assistant emitted across all
    /// turns. Useful for cost / behavior auditing.
    #[serde(default)]
    pub tool_calls: u32,
    /// Each `tool_use` block the assistant emitted, in stream order.
    /// Phase 2 stage 4a: observation only — the agent executes its
    /// own built-in tools internally (Read/Edit/Bash/etc.), and we
    /// record what it did. Stage 4b will add full policy gating
    /// via a CAR-managed MCP server route, which requires a
    /// different invocation mode.
    #[serde(default)]
    pub tool_uses: Vec<ToolUseRequest>,
    /// Wall-clock duration the agent reported in its result event.
    /// `0` when the agent didn't reach a result event.
    #[serde(default)]
    pub duration_ms: u64,
    /// Would-be API cost in USD (subscription users don't pay this;
    /// reported for transparency).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub total_cost_usd: Option<f64>,
    /// `true` when the agent reported `is_error` or we hit a
    /// transport error before a result.
    #[serde(default)]
    pub is_error: bool,
    /// Human-readable error context — populated only when
    /// `is_error` is `true` or no result event arrived.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,
}

#[derive(Debug, Error)]
pub enum InvokeError {
    #[error("subprocess spawn failed: {0}")]
    Spawn(String),
    #[error("subprocess I/O failed: {0}")]
    Io(String),
    #[error("subprocess timed out after {0}s")]
    Timeout(u64),
    #[error("stream produced malformed JSON: {0}")]
    BadJson(String),
}

/// Build the argv for `claude` based on the invocation options.
/// Pulled out so tests can assert the flag combinations without
/// spawning a process. When `mcp_config_path` is set, threads
/// `--mcp-config <path>` through so the agent loads the supplied
/// MCP server file at startup.
pub(crate) fn build_claude_args(
    opts: &InvokeOptions,
    mcp_config_path: Option<&std::path::Path>,
) -> Vec<String> {
    let mut args = vec![
        "-p".to_string(),
        "--output-format".to_string(),
        "stream-json".to_string(),
        "--input-format".to_string(),
        "stream-json".to_string(),
        "--verbose".to_string(),
    ];
    if let Some(allowed) = &opts.allowed_tools {
        args.push("--allowed-tools".to_string());
        // Empty allowlist denies every tool — pass an empty arg
        // explicitly so the binary doesn't think the flag was unset.
        args.push(allowed.join(" "));
    }
    if let Some(turns) = opts.max_turns {
        args.push("--max-turns".to_string());
        args.push(turns.to_string());
    }
    if let Some(path) = mcp_config_path {
        args.push("--mcp-config".to_string());
        args.push(path.to_string_lossy().into_owned());
    }
    args
}

/// Render the MCP config JSON Claude Code expects when
/// `--mcp-config` points at an HTTP-streamable endpoint. The server
/// name `"car"` is what shows up in claude's MCP listing — keep it
/// stable so users learn one name across the project.
pub(crate) fn build_mcp_config_json(endpoint: &str) -> String {
    let cfg = serde_json::json!({
        "mcpServers": {
            "car": {
                "type": "http",
                "url": endpoint,
            }
        }
    });
    cfg.to_string()
}

/// Build the single-line user message JSON to write on stdin.
pub(crate) fn build_user_message(task: &str) -> String {
    let payload = serde_json::json!({
        "type": "user",
        "message": {
            "role": "user",
            "content": task,
        },
    });
    payload.to_string()
}

/// Top-level dispatch by adapter id. Looks up the detected
/// binary path, then dispatches to the per-tool runner.
///
/// Phase 2 stage 3 supports `"claude-code"` only. Calls with other
/// adapter ids return an `InvokeResult` with `is_error = true` and
/// a structured `error` so the WS layer can surface the gap to
/// hosts without a separate error code.
pub async fn invoke(
    id: &str,
    task: &str,
    opts: InvokeOptions,
) -> Result<InvokeResult, InvokeError> {
    invoke_with_emitter(id, task, opts, None).await
}

/// `invoke` plus an optional [`StreamEventEmitter`]. All three
/// adapters fire events through it (car#213):
/// - `claude-code` emits native stream-json events as they arrive.
/// - `codex` synthesizes Anthropic-Messages-shaped `Assistant`
///   events from its JSONL `item.completed` lines so the host's
///   chat renderer fills the bubble incrementally.
/// - `gemini` is text-only in 0.1.x (no event stream), so it emits
///   a single `Assistant` text event with the full reply once the
///   process exits — a single-pop reply rather than a token stream,
///   but the host sees the answer instead of an empty bubble.
pub async fn invoke_with_emitter(
    id: &str,
    task: &str,
    opts: InvokeOptions,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError> {
    let detected = crate::detect().await;
    let spec = detected
        .iter()
        .find(|s| s.id == id)
        .ok_or_else(|| InvokeError::Spawn(format!("no detected external agent with id `{id}`")))?;
    match id {
        "claude-code" => invoke_claude_code(&spec.binary_path, task, opts, emitter).await,
        "codex" => invoke_codex(&spec.binary_path, task, opts, emitter).await,
        "gemini" => invoke_gemini(&spec.binary_path, task, opts, emitter).await,
        _ => Err(InvokeError::Spawn(format!("unknown adapter id: {id}"))),
    }
}

/// Spawn `binary_path` with `claude -p stream-json` flags, write
/// `task` as a user message, and consume the event stream until
/// the binary exits or the timeout fires.
///
/// `binary_path` is treated as already-validated — caller is
/// expected to use the path the detection module resolved from
/// `$PATH`, which has already passed the scratch-dir denylist.
pub async fn invoke_claude_code(
    binary_path: &Path,
    task: &str,
    opts: InvokeOptions,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError> {
    tracing::info!(
        adapter = "claude-code",
        binary = %binary_path.display(),
        task_len = task.len(),
        "external agent invocation started"
    );
    let timeout_secs = opts
        .timeout_secs
        .unwrap_or(DEFAULT_TIMEOUT_SECS)
        .min(MAX_TIMEOUT_SECS)
        .max(1);
    let timeout = Duration::from_secs(timeout_secs);

    // Write the MCP config to a temp file when an endpoint was
    // supplied. Held in a `_mcp_config` binding for the lifetime of
    // the invocation so its `Drop` runs *after* claude exits — same
    // RAII shape as `tempfile::NamedTempFile`. An explicit empty
    // string is treated as "opt out of MCP" (the daemon caller uses
    // it to override the auto-fill).
    let mcp_config: Option<tempfile::NamedTempFile> = match opts.mcp_endpoint.as_deref() {
        Some(endpoint) if !endpoint.is_empty() => {
            let json = build_mcp_config_json(endpoint);
            let mut tmp = tempfile::Builder::new()
                .prefix("car-mcp-config-")
                .suffix(".json")
                .tempfile()
                .map_err(|e| InvokeError::Io(format!("mcp config tempfile: {e}")))?;
            std::io::Write::write_all(&mut tmp, json.as_bytes())
                .map_err(|e| InvokeError::Io(format!("mcp config write: {e}")))?;
            Some(tmp)
        }
        _ => None,
    };
    let mcp_config_path = mcp_config.as_ref().map(|t| t.path());

    let args = build_claude_args(&opts, mcp_config_path);
    let mut cmd = Command::new(binary_path);
    cmd.args(&args);
    if let Some(cwd) = &opts.cwd {
        cmd.current_dir(cwd);
    }
    cmd.stdin(std::process::Stdio::piped());
    cmd.stdout(std::process::Stdio::piped());
    cmd.stderr(std::process::Stdio::piped());
    cmd.kill_on_drop(true);

    let mut child = cmd.spawn().map_err(|e| InvokeError::Spawn(e.to_string()))?;

    // Write the task on stdin and close so the agent sees EOF.
    {
        let stdin = child
            .stdin
            .take()
            .ok_or_else(|| InvokeError::Io("stdin closed unexpectedly".to_string()))?;
        let user_message = build_user_message(task);
        let mut stdin = stdin;
        stdin
            .write_all(user_message.as_bytes())
            .await
            .map_err(|e| InvokeError::Io(format!("stdin write: {e}")))?;
        stdin
            .write_all(b"\n")
            .await
            .map_err(|e| InvokeError::Io(format!("stdin newline: {e}")))?;
        // Drop closes stdin; agent now sees EOF and starts processing.
    }

    let stdout = child
        .stdout
        .take()
        .ok_or_else(|| InvokeError::Io("stdout missing".to_string()))?;
    let stderr = child
        .stderr
        .take()
        .ok_or_else(|| InvokeError::Io("stderr missing".to_string()))?;
    let reader = tokio::io::BufReader::new(stdout);

    let process_fut = async {
        let mut result = process_stream(reader, emitter).await?;
        // Wait for the child so we don't orphan it.
        let exit = child
            .wait()
            .await
            .map_err(|e| InvokeError::Io(format!("wait: {e}")))?;
        if !exit.success() && !result.is_error {
            // Capture stderr for context — don't let a non-zero
            // exit pass silently when the stream itself didn't
            // surface an error.
            let mut stderr_buf = Vec::new();
            let mut stderr_reader = tokio::io::BufReader::new(stderr);
            let _ = tokio::io::AsyncReadExt::read_to_end(&mut stderr_reader, &mut stderr_buf).await;
            let stderr_text = String::from_utf8_lossy(&stderr_buf).to_string();
            result.is_error = true;
            result.error = Some(format!(
                "exit code {}: {}",
                exit.code().unwrap_or(-1),
                stderr_text.trim()
            ));
        }
        Ok::<_, InvokeError>(result)
    };

    match tokio::time::timeout(timeout, process_fut).await {
        Ok(Ok(res)) => Ok(res),
        Ok(Err(e)) => Err(e),
        Err(_) => Err(InvokeError::Timeout(timeout_secs)),
    }
}

/// Consume the `claude` stream-json output line-by-line and
/// aggregate it into an [`InvokeResult`]. Pulled out from
/// [`invoke_claude_code`] so it can be unit-tested with mock
/// readers (avoids burning subscription quota on every test run).
pub async fn process_stream<R>(
    reader: R,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError>
where
    R: AsyncBufRead + Unpin,
{
    let mut result = InvokeResult::default();
    let mut lines = reader.lines();
    loop {
        let line = match lines.next_line().await {
            Ok(Some(line)) => line,
            Ok(None) => break,
            Err(e) => return Err(InvokeError::Io(format!("stdout read: {e}"))),
        };
        let event = match parse_line(&line) {
            Ok(Some(e)) => e,
            Ok(None) => continue,
            Err(e) => {
                // One bad line shouldn't kill the whole stream —
                // log it via the error field if it's the only thing
                // we got, but keep reading so a benign trailing
                // newline doesn't cost us the result.
                if result.answer.is_empty() && result.error.is_none() {
                    result.error = Some(format!("malformed JSON: {e}"));
                }
                continue;
            }
        };
        // Fire the emitter (if any) BEFORE apply_event consumes the
        // value into the running result. Clone so apply_event still
        // owns its copy — StreamEvent is small enough that the clone
        // cost is irrelevant next to the WS round-trip.
        if let Some(e) = &emitter {
            e(event.clone());
        }
        apply_event(&mut result, event);
    }
    Ok(result)
}

fn apply_event(result: &mut InvokeResult, event: StreamEvent) {
    match event {
        StreamEvent::System(s) => {
            // Capture session id on the first system event we see;
            // subsequent system events keep the original.
            if result.session_id.is_none() {
                result.session_id = Some(s.session_id);
            }
        }
        StreamEvent::Assistant(a) => {
            result.turns = result.turns.saturating_add(1);
            // Count + record tool_use blocks. tool_uses is the
            // post-run audit trail; tool_calls is the cheap
            // count for cost-control summaries.
            if let Some(content) = a.message.get("content").and_then(|v| v.as_array()) {
                for block in content {
                    if block.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
                        result.tool_calls = result.tool_calls.saturating_add(1);
                        let id = block
                            .get("id")
                            .and_then(|v| v.as_str())
                            .unwrap_or("")
                            .to_string();
                        let name = block
                            .get("name")
                            .and_then(|v| v.as_str())
                            .unwrap_or("")
                            .to_string();
                        let input = block
                            .get("input")
                            .cloned()
                            .unwrap_or(serde_json::Value::Null);
                        tracing::info!(
                            adapter = "claude-code",
                            tool_name = %name,
                            "external agent emitted tool_use (observation-only — \
                             policy gating via MCP route lands in Phase 2 stage 4b)"
                        );
                        result.tool_uses.push(ToolUseRequest { id, name, input });
                    }
                }
            }
        }
        StreamEvent::Result(r) => {
            result.answer = r.result.unwrap_or_default();
            result.duration_ms = r.duration_ms.unwrap_or(0);
            result.total_cost_usd = r.total_cost_usd;
            result.is_error = r.is_error;
            // Prefer the result event's view of turns when it
            // disagrees with our running count (the agent may
            // count internal turns we don't see).
            if let Some(t) = r.num_turns {
                result.turns = t;
            }
            if r.is_error {
                result.error = Some(format!(
                    "agent reported error (subtype={}, terminal={:?})",
                    r.subtype, r.terminal_reason
                ));
            }
        }
        StreamEvent::User(_) | StreamEvent::RateLimitEvent(_) | StreamEvent::Other => {
            // Not load-bearing for the aggregate result.
        }
    }
}

// --- Codex (OpenAI codex-cli) -----------------------------------------------

/// Build the argv for `codex exec --json`.
///
/// `codex exec --json --skip-git-repo-check --ephemeral` runs
/// non-interactively, emits NDJSON events on stdout, and doesn't
/// persist a session file (matching the per-task semantics this
/// runner expects). When `mcp_config_path` is set we inject the
/// CAR namespace via `-c` overrides — Codex stores MCP servers in
/// TOML config under `mcp_servers.<name>` keys, and the inline
/// override syntax accepts the same dotted path.
pub(crate) fn build_codex_args(opts: &InvokeOptions, mcp_endpoint: Option<&str>) -> Vec<String> {
    let mut args = vec![
        "exec".to_string(),
        "--json".to_string(),
        "--skip-git-repo-check".to_string(),
        "--ephemeral".to_string(),
    ];
    if let Some(cwd) = &opts.cwd {
        args.push("--cd".to_string());
        args.push(cwd.to_string_lossy().into_owned());
    }
    if let Some(endpoint) = mcp_endpoint.filter(|s| !s.is_empty()) {
        // Codex's `-c` flag takes `key=value` where the value is
        // parsed as TOML; an inline-table is the standard shape for
        // `mcp_servers.<name>`. The `type="http"` + URL form mirrors
        // Codex's own MCP config docs.
        let value = format!(r#"{{type="http",url="{}"}}"#, endpoint);
        args.push("-c".to_string());
        args.push(format!("mcp_servers.car={}", value));
    }
    // The prompt itself goes on stdin (we use `-` as the prompt
    // arg so codex reads stdin even when stdin is a pipe rather
    // than a TTY). Chosen over passing the prompt as a CLI arg so
    // long prompts don't trip the OS argv limit.
    args.push("-".to_string());
    args
}

/// Run codex with `task` piped on stdin, parse the JSONL event
/// stream, return an aggregate [`InvokeResult`].
///
/// Codex events observed (2026-05, codex-cli 0.128.0):
/// - `thread.started` — session id
/// - `turn.started` / `turn.completed` — turn boundaries + usage
/// - `item.completed` with `item.type = "agent_message"` — answer
///   text. Multiple items may arrive; we concatenate.
/// - Other `item.completed` shapes (tool calls, etc.) get counted
///   in `tool_calls` and recorded under `tool_uses` so the audit
///   trail matches the Claude Code adapter's shape.
pub async fn invoke_codex(
    binary_path: &Path,
    task: &str,
    opts: InvokeOptions,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError> {
    tracing::info!(
        adapter = "codex",
        binary = %binary_path.display(),
        task_len = task.len(),
        "external agent invocation started"
    );
    let timeout_secs = opts
        .timeout_secs
        .unwrap_or(DEFAULT_TIMEOUT_SECS)
        .min(MAX_TIMEOUT_SECS)
        .max(1);
    let timeout = Duration::from_secs(timeout_secs);

    let args = build_codex_args(&opts, opts.mcp_endpoint.as_deref());
    let mut cmd = Command::new(binary_path);
    cmd.args(&args);
    if let Some(cwd) = &opts.cwd {
        cmd.current_dir(cwd);
    }
    cmd.stdin(std::process::Stdio::piped());
    cmd.stdout(std::process::Stdio::piped());
    cmd.stderr(std::process::Stdio::piped());
    cmd.kill_on_drop(true);

    let mut child = cmd.spawn().map_err(|e| InvokeError::Spawn(e.to_string()))?;

    {
        let mut stdin = child
            .stdin
            .take()
            .ok_or_else(|| InvokeError::Io("stdin closed unexpectedly".to_string()))?;
        stdin
            .write_all(task.as_bytes())
            .await
            .map_err(|e| InvokeError::Io(format!("stdin write: {e}")))?;
    }

    let stdout = child
        .stdout
        .take()
        .ok_or_else(|| InvokeError::Io("stdout missing".to_string()))?;
    let stderr = child
        .stderr
        .take()
        .ok_or_else(|| InvokeError::Io("stderr missing".to_string()))?;
    let reader = tokio::io::BufReader::new(stdout);

    let process_fut = async {
        let mut result = process_codex_stream(reader, emitter).await?;
        let exit = child
            .wait()
            .await
            .map_err(|e| InvokeError::Io(format!("wait: {e}")))?;
        if !exit.success() && !result.is_error {
            let mut stderr_buf = Vec::new();
            let mut stderr_reader = tokio::io::BufReader::new(stderr);
            let _ = tokio::io::AsyncReadExt::read_to_end(&mut stderr_reader, &mut stderr_buf).await;
            let stderr_text = String::from_utf8_lossy(&stderr_buf).to_string();
            result.is_error = true;
            result.error = Some(format!(
                "exit code {}: {}",
                exit.code().unwrap_or(-1),
                stderr_text.trim()
            ));
        }
        Ok::<_, InvokeError>(result)
    };

    match tokio::time::timeout(timeout, process_fut).await {
        Ok(Ok(res)) => Ok(res),
        Ok(Err(e)) => Err(e),
        Err(_) => Err(InvokeError::Timeout(timeout_secs)),
    }
}

/// Build a synthetic `assistant` [`StreamEvent`] wrapping a single
/// content block. Codex and Gemini don't emit Anthropic-Messages-
/// shaped frames natively, but the host's chat renderer keys off
/// `Assistant.message.content[]` (`text` → `kind:"token"`,
/// `tool_use` → `kind:"tool_call"`). Synthesizing that shape keeps
/// the streaming surface uniform across all three external CLIs so
/// hosts render every adapter with one decoder (car#213). `uuid` is
/// left empty — no emitter consumer reads it for synthesized events,
/// and these lines have no upstream uuid to carry.
fn synth_assistant_event(session_id: &str, content: serde_json::Value) -> StreamEvent {
    StreamEvent::Assistant(AssistantEventData {
        message: serde_json::json!({ "role": "assistant", "content": [content] }),
        session_id: session_id.to_string(),
        uuid: String::new(),
        parent_tool_use_id: None,
    })
}

/// Codex stream processor, split out for mock-stream testing.
///
/// `emitter`, when present, fires per parsed JSONL line *in addition
/// to* the running aggregate — same emit-before-aggregate ordering
/// `process_stream` uses for claude-code. Pre-car#213 the emitter
/// was dropped here and the host only ever saw the synthesized
/// terminal `done` (empty bubble).
pub async fn process_codex_stream<R>(
    reader: R,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError>
where
    R: tokio::io::AsyncBufRead + Unpin,
{
    use serde_json::Value;

    let started = std::time::Instant::now();
    let mut result = InvokeResult::default();
    let mut answer_parts: Vec<String> = Vec::new();
    let mut lines = reader.lines();

    loop {
        let line = match lines.next_line().await {
            Ok(Some(line)) => line,
            Ok(None) => break,
            Err(e) => return Err(InvokeError::Io(format!("stdout read: {e}"))),
        };
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        let value: Value = match serde_json::from_str(trimmed) {
            Ok(v) => v,
            Err(_) => {
                // codex emits an "Reading prompt from stdin..."
                // banner before the JSON stream — skip non-JSON
                // lines silently rather than treating them as
                // errors. Same defensive shape as the claude
                // adapter's malformed-line handling.
                continue;
            }
        };
        let kind = value.get("type").and_then(Value::as_str).unwrap_or("");
        match kind {
            "thread.started" => {
                if let Some(id) = value.get("thread_id").and_then(Value::as_str) {
                    if result.session_id.is_none() {
                        result.session_id = Some(id.to_string());
                    }
                }
                if let Some(e) = &emitter {
                    e(StreamEvent::System(SystemEventData {
                        subtype: "init".to_string(),
                        session_id: result.session_id.clone().unwrap_or_default(),
                        model: None,
                        cwd: None,
                        tools: Vec::new(),
                        permission_mode: None,
                        claude_code_version: None,
                        extra: serde_json::Map::new(),
                    }));
                }
            }
            "turn.started" => {
                result.turns = result.turns.saturating_add(1);
            }
            "item.completed" => {
                let Some(item) = value.get("item") else {
                    continue;
                };
                let item_type = item.get("type").and_then(Value::as_str).unwrap_or("");
                match item_type {
                    "agent_message" => {
                        if let Some(text) = item.get("text").and_then(Value::as_str) {
                            if let Some(e) = &emitter {
                                let sid = result.session_id.clone().unwrap_or_default();
                                e(synth_assistant_event(
                                    &sid,
                                    serde_json::json!({ "type": "text", "text": text }),
                                ));
                            }
                            answer_parts.push(text.to_string());
                        }
                    }
                    other if other.contains("tool") || other.contains("call") => {
                        result.tool_calls = result.tool_calls.saturating_add(1);
                        let id = item
                            .get("id")
                            .and_then(Value::as_str)
                            .unwrap_or("")
                            .to_string();
                        // Codex tool-call items typically carry a
                        // `name` and structured `arguments`; fall
                        // back to the item type when name is
                        // missing so the audit trail isn't blank.
                        let name = item
                            .get("name")
                            .and_then(Value::as_str)
                            .unwrap_or(other)
                            .to_string();
                        let input = item
                            .get("arguments")
                            .or_else(|| item.get("input"))
                            .cloned()
                            .unwrap_or(Value::Null);
                        tracing::info!(
                            adapter = "codex",
                            tool_name = %name,
                            "external agent emitted tool_use (observation-only)"
                        );
                        if let Some(e) = &emitter {
                            let sid = result.session_id.clone().unwrap_or_default();
                            e(synth_assistant_event(
                                &sid,
                                serde_json::json!({
                                    "type": "tool_use",
                                    "id": id,
                                    "name": name,
                                    "input": input,
                                }),
                            ));
                        }
                        result.tool_uses.push(ToolUseRequest { id, name, input });
                    }
                    _ => {}
                }
            }
            "turn.completed" => {
                // No fields needed for the aggregate today; cost
                // accounting could be threaded here if codex
                // exposes USD cost in a future release.
            }
            _ => {}
        }
    }

    result.answer = answer_parts.join("");
    result.duration_ms = started.elapsed().as_millis() as u64;
    if result.answer.is_empty() && result.error.is_none() {
        // No agent_message arrived but the stream completed —
        // mark as error so callers don't silently treat empty
        // strings as success.
        result.is_error = true;
        result.error = Some("codex produced no agent_message".to_string());
    }
    Ok(result)
}

// --- Gemini CLI -------------------------------------------------------------

/// Build the argv for Gemini CLI.
///
/// Gemini in 0.1.x doesn't expose a JSON-stream output mode, so the
/// adapter is text-only: `gemini -p "<task>" --yolo` runs
/// non-interactively, auto-accepts actions, and prints the final
/// response to stdout. `--yolo` is required for unattended runs;
/// without it the binary blocks waiting for interactive approvals.
///
/// MCP injection is **not yet supported on Gemini** (the CLI in
/// this version doesn't expose an `--mcp-config` equivalent). When
/// `mcp_endpoint` is set, the runner emits a tracing warning and
/// proceeds without MCP — Gemini operates with its own built-in
/// tools, no CAR-namespace tools available.
pub(crate) fn build_gemini_args(opts: &InvokeOptions, task: &str) -> Vec<String> {
    let args = vec!["-p".to_string(), task.to_string(), "--yolo".to_string()];
    if let Some(cwd) = &opts.cwd {
        // Gemini doesn't have a `--cd` flag. We honor cwd via
        // tokio::process::Command::current_dir below; no argv
        // entry needed. Pulling cwd into a no-op path here just
        // documents the gap.
        let _ = cwd;
    }
    args
}

/// Run gemini with `-p task --yolo`, capture stdout, return an
/// aggregate [`InvokeResult`]. Gemini's text-only output means
/// `tool_calls`, `tool_uses`, and `total_cost_usd` are always
/// empty — the agent's actions aren't visible at the adapter
/// boundary in this version.
pub async fn invoke_gemini(
    binary_path: &Path,
    task: &str,
    opts: InvokeOptions,
    emitter: Option<StreamEventEmitter>,
) -> Result<InvokeResult, InvokeError> {
    tracing::info!(
        adapter = "gemini",
        binary = %binary_path.display(),
        task_len = task.len(),
        "external agent invocation started"
    );
    if opts
        .mcp_endpoint
        .as_deref()
        .filter(|s| !s.is_empty())
        .is_some()
    {
        tracing::warn!(
            adapter = "gemini",
            "mcp_endpoint supplied but Gemini CLI v0.1.x doesn't support \
             --mcp-config; agent will run without CAR's MCP namespace"
        );
    }

    let timeout_secs = opts
        .timeout_secs
        .unwrap_or(DEFAULT_TIMEOUT_SECS)
        .min(MAX_TIMEOUT_SECS)
        .max(1);
    let timeout = Duration::from_secs(timeout_secs);

    let started = std::time::Instant::now();
    let args = build_gemini_args(&opts, task);
    let mut cmd = Command::new(binary_path);
    cmd.args(&args);
    if let Some(cwd) = &opts.cwd {
        cmd.current_dir(cwd);
    }
    cmd.stdin(std::process::Stdio::null());
    cmd.stdout(std::process::Stdio::piped());
    cmd.stderr(std::process::Stdio::piped());
    cmd.kill_on_drop(true);

    let child = cmd.spawn().map_err(|e| InvokeError::Spawn(e.to_string()))?;

    let process_fut = async {
        let output = child
            .wait_with_output()
            .await
            .map_err(|e| InvokeError::Io(format!("wait: {e}")))?;
        let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
        let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
        let mut result = InvokeResult {
            answer: stdout.trim().to_string(),
            duration_ms: started.elapsed().as_millis() as u64,
            ..Default::default()
        };
        if !output.status.success() {
            result.is_error = true;
            result.error = Some(format!(
                "exit code {}: {}",
                output.status.code().unwrap_or(-1),
                stderr.trim()
            ));
        } else if result.answer.is_empty() {
            result.is_error = true;
            result.error = Some("gemini produced no stdout output".to_string());
        } else if let Some(e) = &emitter {
            // Gemini 0.1.x has no event-stream output mode (see
            // build_gemini_args), so there are no incremental tokens
            // to forward. Surface the whole reply as one synthetic
            // Assistant text event so the host bubble fills with the
            // answer (single-pop, not a token stream) instead of
            // staying empty. car#213.
            e(synth_assistant_event(
                "",
                serde_json::json!({ "type": "text", "text": result.answer.clone() }),
            ));
        }
        Ok::<_, InvokeError>(result)
    };

    match tokio::time::timeout(timeout, process_fut).await {
        Ok(Ok(res)) => Ok(res),
        Ok(Err(e)) => Err(e),
        Err(_) => Err(InvokeError::Timeout(timeout_secs)),
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Cursor;
    use tokio::io::BufReader;

    fn mock_reader(lines: &[&str]) -> BufReader<Cursor<Vec<u8>>> {
        let joined = lines.join("\n");
        BufReader::new(Cursor::new(joined.into_bytes()))
    }

    #[tokio::test]
    async fn aggregates_simple_text_response() {
        let lines = [
            r#"{"type":"system","subtype":"init","session_id":"s1","model":"opus","tools":[]}"#,
            r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"ok"}],"usage":{}},"session_id":"s1","uuid":"u1"}"#,
            r#"{"type":"result","subtype":"success","is_error":false,"duration_ms":1500,"num_turns":1,"result":"ok","session_id":"s1","total_cost_usd":0.05,"usage":{},"modelUsage":{},"uuid":"r1"}"#,
        ];
        let result = process_stream(mock_reader(&lines), None).await.unwrap();
        assert_eq!(result.answer, "ok");
        assert_eq!(result.session_id.as_deref(), Some("s1"));
        assert_eq!(result.turns, 1);
        assert_eq!(result.tool_calls, 0);
        assert_eq!(result.duration_ms, 1500);
        assert_eq!(result.total_cost_usd, Some(0.05));
        assert!(!result.is_error);
        assert!(result.error.is_none());
    }

    #[tokio::test]
    async fn counts_tool_use_blocks_across_turns() {
        let lines = [
            r#"{"type":"system","subtype":"init","session_id":"s2","model":"opus","tools":[]}"#,
            r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Read","input":{"file_path":"/x"}}],"usage":{}},"session_id":"s2","uuid":"u1"}"#,
            r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t2","name":"Bash","input":{"command":"ls"}},{"type":"text","text":"done"}],"usage":{}},"session_id":"s2","uuid":"u2"}"#,
            r#"{"type":"result","subtype":"success","is_error":false,"duration_ms":3000,"num_turns":2,"result":"done","session_id":"s2","total_cost_usd":0.10,"usage":{},"modelUsage":{},"uuid":"r1"}"#,
        ];
        let result = process_stream(mock_reader(&lines), None).await.unwrap();
        assert_eq!(result.tool_calls, 2);
        assert_eq!(result.turns, 2);
        assert_eq!(result.answer, "done");
        // tool_uses captures the full audit trail in stream order.
        assert_eq!(result.tool_uses.len(), 2);
        assert_eq!(result.tool_uses[0].id, "t1");
        assert_eq!(result.tool_uses[0].name, "Read");
        assert_eq!(
            result.tool_uses[0]
                .input
                .get("file_path")
                .and_then(|v| v.as_str()),
            Some("/x")
        );
        assert_eq!(result.tool_uses[1].id, "t2");
        assert_eq!(result.tool_uses[1].name, "Bash");
    }

    #[tokio::test]
    async fn surfaces_agent_reported_error() {
        let lines = [
            r#"{"type":"system","subtype":"init","session_id":"s3","model":"opus","tools":[]}"#,
            r#"{"type":"result","subtype":"error","is_error":true,"duration_ms":500,"session_id":"s3","total_cost_usd":0.0,"usage":{},"modelUsage":{},"terminal_reason":"timeout","uuid":"r1"}"#,
        ];
        let result = process_stream(mock_reader(&lines), None).await.unwrap();
        assert!(result.is_error);
        assert!(result.error.as_deref().unwrap().contains("error"));
    }

    #[tokio::test]
    async fn empty_stream_produces_empty_result_with_no_panic() {
        let result = process_stream(mock_reader(&[]), None).await.unwrap();
        assert_eq!(result.answer, "");
        assert_eq!(result.turns, 0);
        // No result event → not technically an error in the agent's
        // view, but caller can detect via empty answer + missing
        // session id.
        assert!(result.session_id.is_none());
    }

    #[tokio::test]
    async fn malformed_line_logged_but_stream_continues() {
        let lines = [
            r#"{not valid"#,
            r#"{"type":"system","subtype":"init","session_id":"s4","model":"opus","tools":[]}"#,
            r#"{"type":"result","subtype":"success","is_error":false,"duration_ms":100,"num_turns":0,"result":"recovered","session_id":"s4","total_cost_usd":0.0,"usage":{},"modelUsage":{},"uuid":"r1"}"#,
        ];
        let result = process_stream(mock_reader(&lines), None).await.unwrap();
        assert_eq!(result.answer, "recovered");
        assert_eq!(result.session_id.as_deref(), Some("s4"));
    }

    #[test]
    fn build_claude_args_includes_required_format_flags() {
        let args = build_claude_args(&InvokeOptions::default(), None);
        assert!(args.contains(&"-p".to_string()));
        assert!(args.iter().any(|a| a == "stream-json"));
        // Default: no --allowed-tools, no --max-turns, no --mcp-config.
        assert!(!args.iter().any(|a| a == "--allowed-tools"));
        assert!(!args.iter().any(|a| a == "--max-turns"));
        assert!(!args.iter().any(|a| a == "--mcp-config"));
    }

    #[test]
    fn build_claude_args_passes_allowed_tools() {
        let opts = InvokeOptions {
            allowed_tools: Some(vec!["Read".to_string(), "Bash".to_string()]),
            max_turns: Some(5),
            ..Default::default()
        };
        let args = build_claude_args(&opts, None);
        let pos = args.iter().position(|a| a == "--allowed-tools").unwrap();
        assert_eq!(args[pos + 1], "Read Bash");
        let pos = args.iter().position(|a| a == "--max-turns").unwrap();
        assert_eq!(args[pos + 1], "5");
    }

    #[test]
    fn build_claude_args_empty_allowed_tools_denies_everything() {
        let opts = InvokeOptions {
            allowed_tools: Some(vec![]),
            ..Default::default()
        };
        let args = build_claude_args(&opts, None);
        let pos = args.iter().position(|a| a == "--allowed-tools").unwrap();
        // Empty list still produces a flag with empty value so the
        // binary knows the policy is "deny all", not "use default".
        assert_eq!(args[pos + 1], "");
    }

    #[test]
    fn build_claude_args_threads_mcp_config_path() {
        let path = std::path::Path::new("/tmp/mcp-config.json");
        let args = build_claude_args(&InvokeOptions::default(), Some(path));
        let pos = args.iter().position(|a| a == "--mcp-config").unwrap();
        assert_eq!(args[pos + 1], "/tmp/mcp-config.json");
    }

    #[test]
    fn build_mcp_config_renders_http_server_entry() {
        let json = build_mcp_config_json("http://127.0.0.1:9102/mcp");
        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
        assert_eq!(parsed["mcpServers"]["car"]["type"], "http");
        assert_eq!(
            parsed["mcpServers"]["car"]["url"],
            "http://127.0.0.1:9102/mcp"
        );
    }

    #[test]
    fn build_user_message_is_valid_json() {
        let msg = build_user_message("hello world");
        let parsed: serde_json::Value = serde_json::from_str(&msg).unwrap();
        assert_eq!(parsed["type"], "user");
        assert_eq!(parsed["message"]["role"], "user");
        assert_eq!(parsed["message"]["content"], "hello world");
    }

    // --- Codex adapter ---------------------------------------------------

    #[test]
    fn codex_args_use_exec_subcommand_and_json_output() {
        let args = build_codex_args(&InvokeOptions::default(), None);
        assert_eq!(args[0], "exec");
        assert!(args.contains(&"--json".to_string()));
        assert!(args.contains(&"--skip-git-repo-check".to_string()));
        assert!(args.contains(&"--ephemeral".to_string()));
        // Last arg is `-` so codex reads prompt from stdin.
        assert_eq!(args.last().map(String::as_str), Some("-"));
    }

    #[test]
    fn codex_args_inject_mcp_via_inline_config_override() {
        let args = build_codex_args(&InvokeOptions::default(), Some("http://127.0.0.1:9102/mcp"));
        let mcp_pos = args.iter().position(|a| a == "-c").unwrap();
        assert!(args[mcp_pos + 1].starts_with("mcp_servers.car="));
        assert!(args[mcp_pos + 1].contains("http://127.0.0.1:9102/mcp"));
        assert!(args[mcp_pos + 1].contains("type=\"http\""));
    }

    #[test]
    fn codex_args_threads_cd_flag() {
        let opts = InvokeOptions {
            cwd: Some("/tmp/work".into()),
            ..Default::default()
        };
        let args = build_codex_args(&opts, None);
        let pos = args.iter().position(|a| a == "--cd").unwrap();
        assert_eq!(args[pos + 1], "/tmp/work");
    }

    #[tokio::test]
    async fn codex_stream_aggregates_agent_message() {
        let lines = [
            r#"{"type":"thread.started","thread_id":"thread-abc"}"#,
            r#"{"type":"turn.started"}"#,
            r#"{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"hello world"}}"#,
            r#"{"type":"turn.completed","usage":{"input_tokens":5,"output_tokens":2}}"#,
        ];
        let result = process_codex_stream(mock_reader(&lines), None)
            .await
            .unwrap();
        assert_eq!(result.answer, "hello world");
        assert_eq!(result.session_id.as_deref(), Some("thread-abc"));
        assert_eq!(result.turns, 1);
        assert!(!result.is_error);
    }

    #[tokio::test]
    async fn codex_stream_concatenates_multiple_agent_messages() {
        let lines = [
            r#"{"type":"thread.started","thread_id":"t"}"#,
            r#"{"type":"turn.started"}"#,
            r#"{"type":"item.completed","item":{"type":"agent_message","text":"part 1 "}}"#,
            r#"{"type":"item.completed","item":{"type":"agent_message","text":"part 2"}}"#,
            r#"{"type":"turn.completed"}"#,
        ];
        let result = process_codex_stream(mock_reader(&lines), None)
            .await
            .unwrap();
        assert_eq!(result.answer, "part 1 part 2");
    }

    #[tokio::test]
    async fn codex_stream_records_tool_calls() {
        let lines = [
            r#"{"type":"thread.started","thread_id":"t"}"#,
            r#"{"type":"turn.started"}"#,
            r#"{"type":"item.completed","item":{"id":"call_1","type":"tool_call","name":"shell","arguments":{"cmd":"ls"}}}"#,
            r#"{"type":"item.completed","item":{"type":"agent_message","text":"done"}}"#,
            r#"{"type":"turn.completed"}"#,
        ];
        let result = process_codex_stream(mock_reader(&lines), None)
            .await
            .unwrap();
        assert_eq!(result.tool_calls, 1);
        assert_eq!(result.tool_uses.len(), 1);
        assert_eq!(result.tool_uses[0].name, "shell");
        assert_eq!(result.answer, "done");
    }

    #[tokio::test]
    async fn codex_stream_skips_non_json_banner_lines() {
        let lines = [
            "Reading prompt from stdin...",
            r#"{"type":"thread.started","thread_id":"t"}"#,
            r#"{"type":"turn.started"}"#,
            r#"{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}"#,
            r#"{"type":"turn.completed"}"#,
        ];
        let result = process_codex_stream(mock_reader(&lines), None)
            .await
            .unwrap();
        assert_eq!(result.answer, "ok");
    }

    #[tokio::test]
    async fn codex_stream_no_agent_message_marks_error() {
        let lines = [r#"{"type":"thread.started","thread_id":"t"}"#];
        let result = process_codex_stream(mock_reader(&lines), None)
            .await
            .unwrap();
        assert!(result.is_error);
        assert!(result.answer.is_empty());
    }

    #[tokio::test]
    async fn codex_stream_fires_assistant_events_to_emitter() {
        // car#213 regression: pre-fix the emitter was dropped in the
        // dispatcher, so a streaming codex invocation produced zero
        // host-visible events (empty bubble). The emitter must now
        // fire an Anthropic-Messages-shaped Assistant text event the
        // host's chat renderer can turn into a `kind:"token"` frame.
        use std::sync::Mutex;
        let captured: Arc<Mutex<Vec<StreamEvent>>> = Arc::new(Mutex::new(Vec::new()));
        let sink = captured.clone();
        let emitter: StreamEventEmitter = Arc::new(move |ev| sink.lock().unwrap().push(ev));
        let lines = [
            r#"{"type":"thread.started","thread_id":"t"}"#,
            r#"{"type":"turn.started"}"#,
            r#"{"type":"item.completed","item":{"type":"agent_message","text":"hi there"}}"#,
            r#"{"type":"turn.completed"}"#,
        ];
        let result = process_codex_stream(mock_reader(&lines), Some(emitter))
            .await
            .unwrap();
        assert_eq!(result.answer, "hi there");

        let events = captured.lock().unwrap();
        let texts: Vec<String> = events
            .iter()
            .filter_map(|e| match e {
                StreamEvent::Assistant(a) => a
                    .message
                    .get("content")
                    .and_then(|c| c.as_array())
                    .and_then(|arr| arr.first())
                    .filter(|b| b.get("type").and_then(|t| t.as_str()) == Some("text"))
                    .and_then(|b| b.get("text"))
                    .and_then(|t| t.as_str())
                    .map(str::to_string),
                _ => None,
            })
            .collect();
        assert!(
            texts.iter().any(|t| t == "hi there"),
            "codex emitter must fire an Assistant text event (car#213); got {events:?}"
        );
    }

    // --- Gemini adapter --------------------------------------------------

    #[test]
    fn gemini_args_use_prompt_and_yolo() {
        let args = build_gemini_args(&InvokeOptions::default(), "say hi");
        assert_eq!(args[0], "-p");
        assert_eq!(args[1], "say hi");
        assert!(args.contains(&"--yolo".to_string()));
    }
}