car-engine 0.31.0

//! Execution substrate — the single environment an agent acts within.
//!
//! See `docs/execution-substrate.md`. A coherent agent acts within **one**
//! environment: every side-effecting tool (files, processes) resolves against
//! the same machine, while pure tools (`calculate`) are environment-free and
//! run in-process anywhere.
//!
//! The [`Substrate`] trait is deliberately minimal: exec + file I/O (+ optional
//! PTY). Convenience tools (`list_dir`/`find_files`/`grep_files`/`edit_file`)
//! are **composed on top** — they are not trait methods. The GUI is a sibling
//! surface on the same environment, not folded in here.
//!
//! ## Variants
//!
//! - [`LocalSubstrate`] — host fs/process. This is the lifted, byte-for-byte
//!   equivalent of the historic `agent_basics` host behavior (absolute paths
//!   pass through; relative paths join the host process `current_dir()`). It is
//!   the **default** for every existing consumer, so binding a substrate is
//!   backward compatible by construction.
//! - [`McpSubstrate`] — wraps an [`McpSession`] (e.g. the `vm` bridge). The
//!   bridge becomes *one implementation of the environment*, not a parallel tool
//!   surface. Routes substrate methods onto same-named bridge tools.

use crate::mcp::McpSession;
use serde_json::json;
use std::path::PathBuf;
use std::time::Duration;
use std::sync::Arc;
use tokio::sync::Mutex;

/// Result of a one-shot command execution on a substrate.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CommandOutput {
    pub stdout: String,
    pub stderr: String,
    pub exit_code: i32,
}

/// The environment an agent acts within. Bound once to a [`Runtime`]; the agent
/// never chooses per-tool where execution lands.
///
/// Kept minimal on purpose — exec + file I/O, with PTY as an optional
/// capability that defaults to "unsupported". `list_dir`/`find`/`grep`/`edit`
/// are reducible to these primitives and live as convenience tools, mirroring
/// the `vm` MCP bridge.
#[async_trait::async_trait]
pub trait Substrate: Send + Sync {
    /// Environment name: `"local"`, `"vm"`, `"docker:..."`, etc.
    fn name(&self) -> &str;

    /// Run a one-shot command, returning stdout/stderr/exit_code.
    async fn run_command(&self, cmd: &str, timeout_s: Option<f64>)
        -> Result<CommandOutput, String>;

    /// Read a UTF-8 text file in full.
    async fn read_text(&self, path: &str) -> Result<String, String>;

    /// Write (truncate) a UTF-8 text file.
    async fn write_text(&self, path: &str, content: &str) -> Result<(), String>;

    /// Read raw bytes, optionally a window `[offset, offset+len)`.
    async fn read_bytes(
        &self,
        path: &str,
        offset: Option<u64>,
        len: Option<u64>,
    ) -> Result<Vec<u8>, String>;

    /// Write (truncate) raw bytes.
    async fn write_bytes(&self, path: &str, bytes: &[u8]) -> Result<(), String>;

    /// Whether this substrate is the host process itself. Convenience tools
    /// (`list_dir`/`find_files`/`grep_files`) that walk a directory tree use
    /// this to keep their byte-identical host-fs implementation on the local
    /// path; non-local environments fall back to composing on `run_command`.
    fn is_local(&self) -> bool {
        false
    }

    /// How a path should appear in tool-result metadata for this environment.
    /// Pure/synchronous; no I/O. The default echoes the input unchanged. The
    /// local environment overrides this to reproduce the historic behavior of
    /// reporting the resolved absolute path (relative paths joined to the host
    /// CWD). Not a capability — just display canonicalization.
    fn display_path(&self, path: &str) -> String {
        path.to_string()
    }

    // ─── Optional PTY capability (default: unsupported) ───────────────

    async fn pty_start(&self, _cmd: &str) -> Result<String, String> {
        Err("pty not supported by this substrate".into())
    }
    async fn pty_input(&self, _id: &str, _data: &str) -> Result<(), String> {
        Err("pty not supported by this substrate".into())
    }
    async fn pty_read(&self, _id: &str) -> Result<String, String> {
        Err("pty not supported by this substrate".into())
    }
    async fn pty_resize(&self, _id: &str, _rows: u16, _cols: u16) -> Result<(), String> {
        Err("pty not supported by this substrate".into())
    }
    async fn pty_kill(&self, _id: &str) -> Result<(), String> {
        Err("pty not supported by this substrate".into())
    }
}

// ─────────────────────────────────────────────────────────────────────────
// LocalSubstrate
// ─────────────────────────────────────────────────────────────────────────

/// Host filesystem / process substrate.
///
/// Reproduces the historic `agent_basics` host behavior exactly: the entire
/// host-locality contract is one rule — absolute paths are used verbatim;
/// relative paths join `std::env::current_dir()`.
#[derive(Debug, Default, Clone)]
pub struct LocalSubstrate;

impl LocalSubstrate {
    pub fn new() -> Self {
        Self
    }

    /// The single host-locality seam (formerly `agent_basics::resolve_path`):
    /// absolute paths pass through; relative paths join the host process CWD.
    pub fn resolve_path(path: &str) -> Result<PathBuf, String> {
        let candidate = PathBuf::from(path);
        if candidate.is_absolute() {
            Ok(candidate)
        } else {
            std::env::current_dir()
                .map(|cwd| cwd.join(candidate))
                .map_err(|e| format!("failed to resolve working directory: {e}"))
        }
    }
}

#[async_trait::async_trait]
impl Substrate for LocalSubstrate {
    fn name(&self) -> &str {
        "local"
    }

    fn is_local(&self) -> bool {
        true
    }

    async fn run_command(
        &self,
        cmd: &str,
        timeout_s: Option<f64>,
    ) -> Result<CommandOutput, String> {
        use tokio::process::Command;
        let mut command = if cfg!(target_os = "windows") {
            let mut c = Command::new("cmd");
            c.arg("/C").arg(cmd);
            c
        } else {
            let mut c = Command::new("sh");
            c.arg("-c").arg(cmd);
            c
        };

        let fut = command.output();
        let output = match timeout_s {
            Some(secs) if secs > 0.0 => {
                let dur = std::time::Duration::from_secs_f64(secs);
                match tokio::time::timeout(dur, fut).await {
                    Ok(res) => res.map_err(|e| format!("failed to run command: {e}"))?,
                    Err(_) => return Err(format!("command timed out after {secs}s")),
                }
            }
            _ => fut
                .await
                .map_err(|e| format!("failed to run command: {e}"))?,
        };

        Ok(CommandOutput {
            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
            exit_code: output.status.code().unwrap_or(-1),
        })
    }

    async fn read_text(&self, path: &str) -> Result<String, String> {
        let full = Self::resolve_path(path)?;
        std::fs::read_to_string(&full)
            .map_err(|e| format!("failed to read file '{}': {e}", full.display()))
    }

    async fn write_text(&self, path: &str, content: &str) -> Result<(), String> {
        let full = Self::resolve_path(path)?;
        if let Some(parent) = full.parent() {
            std::fs::create_dir_all(parent)
                .map_err(|e| format!("failed to create parent dir '{}': {e}", parent.display()))?;
        }
        std::fs::write(&full, content)
            .map_err(|e| format!("failed to write file '{}': {e}", full.display()))
    }

    async fn read_bytes(
        &self,
        path: &str,
        offset: Option<u64>,
        len: Option<u64>,
    ) -> Result<Vec<u8>, String> {
        let full = Self::resolve_path(path)?;
        let bytes = std::fs::read(&full)
            .map_err(|e| format!("failed to read file '{}': {e}", full.display()))?;
        let start = offset.unwrap_or(0) as usize;
        if start >= bytes.len() {
            return Ok(Vec::new());
        }
        let end = match len {
            Some(l) => (start + l as usize).min(bytes.len()),
            None => bytes.len(),
        };
        Ok(bytes[start..end].to_vec())
    }

    async fn write_bytes(&self, path: &str, bytes: &[u8]) -> Result<(), String> {
        let full = Self::resolve_path(path)?;
        if let Some(parent) = full.parent() {
            std::fs::create_dir_all(parent)
                .map_err(|e| format!("failed to create parent dir '{}': {e}", parent.display()))?;
        }
        std::fs::write(&full, bytes)
            .map_err(|e| format!("failed to write file '{}': {e}", full.display()))
    }

    fn display_path(&self, path: &str) -> String {
        match Self::resolve_path(path) {
            Ok(p) => p.display().to_string(),
            Err(_) => path.to_string(),
        }
    }
}

// ─────────────────────────────────────────────────────────────────────────
// McpSubstrate
// ─────────────────────────────────────────────────────────────────────────

/// Substrate backed by an MCP session (e.g. the `vm` bridge). Maps substrate
/// methods onto same-named bridge tools (`run_command`, `read_text`,
/// `write_text`, `read_bytes`, `write_bytes`, `pty_*`).
///
/// The JSON wire arg names match the canonical sandbox bridge protocol (the
/// shared `vm_mcp_server` used by ALE/OpenClaw): `run_command{command,timeout}`,
/// `read_bytes{path,offset,length}`, `write_bytes{path,content_b64,append?}`,
/// `pty_*{pid,...}` with a numeric pid. These differ from the Rust trait's
/// parameter names (`cmd`/`timeout_s`/`len`/`id`), which are kept idiomatic.
///
/// Holds the session as `Arc<Mutex<dyn McpSession>>` — the same handle type
/// `car-connectors` and [`crate::mcp::McpToolExecutor`] already pass around —
/// because [`McpSession::call_tool`] takes `&mut self`.
/// Reserved prefix marking an error that originated at the substrate's
/// *transport* boundary — the MCP session call failed to reach the bridge, or
/// the bridge returned `isError` with a transport-down signature (e.g. the
/// canonical `vm` bridge's "fetch failed" when its link to the VM dropped).
///
/// A legitimate command failure comes back as `Ok` with a nonzero `exit_code`,
/// and a tool-reported task error (e.g. "no such file") is left untagged, so a
/// caller can count genuine infra incidents by matching this prefix instead of
/// sniffing free-text. `car run-task` uses it for the `run_end.infra_tool_errors`
/// count and strips it before the text reaches a model.
pub const SUBSTRATE_TRANSPORT_ERR_PREFIX: &str = "[substrate-transport] ";

/// Heuristic: does an error message look like a transport/environment fault
/// (as opposed to a tool-reported task error)? Bounded allowlist of the
/// signatures the canonical bridges and the MCP transport actually emit.
fn looks_like_transport_error(msg: &str) -> bool {
    const SIGS: &[&str] = &[
        "fetch failed",
        "connection",
        "econnrefused",
        "econnreset",
        "transport",
        "timed out",
        "timeout",
        "socket hang",
        "broken pipe",
        "stream closed",
        "channel closed",
        "unexpected eof",
    ];
    let m = msg.to_ascii_lowercase();
    SIGS.iter().any(|s| m.contains(s))
}

/// Subset of transport errors that are SAFE to auto-retry: connection-drop
/// signatures that mean the request never reached the server, so re-sending
/// cannot double-execute a non-idempotent call (e.g. `run_command`).
///
/// Deliberately EXCLUDES timeouts ("timed out" / "currently unavailable"): a
/// timeout may mean the call IS running server-side, so retrying could run a
/// command twice. Timeouts are still tagged as transport faults (surfaced to the
/// agent) but not silently retried.
///
/// Residual risk: connection-drop signatures are *usually* pre-execution (the
/// request never reached the server) but not guaranteed — a drop after the
/// request was sent could re-run a non-idempotent command. Accepted because the
/// retry is bounded ([`SUBSTRATE_RETRY_ATTEMPTS`]) and a surfaced failure would
/// make the agent re-run the command anyway, so the net double-execution risk is
/// no worse than the status quo. Tighten to idempotent-only tools if this bites.
fn is_retryable_transport_error(msg: &str) -> bool {
    const RETRYABLE: &[&str] = &[
        "fetch failed",
        "econnrefused",
        "econnreset",
        "connection refused",
        "connection reset",
        "broken pipe",
        "stream closed",
        "channel closed",
        "unexpected eof",
        "closed the connection",
    ];
    let m = msg.to_ascii_lowercase();
    RETRYABLE.iter().any(|s| m.contains(s))
}

/// Bounded attempts for a connection-drop retry in [`McpSubstrate::call_timed`].
const SUBSTRATE_RETRY_ATTEMPTS: u32 = 3;

pub struct McpSubstrate {
    session: Arc<Mutex<dyn McpSession>>,
    name: String,
}

impl McpSubstrate {
    /// Wrap an MCP session as a substrate. `name` is the environment label
    /// (e.g. `"vm"`); it is not required to match the session's server name.
    pub fn new(session: Arc<Mutex<dyn McpSession>>, name: impl Into<String>) -> Self {
        Self {
            session,
            name: name.into(),
        }
    }

    async fn call(&self, tool: &str, args: serde_json::Value) -> Result<serde_json::Value, String> {
        self.call_timed(tool, args, None).await
    }

    /// Like [`call`], but bounds the MCP response await by `timeout` (else the
    /// session backstop). Used by `run_command` so a long-running command isn't
    /// abandoned by CAR while it is still executing on the VM.
    async fn call_timed(
        &self,
        tool: &str,
        args: serde_json::Value,
        timeout: Option<Duration>,
    ) -> Result<serde_json::Value, String> {
        let mut last_err = String::new();
        for attempt in 1..=SUBSTRATE_RETRY_ATTEMPTS {
            // Re-acquire the lock per attempt (never held across the backoff).
            let result = {
                let mut guard = self.session.lock().await;
                guard.call_tool_with_timeout(tool, args.clone(), timeout).await
            };
            match result {
                Ok(v) => return Ok(v),
                // Connection-drop (request didn't reach the server) → safe to
                // retry; a transient blip self-heals and never surfaces to the
                // model (so it isn't counted as an infra incident either).
                Err(e) if is_retryable_transport_error(&e) && attempt < SUBSTRATE_RETRY_ATTEMPTS => {
                    last_err = e;
                    tokio::time::sleep(Duration::from_millis(300 * attempt as u64)).await;
                    continue;
                }
                // Exhausted, or a non-retryable error. Tag transport-class faults
                // (incl. timeouts) so callers count genuine infra incidents
                // without sniffing free-text; leave tool-reported task errors raw.
                Err(e) => {
                    return Err(if looks_like_transport_error(&e) {
                        format!("{SUBSTRATE_TRANSPORT_ERR_PREFIX}{e}")
                    } else {
                        e
                    });
                }
            }
        }
        // Unreachable in practice (the loop returns), but keep the tag contract.
        Err(format!("{SUBSTRATE_TRANSPORT_ERR_PREFIX}{last_err}"))
    }
}

#[async_trait::async_trait]
impl Substrate for McpSubstrate {
    fn name(&self) -> &str {
        &self.name
    }

    async fn run_command(
        &self,
        cmd: &str,
        timeout_s: Option<f64>,
    ) -> Result<CommandOutput, String> {
        let mut args = json!({ "command": cmd });
        if let Some(secs) = timeout_s {
            args["timeout"] = json!(secs);
        }
        // Bound the MCP await by the command's own timeout + a margin so the
        // bridge can return its server-side timeout result before CAR's await
        // fires; commands with no declared timeout fall back to the backstop.
        let await_timeout = timeout_s.map(|s| Duration::from_secs_f64(s + 30.0));
        let result = self.call_timed("run_command", args, await_timeout).await?;

        // The bridge may return a flattened text string (text-content blocks)
        // or a structured object with stdout/stderr/exit_code.
        match &result {
            serde_json::Value::String(s) => Ok(CommandOutput {
                stdout: s.clone(),
                stderr: String::new(),
                exit_code: 0,
            }),
            serde_json::Value::Object(_) => Ok(CommandOutput {
                stdout: result
                    .get("stdout")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
                    .to_string(),
                stderr: result
                    .get("stderr")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
                    .to_string(),
                exit_code: result
                    .get("exit_code")
                    .and_then(|v| v.as_i64())
                    .unwrap_or(0) as i32,
            }),
            other => Ok(CommandOutput {
                stdout: other.to_string(),
                stderr: String::new(),
                exit_code: 0,
            }),
        }
    }

    async fn read_text(&self, path: &str) -> Result<String, String> {
        let result = self.call("read_text", json!({ "path": path })).await?;
        match result {
            serde_json::Value::String(s) => Ok(s),
            other => Ok(other
                .get("content")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string())
                .unwrap_or_else(|| other.to_string())),
        }
    }

    async fn write_text(&self, path: &str, content: &str) -> Result<(), String> {
        self.call("write_text", json!({ "path": path, "content": content }))
            .await
            .map(|_| ())
    }

    async fn read_bytes(
        &self,
        path: &str,
        offset: Option<u64>,
        len: Option<u64>,
    ) -> Result<Vec<u8>, String> {
        let mut args = json!({ "path": path });
        if let Some(o) = offset {
            args["offset"] = json!(o);
        }
        if let Some(l) = len {
            args["length"] = json!(l);
        }
        let result = self.call("read_bytes", args).await?;
        let b64 = match &result {
            serde_json::Value::String(s) => s.clone(),
            other => other
                .get("data")
                .or_else(|| other.get("base64"))
                .and_then(|v| v.as_str())
                .map(|s| s.to_string())
                .ok_or_else(|| "read_bytes: no base64 data in response".to_string())?,
        };
        base64_decode(b64.trim()).map_err(|e| format!("read_bytes: invalid base64: {e}"))
    }

    async fn write_bytes(&self, path: &str, bytes: &[u8]) -> Result<(), String> {
        let b64 = base64_encode(bytes);
        self.call("write_bytes", json!({ "path": path, "content_b64": b64 }))
            .await
            .map(|_| ())
    }

    async fn pty_start(&self, cmd: &str) -> Result<String, String> {
        let result = self.call("pty_start", json!({ "command": cmd })).await?;
        // The bridge returns the pid either structured (`{"pid": N}`) or in a
        // text block like `"pid: 1234\ncols: 80\nrows: 24"`. Substrate ids are
        // strings; the numeric pid is round-tripped back as a string and parsed
        // to a number by the other pty_* methods (see `pty_pid`).
        if let Some(pid) = result.get("pid").and_then(|v| v.as_i64()) {
            return Ok(pid.to_string());
        }
        let text = match &result {
            serde_json::Value::String(s) => s.clone(),
            other => other.to_string(),
        };
        text.lines()
            .find_map(|l| {
                l.trim_start()
                    .strip_prefix("pid:")
                    .map(|r| r.trim().to_string())
            })
            .filter(|p| !p.is_empty() && p.bytes().all(|b| b.is_ascii_digit()))
            .ok_or_else(|| format!("pty_start: no pid in response: {text}"))
    }

    async fn pty_input(&self, id: &str, data: &str) -> Result<(), String> {
        self.call("pty_input", json!({ "pid": pty_pid(id)?, "data": data }))
            .await
            .map(|_| ())
    }

    async fn pty_read(&self, id: &str) -> Result<String, String> {
        let result = self
            .call("pty_read", json!({ "pid": pty_pid(id)? }))
            .await?;
        match result {
            serde_json::Value::String(s) => Ok(s),
            other => Ok(other
                .get("data")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string())
                .unwrap_or_else(|| other.to_string())),
        }
    }

    async fn pty_resize(&self, id: &str, rows: u16, cols: u16) -> Result<(), String> {
        self.call(
            "pty_resize",
            json!({ "pid": pty_pid(id)?, "rows": rows, "cols": cols }),
        )
        .await
        .map(|_| ())
    }

    async fn pty_kill(&self, id: &str) -> Result<(), String> {
        self.call("pty_kill", json!({ "pid": pty_pid(id)? }))
            .await
            .map(|_| ())
    }
}

/// Parse a substrate pty id (a string) back into the numeric `pid` the bridge
/// expects on its `pty_input`/`pty_read`/`pty_resize`/`pty_kill` tools.
fn pty_pid(id: &str) -> Result<i64, String> {
    id.trim()
        .parse::<i64>()
        .map_err(|_| format!("invalid pty id (expected numeric pid): {id:?}"))
}

// ─────────────────────────────────────────────────────────────────────────
// Minimal standard-alphabet base64 (no padding-stripping leniency beyond the
// trailing '='). Used only on the MCP byte-transfer path so we don't pull a
// new crate dependency for phases 1-2 where the local path never touches it.
// ─────────────────────────────────────────────────────────────────────────

const B64_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

fn base64_encode(bytes: &[u8]) -> String {
    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
    for chunk in bytes.chunks(3) {
        let b0 = chunk[0] as u32;
        let b1 = *chunk.get(1).unwrap_or(&0) as u32;
        let b2 = *chunk.get(2).unwrap_or(&0) as u32;
        let n = (b0 << 16) | (b1 << 8) | b2;
        out.push(B64_ALPHABET[((n >> 18) & 0x3f) as usize] as char);
        out.push(B64_ALPHABET[((n >> 12) & 0x3f) as usize] as char);
        if chunk.len() > 1 {
            out.push(B64_ALPHABET[((n >> 6) & 0x3f) as usize] as char);
        } else {
            out.push('=');
        }
        if chunk.len() > 2 {
            out.push(B64_ALPHABET[(n & 0x3f) as usize] as char);
        } else {
            out.push('=');
        }
    }
    out
}

fn base64_decode(s: &str) -> Result<Vec<u8>, String> {
    fn val(c: u8) -> Result<u32, String> {
        match c {
            b'A'..=b'Z' => Ok((c - b'A') as u32),
            b'a'..=b'z' => Ok((c - b'a' + 26) as u32),
            b'0'..=b'9' => Ok((c - b'0' + 52) as u32),
            b'+' => Ok(62),
            b'/' => Ok(63),
            _ => Err(format!("invalid base64 char {:?}", c as char)),
        }
    }
    let clean: Vec<u8> = s
        .bytes()
        .filter(|b| !b.is_ascii_whitespace() && *b != b'=')
        .collect();
    let mut out = Vec::with_capacity(clean.len() / 4 * 3);
    for chunk in clean.chunks(4) {
        let mut n = 0u32;
        let mut bits = 0;
        for &c in chunk {
            n = (n << 6) | val(c)?;
            bits += 6;
        }
        // Left-align the accumulated bits and emit whole bytes.
        n <<= 24 - bits;
        let nbytes = bits / 8;
        for i in 0..nbytes {
            out.push(((n >> (16 - i * 8)) & 0xff) as u8);
        }
    }
    Ok(out)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn base64_roundtrip() {
        for case in [
            &b""[..],
            b"f",
            b"fo",
            b"foo",
            b"foob",
            b"fooba",
            b"foobar",
            &[0u8, 1, 2, 255],
        ] {
            let enc = base64_encode(case);
            let dec = base64_decode(&enc).unwrap();
            assert_eq!(dec, case, "roundtrip failed for {case:?} (enc={enc})");
        }
        assert_eq!(base64_encode(b"foobar"), "Zm9vYmFy");
        assert_eq!(base64_decode("Zm9vYmFy").unwrap(), b"foobar");
    }

    #[tokio::test]
    async fn local_read_write_roundtrip() {
        let dir = std::env::temp_dir().join(format!("car-substrate-{}", uuid_like()));
        std::fs::create_dir_all(&dir).unwrap();
        let path = dir.join("hello.txt");
        let path_str = path.to_string_lossy().to_string();

        let sub = LocalSubstrate::new();
        assert_eq!(sub.name(), "local");

        sub.write_text(&path_str, "hello world").await.unwrap();
        let read = sub.read_text(&path_str).await.unwrap();
        assert_eq!(read, "hello world");

        let bytes = sub.read_bytes(&path_str, None, None).await.unwrap();
        assert_eq!(bytes, b"hello world");

        // windowed byte read
        let window = sub.read_bytes(&path_str, Some(6), Some(5)).await.unwrap();
        assert_eq!(window, b"world");

        sub.write_bytes(&path_str, b"\x00\x01\x02").await.unwrap();
        let raw = sub.read_bytes(&path_str, None, None).await.unwrap();
        assert_eq!(raw, vec![0u8, 1, 2]);

        std::fs::remove_dir_all(&dir).ok();
    }

    #[tokio::test]
    async fn local_run_command_captures_output() {
        let sub = LocalSubstrate::new();
        let out = sub.run_command("echo hi", None).await.unwrap();
        assert_eq!(out.stdout.trim(), "hi");
        assert_eq!(out.exit_code, 0);
    }

    #[tokio::test]
    async fn local_pty_defaults_to_unsupported() {
        let sub = LocalSubstrate::new();
        assert!(sub.pty_start("sh").await.is_err());
    }

    // ── McpSubstrate against a mock McpSession ──

    use crate::mcp::McpToolInfo;
    use serde_json::Value;
    use std::sync::Mutex as StdMutex;

    struct MockSession {
        name: String,
        // (tool, args) recorded for assertions
        calls: Arc<StdMutex<Vec<(String, Value)>>>,
        files: Arc<StdMutex<std::collections::HashMap<String, String>>>,
    }

    #[async_trait::async_trait]
    impl McpSession for MockSession {
        async fn list_tools(&mut self) -> Result<Vec<McpToolInfo>, String> {
            Ok(vec![])
        }
        async fn call_tool(&mut self, name: &str, arguments: Value) -> Result<Value, String> {
            self.calls
                .lock()
                .unwrap()
                .push((name.to_string(), arguments.clone()));
            match name {
                "write_text" => {
                    let p = arguments["path"].as_str().unwrap().to_string();
                    let c = arguments["content"].as_str().unwrap().to_string();
                    self.files.lock().unwrap().insert(p, c);
                    Ok(Value::String("ok".into()))
                }
                "read_text" => {
                    let p = arguments["path"].as_str().unwrap();
                    let c = self
                        .files
                        .lock()
                        .unwrap()
                        .get(p)
                        .cloned()
                        .ok_or_else(|| "not found".to_string())?;
                    // Bridge returns flattened text content as a String.
                    Ok(Value::String(c))
                }
                "run_command" => Ok(json!({
                    "stdout": "from-vm",
                    "stderr": "",
                    "exit_code": 0
                })),
                // The canonical bridge flattens read_bytes to a base64 text
                // block; "aGk=" is base64("hi").
                "read_bytes" => Ok(Value::String("aGk=".into())),
                "write_bytes" => Ok(Value::String("ok".into())),
                // pty_start returns a human-readable text block carrying the pid.
                "pty_start" => Ok(Value::String("pid: 4242\ncols: 80\nrows: 24".into())),
                "pty_read" => Ok(Value::String("pty-out".into())),
                "pty_input" | "pty_resize" | "pty_kill" => Ok(Value::String("ok".into())),
                _ => Err(format!("unknown tool {name}")),
            }
        }
        fn name(&self) -> &str {
            &self.name
        }
    }

    #[tokio::test]
    async fn mcp_substrate_routes_to_session() {
        let calls = Arc::new(StdMutex::new(Vec::new()));
        let files = Arc::new(StdMutex::new(std::collections::HashMap::new()));
        let mock = MockSession {
            name: "vm".into(),
            calls: calls.clone(),
            files: files.clone(),
        };
        let session: Arc<Mutex<dyn McpSession>> = Arc::new(Mutex::new(mock));
        let sub = McpSubstrate::new(session, "vm");

        assert_eq!(sub.name(), "vm");

        sub.write_text("/tmp/a.txt", "vm-content").await.unwrap();
        let read = sub.read_text("/tmp/a.txt").await.unwrap();
        assert_eq!(read, "vm-content");

        let out = sub.run_command("ls", None).await.unwrap();
        assert_eq!(out.stdout, "from-vm");
        assert_eq!(out.exit_code, 0);

        // Verify the substrate routed to same-named bridge tools.
        let recorded = calls.lock().unwrap();
        let names: Vec<&str> = recorded.iter().map(|(n, _)| n.as_str()).collect();
        assert_eq!(names, vec!["write_text", "read_text", "run_command"]);
    }

    /// Pins the exact JSON wire arg keys McpSubstrate sends, which MUST match the
    /// canonical sandbox bridge (`vm_mcp_server`) zod schemas. A drift here ships
    /// a silently-broken substrate: the bridge rejects the call with MCP -32602
    /// ("invalid_type, received undefined") and every VM op fails. This exact
    /// drift (`cmd`/`timeout_s`/`len`/`data`/`id` vs the bridge's
    /// `command`/`timeout`/`length`/`content_b64`/`pid`) shipped once because the
    /// routing test above never asserted arg keys — only tool names.
    #[tokio::test]
    async fn mcp_substrate_wire_args_match_canonical_bridge() {
        let calls = Arc::new(StdMutex::new(Vec::new()));
        let files = Arc::new(StdMutex::new(std::collections::HashMap::new()));
        let mock = MockSession {
            name: "vm".into(),
            calls: calls.clone(),
            files: files.clone(),
        };
        let session: Arc<Mutex<dyn McpSession>> = Arc::new(Mutex::new(mock));
        let sub = McpSubstrate::new(session, "vm");

        sub.run_command("ls", Some(5.0)).await.unwrap();
        let bytes = sub.read_bytes("/f", Some(2), Some(4)).await.unwrap();
        assert_eq!(bytes, b"hi"); // base64("hi") round-trips
        sub.write_bytes("/f", b"hi").await.unwrap();
        // pty_start parses the numeric pid out of the bridge's text block.
        let pid = sub.pty_start("bash").await.unwrap();
        assert_eq!(pid, "4242");
        sub.pty_input(&pid, "echo\n").await.unwrap();
        let out = sub.pty_read(&pid).await.unwrap();
        assert_eq!(out, "pty-out");
        sub.pty_resize(&pid, 40, 100).await.unwrap();
        sub.pty_kill(&pid).await.unwrap();

        let recorded = calls.lock().unwrap();
        let by_tool =
            |t: &str| -> Value { recorded.iter().find(|(n, _)| n == t).unwrap().1.clone() };

        assert_eq!(
            by_tool("run_command"),
            json!({"command": "ls", "timeout": 5.0})
        );
        assert_eq!(
            by_tool("read_bytes"),
            json!({"path": "/f", "offset": 2, "length": 4})
        );
        // content_b64 carries base64("hi"); assert the key, not the payload.
        let wb = by_tool("write_bytes");
        assert!(
            wb.get("content_b64").is_some(),
            "write_bytes must use content_b64: {wb}"
        );
        assert!(
            wb.get("data").is_none(),
            "write_bytes must not use legacy `data`: {wb}"
        );
        assert_eq!(wb["path"], json!("/f"));
        assert_eq!(by_tool("pty_start"), json!({"command": "bash"}));
        // pty_* must send a NUMERIC pid, not a string id.
        assert_eq!(by_tool("pty_input"), json!({"pid": 4242, "data": "echo\n"}));
        assert_eq!(by_tool("pty_read"), json!({"pid": 4242}));
        assert_eq!(
            by_tool("pty_resize"),
            json!({"pid": 4242, "rows": 40, "cols": 100})
        );
        assert_eq!(by_tool("pty_kill"), json!({"pid": 4242}));
    }

    fn uuid_like() -> String {
        use std::time::{SystemTime, UNIX_EPOCH};
        let nanos = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        format!("{nanos}")
    }

    /// Records the per-call timeout the substrate threads to the session, so we
    /// can assert run_command carries its own timeout (+ margin) while other
    /// ops fall back to the backstop (None).
    struct TimeoutSpySession {
        last: Arc<StdMutex<Option<Option<Duration>>>>,
    }

    #[async_trait::async_trait]
    impl McpSession for TimeoutSpySession {
        async fn list_tools(&mut self) -> Result<Vec<McpToolInfo>, String> {
            Ok(vec![])
        }
        async fn call_tool(&mut self, _n: &str, _a: Value) -> Result<Value, String> {
            // The default trait path (no timeout) — record None so a regression
            // that bypasses call_tool_with_timeout is visible.
            *self.last.lock().unwrap() = Some(None);
            Ok(json!({"stdout": "x", "stderr": "", "exit_code": 0}))
        }
        async fn call_tool_with_timeout(
            &mut self,
            _n: &str,
            _a: Value,
            t: Option<Duration>,
        ) -> Result<Value, String> {
            *self.last.lock().unwrap() = Some(t);
            Ok(json!({"stdout": "x", "stderr": "", "exit_code": 0}))
        }
        fn name(&self) -> &str {
            "vm"
        }
    }

    #[tokio::test]
    async fn run_command_threads_its_timeout_else_backstop() {
        let last = Arc::new(StdMutex::new(None));
        let sub = McpSubstrate::new(
            Arc::new(Mutex::new(TimeoutSpySession { last: last.clone() })),
            "vm",
        );

        // run_command with a declared timeout → MCP await = timeout + 30s margin.
        sub.run_command("echo hi", Some(120.0)).await.unwrap();
        assert_eq!(
            *last.lock().unwrap(),
            Some(Some(Duration::from_secs_f64(150.0))),
            "run_command must thread its own timeout (+margin) to the session",
        );

        // run_command with NO declared timeout → None (session backstop applies).
        sub.run_command("echo hi", None).await.unwrap();
        assert_eq!(*last.lock().unwrap(), Some(None));

        // A non-command op (read_text) → None (fast; backstop is fine).
        let _ = sub.read_text("/p").await;
        assert_eq!(*last.lock().unwrap(), Some(None));
    }

    /// Fails with `err` until the `succeed_at`-th attempt; counts attempts.
    struct FlakySession {
        err: String,
        succeed_at: u32,
        attempts: Arc<StdMutex<u32>>,
    }

    #[async_trait::async_trait]
    impl McpSession for FlakySession {
        async fn list_tools(&mut self) -> Result<Vec<McpToolInfo>, String> {
            Ok(vec![])
        }
        async fn call_tool(&mut self, n: &str, a: Value) -> Result<Value, String> {
            self.call_tool_with_timeout(n, a, None).await
        }
        async fn call_tool_with_timeout(
            &mut self,
            _n: &str,
            _a: Value,
            _t: Option<Duration>,
        ) -> Result<Value, String> {
            let n = {
                let mut a = self.attempts.lock().unwrap();
                *a += 1;
                *a
            };
            if n >= self.succeed_at {
                Ok(json!({"stdout": "ok", "stderr": "", "exit_code": 0}))
            } else {
                Err(self.err.clone())
            }
        }
        fn name(&self) -> &str {
            "vm"
        }
    }

    fn flaky(err: &str, succeed_at: u32) -> (McpSubstrate, Arc<StdMutex<u32>>) {
        let attempts = Arc::new(StdMutex::new(0));
        let sub = McpSubstrate::new(
            Arc::new(Mutex::new(FlakySession {
                err: err.to_string(),
                succeed_at,
                attempts: attempts.clone(),
            })),
            "vm",
        );
        (sub, attempts)
    }

    #[tokio::test]
    async fn transient_connection_drop_is_retried_and_recovers() {
        // "fetch failed" once, then success → the call succeeds, untagged.
        let (sub, attempts) = flaky("fetch failed", 2);
        let out = sub.run_command("echo hi", None).await.unwrap();
        assert_eq!(out.stdout, "ok");
        assert_eq!(*attempts.lock().unwrap(), 2, "retried once then succeeded");
    }

    #[tokio::test]
    async fn persistent_connection_drop_exhausts_and_is_tagged() {
        let (sub, attempts) = flaky("fetch failed", u32::MAX);
        let err = sub.run_command("echo hi", None).await.unwrap_err();
        assert!(err.starts_with(SUBSTRATE_TRANSPORT_ERR_PREFIX), "tagged: {err}");
        assert_eq!(*attempts.lock().unwrap(), SUBSTRATE_RETRY_ATTEMPTS);
    }

    #[tokio::test]
    async fn timeout_is_not_retried_but_is_tagged() {
        // A timeout may mean the command IS running — must NOT auto-retry.
        let (sub, attempts) = flaky("MCP request 'tools/call' timed out", u32::MAX);
        let err = sub.run_command("echo hi", None).await.unwrap_err();
        assert!(err.starts_with(SUBSTRATE_TRANSPORT_ERR_PREFIX), "tagged: {err}");
        assert_eq!(*attempts.lock().unwrap(), 1, "timeout must not be retried");
    }

    #[tokio::test]
    async fn task_error_is_not_retried_or_tagged() {
        // A tool-reported failure (e.g. file not found) is not transport.
        let (sub, attempts) = flaky("no such file", u32::MAX);
        let err = sub.run_command("cat /nope", None).await.unwrap_err();
        assert!(!err.starts_with(SUBSTRATE_TRANSPORT_ERR_PREFIX), "not tagged: {err}");
        assert_eq!(*attempts.lock().unwrap(), 1, "task errors must not be retried");
    }
}