car-ffi-common 0.6.0

//! Thin client to the singleton CAR daemon over WebSocket JSON-RPC.
//!
//! Closes the cross-process admission gap from #139. When the FFI is
//! in `RuntimeMode::Daemon` (the default for production embedders),
//! every non-callback-bearing method call is sent to the daemon
//! instead of running an embedded engine in the FFI process. That
//! way two Node consumers on the same machine share the daemon's
//! admission semaphore and model cache instead of each running
//! their own and over-subscribing the GPU.
//!
//! ## What proxies vs what stays embedded
//!
//! Per Linus's review (commit message of the FFI proxy refactor),
//! callback-bearing methods stay in-process by design:
//!
//! - `executeProposal` — needs the JS/Python tool callback ABI.
//!   Routing daemon-initiated `tool.execute` requests back to a
//!   user's JS callback over WebSocket means TWO ThreadsafeFunctions
//!   in the call chain, callback lifetimes spanning a network
//!   boundary, and 60s daemon timeouts that legit-slow tools blow
//!   through. The hazard ratio is wrong for the gain.
//! - `inferStream`, `transcribeStream*` — same argument plus the
//!   per-event TSF dispatch each frame. Use the daemon's WS API
//!   directly for those, or stay embedded.
//!
//! Everything else (state, memory, models, inference, registry,
//! verify, secrets, accounts, integrations, health) is a clean
//! request/response pair and proxies cleanly.
//!
//! ## Modes
//!
//! - [`RuntimeMode::Daemon`] (default) — `proxy_call` is the only
//!   path. If the daemon isn't reachable, FFI calls error with a
//!   clear message. No silent fallback to embedded — that would
//!   re-create the multi-tenant overcommit bug #139 is closing.
//! - [`RuntimeMode::Embedded`] — explicit opt-in via env or
//!   constructor. The FFI binding skips the proxy entirely and
//!   runs the embedded engine in-process. Documented as "you
//!   accept multi-tenant resource overcommit; you must coordinate."
//!
//! The CLI keeps its own auto-spawn-with-fallback pattern at
//! `car-cli/src/main.rs::try_infer_via_daemon` because CLI
//! ergonomics differ from library correctness contracts.

use futures_util::{SinkExt, StreamExt};
use serde::Deserialize;
use serde_json::Value;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::net::TcpStream;
use tokio::sync::Mutex as AsyncMutex;
use tokio::time::timeout;
use tokio_tungstenite::{
    connect_async, tungstenite::Message, MaybeTlsStream, WebSocketStream,
};

type WsStream = WebSocketStream<MaybeTlsStream<TcpStream>>;

/// Connect timeout. The daemon is local (127.0.0.1) by default; a
/// 5s ceiling catches half-open / hung-handler cases without
/// blocking the caller forever.
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);

/// Default per-call read timeout. Override via `CAR_DAEMON_TIMEOUT`
/// (seconds, integer). Caller-tunable because some calls (model
/// pull, large infer) legitimately take long.
const DEFAULT_READ_TIMEOUT_SECS: u64 = 30;

fn read_timeout() -> Duration {
    std::env::var("CAR_DAEMON_TIMEOUT")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok())
        .map(Duration::from_secs)
        .unwrap_or(Duration::from_secs(DEFAULT_READ_TIMEOUT_SECS))
}

/// Whether this FFI process talks to the daemon or runs embedded.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuntimeMode {
    /// Default. All non-callback methods proxy to the daemon over
    /// WebSocket. If daemon is unreachable, calls error.
    Daemon,
    /// Explicit opt-in. Run an embedded engine in this FFI process.
    Embedded,
}

impl RuntimeMode {
    /// Resolve from env (`CAR_FFI_MODE=daemon|embedded`). Default
    /// is `daemon` — production embedders share the singleton's
    /// admission semaphore and model cache. Set
    /// `CAR_FFI_MODE=embedded` for notebook / dev / offline use
    /// where the caller knows they can't share a daemon. (An older
    /// version of this comment said the default was `embedded`,
    /// which never matched the code — closed in #146.)
    ///
    /// This is the **string-only** resolution — no probe, no spawn,
    /// no fallback. Use [`RuntimeMode::resolve`] (#139) when you
    /// want the full default-daemon contract: probe → auto-spawn
    /// car-server → fall back to embedded with a stderr warning if
    /// neither lands.
    pub fn from_env() -> Self {
        match std::env::var("CAR_FFI_MODE")
            .ok()
            .as_deref()
            .map(str::trim)
            .map(str::to_ascii_lowercase)
            .as_deref()
        {
            Some("embedded") => Self::Embedded,
            _ => Self::Daemon,
        }
    }

    /// Full default-daemon contract resolution per #139. Returns
    /// `Err(reason)` only when [`ResolutionPolicy::DaemonOnly`]
    /// is set AND the daemon is unreachable — the FFI binding
    /// turns that into a structured error on the embedder's
    /// first call. All other outcomes return `Ok(Daemon)` or
    /// `Ok(Embedded)`.
    ///
    /// The resolution policy comes from
    /// [`resolution_policy`] which reads `CAR_FFI_MODE` (the
    /// preferred matrix-form env var) and falls back to the
    /// pre-fu3 legacy flags (`CAR_FFI_REQUIRE_DAEMON=1`,
    /// `CAR_FFI_NO_AUTOSPAWN=1`) for one minor-version's grace.
    ///
    /// Sync-callable on purpose — works inside
    /// `#[napi(constructor)]` and PyO3's `#[new]`. Uses
    /// `std::net::TcpStream::connect_timeout` for the probe;
    /// `std::process::Command::spawn` for the daemon fork. No
    /// tokio runtime needed.
    pub fn resolve_or_err() -> Result<Self, String> {
        use ResolutionPolicy::*;
        let policy = resolution_policy();

        // No probe, no spawn — caller said embedded, we listen.
        if policy == Embedded {
            return Ok(Self::Embedded);
        }

        // Daemon-bearing policies all probe first.
        let probe_timeout = probe_timeout();
        if car_proto::daemon::probe_daemon_port(probe_timeout) {
            return Ok(Self::Daemon);
        }

        // DaemonOnly + DaemonPrefer try to spawn. DaemonNoSpawn
        // skips the spawn-and-wait (CI / sandboxed containers).
        let should_spawn = matches!(policy, DaemonOnly | DaemonPrefer);
        if should_spawn && car_proto::daemon::try_spawn_daemon().is_ok() {
            // Poll for the spawned daemon to come up. ~2s ceiling
            // is enough for car-server's TCP listener; the model
            // registry warm-up happens in the background and
            // doesn't block the WS handshake.
            for _ in 0..8 {
                std::thread::sleep(std::time::Duration::from_millis(250));
                if car_proto::daemon::probe_daemon_port(probe_timeout) {
                    return Ok(Self::Daemon);
                }
            }
        }

        // Daemon unavailable. DaemonOnly hard-fails; the others
        // soft-fall-back to embedded with a warning.
        if policy == DaemonOnly {
            return Err(format!(
                "CAR_FFI_MODE=daemon-only: daemon at {} unreachable and \
                 `car-server` could not be spawned. Start the daemon \
                 manually or set CAR_FFI_MODE=daemon-prefer to allow \
                 embedded fallback.",
                daemon_ws_url()
            ));
        }

        warn_embedded_fallback();
        Ok(Self::Embedded)
    }
}

/// Daemon-resolution policy from `CAR_FFI_MODE` (#139-fu3).
///
/// The four canonical values:
///
/// - `embedded`: no probe at all; runs the in-process engine.
///   Same shape as v0.6.x's `CAR_FFI_MODE=embedded`.
/// - `daemon-only`: probe and try-spawn. If the daemon is still
///   unreachable, return `Err` (FFI binding maps to typed
///   exception). Replaces `CAR_FFI_REQUIRE_DAEMON=1`.
/// - `daemon-prefer` (default): probe, try-spawn, fall back to
///   embedded with a one-time stderr warning. Same shape as
///   v0.6.x's no-flags-set default.
/// - `daemon-no-spawn`: probe only — never fork `car-server`.
///   For CI / sandboxed containers / multi-process orchestration
///   where the spawn would race or be denied. Replaces
///   `CAR_FFI_NO_AUTOSPAWN=1`.
///
/// Verbosity (the `CAR_FFI_NO_DAEMON_WARNING=1` knob) stays
/// orthogonal — different concern, different env var.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResolutionPolicy {
    DaemonOnly,
    DaemonPrefer,
    DaemonNoSpawn,
    Embedded,
}

/// Resolve the active [`ResolutionPolicy`]. Precedence:
///
/// 1. `CAR_FFI_MODE` matching one of the four canonical values
///    wins, BUT panics with a clear conflict message if any
///    legacy flag (`CAR_FFI_REQUIRE_DAEMON=1`,
///    `CAR_FFI_NO_AUTOSPAWN=1`) is also set. Mid-migration
///    embedders that haven't fully rev'd their env see the
///    conflict instead of silently losing strictness — Linus
///    review on #139-fu3.
/// 2. `CAR_FFI_MODE=daemon` (the v0.6.x synonym for "default
///    behavior") falls through to the legacy-flag step so
///    existing embedders don't have to remap on day one.
/// 3. Legacy flag mapping (one minor-version's grace):
///    `CAR_FFI_REQUIRE_DAEMON=1` → `DaemonOnly`,
///    `CAR_FFI_NO_AUTOSPAWN=1` → `DaemonNoSpawn`. A one-time
///    stderr warning fires per legacy flag when it's mapped.
///    Suppressible via `CAR_FFI_NO_DEPRECATION_WARNING=1` for
///    embedders that can't rev their env yet.
/// 4. Default: `DaemonPrefer`.
pub fn resolution_policy() -> ResolutionPolicy {
    let raw = std::env::var("CAR_FFI_MODE")
        .ok()
        .as_deref()
        .map(str::trim)
        .map(str::to_ascii_lowercase);
    let canonical = match raw.as_deref() {
        Some("embedded") => Some(ResolutionPolicy::Embedded),
        Some("daemon-only") => Some(ResolutionPolicy::DaemonOnly),
        Some("daemon-prefer") => Some(ResolutionPolicy::DaemonPrefer),
        Some("daemon-no-spawn") => Some(ResolutionPolicy::DaemonNoSpawn),
        // "daemon" is the legacy v0.6.x string — recognized but
        // doesn't carry a canonical policy; fall through.
        Some("daemon") => None,
        // No env var set — fall through to legacy/default.
        None => None,
        Some(other) => {
            eprintln!(
                "warning: CAR_FFI_MODE={other:?} unrecognized; \
                 falling back to daemon-prefer (default). Valid values: \
                 embedded | daemon-only | daemon-prefer | daemon-no-spawn"
            );
            None
        }
    };

    if let Some(policy) = canonical {
        // Mid-migration footgun guard: if both the canonical
        // CAR_FFI_MODE and a legacy flag are set, the embedder
        // is in an ambiguous state. Don't silently win by
        // alphabetical-lookup-order — surface it.
        let legacy_req = is_env_flag_set("CAR_FFI_REQUIRE_DAEMON");
        let legacy_no = is_env_flag_set("CAR_FFI_NO_AUTOSPAWN");
        if legacy_req || legacy_no {
            let conflicts: Vec<&str> = [
                ("CAR_FFI_REQUIRE_DAEMON=1", legacy_req),
                ("CAR_FFI_NO_AUTOSPAWN=1", legacy_no),
            ]
            .into_iter()
            .filter_map(|(name, set)| if set { Some(name) } else { None })
            .collect();
            // Constructors can't return Result here, but a clear
            // panic at startup is materially better than silent
            // policy drift in production. Embedders see the
            // conflict immediately, fix the env, redeploy.
            panic!(
                "CAR_FFI_MODE conflict: matrix value `{:?}` is set alongside \
                 legacy flag(s) {}. Pick one — the legacy flags are deprecated \
                 (mapped to daemon-only / daemon-no-spawn). Unset the legacy \
                 flag(s) and keep CAR_FFI_MODE.",
                raw.as_deref().unwrap_or(""),
                conflicts.join(", ")
            );
        }
        return policy;
    }

    // Legacy flag mapping (pre-fu3) — one minor-version's grace.
    if is_env_flag_set("CAR_FFI_REQUIRE_DAEMON") {
        warn_legacy_flag("CAR_FFI_REQUIRE_DAEMON=1", "CAR_FFI_MODE=daemon-only");
        return ResolutionPolicy::DaemonOnly;
    }
    if is_env_flag_set("CAR_FFI_NO_AUTOSPAWN") {
        warn_legacy_flag("CAR_FFI_NO_AUTOSPAWN=1", "CAR_FFI_MODE=daemon-no-spawn");
        return ResolutionPolicy::DaemonNoSpawn;
    }

    ResolutionPolicy::DaemonPrefer
}

/// One-time stderr deprecation warning per legacy flag. Tracking
/// is per-process via a `Once`-guarded HashSet so each flag fires
/// at most once even if `resolution_policy()` is called repeatedly
/// (which it shouldn't be, but defense-in-depth).
fn warn_legacy_flag(legacy: &str, replacement: &str) {
    use std::sync::Mutex;
    use std::sync::OnceLock;
    static SEEN: OnceLock<Mutex<std::collections::HashSet<String>>> = OnceLock::new();
    if is_env_flag_set("CAR_FFI_NO_DEPRECATION_WARNING") {
        return;
    }
    let seen = SEEN.get_or_init(|| Mutex::new(std::collections::HashSet::new()));
    let mut guard = match seen.lock() {
        Ok(g) => g,
        Err(_) => return,
    };
    if guard.insert(legacy.to_string()) {
        eprintln!(
            "warning: {legacy} is deprecated; use {replacement} instead. \
             Suppress this warning with CAR_FFI_NO_DEPRECATION_WARNING=1."
        );
    }
}

/// Daemon port — re-export of [`car_proto::daemon::daemon_port`].
pub fn daemon_port() -> u16 {
    car_proto::daemon::daemon_port()
}

/// Probe timeout from `CAR_FFI_PROBE_TIMEOUT_MS`, default 100ms.
/// 100ms is the localhost ceiling (`127.0.0.1` connect to a
/// listening port returns sub-millisecond on Darwin / Linux); the
/// previous 500ms was sized for hung-daemon detection — a
/// different failure mode handled by the WS-handshake timeout
/// downstream, not here.
fn probe_timeout() -> std::time::Duration {
    std::env::var("CAR_FFI_PROBE_TIMEOUT_MS")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok())
        .map(std::time::Duration::from_millis)
        .unwrap_or(std::time::Duration::from_millis(100))
}


/// `CAR_FFI_*` env flag in trim-and-equal-`"1"` form. Consistent
/// with `CAR_FFI_REQUIRE_DAEMON`'s parsing, prevents the
/// `=0`-meaning-off-but-actually-on footgun the legacy
/// `is_ok()`-as-flag pattern has.
fn is_env_flag_set(name: &str) -> bool {
    std::env::var(name)
        .ok()
        .as_deref()
        .map(str::trim)
        .map(|s| s == "1")
        .unwrap_or(false)
}

/// One-time stderr warning so embedders that quietly fell back to
/// embedded mode notice the contract isn't being met. Suppressed by
/// `CAR_FFI_NO_DAEMON_WARNING=1` (trim-and-equal — set to anything
/// else and the warning still fires).
fn warn_embedded_fallback() {
    if is_env_flag_set("CAR_FFI_NO_DAEMON_WARNING") {
        return;
    }
    eprintln!(
        "warning: CAR daemon at {} unreachable and `car-server` \
         could not be spawned. Falling back to an embedded runtime in this \
         process — model loads, admission, and memgine are NOT shared with \
         other CarRuntime consumers on this host. Start the daemon \
         (`car-server`) for the singleton-daemon contract, set \
         CAR_FFI_REQUIRE_DAEMON=1 for hard-fail, or \
         CAR_FFI_NO_DAEMON_WARNING=1 to silence this warning.",
        daemon_ws_url()
    );
}

/// Daemon WS URL — `CAR_DAEMON_URL` override, default
/// `ws://127.0.0.1:9100`. Re-exported from
/// [`car_proto::daemon::daemon_ws_url`] to keep the existing
/// `car_ffi_common::proxy::daemon_ws_url` import path valid.
pub fn daemon_ws_url() -> String {
    car_proto::daemon::daemon_ws_url()
}

#[derive(Debug, Deserialize)]
struct JsonRpcErrorPayload {
    code: i64,
    message: String,
}

#[derive(Debug, Deserialize)]
struct JsonRpcEnvelope {
    result: Option<Value>,
    error: Option<JsonRpcErrorPayload>,
    id: Option<Value>,
}

/// Persistent JSON-RPC client to the daemon. One per `CarRuntime`
/// instance — keeps a single WebSocket open so all calls land on
/// the same daemon session.
///
/// **Why persistent:** the daemon scopes sessions to the WebSocket
/// connection. State, registered tools, registered policies, the
/// per-session memgine, and the per-session skill graph all live on
/// `session.runtime` which is dropped when the WS closes. A
/// connection-per-call client would route every FFI method to a
/// fresh session — `state_set` followed by `state_get` would return
/// null, `register_tool` followed by `verify` would not find the
/// tool. The proxy contract ("state lifecycle parity with embedded")
/// requires the connection to outlive individual calls.
///
/// **Concurrency model:** one in-flight call at a time per client.
/// The mutex around the WS serializes calls — fine for FFI
/// dispatch where each call is already blocking the calling
/// thread. If we ever need true concurrent in-flight requests on a
/// single client, we'd switch to id-routed multiplexing with a
/// reader task.
///
/// **Errors:** if a call fails (connect, send, recv, parse), the
/// stream is dropped so the next call reconnects. No automatic
/// retry — caller policy.
pub struct DaemonClient {
    conn: AsyncMutex<Option<WsStream>>,
    url: String,
    req_id: AtomicU64,
}

impl DaemonClient {
    /// Create a new client. Does **not** connect — the first
    /// `call()` lazy-connects.
    pub fn new() -> Arc<Self> {
        Arc::new(Self {
            conn: AsyncMutex::new(None),
            url: daemon_ws_url(),
            req_id: AtomicU64::new(1),
        })
    }

    /// Override the daemon URL (testing / non-default ports).
    pub fn with_url(url: impl Into<String>) -> Arc<Self> {
        Arc::new(Self {
            conn: AsyncMutex::new(None),
            url: url.into(),
            req_id: AtomicU64::new(1),
        })
    }

    /// Send a JSON-RPC call and await the matching response.
    /// Lazy-connects on first call. On any I/O or protocol error,
    /// drops the stream so the next call reconnects from scratch.
    pub async fn call(&self, method: &str, params: Value) -> Result<Value, String> {
        let id = self.req_id.fetch_add(1, Ordering::Relaxed);
        let mut guard = self.conn.lock().await;

        if guard.is_none() {
            // Connect with a hard timeout. A half-open daemon
            // (process alive, port accepting, hung handler) would
            // otherwise wedge the calling thread forever — and
            // FFI calls block the JS event loop tick.
            let connect_fut = connect_async(&self.url);
            let (socket, _) = match timeout(CONNECT_TIMEOUT, connect_fut).await {
                Ok(Ok(pair)) => pair,
                Ok(Err(e)) => return Err(format!("connect daemon at {}: {}", self.url, e)),
                Err(_) => {
                    return Err(format!(
                        "connect daemon at {} timed out after {}s",
                        self.url,
                        CONNECT_TIMEOUT.as_secs()
                    ));
                }
            };
            *guard = Some(socket);
        }
        let socket = guard.as_mut().expect("just connected");

        let rpc = serde_json::json!({
            "jsonrpc": "2.0",
            "id": id,
            "method": method,
            "params": params,
        });
        let payload = serde_json::to_string(&rpc)
            .map_err(|e| format!("serialize {method} request: {e}"))?;

        // Send. On error, drop the stream so the next call retries
        // from a fresh connection.
        if let Err(e) = socket.send(Message::Text(payload.into())).await {
            *guard = None;
            return Err(format!("send {method} request: {e}"))
        }

        // Read frames until we see a response with our id. Skip
        // server-initiated notifications (no id) and other ids
        // (shouldn't happen with our serialized model, but defend
        // anyway). On any read error or protocol violation, drop
        // the stream so the slot can't desync into the next call.
        let read_to = read_timeout();
        loop {
            let next = timeout(read_to, socket.next()).await;
            let msg = match next {
                Ok(Some(Ok(m))) => m,
                Ok(Some(Err(e))) => {
                    *guard = None;
                    return Err(format!("recv daemon response: {e}"));
                }
                Ok(None) => {
                    *guard = None;
                    return Err(format!(
                        "daemon stream ended without responding to {method}"
                    ));
                }
                Err(_) => {
                    *guard = None;
                    return Err(format!(
                        "daemon read timeout on {method} after {}s",
                        read_to.as_secs()
                    ));
                }
            };
            match msg {
                Message::Text(text) => {
                    let env: JsonRpcEnvelope = match serde_json::from_str(&text) {
                        Ok(v) => v,
                        Err(e) => {
                            *guard = None;
                            return Err(format!("parse daemon response: {e}"));
                        }
                    };
                    // Match on id. Server notifications (id=null)
                    // skip; any mismatched id is a bug or a stale
                    // response from a previous request — drop the
                    // connection rather than swallow.
                    let env_id = env.id.as_ref().and_then(|v| v.as_u64());
                    if env_id.is_none() {
                        // Notification — keep reading. Today the
                        // daemon doesn't push notifications on the
                        // RPC channel; if it ever does, audit this
                        // path for re-entrancy.
                        tracing::debug!(target: "car_ffi_common::proxy", method, "skipping notification frame");
                        continue;
                    }
                    if env_id != Some(id) {
                        *guard = None;
                        return Err(format!(
                            "id mismatch on {method}: expected {id}, got {env_id:?}"
                        ));
                    }
                    if let Some(err) = env.error {
                        return Err(format!("rpc {} {}: {}", err.code, method, err.message));
                    }
                    if let Some(result) = env.result {
                        return Ok(result);
                    }
                    // Matched id, no result, no error — protocol
                    // violation. Drop so the next call reconnects.
                    *guard = None;
                    return Err(format!("daemon response missing result for {method}"));
                }
                Message::Binary(b) => {
                    tracing::debug!(target: "car_ffi_common::proxy", method, len = b.len(), "skipping binary frame");
                    continue;
                }
                Message::Ping(_) | Message::Pong(_) => continue,
                Message::Close(_) => {
                    *guard = None;
                    return Err(format!("daemon closed connection mid-{method}"));
                }
                Message::Frame(_) => continue,
            }
        }
    }
}


// ---------------------------------------------------------------------------
// Per-method wrappers. NAPI/PyO3 bindings dispatch to these when in
// `RuntimeMode::Daemon`. Mechanical request/response shape — same
// param/return JSON the daemon's JSON-RPC handlers already accept.
// ---------------------------------------------------------------------------

// ---------------------------------------------------------------------------
// Registration: tools, policies, agent basics. These calls plant
// state on the daemon's per-session runtime — exactly the state
// `verify_proposal` and `executeProposal` (in Embedded mode)
// validate against, so per-session continuity matters as much as
// for `state.*` and `memory.*`.
// ---------------------------------------------------------------------------

/// `tools.register`. Daemon expects an array of ToolDefinition.
/// Schemaless registration uses `{ name }` only — empty
/// `parameters` triggers the daemon's legacy no-op validator path.
pub async fn proxy_tools_register(
    client: &DaemonClient,
    name: &str,
) -> Result<(), String> {
    let params = serde_json::json!([{ "name": name }]);
    client.call("tools.register", params).await.map(|_| ())
}

/// `tools.register` with a full ToolSchema. Caller passes the
/// already-serialized schema JSON; we wrap it in a single-element
/// array (the daemon's tools.register accepts `Vec<ToolDefinition>`
/// and `ToolDefinition` has the same shape as `ToolSchema` minus
/// the wire-protocol namespacing).
pub async fn proxy_tools_register_schema(
    client: &DaemonClient,
    schema_json: &str,
) -> Result<(), String> {
    let schema: Value = serde_json::from_str(schema_json)
        .map_err(|e| format!("invalid ToolSchema JSON: {e}"))?;
    let params = serde_json::json!([schema]);
    client.call("tools.register", params).await.map(|_| ())
}

/// `policy.register`. Daemon expects a single `PolicyDefinition`
/// (`{ name, rule, target?, key?, value?, pattern? }`). Callback
/// rules are NOT supported on the daemon — the caller is expected
/// to reject `deny_tool_callback` in Daemon mode and route through
/// Embedded mode instead.
pub async fn proxy_policy_register(
    client: &DaemonClient,
    params_json: &str,
) -> Result<(), String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid policy params JSON: {e}"))?;
    client.call("policy.register", params).await.map(|_| ())
}

/// `agents.register_basics`. Mirrors `Runtime::register_agent_basics`
/// on the daemon's per-session runtime.
pub async fn proxy_register_agent_basics(client: &DaemonClient) -> Result<(), String> {
    client
        .call("agents.register_basics", Value::Null)
        .await
        .map(|_| ())
}

/// State store: write a JSON value under `key`. Mirrors
/// `state.set` JSON-RPC method.
pub async fn proxy_state_set(
    client: &DaemonClient,
    key: &str,
    value_json: &str,
) -> Result<(), String> {
    let value: Value = serde_json::from_str(value_json)
        .map_err(|e| format!("invalid value JSON for state.set: {e}"))?;
    client
        .call(
            "state.set",
            serde_json::json!({ "key": key, "value": value }),
        )
        .await
        .map(|_| ())
}

/// State store: read JSON value under `key`. Returns `"null"` when
/// the key is absent (matches the embedded `CarRuntime::state_get`
/// behavior — callers don't have to distinguish "absent" from
/// "set to null").
pub async fn proxy_state_get(client: &DaemonClient, key: &str) -> Result<String, String> {
    let v = client
        .call("state.get", serde_json::json!({ "key": key }))
        .await?;
    Ok(serde_json::to_string(&v).unwrap_or_else(|_| "null".to_string()))
}

// ---------------------------------------------------------------------------
// Inference: GPU-bound calls. These are the methods #139 most cares
// about — running them on an embedded engine in every FFI consumer
// is exactly the multi-tenant overcommit hazard the daemon's
// admission semaphore exists to prevent.
// ---------------------------------------------------------------------------

/// Plain `infer`. Returns the daemon's full InferenceResult JSON.
/// Caller picks what to surface (the embedded NAPI `infer` collapses
/// to `{"text": ...}` for back-compat; embedded PyO3 returns the
/// raw text). Both are derivable from the full result JSON.
pub async fn proxy_infer(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid GenerateRequest JSON: {e}"))?;
    let v = client.call("infer", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize infer result: {e}"))
}

/// `embed`. Daemon expects `{ texts: [...], model?: "..." }`.
/// Returns array-of-arrays JSON (one embedding per input).
pub async fn proxy_embed(
    client: &DaemonClient,
    texts_json: &str,
    model: Option<&str>,
) -> Result<String, String> {
    let texts: Value = serde_json::from_str(texts_json)
        .map_err(|e| format!("invalid texts JSON: {e}"))?;
    let mut params = serde_json::json!({ "texts": texts });
    if let Some(m) = model {
        params["model"] = Value::String(m.to_string());
    }
    let v = client.call("embed", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize embed result: {e}"))
}

/// `classify`. Daemon expects `{ text, labels: [...], model?: "..." }`.
/// Returns the chosen label string (or full result depending on
/// daemon shape — pass through as JSON).
pub async fn proxy_classify(
    client: &DaemonClient,
    text: &str,
    labels_json: &str,
    model: Option<&str>,
) -> Result<String, String> {
    let labels: Value = serde_json::from_str(labels_json)
        .map_err(|e| format!("invalid labels JSON: {e}"))?;
    let mut params = serde_json::json!({ "text": text, "labels": labels });
    if let Some(m) = model {
        params["model"] = Value::String(m.to_string());
    }
    let v = client.call("classify", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize classify result: {e}"))
}

/// `verify` (proposal). Same JSON-RPC shape the existing CLI / WS
/// callers use — pass through whatever the daemon expects.
pub async fn proxy_verify(client: &DaemonClient, params_json: &str) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid verify params JSON: {e}"))?;
    let v = client.call("verify", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize verify result: {e}"))
}

/// `tokenize`. Daemon expects `{ model, text }`. Returns
/// `{ tokens: [u32, ...] }` JSON.
pub async fn proxy_tokenize(
    client: &DaemonClient,
    model: &str,
    text: &str,
) -> Result<String, String> {
    let v = client
        .call(
            "tokenize",
            serde_json::json!({ "model": model, "text": text }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize tokenize result: {e}"))
}

/// `detokenize`. Daemon expects `{ model, tokens: [u32, ...] }`.
/// Returns `{ text: "..." }` JSON.
pub async fn proxy_detokenize(
    client: &DaemonClient,
    model: &str,
    tokens: &[u32],
) -> Result<String, String> {
    let v = client
        .call(
            "detokenize",
            serde_json::json!({ "model": model, "tokens": tokens }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize detokenize result: {e}"))
}

/// `skills.distill`. Daemon expects `{ events: [...] }` and runs
/// `MemgineEngine::distill_skills` on its per-session engine.
/// Returns the array of `DistilledSkill` JSON.
pub async fn proxy_skills_distill(
    client: &DaemonClient,
    events_json: &str,
) -> Result<String, String> {
    let events: Value = serde_json::from_str(events_json)
        .map_err(|e| format!("invalid events JSON: {e}"))?;
    let v = client
        .call("skills.distill", serde_json::json!({ "events": events }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.distill result: {e}"))
}

/// `memory.consolidate`. Returns the JSON ConsolidationReport.
pub async fn proxy_memory_consolidate(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("memory.consolidate", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize consolidate result: {e}"))
}

/// `skills.ingest_distilled`. Daemon expects `{ skills: [...] }`.
/// Returns `{ ingested: N }`.
pub async fn proxy_skills_ingest_distilled(
    client: &DaemonClient,
    skills_json: &str,
) -> Result<u32, String> {
    let skills: Value = serde_json::from_str(skills_json)
        .map_err(|e| format!("invalid skills JSON: {e}"))?;
    let v = client
        .call(
            "skills.ingest_distilled",
            serde_json::json!({ "skills": skills }),
        )
        .await?;
    let n = v
        .get("ingested")
        .and_then(|x| x.as_u64())
        .ok_or_else(|| format!("ingest_distilled returned unexpected shape: {v}"))?;
    Ok(n as u32)
}

/// `skill.repair`. Returns `{ code: "..." }` on success or
/// `null` if the skill isn't broken / repair failed. Mirrors the
/// embedded `repair_skill` `Option<String>` return.
pub async fn proxy_skill_repair(
    client: &DaemonClient,
    skill_name: &str,
) -> Result<Option<String>, String> {
    let v = client
        .call(
            "skill.repair",
            serde_json::json!({ "skill_name": skill_name }),
        )
        .await?;
    if v.is_null() {
        return Ok(None);
    }
    Ok(v.get("code")
        .and_then(|c| c.as_str())
        .map(|s| s.to_string()))
}

/// `skills.evolve`. Daemon expects `{ events: [...], domain }`.
/// Returns the JSON `DistilledSkill` array.
pub async fn proxy_skills_evolve(
    client: &DaemonClient,
    events_json: &str,
    domain: &str,
) -> Result<String, String> {
    let events: Value = serde_json::from_str(events_json)
        .map_err(|e| format!("invalid events JSON: {e}"))?;
    let v = client
        .call(
            "skills.evolve",
            serde_json::json!({ "events": events, "domain": domain }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.evolve result: {e}"))
}

/// `skills.domains_needing_evolution`. Returns the JSON
/// `Vec<String>` of underperforming domains.
pub async fn proxy_skills_domains_needing_evolution(
    client: &DaemonClient,
    threshold: Option<f64>,
) -> Result<Vec<String>, String> {
    let mut params = serde_json::json!({});
    if let Some(t) = threshold {
        params["threshold"] = serde_json::json!(t);
    }
    let v = client
        .call("skills.domains_needing_evolution", params)
        .await?;
    serde_json::from_value(v).map_err(|e| format!("parse domains: {e}"))
}

/// `rerank`. Daemon expects a full `RerankRequest` JSON.
pub async fn proxy_rerank(
    client: &DaemonClient,
    request_json: &str,
) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid RerankRequest JSON: {e}"))?;
    let v = client.call("rerank", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize rerank result: {e}"))
}

/// `transcribe`. Daemon expects a full `TranscribeRequest` JSON.
/// **Important**: `audio_path` is interpreted on the daemon's
/// filesystem, not the FFI caller's. For paths the daemon can't
/// reach, fall back to Embedded mode or use streaming voice APIs.
pub async fn proxy_transcribe(
    client: &DaemonClient,
    request_json: &str,
) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid TranscribeRequest JSON: {e}"))?;
    let v = client.call("transcribe", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize transcribe result: {e}"))
}

/// `synthesize`. Same filesystem caveat as `transcribe`:
/// `output_path` is on the daemon side.
pub async fn proxy_synthesize(
    client: &DaemonClient,
    request_json: &str,
) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid SynthesizeRequest JSON: {e}"))?;
    let v = client.call("synthesize", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize synthesize result: {e}"))
}

/// `speech.prepare`. Returns the JSON status string the daemon
/// emits — mirrors the embedded `prepare_speech_runtime` shape.
pub async fn proxy_speech_prepare(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("speech.prepare", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize speech.prepare result: {e}"))
}

/// `models.route`. Returns the route decision JSON.
pub async fn proxy_models_route(
    client: &DaemonClient,
    prompt: &str,
) -> Result<String, String> {
    let v = client
        .call("models.route", serde_json::json!({ "prompt": prompt }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.route result: {e}"))
}

/// `models.stats`. Returns the model performance profiles JSON.
pub async fn proxy_models_stats(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.stats", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.stats result: {e}"))
}

/// `events.count`. Returns the per-session event log size.
pub async fn proxy_events_count(client: &DaemonClient) -> Result<u32, String> {
    let v = client.call("events.count", Value::Null).await?;
    v.as_u64()
        .map(|n| n as u32)
        .ok_or_else(|| format!("events.count returned non-u64: {v}"))
}

/// `replan.set_config`. Daemon expects flat
/// `{ max_replans, delay_ms, verify_before_execute }` matching
/// the FFI's positional `set_replan_config` args.
pub async fn proxy_replan_set_config(
    client: &DaemonClient,
    max_replans: u32,
    delay_ms: u64,
    verify_before_execute: bool,
) -> Result<(), String> {
    client
        .call(
            "replan.set_config",
            serde_json::json!({
                "max_replans": max_replans,
                "delay_ms": delay_ms,
                "verify_before_execute": verify_before_execute,
            }),
        )
        .await
        .map(|_| ())
}

// ---------------------------------------------------------------------------
// Memory: per-session graph memory in the daemon. Cross-process
// isolation is the same shape as state — caller's facts land on the
// daemon's per-session memgine and don't leak to other sessions
// unless the embedder shares one explicitly.
// ---------------------------------------------------------------------------

/// `memory.add_fact`. Daemon expects
/// `{ subject, body, kind?, confidence? }`. Returns the new fact
/// count as JSON `u64`.
pub async fn proxy_memory_add_fact(
    client: &DaemonClient,
    subject: &str,
    body: &str,
    kind: Option<&str>,
    confidence: Option<f64>,
) -> Result<u64, String> {
    let mut params = serde_json::json!({
        "subject": subject,
        "body": body,
    });
    if let Some(k) = kind {
        params["kind"] = Value::String(k.to_string());
    }
    if let Some(c) = confidence {
        params["confidence"] = serde_json::json!(c);
    }
    let v = client.call("memory.add_fact", params).await?;
    v.as_u64()
        .ok_or_else(|| format!("memory.add_fact returned non-u64: {v}"))
}

/// `memory.query`. Daemon expects `{ query, k? }`. Returns array of
/// `{ subject, body, activation }` as JSON.
pub async fn proxy_memory_query(
    client: &DaemonClient,
    query: &str,
    k: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "query": query });
    if let Some(k) = k {
        params["k"] = serde_json::json!(k);
    }
    let v = client.call("memory.query", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize memory.query result: {e}"))
}

/// `memory.fact_count`. Returns the daemon-side per-session
/// `valid_fact_count()`. No params. Mirrors the embedded
/// `CarRuntime::fact_count` so the FFI consumer in Daemon mode sees
/// the daemon's facts (#146 — silent zero from the embedded
/// fallback memgine was the bug).
pub async fn proxy_memory_fact_count(client: &DaemonClient) -> Result<u32, String> {
    let v = client.call("memory.fact_count", Value::Null).await?;
    v.as_u64()
        .map(|n| n as u32)
        .ok_or_else(|| format!("memory.fact_count returned non-u64: {v}"))
}

/// `memory.build_context`. Returns the assembled context string.
pub async fn proxy_memory_build_context(
    client: &DaemonClient,
    query: &str,
) -> Result<String, String> {
    let v = client
        .call("memory.build_context", serde_json::json!({ "query": query }))
        .await?;
    Ok(v.as_str().unwrap_or("").to_string())
}

// ---------------------------------------------------------------------------
// Skills: ingest/find/report — per-session skill graph in the
// daemon's memgine. Same isolation contract as memory + state.
// ---------------------------------------------------------------------------

/// `skill.ingest`. Caller passes the full param JSON (name, code,
/// platform, persona, url_pattern, task_keywords, description,
/// supersedes?). Returns the daemon's response (typically a node id
/// or status JSON).
pub async fn proxy_skill_ingest(
    client: &DaemonClient,
    params_json: &str,
) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid skill.ingest params JSON: {e}"))?;
    let v = client.call("skill.ingest", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.ingest result: {e}"))
}

/// `skill.find`. Caller passes `{ persona, url, task, max_results? }`.
/// Returns array of skill matches.
pub async fn proxy_skill_find(
    client: &DaemonClient,
    persona: &str,
    url: &str,
    task: &str,
    max_results: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({
        "persona": persona,
        "url": url,
        "task": task,
    });
    if let Some(n) = max_results {
        params["max_results"] = serde_json::json!(n);
    }
    let v = client.call("skill.find", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.find result: {e}"))
}

/// `skill.report`. Caller passes `{ skill_name, outcome }`. Returns
/// daemon's status response.
pub async fn proxy_skill_report(
    client: &DaemonClient,
    skill_name: &str,
    outcome: &str,
) -> Result<String, String> {
    let v = client
        .call(
            "skill.report",
            serde_json::json!({ "skill_name": skill_name, "outcome": outcome }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.report result: {e}"))
}

/// `skills.list`. Returns the registered skills.
pub async fn proxy_skills_list(
    client: &DaemonClient,
    params_json: Option<&str>,
) -> Result<String, String> {
    let params = match params_json {
        Some(s) => serde_json::from_str(s)
            .map_err(|e| format!("invalid skills.list params JSON: {e}"))?,
        None => Value::Null,
    };
    let v = client.call("skills.list", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.list result: {e}"))
}

// ---------------------------------------------------------------------------
// Models: list / list_unified / pull. Registry calls — these belong
// on the daemon because the daemon owns the model store and the
// admission accounting that depends on what's actually loaded.
// ---------------------------------------------------------------------------

/// `models.list`. Returns the curated/built-in model catalog.
pub async fn proxy_models_list(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.list", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.list result: {e}"))
}

/// `models.list_unified`. Returns the unified registry (built-in +
/// runtime-discovered + user-registered).
pub async fn proxy_models_list_unified(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.list_unified", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.list_unified result: {e}"))
}

/// `models.pull`. Daemon expects `{ name }`. Returns
/// `{ path: "..." }`.
pub async fn proxy_models_pull(client: &DaemonClient, name: &str) -> Result<String, String> {
    let v = client
        .call("models.pull", serde_json::json!({ "name": name }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.pull result: {e}"))
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Tests that mutate process-wide env vars must serialize. Cargo
    /// runs unit tests in parallel by default, so without this gate
    /// two tests poking `CAR_FFI_MODE` simultaneously would race
    /// and the loser sees the winner's value mid-test (#139, Linus
    /// review). `Mutex` (not `RwLock`) — a write-conservative
    /// coordination is fine here.
    static ENV_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

    /// Mode resolution: empty env defaults to Daemon; `embedded`
    /// (case-insensitive, trimmed) flips to Embedded; anything else
    /// stays Daemon.
    #[test]
    fn mode_from_env_defaults_to_daemon() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev = std::env::var("CAR_FFI_MODE").ok();
        std::env::remove_var("CAR_FFI_MODE");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Daemon);

        std::env::set_var("CAR_FFI_MODE", "embedded");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Embedded);

        std::env::set_var("CAR_FFI_MODE", "EMBEDDED");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Embedded);

        std::env::set_var("CAR_FFI_MODE", "  embedded  ");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Embedded);

        std::env::set_var("CAR_FFI_MODE", "daemon");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Daemon);

        std::env::set_var("CAR_FFI_MODE", "garbage");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Daemon);

        match prev {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => std::env::remove_var("CAR_FFI_MODE"),
        }
    }

    /// Daemon URL: env override beats default; default matches the
    /// CLI's `daemon_ws_url`.
    #[test]
    fn daemon_url_resolution() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev = std::env::var("CAR_DAEMON_URL").ok();
        std::env::remove_var("CAR_DAEMON_URL");
        assert_eq!(daemon_ws_url(), "ws://127.0.0.1:9100");

        std::env::set_var("CAR_DAEMON_URL", "ws://other:1234");
        assert_eq!(daemon_ws_url(), "ws://other:1234");

        match prev {
            Some(v) => std::env::set_var("CAR_DAEMON_URL", v),
            None => std::env::remove_var("CAR_DAEMON_URL"),
        }
    }

    // parse_host_port + daemon_port + probe behavior live in
    // car-proto::daemon's own #[cfg(test)] block — see #139-fu1.
    // Keeping this dead-port probe test here because it exercises
    // the proxy module's _use_ of car_proto::daemon::probe_daemon_port
    // (i.e., the integration), not just the helper in isolation.

    /// `probe_daemon_port` returns false against a dead port within
    /// the configured timeout. Port 1 is almost certainly closed
    /// (root-only and unused) so this passes regardless of whether
    /// a daemon happens to be running on the test host.
    #[test]
    fn probe_dead_port_returns_false() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev = std::env::var("CAR_DAEMON_URL").ok();
        std::env::set_var("CAR_DAEMON_URL", "ws://127.0.0.1:1");
        assert!(
            !car_proto::daemon::probe_daemon_port(std::time::Duration::from_millis(100)),
            "probe of port 1 should fail"
        );
        match prev {
            Some(v) => std::env::set_var("CAR_DAEMON_URL", v),
            None => std::env::remove_var("CAR_DAEMON_URL"),
        }
    }

    /// `resolution_policy()` recognizes the canonical mode strings.
    #[test]
    fn resolution_policy_recognizes_canonical_modes() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev_mode = std::env::var("CAR_FFI_MODE").ok();
        let prev_legacy_req = std::env::var("CAR_FFI_REQUIRE_DAEMON").ok();
        let prev_legacy_no = std::env::var("CAR_FFI_NO_AUTOSPAWN").ok();
        std::env::remove_var("CAR_FFI_REQUIRE_DAEMON");
        std::env::remove_var("CAR_FFI_NO_AUTOSPAWN");
        // Suppress the deprecation noise for legacy-flag tests.
        std::env::set_var("CAR_FFI_NO_DEPRECATION_WARNING", "1");

        std::env::set_var("CAR_FFI_MODE", "embedded");
        assert_eq!(resolution_policy(), ResolutionPolicy::Embedded);
        std::env::set_var("CAR_FFI_MODE", "daemon-only");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonOnly);
        std::env::set_var("CAR_FFI_MODE", "daemon-prefer");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonPrefer);
        std::env::set_var("CAR_FFI_MODE", "daemon-no-spawn");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonNoSpawn);
        // Trim + case-insensitive.
        std::env::set_var("CAR_FFI_MODE", "  DAEMON-ONLY  ");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonOnly);
        // No env var set → DaemonPrefer.
        std::env::remove_var("CAR_FFI_MODE");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonPrefer);

        std::env::remove_var("CAR_FFI_NO_DEPRECATION_WARNING");
        match prev_mode {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => std::env::remove_var("CAR_FFI_MODE"),
        }
        match prev_legacy_req {
            Some(v) => std::env::set_var("CAR_FFI_REQUIRE_DAEMON", v),
            None => std::env::remove_var("CAR_FFI_REQUIRE_DAEMON"),
        }
        match prev_legacy_no {
            Some(v) => std::env::set_var("CAR_FFI_NO_AUTOSPAWN", v),
            None => std::env::remove_var("CAR_FFI_NO_AUTOSPAWN"),
        }
    }

    /// Legacy flags map to the matching policy when CAR_FFI_MODE
    /// isn't set (or is set to the legacy `daemon` synonym). Tests
    /// the back-compat one-minor-version grace.
    #[test]
    fn legacy_flags_map_to_policy() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev_mode = std::env::var("CAR_FFI_MODE").ok();
        let prev_req = std::env::var("CAR_FFI_REQUIRE_DAEMON").ok();
        let prev_no = std::env::var("CAR_FFI_NO_AUTOSPAWN").ok();
        std::env::remove_var("CAR_FFI_MODE");
        std::env::remove_var("CAR_FFI_NO_AUTOSPAWN");
        std::env::set_var("CAR_FFI_NO_DEPRECATION_WARNING", "1");

        std::env::set_var("CAR_FFI_REQUIRE_DAEMON", "1");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonOnly);
        std::env::remove_var("CAR_FFI_REQUIRE_DAEMON");

        std::env::set_var("CAR_FFI_NO_AUTOSPAWN", "1");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonNoSpawn);
        std::env::remove_var("CAR_FFI_NO_AUTOSPAWN");

        // CAR_FFI_MODE=daemon (legacy synonym) falls through to
        // the legacy-flag step — daemon-prefer if no legacy set.
        std::env::set_var("CAR_FFI_MODE", "daemon");
        assert_eq!(resolution_policy(), ResolutionPolicy::DaemonPrefer);

        std::env::remove_var("CAR_FFI_NO_DEPRECATION_WARNING");
        std::env::remove_var("CAR_FFI_MODE");
        std::env::remove_var("CAR_FFI_REQUIRE_DAEMON");
        match prev_mode {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => {}
        }
        match prev_req {
            Some(v) => std::env::set_var("CAR_FFI_REQUIRE_DAEMON", v),
            None => {}
        }
        match prev_no {
            Some(v) => std::env::set_var("CAR_FFI_NO_AUTOSPAWN", v),
            None => {}
        }
    }

    /// CAR_FFI_MODE + legacy-flag conflict panics with a clear
    /// message instead of silently picking one. Mid-migration
    /// embedders see the conflict in the env, fix it, redeploy.
    #[test]
    fn conflict_between_canonical_and_legacy_panics() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev_mode = std::env::var("CAR_FFI_MODE").ok();
        let prev_req = std::env::var("CAR_FFI_REQUIRE_DAEMON").ok();
        std::env::set_var("CAR_FFI_MODE", "daemon-prefer");
        std::env::set_var("CAR_FFI_REQUIRE_DAEMON", "1");

        let result = std::panic::catch_unwind(resolution_policy);
        assert!(result.is_err(), "expected panic on conflict");

        std::env::remove_var("CAR_FFI_MODE");
        std::env::remove_var("CAR_FFI_REQUIRE_DAEMON");
        match prev_mode {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => {}
        }
        match prev_req {
            Some(v) => std::env::set_var("CAR_FFI_REQUIRE_DAEMON", v),
            None => {}
        }
    }

    /// `RuntimeMode::resolve` honors `CAR_FFI_MODE=embedded` without
    /// doing any probe / spawn / fallback work. Critical for
    /// notebook / CI environments where probe latency is
    /// unacceptable and the caller knows daemon isn't an option.
    #[test]
    fn resolve_embedded_short_circuits_probe() {
        let _guard = ENV_TEST_LOCK.lock().unwrap();
        let prev = std::env::var("CAR_FFI_MODE").ok();
        std::env::set_var("CAR_FFI_MODE", "embedded");
        let started = std::time::Instant::now();
        assert_eq!(
            RuntimeMode::resolve_or_err().unwrap(),
            RuntimeMode::Embedded
        );
        assert!(
            started.elapsed() < std::time::Duration::from_millis(50),
            "embedded short-circuit shouldn't pay the probe cost"
        );
        match prev {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => std::env::remove_var("CAR_FFI_MODE"),
        }
    }

    /// `proxy_call` against a non-listening port surfaces the
    /// connection error. We pick port 1 (almost certainly closed)
    /// rather than the daemon default, so this passes whether or
    /// not a daemon is running on the test host.
    #[tokio::test]
    async fn client_call_against_dead_port_errors_clearly() {
        let client = DaemonClient::with_url("ws://127.0.0.1:1");
        let r = client.call("state.get", serde_json::json!({"key": "x"})).await;
        assert!(r.is_err(), "expected error against dead port");
        let msg = r.unwrap_err();
        assert!(
            msg.contains("connect daemon"),
            "expected connect-error wording, got: {msg}"
        );
    }

    /// After a failed call, the next call still tries to connect
    /// (we don't poison the slot). Same dead port — should still
    /// surface "connect daemon" error.
    #[tokio::test]
    async fn client_recovers_from_failure_to_retry_connect() {
        let client = DaemonClient::with_url("ws://127.0.0.1:1");
        let _ = client
            .call("state.get", serde_json::json!({"key": "x"}))
            .await;
        let r = client
            .call("state.get", serde_json::json!({"key": "y"}))
            .await;
        assert!(r.is_err());
        assert!(r.unwrap_err().contains("connect daemon"));
    }
}