car-ffi-common 0.24.1

//! Thin client to the singleton CAR daemon over WebSocket JSON-RPC.
//!
//! As of v0.8.0, every FFI binding is a thin daemon client — there is
//! no embedded-engine fallback. All non-callback-bearing method calls
//! travel over WebSocket to the singleton car-server daemon, which
//! shares one admission semaphore and one model cache across every
//! consumer on the host. (The pre-v0.8 embedded-fallback mode existed
//! to ease migration and re-created the multi-tenant overcommit
//! hazard #139 was opened to close; v0.8 retires it.)
//!
//! ## Server-initiated requests
//!
//! For methods that need tool callbacks (executeProposal,
//! registerAgentRunner, inferStream), the daemon's `WsToolExecutor`
//! sends `tools.execute` JSON-RPC requests back to the client over
//! the same WebSocket. [`DaemonClient::register_handler`] installs a
//! closure that the client's recv loop dispatches those requests to.
//! See `docs/websocket-protocol.md` for the wire shape.
//!
//! ## Connection lifetime
//!
//! One [`DaemonClient`] per FFI runtime instance — a single WebSocket
//! held open for the lifetime of the consumer. The daemon scopes
//! sessions to the WS connection (state.set, registered tools, the
//! per-session memgine), so a connection-per-call client would lose
//! state continuity. Lazy-connects on first call.
//!
//! The CLI keeps its own auto-spawn pattern at
//! `car-cli/src/main.rs::try_infer_via_daemon` because CLI
//! ergonomics differ from library correctness contracts.

use futures_util::stream::{SplitSink, SplitStream};
use futures_util::{SinkExt, StreamExt};
use serde::Deserialize;
use serde_json::Value;
use std::collections::HashMap;
use std::future::Future;
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex as StdMutex};
use std::time::Duration;
use tokio::net::TcpStream;
use tokio::sync::{oneshot, Mutex as AsyncMutex};
use tokio::time::timeout;
use tokio_tungstenite::{connect_async, tungstenite::Message, MaybeTlsStream, WebSocketStream};

type WsStream = WebSocketStream<MaybeTlsStream<TcpStream>>;
type WsWrite = SplitSink<WsStream, Message>;
type WsRead = SplitStream<WsStream>;

/// Result delivered to a pending `call()` waiter by the recv loop.
type RpcResult = Result<Value, String>;

/// Handler invoked when the daemon sends a server-initiated JSON-RPC
/// request (e.g. `tools.execute` from `WsToolExecutor`). Returns
/// either a result Value (sent back as `result`) or an error message
/// (sent back as `error.message`, code -32000).
pub type ServerRequestHandler =
    Arc<dyn Fn(Value) -> Pin<Box<dyn Future<Output = Result<Value, String>> + Send>> + Send + Sync>;

/// Handler invoked when the daemon sends a server-initiated JSON-RPC
/// notification (no id, no response expected) — e.g. `voice.event`,
/// `a2ui.event`. Fire-and-forget; failures are caller's problem.
pub type NotificationHandler = Arc<dyn Fn(Value) + Send + Sync>;

/// Connect timeout. The daemon is local (127.0.0.1) by default; a
/// 5s ceiling catches half-open / hung-handler cases without
/// blocking the caller forever.
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);

/// Default per-call read timeout. Override via `CAR_DAEMON_TIMEOUT`
/// (seconds, integer). Caller-tunable because some calls (model
/// pull, large infer) legitimately take long.
const DEFAULT_READ_TIMEOUT_SECS: u64 = 30;

fn read_timeout() -> Duration {
    std::env::var("CAR_DAEMON_TIMEOUT")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok())
        .map(Duration::from_secs)
        .unwrap_or(Duration::from_secs(DEFAULT_READ_TIMEOUT_SECS))
}

/// Floor for the **blocking execute methods** (`proposal.submit`) client read
/// timeout, which legitimately wait on long-running tool callbacks. Far higher
/// than the generic [`DEFAULT_READ_TIMEOUT_SECS`] so the client doesn't cut off
/// a call the daemon's per-action timeout still permits (Parslee-ai/car#259).
///
/// As of Parslee-ai/car#265 this is no longer a *flat* ceiling: the read
/// timeout for `proposal.submit` is **derived from the proposal in hand**
/// (`proposal_submit_read_timeout`) so a legitimately-long, retried, or chained
/// in-budget call is not re-reaped client-side — the exact #259 reap class the
/// old flat 900s still left open. The derived deadline accounts for the
/// daemon-side worst case: the executor restarts a fresh per-attempt deadline
/// per retry (a `FailureBehavior::Retry` action runs `(max_retries + 1) ×
/// timeout_ms`, default 4 attempts; non-retry actions run once) and
/// sequential/chained levels add up (`Σ timeout_ms`). This value is the floor
/// used when the derived deadline is *smaller* (short proposals keep the
/// generous 900s), and the default when no proposal is parseable.
/// `CAR_EXECUTE_TIMEOUT` (seconds) overrides the floor and, when set, is the
/// hard ceiling — an explicit operator cap wins over the derivation, even when
/// it lowers the deadline below the derived-safe value (a `tracing::warn!`
/// fires in that case).
const DEFAULT_EXECUTE_TIMEOUT_SECS: u64 = 900;

/// Default per-action tool budget (ms) assumed when an action of a
/// `proposal.submit` declares no explicit `timeout_ms`. Mirrors the daemon's
/// `car_server_core::session::DEFAULT_TOOL_TIMEOUT_MS` (300s) so the derived
/// client deadline tracks the daemon-side `tool_callback_timeout(None)` bound.
/// Kept as a local constant rather than a cross-crate import because
/// `car-ffi-common` must not depend on `car-server-core`.
const DEFAULT_TOOL_TIMEOUT_MS: u64 = 300_000;

/// Default `max_retries` assumed for a `FailureBehavior::Retry` action that
/// does not pin one. Mirrors `car_ir::actions::default_max_retries` (3). The
/// daemon executor runs `max_retries + 1` attempts for a retry action
/// (`car_engine::executor::execute_with_retry`), so the derived per-action
/// worst case is `max_retries + 1` = 4 attempts at this default — NOT 3. An
/// earlier revision assumed a flat 3-attempt ceiling, which undercounts the
/// real retry worst case and re-reaps a legitimately-retrying call (the exact
/// #265 reap class). Kept as a local constant rather than a cross-crate import
/// because `car-ffi-common` must not depend on `car-ir`.
const DEFAULT_MAX_RETRIES: u64 = 3;

/// Transport grace added on top of the derived per-proposal budget so the
/// client read fires strictly *after* the daemon's own per-action waits — the
/// same "executor reaps first" ordering [`tool_callback_timeout`] gives the
/// WS callback wait, lifted to the proposal level.
const PROPOSAL_TRANSPORT_GRACE_SECS: u64 = 30;

/// `proposal.submit` (and the `executeProposal` FFI path it backs) blocks on
/// the agent's tool callbacks; it must not share the short generic read
/// timeout that the original 30s default applied to every method.
///
/// For `proposal.submit` the caller passes the request `params` (which carry
/// the `proposal`) so the read timeout is derived from the actual action
/// budgets ([`proposal_submit_read_timeout`]); every other method keeps the
/// generic [`read_timeout`].
fn read_timeout_for(method: &str, params: &Value) -> Duration {
    if method == "proposal.submit" {
        proposal_submit_read_timeout(params)
    } else {
        read_timeout()
    }
}

/// Derive the client read timeout for a `proposal.submit` from the proposal's
/// own action budgets (Parslee-ai/car#265).
///
/// The daemon-side worst case for a proposal is: each action may be retried
/// (the executor restarts a fresh `timeout(timeout_ms, dispatch)` per attempt;
/// a `FailureBehavior::Retry` action runs `max_retries + 1` attempts, every
/// other behavior runs once — see [`action_attempts`]), and sequential levels
/// (chained dependencies, or `FailureBehavior::Abort` which serializes) add up.
/// Independent same-level actions run concurrently, so the *level* cost is the
/// `max` of its actions, not the sum — but we don't reconstruct the DAG here;
/// we take the conservative `Σ(action budget × per-action attempts)` upper
/// bound, which never under-waits. Plus a transport grace, and floored at
/// [`DEFAULT_EXECUTE_TIMEOUT_SECS`] so short proposals keep the generous
/// default.
///
/// `CAR_EXECUTE_TIMEOUT` (seconds), when set, is an explicit operator override
/// that *caps* the result — an operator who pins it gets exactly that, never a
/// larger derived value.
fn proposal_submit_read_timeout(params: &Value) -> Duration {
    let env_override = std::env::var("CAR_EXECUTE_TIMEOUT")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok());

    let derived_secs = derive_proposal_budget_secs(params);
    let floored = derived_secs.max(DEFAULT_EXECUTE_TIMEOUT_SECS);

    let secs = match env_override {
        // An explicit cap wins: the operator pinned the ceiling. Note this is
        // a *semantic reversal* from the pre-#265 meaning, where the env var
        // only ever *raised* the flat ceiling. It can now LOWER the deadline
        // below the derived-safe value — re-reaping a legitimately in-budget
        // call. Warn when that happens so an operator who set this knob for
        // the old "raise the ceiling" reason notices it is now cutting the
        // derivation short. (Over-waiting is safe; under-waiting is the bug.)
        Some(cap) => {
            if cap < floored {
                tracing::warn!(
                    target: "car_ffi_common::proxy",
                    cap_secs = cap,
                    derived_secs = floored,
                    "CAR_EXECUTE_TIMEOUT caps the proposal.submit deadline BELOW the \
                     budget-derived safe value; a legitimately in-budget (retried/chained) \
                     call may be re-reaped client-side. Since #265 this env var is a hard \
                     ceiling, not a floor — re-check whether you still need it set"
                );
            }
            cap
        }
        None => floored,
    };
    Duration::from_secs(secs)
}

/// Cumulative per-proposal daemon-side budget in seconds, derived from the
/// `proposal.actions[*].timeout_ms`. Returns 0 when no proposal/actions are
/// parseable (the caller then falls back to the flat floor).
fn derive_proposal_budget_secs(params: &Value) -> u64 {
    let Some(actions) = params
        .get("proposal")
        .and_then(|p| p.get("actions"))
        .and_then(|a| a.as_array())
    else {
        return 0;
    };
    if actions.is_empty() {
        return 0;
    }

    // Σ(action budget × per-action attempts). saturating throughout so a
    // pathological `timeout_ms` can't wrap the accumulator.
    let mut total_ms: u64 = 0;
    for action in actions {
        let budget_ms = action
            .get("timeout_ms")
            .and_then(|v| v.as_u64())
            .unwrap_or(DEFAULT_TOOL_TIMEOUT_MS);
        total_ms = total_ms.saturating_add(budget_ms.saturating_mul(action_attempts(action)));
    }

    // ms → secs, rounding up so a sub-second remainder never truncates the
    // budget below what the daemon permits, then add the transport grace.
    let secs = total_ms.div_ceil(1000);
    secs.saturating_add(PROPOSAL_TRANSPORT_GRACE_SECS)
}

/// Worst-case attempt count the daemon executor will run for a single action,
/// derived from the same `params` shape we already walk.
///
/// Mirrors `car_engine::executor::execute_with_retry`: an action with
/// `failure_behavior == "retry"` runs `max_retries + 1` attempts (the initial
/// try plus up to `max_retries` retries); every other behavior (`abort`,
/// `skip`, or unset → `abort`) runs exactly once. `max_retries` defaults to
/// [`DEFAULT_MAX_RETRIES`] (3 → 4 attempts), matching `car_ir`'s
/// `default_max_retries`. Returns `>= 1`.
///
/// This is the per-action piece of the #265 fix: a flat `× 3` undercounted the
/// retry worst case (`max_retries + 1` = 4 at the default), so the client
/// deadline could be shorter than the daemon's actual run and re-reap a
/// legitimately-retrying call. Over-counting here only widens the client
/// deadline — the safe direction.
fn action_attempts(action: &Value) -> u64 {
    let is_retry = action
        .get("failure_behavior")
        .and_then(|v| v.as_str())
        .map(|s| s.eq_ignore_ascii_case("retry"))
        .unwrap_or(false);

    if is_retry {
        let max_retries = action
            .get("max_retries")
            .and_then(|v| v.as_u64())
            .unwrap_or(DEFAULT_MAX_RETRIES);
        max_retries.saturating_add(1)
    } else {
        1
    }
}

/// Single-variant tombstone of the pre-v0.8 `RuntimeMode` enum.
///
/// Daemon is the only mode FFI bindings support — embedded engines
/// in the FFI process were retired in v0.8.0 to close the
/// multi-tenant admission/cache overcommit hazard #139 was opened
/// for. The enum stays as a single variant so existing FFI source
/// referencing `RuntimeMode::Daemon` keeps compiling through the
/// migration; the dead `if self.mode == Daemon` branches will be
/// pruned in follow-up commits.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuntimeMode {
    /// All non-callback methods proxy to the daemon over WebSocket.
    /// If the daemon is unreachable, the first FFI call surfaces a
    /// clear `connect daemon at ws://...` error.
    Daemon,
}

impl RuntimeMode {
    /// Always `Daemon`. Kept for source compatibility with v0.6/v0.7
    /// FFI bindings that read `CAR_FFI_MODE`.
    pub fn from_env() -> Self {
        Self::Daemon
    }

    /// Always `Ok(Daemon)`. Kept for source compatibility — the
    /// pre-v0.8 helper probed for the daemon, optionally spawned
    /// car-server, and fell back to embedded with a stderr warning.
    /// In v0.8 the daemon is the only path; the FFI surface defers
    /// the unreachable-daemon error to the first `call()` so the
    /// constructor stays infallible.
    pub fn resolve_or_err() -> Result<Self, String> {
        Ok(Self::Daemon)
    }
}

/// Daemon port — re-export of [`car_proto::daemon::daemon_port`].
pub fn daemon_port() -> u16 {
    car_proto::daemon::daemon_port()
}

/// Daemon WS URL — `CAR_DAEMON_URL` override, default
/// `ws://127.0.0.1:9100`. Re-exported from
/// [`car_proto::daemon::daemon_ws_url`] to keep the existing
/// `car_ffi_common::proxy::daemon_ws_url` import path valid.
pub fn daemon_ws_url() -> String {
    car_proto::daemon::daemon_ws_url()
}

#[derive(Debug, Deserialize)]
struct JsonRpcErrorPayload {
    code: i64,
    message: String,
}

/// Generic JSON-RPC frame as it arrives on the WebSocket. Covers
/// three shapes:
///
/// - **Response**: `id` + (`result` xor `error`).
/// - **Request**: `id` + `method` (+ optional `params`).
/// - **Notification**: `method` (+ optional `params`), no `id`.
///
/// All fields default to None so a single struct decodes any of the
/// three. The recv loop demuxes by inspecting which fields are set.
#[derive(Debug, Deserialize)]
struct IncomingFrame {
    #[serde(default)]
    method: Option<String>,
    #[serde(default)]
    params: Option<Value>,
    #[serde(default)]
    result: Option<Value>,
    #[serde(default)]
    error: Option<JsonRpcErrorPayload>,
    #[serde(default)]
    id: Option<Value>,
}

/// Outcome of a `session.auth` handshake exchange. Used by
/// [`send_auth`] so the caller can decide whether to retry (e.g. on
/// `token mismatch` after re-reading the auth-token file).
enum AuthOutcome {
    Accepted,
    Rejected(String),
}

/// Send a single `session.auth` frame and await the response.
///
/// Splits out of [`DaemonClient::ensure_connected`] so the retry path
/// for token rotation (#244) can re-invoke the handshake on the same
/// not-yet-split socket with a freshly-read token.
async fn send_auth(
    socket: &mut WsStream,
    token: &str,
    id: u64,
) -> Result<AuthOutcome, String> {
    let handshake = serde_json::json!({
        "jsonrpc": "2.0",
        "id": id,
        "method": "session.auth",
        "params": { "token": token },
    });
    let payload = serde_json::to_string(&handshake)
        .map_err(|e| format!("serialize session.auth: {e}"))?;
    socket
        .send(Message::Text(payload.into()))
        .await
        .map_err(|e| format!("send session.auth: {e}"))?;

    let auth_to = read_timeout();
    match timeout(auth_to, socket.next()).await {
        Ok(Some(Ok(Message::Text(text)))) => {
            if let Ok(env) = serde_json::from_str::<IncomingFrame>(&text) {
                if let Some(err) = env.error {
                    return Ok(AuthOutcome::Rejected(err.message));
                }
            }
            Ok(AuthOutcome::Accepted)
        }
        Ok(Some(Ok(_))) => Ok(AuthOutcome::Accepted),
        Ok(Some(Err(e))) => Err(format!("recv session.auth response: {e}")),
        Ok(None) => Err("daemon closed during session.auth".to_string()),
        Err(_) => Err(format!(
            "session.auth timed out after {}s",
            auth_to.as_secs()
        )),
    }
}

/// Persistent JSON-RPC client to the daemon. One per `CarRuntime`
/// instance — keeps a single WebSocket open so all calls land on
/// the same daemon session.
///
/// **Why persistent:** the daemon scopes sessions to the WebSocket
/// connection. State, registered tools, registered policies, the
/// per-session memgine, and the per-session skill graph all live on
/// `session.runtime` which is dropped when the WS closes. A
/// connection-per-call client would route every FFI method to a
/// fresh session — `state_set` followed by `state_get` would return
/// null, `register_tool` followed by `verify` would not find the
/// tool. The proxy contract ("state lifecycle parity with embedded")
/// requires the connection to outlive individual calls.
///
/// **Concurrency model:** id-routed multiplexing. One persistent
/// recv task demuxes incoming frames into either pending response
/// waiters (matched by numeric id) or registered server-request
/// handlers (matched by method name). Multiple `call()` invocations
/// can be in-flight concurrently against the same connection — they
/// only contend on the single write half lock for the duration of
/// the actual `send`.
///
/// **Server-initiated requests:** the daemon's `WsToolExecutor`
/// sends `tools.execute` JSON-RPC requests back to the client when a
/// tool needs to fire. Register a handler with
/// [`DaemonClient::register_handler`] before calling
/// `proposal.execute` (or any method that may reach a tool dispatch
/// path) so those callbacks get routed to your tool implementation
/// instead of returning a "no handler" error to the daemon.
///
/// **Errors:** if a call fails (connect, send, recv, parse), the
/// connection state is reset so the next call reconnects from
/// scratch. No automatic retry — caller policy. Pending waiters at
/// reset time receive a clear "daemon connection at {url} closed
/// before response" error.
pub struct DaemonClient {
    /// Connection state: write half + recv task abort handle. Lazy:
    /// `None` until the first `call()` connects. The async mutex
    /// serializes connect-vs-call races so two concurrent first
    /// calls don't both spawn recv tasks.
    state: AsyncMutex<Option<ConnState>>,
    /// Daemon WS URL. Captured at construction; reused on every
    /// reconnect.
    url: String,
    /// Monotonic id source for outbound requests. Numeric ids — the
    /// recv loop only matches numeric ids against `pending`, so
    /// server-initiated string ids (e.g. `"cb-1"` from
    /// `WsToolExecutor`) cannot collide.
    req_id: AtomicU64,
    /// Map of in-flight outbound request id → oneshot sender. Recv
    /// loop populates the response side; `call()` registers entries
    /// before sending.
    pending: Arc<StdMutex<HashMap<u64, oneshot::Sender<RpcResult>>>>,
    /// Map of method name → server-request handler. Persists across
    /// reconnects — register once, the new recv loop picks them up
    /// automatically. See [`register_handler`](Self::register_handler).
    handlers: Arc<StdMutex<HashMap<String, ServerRequestHandler>>>,
    /// Map of method name → notification handler. Notifications arrive
    /// from the daemon as JSON-RPC frames with `method` set and no
    /// `id` (e.g. `voice.event`, `a2ui.event`). Registered handlers
    /// fire in a tokio task; failures don't propagate. Persists across
    /// reconnects.
    notif_handlers: Arc<StdMutex<HashMap<String, NotificationHandler>>>,
}

/// Per-connection state. Held inside the `state` AsyncMutex.
struct ConnState {
    /// Write half. Wrapped in its own AsyncMutex so concurrent
    /// callers can each take it briefly to send their request frame
    /// without holding the connection state lock for the entire
    /// send-and-await cycle.
    write: Arc<AsyncMutex<WsWrite>>,
    /// Recv task abort handle. On `reset()`, abort the task so the
    /// read loop releases the read half cleanly; the task's drop
    /// also drains pending waiters.
    recv_task: tokio::task::AbortHandle,
}

impl DaemonClient {
    /// Create a new client. Does **not** connect — the first
    /// `call()` lazy-connects.
    pub fn new() -> Arc<Self> {
        Arc::new(Self {
            state: AsyncMutex::new(None),
            url: daemon_ws_url(),
            req_id: AtomicU64::new(1),
            pending: Arc::new(StdMutex::new(HashMap::new())),
            handlers: Arc::new(StdMutex::new(HashMap::new())),
            notif_handlers: Arc::new(StdMutex::new(HashMap::new())),
        })
    }

    /// Override the daemon URL (testing / non-default ports).
    pub fn with_url(url: impl Into<String>) -> Arc<Self> {
        Arc::new(Self {
            state: AsyncMutex::new(None),
            url: url.into(),
            req_id: AtomicU64::new(1),
            pending: Arc::new(StdMutex::new(HashMap::new())),
            handlers: Arc::new(StdMutex::new(HashMap::new())),
            notif_handlers: Arc::new(StdMutex::new(HashMap::new())),
        })
    }

    /// Register a handler for a server-initiated JSON-RPC request
    /// method. The recv loop invokes this when the daemon sends a
    /// request (with `method` field) — most commonly
    /// `tools.execute` from `WsToolExecutor`. The handler runs on a
    /// fresh tokio task; its result/error is sent back as a JSON-RPC
    /// response with the matching id.
    ///
    /// Handlers persist across reconnects — register once at client
    /// construction, the new recv loop picks them up automatically.
    /// Registering a handler with the same method name replaces the
    /// previous one.
    pub fn register_handler<F, Fut>(&self, method: &str, handler: F)
    where
        F: Fn(Value) -> Fut + Send + Sync + 'static,
        Fut: Future<Output = Result<Value, String>> + Send + 'static,
    {
        let h: ServerRequestHandler = Arc::new(move |params| Box::pin(handler(params)));
        if let Ok(mut g) = self.handlers.lock() {
            g.insert(method.to_string(), h);
        }
    }

    /// Remove a server-request handler. Subsequent server-initiated
    /// requests for that method will get a -32601 "no handler"
    /// error response.
    pub fn unregister_handler(&self, method: &str) {
        if let Ok(mut g) = self.handlers.lock() {
            g.remove(method);
        }
    }

    /// Register a handler for a server-initiated JSON-RPC
    /// **notification** (no `id`, no response expected) — e.g.
    /// `voice.event`, `a2ui.event`.
    ///
    /// **Single subscriber per method, by design.** Registering with
    /// the same method name replaces the previous handler. This
    /// mirrors `register_handler` and is intentional for the FFI
    /// bridging cases that exist today (one TSF / one PyObject per
    /// process). If a future caller needs multi-observer fanout
    /// (e.g. a renderer + an inspector both watching `a2ui.event`),
    /// layer a fanout dispatcher above this — register one handler
    /// here that broadcasts to a tokio::sync::broadcast or similar.
    ///
    /// **Liveness.** The recv loop invokes the closure synchronously
    /// after parsing the frame. A blocking handler stalls every
    /// other in-flight JSON-RPC response on this client. Keep the
    /// closure cheap — post into a queue, fire a TSF (NAPI's
    /// `NonBlocking` mode is safe), and return. Anything that
    /// acquires a Python GIL or takes a non-trivial lock must run on
    /// a dedicated task drained from a channel the handler writes
    /// to.
    ///
    /// Notification handlers persist across reconnects.
    pub fn register_notification_handler<F>(&self, method: &str, handler: F)
    where
        F: Fn(Value) + Send + Sync + 'static,
    {
        let h: NotificationHandler = Arc::new(handler);
        if let Ok(mut g) = self.notif_handlers.lock() {
            g.insert(method.to_string(), h);
        }
    }

    /// Remove a notification handler. Subsequent notifications for
    /// that method are logged at debug and dropped.
    pub fn unregister_notification_handler(&self, method: &str) {
        if let Ok(mut g) = self.notif_handlers.lock() {
            g.remove(method);
        }
    }

    /// Send a JSON-RPC call and await the matching response.
    /// Lazy-connects on first call. Multiple in-flight calls on the
    /// same client are supported — each gets its own oneshot waiter
    /// in `pending`. On send error, drops the connection so the next
    /// call reconnects.
    pub async fn call(&self, method: &str, params: Value) -> Result<Value, String> {
        let write = self.ensure_connected().await?;

        let id = self.req_id.fetch_add(1, Ordering::Relaxed);

        let (tx, rx) = oneshot::channel::<RpcResult>();
        if let Ok(mut g) = self.pending.lock() {
            g.insert(id, tx);
        }

        // Derive the read timeout BEFORE `params` moves into the rpc frame —
        // for `proposal.submit` it reads the proposal's action budgets (#265).
        let read_to = read_timeout_for(method, &params);

        let rpc = serde_json::json!({
            "jsonrpc": "2.0",
            "id": id,
            "method": method,
            "params": params,
        });
        let payload = match serde_json::to_string(&rpc) {
            Ok(s) => s,
            Err(e) => {
                if let Ok(mut g) = self.pending.lock() {
                    g.remove(&id);
                }
                return Err(format!("serialize {method} request: {e}"));
            }
        };

        if let Err(e) = write.lock().await.send(Message::Text(payload.into())).await {
            if let Ok(mut g) = self.pending.lock() {
                g.remove(&id);
            }
            self.reset().await;
            return Err(format!("send {method} request: {e}"));
        }

        match timeout(read_to, rx).await {
            Ok(Ok(result)) => match result {
                Ok(v) => Ok(v),
                Err(e) => Err(format!("rpc {method}: {e}")),
            },
            Ok(Err(_)) => Err(format!("rpc channel closed for {method}")),
            Err(_) => {
                if let Ok(mut g) = self.pending.lock() {
                    g.remove(&id);
                }
                Err(format!(
                    "daemon read timeout on {method} after {}s",
                    read_to.as_secs()
                ))
            }
        }
    }

    /// Ensure we have a live connection. Returns the shared write
    /// half. Idempotent — concurrent first-callers serialize on
    /// `state` and only one performs the actual connect.
    async fn ensure_connected(&self) -> Result<Arc<AsyncMutex<WsWrite>>, String> {
        let mut state = self.state.lock().await;
        if let Some(s) = state.as_ref() {
            return Ok(s.write.clone());
        }

        // Connect with a hard timeout. A half-open daemon (process
        // alive, port accepting, hung handler) would otherwise wedge
        // the calling thread forever — and FFI calls block the JS
        // event loop tick.
        let connect_fut = connect_async(&self.url);
        let (mut socket, _) = match timeout(CONNECT_TIMEOUT, connect_fut).await {
            Ok(Ok(pair)) => pair,
            Ok(Err(e)) => return Err(format!("connect daemon at {}: {}", self.url, e)),
            Err(_) => {
                return Err(format!(
                    "connect daemon at {} timed out after {}s",
                    self.url,
                    CONNECT_TIMEOUT.as_secs()
                ));
            }
        };

        // Auth handshake (Parslee-ai/car-releases#32). If the daemon
        // wrote an auth token, present it as the first frame on this
        // connection; otherwise skip — the daemon either has auth
        // disabled (in which case the gate is a no-op) or its token
        // write failed (in which case the gate would close us anyway,
        // with a clear error). Done before splitting so the response
        // round-trip is sequential — the recv loop only takes over
        // after auth succeeds.
        //
        // Token source: `read_for_client` (NOT `read`) so a cross-host
        // deployment can override via `$CAR_AUTH_TOKEN` — the FFI
        // process and the daemon may be on different machines, in
        // which case the local file path is the wrong source.
        // Parslee-ai/car#231 §8.0.3.
        //
        // On `token mismatch` we re-read once and retry. The daemon
        // rotates its in-memory token on every launch and rewrites the
        // well-known file; an FFI process that read the token before
        // that rotation would otherwise surface a terminal error (#244)
        // even though the current file contents would let it in. The
        // retry uses `read_for_client` too, so on the cross-host path
        // (where `$CAR_AUTH_TOKEN` is set) a "rotation" re-read returns
        // the same env-var value — the `fresh == token` guard then
        // correctly surfaces this as a true auth failure (the operator
        // needs to refresh their env var), not a rotation race that
        // would silently resolve.
        if let Ok(Some(token)) = crate::auth_token::read_for_client() {
            match send_auth(&mut socket, &token, 0).await? {
                AuthOutcome::Accepted => {}
                AuthOutcome::Rejected(msg) => {
                    // Pin to the exact phrase the daemon's auth gate
                    // emits — `car-server-core/src/handler.rs:1595,1645`
                    // both produce "token mismatch". A bare `contains
                    // ("mismatch")` would mis-fire on unrelated daemon
                    // errors like "policy mismatch", "schema mismatch",
                    // or "version mismatch", and F2 makes the false
                    // positive worse: the cross-host retry path emits
                    // a "refresh $CAR_AUTH_TOKEN" hint that's actively
                    // misleading for non-auth errors. Pin to what the
                    // daemon actually says.
                    let looks_like_mismatch = msg.to_lowercase().contains("token mismatch");
                    if !looks_like_mismatch {
                        return Err(format!("session.auth rejected by daemon: {msg}"));
                    }
                    // Possible daemon rotation between our read and
                    // send. Re-read; only retry if the source now holds
                    // a different value (a stale-cache retry against
                    // the same value would just fail the same way).
                    let fresh = crate::auth_token::read_for_client()
                        .map_err(|e| {
                            format!("session.auth rejected (token mismatch); re-read failed: {e}")
                        })?
                        .ok_or_else(|| {
                            "session.auth rejected (token mismatch); token source vanished on re-read"
                                .to_string()
                        })?;
                    if fresh == token {
                        // Same value on both reads — not a rotation race.
                        // Guidance covers both token sources without
                        // pretending we know which one the caller used,
                        // since `read_for_client` doesn't surface that
                        // (and adding an API just for one error message
                        // isn't worth it).
                        return Err(format!(
                            "session.auth rejected by daemon: {msg} (re-read returned same token — \
                             true auth failure, not a rotation race. If using $CAR_AUTH_TOKEN, \
                             refresh it from the daemon's current value; if using the local token \
                             file, the daemon's in-memory token has diverged from disk — restart \
                             the daemon)"
                        ));
                    }
                    match send_auth(&mut socket, &fresh, 1).await? {
                        AuthOutcome::Accepted => {}
                        AuthOutcome::Rejected(msg2) => {
                            return Err(format!(
                                "session.auth rejected by daemon after rotation re-read: {msg2}"
                            ));
                        }
                    }
                }
            }
        }

        let (write_half, read_half) = socket.split();
        let write_arc = Arc::new(AsyncMutex::new(write_half));

        let pending = self.pending.clone();
        let handlers = self.handlers.clone();
        let notif_handlers = self.notif_handlers.clone();
        let write_for_task = write_arc.clone();
        let url_for_task = self.url.clone();

        let task = tokio::spawn(async move {
            recv_loop(
                read_half,
                write_for_task,
                pending,
                handlers,
                notif_handlers,
                url_for_task,
            )
            .await;
        });

        *state = Some(ConnState {
            write: write_arc.clone(),
            recv_task: task.abort_handle(),
        });

        Ok(write_arc)
    }

    /// Tear down the connection: abort the recv task and clear the
    /// pending waiters. Next `call()` reconnects.
    async fn reset(&self) {
        let mut state = self.state.lock().await;
        if let Some(s) = state.take() {
            s.recv_task.abort();
        }
        // Drain pending: drop senders so awaiters unblock with a
        // closed-channel error.
        if let Ok(mut g) = self.pending.lock() {
            g.clear();
        }
    }
}

/// Permanent recv loop. Reads frames until the read half ends or a
/// Close arrives. Demuxes:
///
/// - **Response** (no `method`, has `id`+numeric): take pending\[id\],
///   deliver result/error.
/// - **Request** (has `method`+`id`): look up handler\[method\],
///   spawn task to invoke, send response back over the shared write
///   half. Returns -32601 if no handler is registered.
/// - **Notification** (has `method`, no `id`): look up
///   notif_handlers\[method\] and invoke it synchronously. Handlers
///   without a registration are logged at debug and dropped.
///
/// On loop exit (read error, Close, or task abort), all remaining
/// pending senders are dropped so awaiting `call()`s unblock with a
/// closed-channel error.
async fn recv_loop(
    mut read: WsRead,
    write: Arc<AsyncMutex<WsWrite>>,
    pending: Arc<StdMutex<HashMap<u64, oneshot::Sender<RpcResult>>>>,
    handlers: Arc<StdMutex<HashMap<String, ServerRequestHandler>>>,
    notif_handlers: Arc<StdMutex<HashMap<String, NotificationHandler>>>,
    url: String,
) {
    while let Some(frame) = read.next().await {
        let msg = match frame {
            Ok(m) => m,
            Err(e) => {
                tracing::warn!(
                    target: "car_ffi_common::proxy",
                    url = %url,
                    error = %e,
                    "recv loop read error; closing"
                );
                break;
            }
        };
        match msg {
            Message::Text(text) => {
                let parsed: IncomingFrame = match serde_json::from_str(&text) {
                    Ok(f) => f,
                    Err(e) => {
                        tracing::warn!(
                            target: "car_ffi_common::proxy",
                            error = %e,
                            "parse incoming frame failed"
                        );
                        continue;
                    }
                };

                if let Some(method) = parsed.method.as_deref() {
                    // Server-initiated request or notification.
                    let Some(id) = parsed.id.clone() else {
                        // Notification: no id, no response expected.
                        // Look up handler; invoke synchronously (handler
                        // is responsible for keeping work cheap — fire
                        // a TSF, push into a channel, etc.).
                        //
                        // Wrap the call in `catch_unwind`. The recv
                        // loop is the single demuxer for every
                        // in-flight call on this client; if a buggy
                        // handler panics, unwinding here would kill
                        // the loop and force every pending caller to
                        // reconnect. The cost of the wrap is
                        // negligible compared to that blast radius.
                        let h = notif_handlers
                            .lock()
                            .ok()
                            .and_then(|g| g.get(method).cloned());
                        let params = parsed.params.unwrap_or(Value::Null);
                        if let Some(h) = h {
                            let method_owned = method.to_string();
                            if let Err(e) =
                                std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| h(params)))
                            {
                                let panic_msg = e
                                    .downcast_ref::<&'static str>()
                                    .map(|s| (*s).to_string())
                                    .or_else(|| e.downcast_ref::<String>().cloned())
                                    .unwrap_or_else(|| "<non-string panic payload>".to_string());
                                tracing::error!(
                                    target: "car_ffi_common::proxy",
                                    method = %method_owned,
                                    panic = %panic_msg,
                                    "notification handler panicked; recv loop continues"
                                );
                            }
                        } else {
                            tracing::debug!(
                                target: "car_ffi_common::proxy",
                                method = %method,
                                "no notification handler registered; dropping"
                            );
                        }
                        continue;
                    };
                    let handler = handlers.lock().ok().and_then(|g| g.get(method).cloned());
                    let params = parsed.params.unwrap_or(Value::Null);
                    let write_for_resp = write.clone();
                    let method_owned = method.to_string();
                    if let Some(h) = handler {
                        tokio::spawn(async move {
                            let resp = match h(params).await {
                                Ok(v) => serde_json::json!({
                                    "jsonrpc": "2.0",
                                    "id": id,
                                    "result": v,
                                }),
                                Err(e) => serde_json::json!({
                                    "jsonrpc": "2.0",
                                    "id": id,
                                    "error": { "code": -32000, "message": e },
                                }),
                            };
                            if let Ok(payload) = serde_json::to_string(&resp) {
                                let _ = write_for_resp
                                    .lock()
                                    .await
                                    .send(Message::Text(payload.into()))
                                    .await;
                            }
                        });
                    } else {
                        // No handler — return method-not-found so the
                        // daemon's pending oneshot resolves quickly
                        // instead of timing out at 60s.
                        let resp = serde_json::json!({
                            "jsonrpc": "2.0",
                            "id": id,
                            "error": {
                                "code": -32601,
                                "message": format!(
                                    "no handler registered on FFI client for `{method_owned}`"
                                ),
                            },
                        });
                        if let Ok(payload) = serde_json::to_string(&resp) {
                            let _ = write_for_resp
                                .lock()
                                .await
                                .send(Message::Text(payload.into()))
                                .await;
                        }
                    }
                } else {
                    // Response — match by numeric id. Server-initiated
                    // requests use string ids ("cb-N") which can't
                    // shadow our numeric ids.
                    let Some(id) = parsed.id.as_ref().and_then(|v| v.as_u64()) else {
                        tracing::warn!(
                            target: "car_ffi_common::proxy",
                            "response with non-numeric id; ignoring"
                        );
                        continue;
                    };
                    let tx = pending.lock().ok().and_then(|mut g| g.remove(&id));
                    let Some(tx) = tx else {
                        tracing::debug!(
                            target: "car_ffi_common::proxy",
                            id,
                            "no pending request for response (likely timed out)"
                        );
                        continue;
                    };
                    let result: RpcResult = if let Some(err) = parsed.error {
                        Err(format!("{} {}", err.code, err.message))
                    } else {
                        Ok(parsed.result.unwrap_or(Value::Null))
                    };
                    let _ = tx.send(result);
                }
            }
            Message::Binary(b) => {
                tracing::debug!(
                    target: "car_ffi_common::proxy",
                    len = b.len(),
                    "skipping binary frame"
                );
            }
            Message::Ping(_) | Message::Pong(_) | Message::Frame(_) => {}
            Message::Close(_) => {
                tracing::info!(
                    target: "car_ffi_common::proxy",
                    url = %url,
                    "daemon closed connection"
                );
                break;
            }
        }
    }
    // Loop ended — drain pending so awaiters fail fast.
    if let Ok(mut g) = pending.lock() {
        let count = g.len();
        if count > 0 {
            tracing::warn!(
                target: "car_ffi_common::proxy",
                url = %url,
                count,
                "recv loop ended with pending requests; dropping waiters"
            );
        }
        g.clear();
    }
}

// ---------------------------------------------------------------------------
// Per-method wrappers. NAPI/PyO3 bindings dispatch to these for every
// non-callback method. Mechanical request/response shape — same
// param/return JSON the daemon's JSON-RPC handlers already accept.
// ---------------------------------------------------------------------------

// ---------------------------------------------------------------------------
// Registration: tools, policies, agent basics. These calls plant
// state on the daemon's per-session runtime — exactly the state
// `verify_proposal` and `executeProposal` validate against, so
// per-session continuity matters as much as for `state.*` and
// `memory.*`.
// ---------------------------------------------------------------------------

/// `tools.register`. Daemon expects an array of ToolDefinition.
/// Schemaless registration uses `{ name }` only — empty
/// `parameters` triggers the daemon's legacy no-op validator path.
pub async fn proxy_tools_register(client: &DaemonClient, name: &str) -> Result<(), String> {
    let params = serde_json::json!([{ "name": name }]);
    client.call("tools.register", params).await.map(|_| ())
}

/// `tools.register` with a full ToolSchema. Caller passes the
/// already-serialized schema JSON; we wrap it in a single-element
/// array (the daemon's tools.register accepts `Vec<ToolDefinition>`
/// and `ToolDefinition` has the same shape as `ToolSchema` minus
/// the wire-protocol namespacing).
pub async fn proxy_tools_register_schema(
    client: &DaemonClient,
    schema_json: &str,
) -> Result<(), String> {
    let schema: Value =
        serde_json::from_str(schema_json).map_err(|e| format!("invalid ToolSchema JSON: {e}"))?;
    let params = serde_json::json!([schema]);
    client.call("tools.register", params).await.map(|_| ())
}

/// `policy.register`. Daemon expects a single `PolicyDefinition`
/// (`{ name, rule, target?, key?, value?, pattern? }`). Callback
/// rules (`deny_tool_callback`) are not supported on the wire — the
/// FFI binding rejects them with a structured error before reaching
/// this helper.
pub async fn proxy_policy_register(client: &DaemonClient, params_json: &str) -> Result<(), String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid policy params JSON: {e}"))?;
    client.call("policy.register", params).await.map(|_| ())
}

/// `foreman.plan` — decompose a coding `goal` into a footprint-annotated,
/// scheduled subtask plan via the daemon's inference engine. `repo` defaults to
/// the daemon's cwd; `max_attempts` bounds the parse/conflict repair loop.
/// Returns the serialized `ForemanPlanReport` JSON.
pub async fn proxy_foreman_plan(
    client: &DaemonClient,
    goal: &str,
    repo: Option<&str>,
    max_attempts: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "goal": goal });
    if let Some(r) = repo {
        params["repo"] = serde_json::json!(r);
    }
    if let Some(m) = max_attempts {
        params["max_attempts"] = serde_json::json!(m);
    }
    let result = client.call("foreman.plan", params).await?;
    serde_json::to_string(&result).map_err(|e| format!("serialize plan: {e}"))
}

/// `foreman.run` — plan, then farm the subtasks to an external coding CLI in
/// isolated worktrees and gate each worktree + the integrated union. `adapter`
/// defaults to `claude-code`; `verify_command` is the build/test the gate runs.
/// **Spends real agent quota.** Returns the serialized `{ plan, ran, run? }` JSON.
#[allow(clippy::too_many_arguments)]
pub async fn proxy_foreman_run(
    client: &DaemonClient,
    goal: &str,
    repo: Option<&str>,
    adapter: Option<&str>,
    verify_command: Option<Vec<String>>,
    union_verify_command: Option<Vec<String>>,
    max_attempts: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "goal": goal });
    if let Some(r) = repo {
        params["repo"] = serde_json::json!(r);
    }
    if let Some(a) = adapter {
        params["adapter"] = serde_json::json!(a);
    }
    if let Some(v) = verify_command {
        params["verify_command"] = serde_json::json!(v);
    }
    if let Some(v) = union_verify_command {
        params["union_verify_command"] = serde_json::json!(v);
    }
    if let Some(m) = max_attempts {
        params["max_attempts"] = serde_json::json!(m);
    }
    let result = client.call("foreman.run", params).await?;
    serde_json::to_string(&result).map_err(|e| format!("serialize run: {e}"))
}

/// `runs.start` (agent run tracing, U1). Brackets the start of an agent
/// run on the daemon: mints a durable `run_id`, tags it as the session's
/// current run, records `RunStarted`. The harness MUST await this ack
/// before submitting any proposal so the per-turn recorder (U2) reads the
/// run_id this bracket set (KTD3). Returns the resolved
/// `{ run_id, agent_id }` as a JSON string. `params_json` is a serialized
/// `RunStartRequest` (`{ intent, agent_id?, agent_name?,
/// outcome_description? }`).
pub async fn proxy_runs_start(
    client: &DaemonClient,
    params_json: &str,
) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid runs.start params JSON: {e}"))?;
    let v = client.call("runs.start", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize runs.start response: {e}"))
}

/// `runs.complete` (agent run tracing, U1). Records the terminal
/// `AgentOutcome` for a run and acks. The harness MUST await this ack
/// before letting its connection close, so a healthy run is never raced
/// into `Incomplete` by the daemon's disconnect grace window (R5).
/// `params_json` is a serialized `RunCompleteRequest`
/// (`{ run_id, outcome }`). Returns the `{ run_id, ok }` ack as a JSON
/// string.
pub async fn proxy_runs_complete(
    client: &DaemonClient,
    params_json: &str,
) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid runs.complete params JSON: {e}"))?;
    let v = client.call("runs.complete", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize runs.complete response: {e}"))
}

/// `agents.register_basics`. Mirrors `Runtime::register_agent_basics`
/// on the daemon's per-session runtime.
pub async fn proxy_register_agent_basics(client: &DaemonClient) -> Result<(), String> {
    client
        .call("agents.register_basics", Value::Null)
        .await
        .map(|_| ())
}

/// `voice.prepare_parakeet`. Triggers Parakeet model load on the daemon
/// — must run there, not in the FFI process, so that the listener
/// constructed by `voice.transcribe_stream.start` (also daemon-side)
/// can pick up the prepared provider from the daemon's OnceLock.
/// Returns the daemon's JSON response (`{"ready": true, ...}`) as a
/// string so the FFI standalone can pass it through unchanged.
pub async fn proxy_prepare_parakeet(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("voice.prepare_parakeet", Value::Null).await?;
    Ok(v.to_string())
}

/// `voice.prepare_diarizer`. Triggers WeSpeaker ONNX load on the daemon
/// — must run there, not in the FFI process. Without daemon-side
/// preparation the daemon's `current_prepared_diarizer()` returns
/// `None` when the listener is constructed, transcripts fall through
/// to `TranscriptRole::Unknown`, and per-speaker clustering is silently
/// lost (root cause of the v0.8.x diarizer regression). Returns the
/// daemon's JSON response (`{"ready": true}`) as a string.
pub async fn proxy_prepare_diarizer(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("voice.prepare_diarizer", Value::Null).await?;
    Ok(v.to_string())
}

/// State store: write a JSON value under `key`. Mirrors
/// `state.set` JSON-RPC method.
pub async fn proxy_state_set(
    client: &DaemonClient,
    key: &str,
    value_json: &str,
) -> Result<(), String> {
    let value: Value = serde_json::from_str(value_json)
        .map_err(|e| format!("invalid value JSON for state.set: {e}"))?;
    client
        .call(
            "state.set",
            serde_json::json!({ "key": key, "value": value }),
        )
        .await
        .map(|_| ())
}

/// State store: read JSON value under `key`. Returns `"null"` when
/// the key is absent (matches the embedded `CarRuntime::state_get`
/// behavior — callers don't have to distinguish "absent" from
/// "set to null").
pub async fn proxy_state_get(client: &DaemonClient, key: &str) -> Result<String, String> {
    let v = client
        .call("state.get", serde_json::json!({ "key": key }))
        .await?;
    Ok(serde_json::to_string(&v).unwrap_or_else(|_| "null".to_string()))
}

/// `state.exists` — true if `key` is set in the daemon session's
/// state store.
pub async fn proxy_state_exists(client: &DaemonClient, key: &str) -> Result<bool, String> {
    let v = client
        .call("state.exists", serde_json::json!({ "key": key }))
        .await?;
    Ok(v.as_bool().unwrap_or(false))
}

/// `state.keys` — list every key in the daemon session's state store.
pub async fn proxy_state_keys(client: &DaemonClient) -> Result<Vec<String>, String> {
    let v = client.call("state.keys", Value::Null).await?;
    serde_json::from_value(v).map_err(|e| format!("parse state.keys: {e}"))
}

/// `state.snapshot` — return the full state store as a JSON-encoded
/// `{ key: value, ... }` object string. Mirrors the v0.7 embedded
/// shape so callers don't have to rewrite parsing.
pub async fn proxy_state_snapshot(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("state.snapshot", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize state.snapshot: {e}"))
}

/// `memory.build_context_fast` — Fast-mode context assembly. Same
/// param shape as `proxy_memory_build_context`. Returns the assembled
/// context string.
pub async fn proxy_memory_build_context_fast(
    client: &DaemonClient,
    query: &str,
    model_context_window: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "query": query });
    if let Some(w) = model_context_window {
        params["model_context_window"] = serde_json::json!(w);
    }
    let v = client.call("memory.build_context_fast", params).await?;
    Ok(v.as_str().unwrap_or("").to_string())
}

// ---------------------------------------------------------------------------
// Inference: GPU-bound calls. These are the methods #139 most cares
// about — running them on an embedded engine in every FFI consumer
// is exactly the multi-tenant overcommit hazard the daemon's
// admission semaphore exists to prevent.
// ---------------------------------------------------------------------------

/// Plain `infer`. Returns the daemon's full InferenceResult JSON.
/// Caller picks what to surface (the embedded NAPI `infer` collapses
/// to `{"text": ...}` for back-compat; embedded PyO3 returns the
/// raw text). Both are derivable from the full result JSON.
pub async fn proxy_infer(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid GenerateRequest JSON: {e}"))?;
    let v = client.call("infer", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize infer result: {e}"))
}

/// `builder.build` — natural language → validated workflow. `request_json` is
/// `{ goal, existing?, max_attempts? }`. Runs on the daemon, where the catalog
/// (registered tools + models) is authoritative. Returns
/// `{ valid, workflow, issues, warnings, attempts }` as JSON.
pub async fn proxy_builder_build(
    client: &DaemonClient,
    request_json: &str,
) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid builder.build request JSON: {e}"))?;
    let v = client.call("builder.build", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize builder result: {e}"))
}

/// `embed`. Daemon expects `{ texts: [...], model?: "..." }`.
/// Returns array-of-arrays JSON (one embedding per input).
pub async fn proxy_embed(
    client: &DaemonClient,
    texts_json: &str,
    model: Option<&str>,
) -> Result<String, String> {
    let texts: Value =
        serde_json::from_str(texts_json).map_err(|e| format!("invalid texts JSON: {e}"))?;
    let mut params = serde_json::json!({ "texts": texts });
    if let Some(m) = model {
        params["model"] = Value::String(m.to_string());
    }
    let v = client.call("embed", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize embed result: {e}"))
}

/// `classify`. Daemon expects `{ text, labels: [...], model?: "..." }`.
/// Returns the chosen label string (or full result depending on
/// daemon shape — pass through as JSON).
pub async fn proxy_classify(
    client: &DaemonClient,
    text: &str,
    labels_json: &str,
    model: Option<&str>,
) -> Result<String, String> {
    let labels: Value =
        serde_json::from_str(labels_json).map_err(|e| format!("invalid labels JSON: {e}"))?;
    let mut params = serde_json::json!({ "text": text, "labels": labels });
    if let Some(m) = model {
        params["model"] = Value::String(m.to_string());
    }
    let v = client.call("classify", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize classify result: {e}"))
}

/// `verify` (proposal). Same JSON-RPC shape the existing CLI / WS
/// callers use — pass through whatever the daemon expects.
pub async fn proxy_verify(client: &DaemonClient, params_json: &str) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid verify params JSON: {e}"))?;
    let v = client.call("verify", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize verify result: {e}"))
}

/// `tokenize`. Daemon expects `{ model, text }`. Returns
/// `{ tokens: [u32, ...] }` JSON.
pub async fn proxy_tokenize(
    client: &DaemonClient,
    model: &str,
    text: &str,
) -> Result<String, String> {
    let v = client
        .call(
            "tokenize",
            serde_json::json!({ "model": model, "text": text }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize tokenize result: {e}"))
}

/// `detokenize`. Daemon expects `{ model, tokens: [u32, ...] }`.
/// Returns `{ text: "..." }` JSON.
pub async fn proxy_detokenize(
    client: &DaemonClient,
    model: &str,
    tokens: &[u32],
) -> Result<String, String> {
    let v = client
        .call(
            "detokenize",
            serde_json::json!({ "model": model, "tokens": tokens }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize detokenize result: {e}"))
}

/// `skills.distill`. Daemon expects `{ events: [...] }` and runs
/// `MemgineEngine::distill_skills` on its per-session engine.
/// Returns the array of `DistilledSkill` JSON.
pub async fn proxy_skills_distill(
    client: &DaemonClient,
    events_json: &str,
) -> Result<String, String> {
    let events: Value =
        serde_json::from_str(events_json).map_err(|e| format!("invalid events JSON: {e}"))?;
    let v = client
        .call("skills.distill", serde_json::json!({ "events": events }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.distill result: {e}"))
}

/// `memory.consolidate`. Returns the JSON ConsolidationReport.
pub async fn proxy_memory_consolidate(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("memory.consolidate", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize consolidate result: {e}"))
}

/// `memory.persist`. Daemon writes its memgine snapshot to `path`
/// (resolved under `~/.car/memory/`) and returns the number of
/// records written as a JSON number.
pub async fn proxy_memory_persist(client: &DaemonClient, path: &str) -> Result<u32, String> {
    let v = client
        .call("memory.persist", serde_json::json!({ "path": path }))
        .await?;
    let n = v
        .as_u64()
        .ok_or_else(|| format!("memory.persist returned non-numeric: {v}"))?;
    Ok(n as u32)
}

/// `memory.load`. Daemon reads a snapshot from `path` (resolved under
/// `~/.car/memory/`) and replaces its memgine; returns the number of
/// records loaded as a JSON number.
pub async fn proxy_memory_load(client: &DaemonClient, path: &str) -> Result<u32, String> {
    let v = client
        .call("memory.load", serde_json::json!({ "path": path }))
        .await?;
    let n = v
        .as_u64()
        .ok_or_else(|| format!("memory.load returned non-numeric: {v}"))?;
    Ok(n as u32)
}

/// `skills.ingest_distilled`. Daemon expects `{ skills: [...] }`.
/// Returns `{ ingested: N }`.
pub async fn proxy_skills_ingest_distilled(
    client: &DaemonClient,
    skills_json: &str,
) -> Result<u32, String> {
    let skills: Value =
        serde_json::from_str(skills_json).map_err(|e| format!("invalid skills JSON: {e}"))?;
    let v = client
        .call(
            "skills.ingest_distilled",
            serde_json::json!({ "skills": skills }),
        )
        .await?;
    let n = v
        .get("ingested")
        .and_then(|x| x.as_u64())
        .ok_or_else(|| format!("ingest_distilled returned unexpected shape: {v}"))?;
    Ok(n as u32)
}

/// `skill.repair`. Returns `{ code: "..." }` on success or
/// `null` if the skill isn't broken / repair failed. Mirrors the
/// embedded `repair_skill` `Option<String>` return.
pub async fn proxy_skill_repair(
    client: &DaemonClient,
    skill_name: &str,
) -> Result<Option<String>, String> {
    let v = client
        .call(
            "skill.repair",
            serde_json::json!({ "skill_name": skill_name }),
        )
        .await?;
    if v.is_null() {
        return Ok(None);
    }
    Ok(v.get("code")
        .and_then(|c| c.as_str())
        .map(|s| s.to_string()))
}

/// `skills.evolve`. Daemon expects `{ events: [...], domain }`.
/// Returns the JSON `DistilledSkill` array.
pub async fn proxy_skills_evolve(
    client: &DaemonClient,
    events_json: &str,
    domain: &str,
) -> Result<String, String> {
    let events: Value =
        serde_json::from_str(events_json).map_err(|e| format!("invalid events JSON: {e}"))?;
    let v = client
        .call(
            "skills.evolve",
            serde_json::json!({ "events": events, "domain": domain }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.evolve result: {e}"))
}

/// `skills.domains_needing_evolution`. Returns the JSON
/// `Vec<String>` of underperforming domains.
pub async fn proxy_skills_domains_needing_evolution(
    client: &DaemonClient,
    threshold: Option<f64>,
) -> Result<Vec<String>, String> {
    let mut params = serde_json::json!({});
    if let Some(t) = threshold {
        params["threshold"] = serde_json::json!(t);
    }
    let v = client
        .call("skills.domains_needing_evolution", params)
        .await?;
    serde_json::from_value(v).map_err(|e| format!("parse domains: {e}"))
}

/// `skills.ingest_provisional`. Ingests skills as validation-gated PROVISIONAL
/// candidates (vs `skills.ingest_distilled` which trusts them active). Daemon
/// expects `{ skills: [...], tenant? }`. Returns the count ingested.
pub async fn proxy_skills_ingest_provisional(
    client: &DaemonClient,
    skills_json: &str,
    tenant: Option<&str>,
) -> Result<u32, String> {
    let skills: Value =
        serde_json::from_str(skills_json).map_err(|e| format!("invalid skills JSON: {e}"))?;
    let mut params = serde_json::json!({ "skills": skills });
    if let Some(t) = tenant {
        params["tenant"] = serde_json::json!(t);
    }
    let v = client.call("skills.ingest_provisional", params).await?;
    let n = v
        .get("ingested")
        .and_then(|x| x.as_u64())
        .ok_or_else(|| format!("ingest_provisional returned unexpected shape: {v}"))?;
    Ok(n as u32)
}

/// `skills.gate`. Runs the promotion gate. Returns the JSON
/// `{ promoted: [...], rejected: [...] }` of resolved candidate keys.
pub async fn proxy_skills_gate(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("skills.gate", serde_json::json!({})).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.gate result: {e}"))
}

/// `skill.meta`. Returns the JSON `SkillMeta` for `key` (incl. lifecycle
/// `status`/`incumbent`/`version`/`stats`), or the string `"null"` if absent.
pub async fn proxy_skill_meta(client: &DaemonClient, key: &str) -> Result<String, String> {
    let v = client
        .call("skill.meta", serde_json::json!({ "key": key }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.meta result: {e}"))
}

/// `skill.export`. Returns the portable markdown document for a VALIDATED skill,
/// or `None` if the key is absent / not exportable (provisional or degraded).
pub async fn proxy_skill_export(client: &DaemonClient, key: &str) -> Result<Option<String>, String> {
    let v = client
        .call("skill.export", serde_json::json!({ "key": key }))
        .await?;
    if v.is_null() {
        return Ok(None);
    }
    v.as_str()
        .map(|s| Some(s.to_string()))
        .ok_or_else(|| format!("skill.export returned unexpected shape: {v}"))
}

/// `skill.import`. Imports a skill from a portable markdown document
/// (digest-verified). Returns `true` on success; the daemon returns a JSON-RPC
/// error if the document is malformed or its digest doesn't verify.
pub async fn proxy_skill_import(client: &DaemonClient, markdown: &str) -> Result<bool, String> {
    let v = client
        .call("skill.import", serde_json::json!({ "markdown": markdown }))
        .await?;
    Ok(v.get("imported").and_then(|b| b.as_bool()).unwrap_or(false))
}

/// `rerank`. Daemon expects a full `RerankRequest` JSON.
pub async fn proxy_rerank(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid RerankRequest JSON: {e}"))?;
    let v = client.call("rerank", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize rerank result: {e}"))
}

/// `transcribe`. Daemon expects a full `TranscribeRequest` JSON.
/// **Important**: `audio_path` is interpreted on the daemon's
/// filesystem, not the FFI caller's. For paths the daemon can't
/// reach, use the streaming voice APIs that ship audio bytes inline.
pub async fn proxy_transcribe(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid TranscribeRequest JSON: {e}"))?;
    let v = client.call("transcribe", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize transcribe result: {e}"))
}

/// `search` — web search (Parslee-hosted when signed in, else a bring-your-own
/// Tavily key). The daemon resolves the provider from its environment; the
/// client just passes `{ query, max_results? }`.
pub async fn proxy_search(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid SearchRequest JSON: {e}"))?;
    let v = client.call("search", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize search result: {e}"))
}

/// `web_fetch` — fetch a URL and extract readable text. Keyless; companion to
/// `search`. Client passes `{ url }`.
pub async fn proxy_web_fetch(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid FetchRequest JSON: {e}"))?;
    let v = client.call("web_fetch", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize web_fetch result: {e}"))
}

/// `synthesize`. Same filesystem caveat as `transcribe`:
/// `output_path` is on the daemon side.
pub async fn proxy_synthesize(client: &DaemonClient, request_json: &str) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid SynthesizeRequest JSON: {e}"))?;
    let v = client.call("synthesize", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize synthesize result: {e}"))
}

/// `speech.prepare`. Returns the JSON status string the daemon
/// emits — mirrors the embedded `prepare_speech_runtime` shape.
pub async fn proxy_speech_prepare(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("speech.prepare", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize speech.prepare result: {e}"))
}

/// `models.route`. Returns the route decision JSON.
pub async fn proxy_models_route(client: &DaemonClient, prompt: &str) -> Result<String, String> {
    let v = client
        .call("models.route", serde_json::json!({ "prompt": prompt }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.route result: {e}"))
}

/// `models.stats`. Returns the model performance profiles JSON.
pub async fn proxy_models_stats(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.stats", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.stats result: {e}"))
}

/// `events.count`. Returns the per-session event log size.
pub async fn proxy_events_count(client: &DaemonClient) -> Result<u32, String> {
    let v = client.call("events.count", Value::Null).await?;
    v.as_u64()
        .map(|n| n as u32)
        .ok_or_else(|| format!("events.count returned non-u64: {v}"))
}

/// `events.stats`. Returns counts and approximate serialized bytes
/// for the per-session event log.
pub async fn proxy_events_stats(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("events.stats", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize events.stats result: {e}"))
}

/// `events.truncate`. Keeps only the newest `max_events`/`max_spans`
/// entries for the daemon session.
pub async fn proxy_events_truncate(
    client: &DaemonClient,
    max_events: Option<u32>,
    max_spans: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({});
    if let Some(max) = max_events {
        params["maxEvents"] = serde_json::json!(max);
    }
    if let Some(max) = max_spans {
        params["maxSpans"] = serde_json::json!(max);
    }
    let v = client.call("events.truncate", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize events.truncate result: {e}"))
}

/// `events.clear`. Clears the daemon session event log.
pub async fn proxy_events_clear(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("events.clear", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize events.clear result: {e}"))
}

/// `replan.set_config`. Daemon expects flat
/// `{ max_replans, delay_ms, verify_before_execute, replan_on_rejected }`
/// matching the FFI's positional `set_replan_config` args.
pub async fn proxy_replan_set_config(
    client: &DaemonClient,
    max_replans: u32,
    delay_ms: u64,
    verify_before_execute: bool,
    replan_on_rejected: bool,
) -> Result<(), String> {
    client
        .call(
            "replan.set_config",
            serde_json::json!({
                "max_replans": max_replans,
                "delay_ms": delay_ms,
                "verify_before_execute": verify_before_execute,
                "replan_on_rejected": replan_on_rejected,
            }),
        )
        .await
        .map(|_| ())
}

// ---------------------------------------------------------------------------
// Memory: per-session graph memory in the daemon. Cross-process
// isolation is the same shape as state — caller's facts land on the
// daemon's per-session memgine and don't leak to other sessions
// unless the embedder shares one explicitly.
// ---------------------------------------------------------------------------

/// `memory.add_fact`. Daemon expects
/// `{ subject, body, kind?, confidence? }`. Returns the new fact
/// count as JSON `u64`.
pub async fn proxy_memory_add_fact(
    client: &DaemonClient,
    subject: &str,
    body: &str,
    kind: Option<&str>,
    confidence: Option<f64>,
) -> Result<u64, String> {
    let mut params = serde_json::json!({
        "subject": subject,
        "body": body,
    });
    if let Some(k) = kind {
        params["kind"] = Value::String(k.to_string());
    }
    if let Some(c) = confidence {
        params["confidence"] = serde_json::json!(c);
    }
    let v = client.call("memory.add_fact", params).await?;
    v.as_u64()
        .ok_or_else(|| format!("memory.add_fact returned non-u64: {v}"))
}

/// `memory.query`. Daemon expects `{ query, k? }`. Returns array of
/// `{ subject, body, activation }` as JSON.
pub async fn proxy_memory_query(
    client: &DaemonClient,
    query: &str,
    k: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "query": query });
    if let Some(k) = k {
        params["k"] = serde_json::json!(k);
    }
    let v = client.call("memory.query", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize memory.query result: {e}"))
}

/// `memory.fact_count`. Returns the daemon-side per-session
/// `valid_fact_count()`. No params. Mirrors the embedded
/// `CarRuntime::fact_count` so the FFI consumer in Daemon mode sees
/// the daemon's facts (#146 — silent zero from the embedded
/// fallback memgine was the bug).
pub async fn proxy_memory_fact_count(client: &DaemonClient) -> Result<u32, String> {
    let v = client.call("memory.fact_count", Value::Null).await?;
    v.as_u64()
        .map(|n| n as u32)
        .ok_or_else(|| format!("memory.fact_count returned non-u64: {v}"))
}

/// `memory.build_context`. Returns the assembled context string.
pub async fn proxy_memory_build_context(
    client: &DaemonClient,
    query: &str,
) -> Result<String, String> {
    let v = client
        .call(
            "memory.build_context",
            serde_json::json!({ "query": query }),
        )
        .await?;
    Ok(v.as_str().unwrap_or("").to_string())
}

// ---------------------------------------------------------------------------
// Skills: ingest/find/report — per-session skill graph in the
// daemon's memgine. Same isolation contract as memory + state.
// ---------------------------------------------------------------------------

/// `skill.ingest`. Caller passes the full param JSON (name, code,
/// platform, persona, url_pattern, task_keywords, description,
/// supersedes?). Returns the daemon's response (typically a node id
/// or status JSON).
pub async fn proxy_skill_ingest(
    client: &DaemonClient,
    params_json: &str,
) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid skill.ingest params JSON: {e}"))?;
    let v = client.call("skill.ingest", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.ingest result: {e}"))
}

/// `skill.find`. Caller passes `{ persona, url, task, max_results? }`.
/// Returns array of skill matches.
pub async fn proxy_skill_find(
    client: &DaemonClient,
    persona: &str,
    url: &str,
    task: &str,
    max_results: Option<u32>,
) -> Result<String, String> {
    let mut params = serde_json::json!({
        "persona": persona,
        "url": url,
        "task": task,
    });
    if let Some(n) = max_results {
        params["max_results"] = serde_json::json!(n);
    }
    let v = client.call("skill.find", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.find result: {e}"))
}

/// `skill.report`. Caller passes `{ skill_name, outcome }`. Returns
/// daemon's status response.
pub async fn proxy_skill_report(
    client: &DaemonClient,
    skill_name: &str,
    outcome: &str,
) -> Result<String, String> {
    let v = client
        .call(
            "skill.report",
            serde_json::json!({ "skill_name": skill_name, "outcome": outcome }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skill.report result: {e}"))
}

/// `skills.list`. Returns the registered skills.
pub async fn proxy_skills_list(
    client: &DaemonClient,
    params_json: Option<&str>,
) -> Result<String, String> {
    let params = match params_json {
        Some(s) => {
            serde_json::from_str(s).map_err(|e| format!("invalid skills.list params JSON: {e}"))?
        }
        None => Value::Null,
    };
    let v = client.call("skills.list", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize skills.list result: {e}"))
}

// ---------------------------------------------------------------------------
// Models: list / list_unified / pull. Registry calls — these belong
// on the daemon because the daemon owns the model store and the
// admission accounting that depends on what's actually loaded.
// ---------------------------------------------------------------------------

/// `models.list`. Returns the curated/built-in model catalog.
pub async fn proxy_models_list(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.list", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.list result: {e}"))
}

/// `models.list_unified`. Returns the unified registry (built-in +
/// runtime-discovered + user-registered).
pub async fn proxy_models_list_unified(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("models.list_unified", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.list_unified result: {e}"))
}

/// `models.pull`. Daemon expects `{ name }`. Returns
/// `{ path: "..." }`.
pub async fn proxy_models_pull(client: &DaemonClient, name: &str) -> Result<String, String> {
    let v = client
        .call("models.pull", serde_json::json!({ "name": name }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize models.pull result: {e}"))
}

// ---------------------------------------------------------------------------
// Meeting — multi-track recording, transcription, summarization
//
// The daemon owns one shared MeetingRegistry + voice session pool.
// Routing here means two FFI consumers (CLI script + Python notebook)
// see the same in-flight meetings, and the post-meeting summarizer
// runs on the daemon's inference engine instead of being skipped
// because the FFI process has no engine.
//
// `voice.event` notifications stream back over the same WebSocket as
// JSON-RPC notifications; FFI bindings register a notification handler
// via [`DaemonClient::register_notification_handler`] to surface them
// to JS / Python callbacks.
// ---------------------------------------------------------------------------

/// `meeting.start` — start a meeting capture on the daemon. Caller
/// passes a serialized `StartMeetingRequest`. Returns the meeting
/// status JSON the daemon emits.
pub async fn proxy_meeting_start(
    client: &DaemonClient,
    request_json: &str,
) -> Result<String, String> {
    let req: Value = serde_json::from_str(request_json)
        .map_err(|e| format!("invalid meeting.start request JSON: {e}"))?;
    let v = client.call("meeting.start", req).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize meeting.start result: {e}"))
}

/// `meeting.stop` — stop an in-flight meeting on the daemon. When
/// `summarize` is true (the default), the daemon runs the post-meeting
/// summarizer on its own inference engine.
pub async fn proxy_meeting_stop(
    client: &DaemonClient,
    meeting_id: &str,
    summarize: bool,
) -> Result<String, String> {
    let v = client
        .call(
            "meeting.stop",
            serde_json::json!({
                "meeting_id": meeting_id,
                "summarize": summarize,
            }),
        )
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize meeting.stop result: {e}"))
}

/// `meeting.list` — list every meeting on disk under `root` (defaults
/// to the daemon's cwd `.car/meetings`).
pub async fn proxy_meeting_list(
    client: &DaemonClient,
    root: Option<&str>,
) -> Result<String, String> {
    let mut params = serde_json::json!({});
    if let Some(r) = root {
        params["root"] = Value::String(r.to_string());
    }
    let v = client.call("meeting.list", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize meeting.list result: {e}"))
}

/// `meeting.get` — fetch one meeting by id. Returns the meeting JSON
/// or surfaces the daemon's "not found" error.
pub async fn proxy_meeting_get(
    client: &DaemonClient,
    meeting_id: &str,
    root: Option<&str>,
) -> Result<String, String> {
    let mut params = serde_json::json!({ "meeting_id": meeting_id });
    if let Some(r) = root {
        params["root"] = Value::String(r.to_string());
    }
    let v = client.call("meeting.get", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize meeting.get result: {e}"))
}

// ---------------------------------------------------------------------------
// A2A — Agent-to-Agent peer surface
//
// These four methods wrap the daemon's `a2a.*` JSON-RPC namespace so
// FFI callers in v0.8 daemon-only mode can drive the A2A peer state
// the same way they drove the in-process car-ffi-common::a2a helpers
// in v0.7. Daemon-shared state means two FFI consumers on the same
// host see the same listener / inbox / dispatcher.
// ---------------------------------------------------------------------------

/// `a2a.start` — start the A2A HTTP listener. Caller passes
/// `{ bind, public_url?, agent_name?, agent_description?,
///   organization?, organization_url? }`. Returns `{ "bound": "..." }`.
pub async fn proxy_a2a_start(client: &DaemonClient, params_json: &str) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid a2a.start params JSON: {e}"))?;
    let v = client.call("a2a.start", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2a.start result: {e}"))
}

/// `a2a.stop` — shut down the A2A listener. Returns
/// `{ "stopped": true }` or errors when not running.
pub async fn proxy_a2a_stop(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("a2a.stop", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2a.stop result: {e}"))
}

/// `a2a.status` — listener status. Always returns a JSON object;
/// never errors so polling code doesn't have to distinguish "not
/// running" from a failure.
pub async fn proxy_a2a_status(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("a2a.status", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2a.status result: {e}"))
}

/// `a2a.send` — dispatch a message to a remote A2A peer. Caller passes
/// `{ endpoint, message, blocking?, ingest_a2ui?, route_auth?, allow_untrusted_endpoint? }`.
pub async fn proxy_a2a_send(client: &DaemonClient, params_json: &str) -> Result<String, String> {
    let params: Value = serde_json::from_str(params_json)
        .map_err(|e| format!("invalid a2a.send params JSON: {e}"))?;
    let v = client.call("a2a.send", params).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2a.send result: {e}"))
}

// ---------------------------------------------------------------------------
// A2UI — Agent-to-UI surface store
//
// Wraps the daemon's `a2ui.*` JSON-RPC namespace. In v0.8 the daemon
// owns the A2UI surface store; FFI bindings proxy here so multiple
// consumers (web dashboard, host app, agent process) see the same
// surfaces.
// ---------------------------------------------------------------------------

/// `a2ui.capabilities` — return the renderer capabilities the daemon
/// advertises (component catalog version, max payload size, etc.).
pub async fn proxy_a2ui_capabilities(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("a2ui.capabilities", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.capabilities result: {e}"))
}

/// `a2ui.apply` — apply a single A2UI envelope to the daemon's
/// surface store. Returns the apply result envelope.
pub async fn proxy_a2ui_apply(
    client: &DaemonClient,
    envelope_json: &str,
) -> Result<String, String> {
    let envelope: Value =
        serde_json::from_str(envelope_json).map_err(|e| format!("invalid A2UI envelope: {e}"))?;
    let v = client.call("a2ui.apply", envelope).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.apply result: {e}"))
}

/// `a2ui.ingest` — parse A2UI envelopes from a generic A2A carrier
/// payload (`a2ui` key, `data` parts, `artifact` payloads) and apply
/// each in order. Returns `{ "applied": [A2uiApplyResult] }`.
pub async fn proxy_a2ui_ingest(
    client: &DaemonClient,
    payload_json: &str,
) -> Result<String, String> {
    let payload: Value =
        serde_json::from_str(payload_json).map_err(|e| format!("invalid A2UI payload: {e}"))?;
    let v = client.call("a2ui.ingest", payload).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.ingest result: {e}"))
}

/// `a2ui.surfaces` — list every surface currently held by the daemon.
pub async fn proxy_a2ui_surfaces(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("a2ui.surfaces", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.surfaces result: {e}"))
}

/// `a2ui.get` — fetch one surface by id. Returns the surface JSON
/// or `null` when absent.
pub async fn proxy_a2ui_get(client: &DaemonClient, surface_id: &str) -> Result<String, String> {
    let v = client
        .call("a2ui.get", serde_json::json!({ "surface_id": surface_id }))
        .await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.get result: {e}"))
}

/// `a2ui.reap` — drop expired surfaces. Returns
/// `{ "removed": [surface_id, ...] }`.
pub async fn proxy_a2ui_reap(client: &DaemonClient) -> Result<String, String> {
    let v = client.call("a2ui.reap", Value::Null).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.reap result: {e}"))
}

/// `a2ui.action` — forward a user action (button click, form submit)
/// from the renderer to the agent that owns the surface. Caller passes
/// the full `ClientAction` shape: `{ surface_id, name, source_component_id, ... }`.
/// Returns `{ event, route }`.
pub async fn proxy_a2ui_action(client: &DaemonClient, action_json: &str) -> Result<String, String> {
    let action: Value =
        serde_json::from_str(action_json).map_err(|e| format!("invalid A2UI action JSON: {e}"))?;
    let v = client.call("a2ui.action", action).await?;
    serde_json::to_string(&v).map_err(|e| format!("serialize a2ui.action result: {e}"))
}

#[cfg(test)]
mod tests {
    use super::*;

    // Tests that mutate process-wide env vars serialize on the
    // crate-wide [`crate::env_test_lock`] (shared with
    // `auth_token::tests` / `memory_path::tests`), so env mutations in
    // any module can't race env reads in another, and a panic can't
    // poison a per-module lock and cascade.

    #[test]
    fn execute_method_gets_a_generous_read_timeout() {
        let _g = crate::env_test_lock();
        // Clear overrides so we observe the defaults.
        let prev_exec = std::env::var_os("CAR_EXECUTE_TIMEOUT");
        let prev_daemon = std::env::var_os("CAR_DAEMON_TIMEOUT");
        unsafe {
            std::env::remove_var("CAR_EXECUTE_TIMEOUT");
            std::env::remove_var("CAR_DAEMON_TIMEOUT");
        }
        // proposal.submit blocks on tool callbacks -> the long execute
        // timeout, NOT the 30s generic default that reaped it (#259). With no
        // proposal/actions parseable it falls back to the flat floor.
        let empty = serde_json::json!({});
        assert_eq!(
            read_timeout_for("proposal.submit", &empty),
            Duration::from_secs(DEFAULT_EXECUTE_TIMEOUT_SECS)
        );
        assert!(DEFAULT_EXECUTE_TIMEOUT_SECS > DEFAULT_READ_TIMEOUT_SECS);
        // Other methods keep the short generic default.
        assert_eq!(
            read_timeout_for("memory.add_fact", &empty),
            Duration::from_secs(DEFAULT_READ_TIMEOUT_SECS)
        );
        // Env override caps the execute path.
        unsafe { std::env::set_var("CAR_EXECUTE_TIMEOUT", "1234") };
        assert_eq!(
            read_timeout_for("proposal.submit", &empty),
            Duration::from_secs(1234)
        );
        // Restore.
        unsafe {
            match prev_exec {
                Some(v) => std::env::set_var("CAR_EXECUTE_TIMEOUT", v),
                None => std::env::remove_var("CAR_EXECUTE_TIMEOUT"),
            }
            if let Some(v) = prev_daemon {
                std::env::set_var("CAR_DAEMON_TIMEOUT", v);
            }
        }
    }

    /// #265: the `proposal.submit` client read timeout is derived from the
    /// proposal's action budgets so a legitimately-long / retried / chained
    /// in-budget call is not re-reaped client-side.
    ///
    /// Attempt count is now per-action: a non-retry action (the harness
    /// default, `Abort`) is single-attempt; only `failure_behavior == "retry"`
    /// actions get `max_retries + 1` (see `retry_proposal_deadline_*`).
    #[test]
    fn proposal_submit_read_timeout_derives_from_budgets() {
        let _g = crate::env_test_lock();
        let prev_exec = std::env::var_os("CAR_EXECUTE_TIMEOUT");
        unsafe { std::env::remove_var("CAR_EXECUTE_TIMEOUT") };

        // A single 1200s (20 min) non-retry action. Old flat 900s would
        // re-reap it; the derived deadline must exceed the action budget.
        // Σ = 1_200_000ms × 1 attempt = 1_200_000ms = 1200s, +30 grace.
        let long = serde_json::json!({
            "proposal": { "actions": [{ "timeout_ms": 1_200_000u64 }] }
        });
        let dl = read_timeout_for("proposal.submit", &long);
        assert!(
            dl >= Duration::from_secs(1200),
            "derived deadline {dl:?} must cover the 1200s in-budget action"
        );
        assert_eq!(dl, Duration::from_secs(1_200 + 30));

        // Several chained non-retry actions sum (Σ), not max — the conservative
        // upper bound. 3 × 200s = 600s, +30 grace = 630s. Floor (900s) bites,
        // so the floored value is what we get.
        let chained = serde_json::json!({
            "proposal": { "actions": [
                { "timeout_ms": 200_000u64 },
                { "timeout_ms": 200_000u64 },
                { "timeout_ms": 200_000u64 },
            ] }
        });
        assert_eq!(
            read_timeout_for("proposal.submit", &chained),
            // 600s + 30 grace = 630s < 900s floor.
            Duration::from_secs(DEFAULT_EXECUTE_TIMEOUT_SECS)
        );

        // A short proposal keeps the generous flat floor (derived < floor).
        let short = serde_json::json!({
            "proposal": { "actions": [{ "timeout_ms": 5_000u64 }] }
        });
        assert_eq!(
            read_timeout_for("proposal.submit", &short),
            Duration::from_secs(DEFAULT_EXECUTE_TIMEOUT_SECS)
        );

        // An action with no timeout_ms assumes the daemon default (300s).
        // Non-retry → single attempt → 300s + 30 grace = 330s < 900s floor.
        let defaulted = serde_json::json!({
            "proposal": { "actions": [{}] }
        });
        assert_eq!(
            read_timeout_for("proposal.submit", &defaulted),
            Duration::from_secs(DEFAULT_EXECUTE_TIMEOUT_SECS)
        );

        // CAR_EXECUTE_TIMEOUT is an explicit cap: it wins even over a larger
        // derived deadline (operator pinned the ceiling).
        unsafe { std::env::set_var("CAR_EXECUTE_TIMEOUT", "100") };
        assert_eq!(
            read_timeout_for("proposal.submit", &long),
            Duration::from_secs(100)
        );

        unsafe {
            match prev_exec {
                Some(v) => std::env::set_var("CAR_EXECUTE_TIMEOUT", v),
                None => std::env::remove_var("CAR_EXECUTE_TIMEOUT"),
            }
        }
    }

    /// #265 (retry worst case): a `failure_behavior: "retry"` action's derived
    /// deadline must cover `max_retries + 1` attempts — the daemon executor
    /// runs `action.max_retries + 1` attempts for a retry action
    /// (`car_engine::executor::execute_with_retry`). An earlier flat `× 3`
    /// undercounted the default (3 → 4) and re-reaped a legitimately-retrying
    /// call.
    #[test]
    fn retry_proposal_deadline_covers_max_retries_plus_one() {
        let _g = crate::env_test_lock();
        let prev_exec = std::env::var_os("CAR_EXECUTE_TIMEOUT");
        unsafe { std::env::remove_var("CAR_EXECUTE_TIMEOUT") };

        // Default max_retries (3) on a retry action → 4 attempts, NOT 3.
        // 500s × 4 = 2000s, +30 grace = 2030s. Floor (900s) doesn't bite.
        let retry_default = serde_json::json!({
            "proposal": { "actions": [
                { "timeout_ms": 500_000u64, "failure_behavior": "retry" }
            ] }
        });
        let dl = read_timeout_for("proposal.submit", &retry_default);
        assert_eq!(
            dl,
            Duration::from_secs(500 * 4 + 30),
            "retry action at default max_retries must cover 4 attempts (max_retries+1)"
        );
        // Regression guard: the old `× 3` undercount would have been 1530s.
        assert!(
            dl > Duration::from_secs(500 * 3 + 30),
            "deadline {dl:?} must exceed the old 3-attempt undercount"
        );

        // Explicit max_retries is honored: 2 retries → 3 attempts.
        let retry_explicit = serde_json::json!({
            "proposal": { "actions": [
                { "timeout_ms": 400_000u64, "failure_behavior": "retry", "max_retries": 2 }
            ] }
        });
        assert_eq!(
            read_timeout_for("proposal.submit", &retry_explicit),
            Duration::from_secs(400 * 3 + 30)
        );

        // A retry action with no explicit timeout_ms uses the daemon default
        // (300s) × 4 attempts = 1200s + 30 grace = 1230s (> 900s floor).
        let retry_defaulted = serde_json::json!({
            "proposal": { "actions": [{ "failure_behavior": "retry" }] }
        });
        assert_eq!(
            read_timeout_for("proposal.submit", &retry_defaulted),
            Duration::from_secs(DEFAULT_TOOL_TIMEOUT_MS / 1000 * 4 + 30)
        );

        // Non-retry behaviors stay single-attempt.
        for behavior in ["abort", "skip"] {
            let p = serde_json::json!({
                "proposal": { "actions": [
                    { "timeout_ms": 1_000_000u64, "failure_behavior": behavior }
                ] }
            });
            assert_eq!(
                read_timeout_for("proposal.submit", &p),
                Duration::from_secs(1_000 + 30),
                "{behavior} action must be single-attempt"
            );
        }

        unsafe {
            match prev_exec {
                Some(v) => std::env::set_var("CAR_EXECUTE_TIMEOUT", v),
                None => std::env::remove_var("CAR_EXECUTE_TIMEOUT"),
            }
        }
    }

    /// `RuntimeMode::from_env` and `resolve_or_err` always return
    /// `Daemon` in v0.8 — the embedded fallback was retired. The
    /// pre-v0.8 env knobs (`CAR_FFI_MODE=embedded`, `daemon-only`,
    /// `daemon-no-spawn`) are silently ignored; setting them does
    /// not flip behavior.
    #[test]
    fn runtime_mode_is_daemon_only() {
        let _guard = crate::env_test_lock();
        let prev = std::env::var("CAR_FFI_MODE").ok();

        std::env::remove_var("CAR_FFI_MODE");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Daemon);
        assert_eq!(RuntimeMode::resolve_or_err().unwrap(), RuntimeMode::Daemon);

        // Pre-v0.8 embedded knob is ignored — daemon-only.
        std::env::set_var("CAR_FFI_MODE", "embedded");
        assert_eq!(RuntimeMode::from_env(), RuntimeMode::Daemon);
        assert_eq!(RuntimeMode::resolve_or_err().unwrap(), RuntimeMode::Daemon);

        match prev {
            Some(v) => std::env::set_var("CAR_FFI_MODE", v),
            None => std::env::remove_var("CAR_FFI_MODE"),
        }
    }

    /// Daemon URL: env override beats default; default matches the
    /// CLI's `daemon_ws_url`.
    #[test]
    fn daemon_url_resolution() {
        let _guard = crate::env_test_lock();
        let prev = std::env::var("CAR_DAEMON_URL").ok();
        std::env::remove_var("CAR_DAEMON_URL");
        assert_eq!(daemon_ws_url(), "ws://127.0.0.1:9100");

        std::env::set_var("CAR_DAEMON_URL", "ws://other:1234");
        assert_eq!(daemon_ws_url(), "ws://other:1234");

        match prev {
            Some(v) => std::env::set_var("CAR_DAEMON_URL", v),
            None => std::env::remove_var("CAR_DAEMON_URL"),
        }
    }

    /// `probe_daemon_port` returns false against a dead port within
    /// the configured timeout. Port 1 is almost certainly closed
    /// (root-only and unused) so this passes regardless of whether
    /// a daemon happens to be running on the test host.
    #[test]
    fn probe_dead_port_returns_false() {
        let _guard = crate::env_test_lock();
        let prev = std::env::var("CAR_DAEMON_URL").ok();
        std::env::set_var("CAR_DAEMON_URL", "ws://127.0.0.1:1");
        assert!(
            !car_proto::daemon::probe_daemon_port(std::time::Duration::from_millis(100)),
            "probe of port 1 should fail"
        );
        match prev {
            Some(v) => std::env::set_var("CAR_DAEMON_URL", v),
            None => std::env::remove_var("CAR_DAEMON_URL"),
        }
    }

    /// `proxy_call` against a non-listening port surfaces the
    /// connection error. We pick port 1 (almost certainly closed)
    /// rather than the daemon default, so this passes whether or
    /// not a daemon is running on the test host.
    #[tokio::test]
    async fn client_call_against_dead_port_errors_clearly() {
        let client = DaemonClient::with_url("ws://127.0.0.1:1");
        let r = client
            .call("state.get", serde_json::json!({"key": "x"}))
            .await;
        assert!(r.is_err(), "expected error against dead port");
        let msg = r.unwrap_err();
        assert!(
            msg.contains("connect daemon"),
            "expected connect-error wording, got: {msg}"
        );
    }

    /// After a failed call, the next call still tries to connect
    /// (we don't poison the slot). Same dead port — should still
    /// surface "connect daemon" error.
    #[tokio::test]
    async fn client_recovers_from_failure_to_retry_connect() {
        let client = DaemonClient::with_url("ws://127.0.0.1:1");
        let _ = client
            .call("state.get", serde_json::json!({"key": "x"}))
            .await;
        let r = client
            .call("state.get", serde_json::json!({"key": "y"}))
            .await;
        assert!(r.is_err());
        assert!(r.unwrap_err().contains("connect daemon"));
    }
}