car-server-core 0.15.1

//! Server-side session state — shared across all connections.

use car_engine::{Runtime, ToolExecutor};
use car_eventlog::EventLog;
use car_proto::{ToolExecuteRequest, ToolExecuteResponse};
use futures::Sink;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use tokio::sync::{oneshot, Mutex};
use tokio_tungstenite::tungstenite::{Error as WsError, Message};

/// Type-erased WebSocket sink. The dispatch loop accepts either a
/// `WebSocketStream<TcpStream>` (the legacy car-server TCP listener)
/// or a `WebSocketStream<UnixStream>` (the daemon-as-default UDS
/// listener) — both implement `Sink<Message, Error = WsError>` after
/// the tungstenite handshake. Erasing the type here avoids cascading
/// a generic parameter through every WsChannel / Session / ServerState
/// touchpoint in the dispatcher.
pub type WsSink = Pin<Box<dyn Sink<Message, Error = WsError> + Send + Unpin + 'static>>;

/// Server-side credentials for continuing an A2A-owned A2UI surface.
///
/// This intentionally lives outside `car_a2ui::A2uiSurfaceOwner` so
/// renderers can inspect surface ownership without receiving secrets.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "type")]
pub enum A2aRouteAuth {
    None,
    Bearer { token: String },
    Header { name: String, value: String },
}

/// Shared write half of the WebSocket, plus pending callback channels.
/// `write` is type-erased via [`WsSink`] so the dispatcher can run
/// against any transport-specific WebSocketStream (TCP or UDS today;
/// axum-bridged in future) without templatizing every consumer.
pub struct WsChannel {
    pub write: Mutex<WsSink>,
    /// Pending tool execution callbacks: request_id → oneshot sender
    pub pending: Mutex<HashMap<String, oneshot::Sender<ToolExecuteResponse>>>,
    pub next_id: AtomicU64,
}

impl WsChannel {
    pub fn next_request_id(&self) -> String {
        let id = self.next_id.fetch_add(1, Ordering::SeqCst);
        format!("cb-{}", id)
    }

    /// Test-only stub that returns a WsChannel whose write sink drains
    /// to nowhere. Used by `host.rs` tests that need a real
    /// `Arc<WsChannel>` in the subscribers map (to exercise membership
    /// checks like the cross-session resolve fan-out) without
    /// constructing a tungstenite handshake. Never writes are
    /// performed against this stub; if anything tries, the drain sink
    /// quietly absorbs.
    #[cfg(test)]
    pub fn test_stub() -> Self {
        use futures::sink::SinkExt;
        let sink: WsSink = Box::pin(futures::sink::drain().sink_map_err(|_| {
            tokio_tungstenite::tungstenite::Error::ConnectionClosed
        }));
        WsChannel {
            write: Mutex::new(sink),
            pending: Mutex::new(HashMap::new()),
            next_id: AtomicU64::new(0),
        }
    }
}

/// In-flight `agents.chat` session bookkeeping. Created when a host
/// client calls `agents.chat`, removed when the agent emits a terminal
/// `agent.chat.event` (`kind: "done"` or `"error"`), when either side
/// disconnects, or when the host cancels via `agents.chat.cancel`.
///
/// The session_id is host-supplied (or server-generated when omitted)
/// and threads through every `agent.chat.event` notification so the
/// server can route streamed deltas back to the originating host
/// without needing per-session subscriptions. See
/// `docs/proposals/agent-chat-surface.md` for the wire contract.
#[derive(Debug, Clone)]
pub struct ChatSession {
    /// Agent that owns this chat — populated from
    /// `attached_agents` at `agents.chat` dispatch time.
    pub agent_id: String,
    /// Client id of the host that issued `agents.chat`. The server
    /// forwards `agent.chat.event` notifications back to *this* host
    /// only, so two CarHost windows chatting with the same agent are
    /// independent streams.
    pub host_client_id: String,
    /// Unix-seconds creation time — used by the future stale-session
    /// sweeper to drop sessions whose agent died without emitting a
    /// terminal event.
    pub created_at: u64,
}

/// Tool executor that sends callbacks to the client over WebSocket.
pub struct WsToolExecutor {
    pub channel: Arc<WsChannel>,
}

#[async_trait::async_trait]
impl ToolExecutor for WsToolExecutor {
    async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
        // Legacy callers that don't have a proposal-level Action.id
        // (e.g. internal `executor.execute` chains in tests) — emit an
        // empty action_id so the client-side handler can still see the
        // payload shape and decide whether to fail loudly.
        self.execute_with_action(tool, params, "").await
    }

    async fn execute_with_action(
        &self,
        tool: &str,
        params: &Value,
        action_id: &str,
    ) -> Result<Value, String> {
        use futures::SinkExt;

        // The JSON-RPC request id is the daemon's callback-routing key
        // (used by the pending-response map below). The `action_id`
        // FIELD on the payload is the originating proposal Action.id
        // surfaced to the host so process-wide handlers can route
        // concurrent callbacks back to per-call dispatchers
        // (Parslee-ai/car-releases#43 follow-up). They serve different
        // purposes and must stay distinct: routing id is daemon-side,
        // action id is host-side.
        let request_id = self.channel.next_request_id();

        let callback = ToolExecuteRequest {
            action_id: action_id.to_string(),
            tool: tool.to_string(),
            parameters: params.clone(),
            timeout_ms: None,
            attempt: 1,
        };

        // Create a oneshot channel for the response
        let (tx, rx) = oneshot::channel();
        self.channel
            .pending
            .lock()
            .await
            .insert(request_id.clone(), tx);

        // Send the callback to the client as a JSON-RPC request
        let rpc_request = serde_json::json!({
            "jsonrpc": "2.0",
            "method": "tools.execute",
            "params": callback,
            "id": request_id,
        });

        let msg = Message::Text(
            serde_json::to_string(&rpc_request)
                .map_err(|e| e.to_string())?
                .into(),
        );
        self.channel
            .write
            .lock()
            .await
            .send(msg)
            .await
            .map_err(|e| format!("failed to send tool callback: {}", e))?;

        // Wait for the client to respond (with a timeout)
        let response = tokio::time::timeout(std::time::Duration::from_secs(60), rx)
            .await
            .map_err(|_| format!("tool '{}' callback timed out (60s)", tool))?
            .map_err(|_| format!("tool '{}' callback channel closed", tool))?;

        if let Some(err) = response.error {
            Err(err)
        } else {
            Ok(response.output.unwrap_or(Value::Null))
        }
    }
}

/// Voice event sink that forwards events to a specific WebSocket client
/// as `voice.event` JSON-RPC notifications.
///
/// Each `voice.transcribe_stream.start` call constructs one of these
/// bound to the originating client's [`WsChannel`], so a client only
/// receives events for sessions it started.
pub struct WsVoiceEventSink {
    pub channel: Arc<WsChannel>,
}

impl car_voice::VoiceEventSink for WsVoiceEventSink {
    fn send(&self, session_id: &str, event_json: String) {
        use futures::SinkExt;
        let channel = self.channel.clone();
        let session_id = session_id.to_string();
        tokio::spawn(async move {
            let payload: Value = serde_json::from_str(&event_json)
                .unwrap_or_else(|_| Value::String(event_json.clone()));
            let notification = serde_json::json!({
                "jsonrpc": "2.0",
                "method": "voice.event",
                "params": {
                    "session_id": session_id,
                    "event": payload,
                },
            });
            let Ok(text) = serde_json::to_string(&notification) else {
                return;
            };
            let _ = channel
                .write
                .lock()
                .await
                .send(Message::Text(text.into()))
                .await;
        });
    }
}

/// Per-meeting fanout sink that ingests transcript text into a
/// session-scoped memgine using the `Arc<tokio::sync::Mutex<...>>`
/// wrapper, then forwards every event upstream untouched.
///
/// Lives here (not in `car-ffi-common`) because the engine handle uses
/// `tokio::sync::Mutex` per the "one-wrapper rule" — the FFI-common
/// `MeetingMemgineFanout` still uses `std::sync::Mutex` for the NAPI/
/// PyO3 bindings, which keep their sync wrappers. Each binding owns the
/// fanout that matches its lock primitive; the parsing/formatting logic
/// itself is shared via [`car_meeting::extract_transcript_for_ingest`].
///
/// `send` is called from the voice drain task and must be non-blocking,
/// so the lock acquisition is shipped to a `tokio::spawn`. Transcript
/// events are independent so reordering across spawned tasks is fine.
pub struct WsMemgineIngestSink {
    pub meeting_id: String,
    pub engine: Arc<Mutex<car_memgine::MemgineEngine>>,
    pub upstream: Arc<dyn car_voice::VoiceEventSink>,
}

impl car_voice::VoiceEventSink for WsMemgineIngestSink {
    fn send(&self, voice_session_id: &str, event_json: String) {
        if let Ok(value) = serde_json::from_str::<Value>(&event_json) {
            if let Some((speaker, text)) = car_meeting::extract_transcript_for_ingest(
                &value,
                &self.meeting_id,
                voice_session_id,
            ) {
                let engine = self.engine.clone();
                tokio::spawn(async move {
                    let mut guard = engine.lock().await;
                    guard.ingest_conversation(&speaker, &text, chrono::Utc::now());
                });
            }
        }
        self.upstream.send(voice_session_id, event_json);
    }
}

/// Per-client session.
pub struct ClientSession {
    pub client_id: String,
    pub runtime: Arc<Runtime>,
    pub channel: Arc<WsChannel>,
    pub host: Arc<crate::host::HostState>,
    /// Memgine handle. Wrapped in `tokio::sync::Mutex` so dispatcher
    /// handlers can hold the lock across `.await` points without
    /// risking poisoning. Migrated from `std::sync::Mutex` in the
    /// car-server-core extraction (U1) per the "one-wrapper rule".
    pub memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
    /// Lazy browser session — first `browser.run` call launches Chromium,
    /// subsequent calls reuse it so element IDs resolve across invocations
    /// within the same WebSocket connection.
    pub browser: car_ffi_common::browser::BrowserSessionSlot,
    /// Per-connection auth state. Starts `false`; flips to `true`
    /// after a successful `session.auth` handshake. Always considered
    /// authenticated when `ServerState::auth_token` is unset (auth
    /// disabled). Closes Parslee-ai/car-releases#32.
    pub authenticated: std::sync::atomic::AtomicBool,
    /// Bound agent identity (#169). `Some(id)` once a lifecycle-agent
    /// child has called `session.auth { token, agent_id }` and the
    /// supervisor confirmed `agent_id` is supervised + token matches.
    /// Used by `agents.list` to surface which managed agents have
    /// actually attached vs. just being marked `Running` at the
    /// process level. Cleared at disconnect by `remove_session`.
    pub agent_id: tokio::sync::Mutex<Option<String>>,
    /// Bound persistent memgine (#170). `Some` after `session.auth`
    /// successfully attaches the connection to a daemon-owned
    /// per-agent memgine (paired with `agent_id`). Memory handlers
    /// route through [`ClientSession::effective_memgine`] which
    /// returns this when set, falling back to the ephemeral
    /// `memgine` field for browser/host/CLI connections.
    pub bound_memgine: tokio::sync::Mutex<Option<Arc<Mutex<car_memgine::MemgineEngine>>>>,
}

impl ClientSession {
    /// Returns the memgine handle the memory.* handlers should use:
    /// the bound per-agent memgine when this session attached via
    /// `session.auth { agent_id }` (#169 + #170), otherwise the
    /// ephemeral per-WS memgine. Cheap (one async lock + Arc clone).
    pub async fn effective_memgine(&self) -> Arc<Mutex<car_memgine::MemgineEngine>> {
        if let Some(eng) = self.bound_memgine.lock().await.as_ref() {
            return eng.clone();
        }
        self.memgine.clone()
    }
}

/// Builder for constructing a [`ServerState`] with embedder-supplied
/// dependencies. Embedders (e.g. `tokhn-daemon`) use this to inject
/// their own memgine handle and other shared infrastructure; the
/// Approval-gate policy for high-risk WS methods.
///
/// Every method in `methods` must be acknowledged via
/// `host.resolve_approval` before the dispatcher will route the
/// request to its handler. The dispatcher waits up to `timeout` for
/// a resolution; on timeout (or any non-`approve` resolution) the
/// request fails with JSON-RPC error `-32003`.
///
/// Default: gate enabled, the macOS-automation surface
/// (`automation.run_applescript`, `automation.shortcuts.run`,
/// `messages.send`, `mail.send`, `vision.ocr`), 60-second timeout.
/// `car-server --no-approvals` (or embedders calling
/// [`ServerStateConfig::with_approval_gate`] with `enabled=false`)
/// turns it off — only appropriate when no untrusted caller can
/// reach the WS port.
#[derive(Debug, Clone)]
pub struct ApprovalGate {
    /// Master switch. When `false`, every method dispatches without
    /// raising an approval — the pre-2026-05 behaviour.
    pub enabled: bool,
    /// Methods that require approval. Match is by exact method-name
    /// string against the JSON-RPC `method` field.
    pub methods: std::collections::HashSet<String>,
    /// How long to wait for the user to resolve the approval before
    /// timing out and surfacing an error to the caller.
    pub timeout: std::time::Duration,
}

impl Default for ApprovalGate {
    fn default() -> Self {
        let methods = [
            "automation.run_applescript",
            "automation.shortcuts.run",
            "messages.send",
            "mail.send",
            "vision.ocr",
        ]
        .iter()
        .map(|s| s.to_string())
        .collect();
        Self {
            enabled: true,
            methods,
            timeout: std::time::Duration::from_secs(60),
        }
    }
}

impl ApprovalGate {
    /// Disable the gate entirely. Equivalent to passing
    /// `car-server --no-approvals`. Only appropriate when no
    /// untrusted caller can reach the WS port.
    pub fn disabled() -> Self {
        Self {
            enabled: false,
            methods: std::collections::HashSet::new(),
            timeout: std::time::Duration::from_secs(60),
        }
    }

    /// `true` if this method must be acknowledged before dispatch.
    pub fn requires_approval(&self, method: &str) -> bool {
        self.enabled && self.methods.contains(method)
    }
}

/// standalone `car-server` binary uses [`ServerState::standalone`]
/// which calls `with_config` under the hood.
pub struct ServerStateConfig {
    pub journal_dir: PathBuf,
    /// Optional pre-constructed memgine engine. When `None`, each
    /// `create_session` call builds a fresh engine; embedders that want
    /// to share a single engine across sessions can supply a clone of
    /// their `Arc<Mutex<MemgineEngine>>` here.
    pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
    /// Optional pre-constructed inference engine.
    pub inference: Option<Arc<car_inference::InferenceEngine>>,
    /// Optional embedder-supplied A2A runtime. Used by the in-core
    /// `A2aDispatcher` to execute peer-driven proposals. When `None`,
    /// the dispatcher uses a fresh `Runtime` with `register_agent_basics`
    /// — peer agents see CAR's built-in tools and nothing else,
    /// matching the behaviour of the standalone `start_a2a_listener`.
    pub a2a_runtime: Option<Arc<car_engine::Runtime>>,
    /// Optional embedder-supplied A2A task store. When `None`,
    /// defaults to `InMemoryTaskStore`. tokhn-style embedders that
    /// want a polling-friendly persistent store plug it in here.
    pub a2a_store: Option<Arc<dyn car_a2a::TaskStore>>,
    /// Optional embedder-supplied agent card factory. When `None`,
    /// the dispatcher serves a card built from the A2A runtime's
    /// tool schemas at construction time, advertising its public URL
    /// as `ws://127.0.0.1:9100/` (the WS surface the dispatcher itself
    /// is reachable on).
    pub a2a_card_source: Option<Arc<car_a2a::AgentCardSource>>,
    /// Approval-gate policy. When `None`, the dispatcher uses
    /// [`ApprovalGate::default`] (gate ON, the macOS-automation
    /// surface gated, 60s timeout). Pass
    /// [`ApprovalGate::disabled`] to opt out — only appropriate
    /// when no untrusted caller can reach the WS port.
    pub approval_gate: Option<ApprovalGate>,
}

impl ServerStateConfig {
    /// Minimal config suitable for the standalone car-server binary:
    /// only the journal dir is required; everything else is lazily
    /// constructed at first use.
    pub fn new(journal_dir: PathBuf) -> Self {
        Self {
            journal_dir,
            shared_memgine: None,
            inference: None,
            a2a_runtime: None,
            a2a_store: None,
            a2a_card_source: None,
            approval_gate: None,
        }
    }

    pub fn with_shared_memgine(mut self, engine: Arc<Mutex<car_memgine::MemgineEngine>>) -> Self {
        self.shared_memgine = Some(engine);
        self
    }

    pub fn with_inference(mut self, engine: Arc<car_inference::InferenceEngine>) -> Self {
        self.inference = Some(engine);
        self
    }

    /// Plug in an embedder-supplied runtime for the A2A dispatcher.
    /// Use case: tokhn-daemon wants peers to see its OPA preflight
    /// tooling, not just CAR's `register_agent_basics` defaults.
    pub fn with_a2a_runtime(mut self, runtime: Arc<car_engine::Runtime>) -> Self {
        self.a2a_runtime = Some(runtime);
        self
    }

    /// Plug in an embedder-supplied task store for the A2A
    /// dispatcher. Use case: tokhn's polling-friendly persistent
    /// store keyed by their session id.
    pub fn with_a2a_store(mut self, store: Arc<dyn car_a2a::TaskStore>) -> Self {
        self.a2a_store = Some(store);
        self
    }

    /// Plug in an embedder-supplied agent card factory. The factory
    /// is invoked on every `agent/getAuthenticatedExtendedCard`
    /// dispatch, so embedders can reflect runtime tool changes.
    pub fn with_a2a_card_source(mut self, source: Arc<car_a2a::AgentCardSource>) -> Self {
        self.a2a_card_source = Some(source);
        self
    }

    /// Override the approval-gate policy. Pass
    /// [`ApprovalGate::disabled`] to skip the gate entirely (only
    /// appropriate when no untrusted caller can reach the WS port);
    /// pass a customised [`ApprovalGate`] to add or remove methods
    /// or to change the timeout.
    pub fn with_approval_gate(mut self, gate: ApprovalGate) -> Self {
        self.approval_gate = Some(gate);
        self
    }
}

/// Global server state shared across all connections.
pub struct ServerState {
    pub journal_dir: PathBuf,
    pub sessions: Mutex<HashMap<String, Arc<ClientSession>>>,
    pub inference: std::sync::OnceLock<Arc<car_inference::InferenceEngine>>,
    pub host: Arc<crate::host::HostState>,
    /// When `Some`, `create_session` clones this handle into every new
    /// `ClientSession.memgine` — embedders that want a single shared
    /// memgine across all WS sessions set this. Standalone car-server
    /// leaves it `None`, which gives each session its own engine
    /// (preserving today's behavior).
    pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
    /// Process-wide voice session registry. Each
    /// `voice.transcribe_stream.start` call registers its own per-client
    /// [`WsVoiceEventSink`] so events route back to the originating WS
    /// connection only.
    pub voice_sessions: Arc<car_voice::VoiceSessionRegistry>,
    /// Process-wide meeting registry. Meeting ids are global; each
    /// meeting binds to the originating client's WS for upstream
    /// events but persists transcripts to the resolved
    /// `.car/meetings/<id>/` regardless of which client started it.
    pub meetings: Arc<car_meeting::MeetingRegistry>,
    /// Process-wide A2UI surface store. Agent-produced surfaces are
    /// visible to every host UI subscriber, independent of the
    /// WebSocket session that applied the update.
    pub a2ui: car_a2ui::A2uiSurfaceStore,
    /// In-process UI-improvement agent. Invoked from
    /// `handle_a2ui_render_report` with each inbound report; returned
    /// `Decision::Patch` envelopes are applied via the standard
    /// `apply_a2ui_envelope` path so all subscribers see the patch.
    /// `Arc` so the agent's interior `DashMap` state survives across
    /// handler calls even when `ServerState` is cheap-cloned.
    pub ui_agent: Arc<car_ui_agent::UIImprovementAgent>,
    /// Per-surface oscillation detector for the UI-improvement
    /// loop. Sits between the agent's `Decision::Patch` and the
    /// apply path so A→B→A patch cycles get cooled down without
    /// the agent itself having to track history. neo's review:
    /// "controllers use workqueue backoff; reconcilers stay
    /// stateless."
    pub ui_agent_oscillation: Arc<crate::ui_agent_loop::OscillationDetector>,
    /// Per-surface iteration budget. Backstop against runaway
    /// loops the oscillation detector misses — caps total agent-
    /// driven patches per surface at `DEFAULT_MAX_ITERATIONS`.
    pub ui_agent_budget: Arc<crate::ui_agent_loop::IterationBudget>,
    /// Process-wide concurrency gate for inference RPC handlers. Sized
    /// from host RAM at startup, overridable via
    /// [`crate::admission::ENV_MAX_CONCURRENT`]. Without this, N
    /// concurrent users multiply KV-cache and activation memory and
    /// take the host out (#114-adjacent: filed alongside the daemon
    /// always-on rework). The semaphore lives on `ServerState` so it
    /// is shared across every WebSocket session in the same process.
    pub admission: Arc<crate::admission::InferenceAdmission>,
    /// Server-side A2A continuation auth keyed by A2UI surface id.
    /// Kept out of `A2uiSurface.owner` so host renderers never see
    /// bearer/API-key material.
    pub a2ui_route_auth: Mutex<HashMap<String, A2aRouteAuth>>,
    /// Lifecycle-managed agents — declarative manifest at
    /// `~/.car/agents.json` driving spawn/restart/stop. Closes
    /// Parslee-ai/car-releases#27. Lazy-initialized so embedders that
    /// don't want process supervision don't pay the disk-touch cost
    /// at server start.
    pub supervisor: std::sync::OnceLock<Arc<car_registry::supervisor::Supervisor>>,
    /// Manifest path this daemon is *observing* but does NOT own.
    /// Set by `car-server` when boot-time supervisor construction
    /// fails with [`car_registry::supervisor::SupervisorError::AlreadyRunning`]
    /// — another car-server process on the host holds the exclusive
    /// lock on this manifest. In that state, `supervisor()` returns a
    /// clear "observe-only" error so mutation handlers refuse
    /// (preventing the duplicate-spawn bug from
    /// Parslee-ai/car-releases#44), while read-only handlers
    /// (`agents.list`, `agents.health`) fall back to
    /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
    /// [`car_registry::supervisor::Supervisor::health_from_manifest`]
    /// so operators can still inspect what the primary daemon is
    /// supervising.
    pub observer_manifest_path: std::sync::OnceLock<PathBuf>,
    /// In-core A2A dispatcher — embedders that consume `car-server-core`
    /// get A2A reachability "for free" without standing up a separate
    /// HTTP listener. Closes Parslee-ai/car-releases#28. Lazy-init so
    /// the embedder can override the runtime / task store / agent card
    /// via [`ServerStateConfig::with_a2a_runtime`] etc. before the
    /// first dispatch.
    pub a2a_dispatcher: std::sync::OnceLock<Arc<car_a2a::A2aDispatcher>>,
    /// WS clients subscribed to A2UI envelope events. After every
    /// successful `a2ui.apply` / `a2ui.ingest`, the resulting
    /// `A2uiApplyResult` is broadcast to every subscriber as an
    /// `a2ui.event` JSON-RPC notification. Closes
    /// Parslee-ai/car-releases#29. Subscribers register via the
    /// `a2ui/subscribe` method and are auto-cleaned on WS disconnect.
    pub a2ui_subscribers: Mutex<HashMap<String, Arc<WsChannel>>>,
    /// Per-launch auth token. When `Some`, the WS dispatcher rejects
    /// non-auth methods on unauthenticated sessions until the client
    /// calls `session.auth` with the matching value. When `None`,
    /// auth is disabled and every connection works as before. Set
    /// at startup by `car-server` unless `--no-auth` is passed
    /// (default flipped 2026-05); embedders that want to enable
    /// auth call [`ServerState::install_auth_token`]. Closes
    /// Parslee-ai/car-releases#32.
    pub auth_token: std::sync::OnceLock<String>,
    /// Parslee cloud identity loaded from the user's OS keychain at
    /// daemon startup when `car auth login` has been completed.
    pub parslee_session: std::sync::OnceLock<crate::parslee_auth::ParsleeSession>,
    /// `agent_id -> client_id` map of currently-attached lifecycle
    /// agents (#169). Populated by the `session.auth` handler when a
    /// supervised child presents its `agent_id` + per-agent token;
    /// drained on disconnect by `remove_session`. Single-claim:
    /// a second connection presenting the same `agent_id` is
    /// rejected so the daemon-side per-agent state stays unambiguous.
    pub attached_agents: Mutex<HashMap<String, String>>,
    /// `agent_id -> persistent memgine` map (#170). Lazy-loaded on
    /// first connection per id from `~/.car/memory/agents/<id>.jsonl`,
    /// retained across daemon restart, surviving any single
    /// disconnect/reconnect of the supervised child. Connections
    /// that auth without an `agent_id` (browser, host, ad-hoc CLI)
    /// keep the per-WS ephemeral memgine on `ClientSession.memgine`
    /// — no behaviour change.
    pub agent_memgines: Mutex<HashMap<String, Arc<Mutex<car_memgine::MemgineEngine>>>>,
    /// In-flight `agents.chat` sessions keyed by `session_id`. See
    /// [`ChatSession`] for shape. Populated by `agents.chat`,
    /// cleared on terminal `agent.chat.event` or
    /// `agents.chat.cancel`. Disconnect cleanup happens in
    /// `remove_session` — any in-flight session bound to either the
    /// disconnecting host or agent client is dropped so subsequent
    /// stray notifications from a respawned agent fall on the floor
    /// rather than racing into a stale stream.
    pub chat_sessions: Mutex<HashMap<String, ChatSession>>,
    /// Bound MCP HTTP-streamable URL (e.g.
    /// `"http://127.0.0.1:9102/mcp"`) — `car-server` installs this
    /// after binding the listener. Used by the
    /// `agents.invoke_external` handler to default
    /// `InvokeOptions.mcp_endpoint` so external agents
    /// (Claude Code today) load the daemon's CAR namespace via
    /// `--mcp-config` automatically. `None` when MCP isn't bound
    /// (e.g. `--mcp-bind disabled`).
    pub mcp_url: std::sync::OnceLock<String>,
    /// Registry of connected MCP SSE sessions. Populated alongside
    /// [`mcp_url`] when `car-server` boots the MCP listener. Public
    /// so handlers can call `crate::mcp::push_to_session` to send
    /// server-initiated requests to a specific MCP-connected
    /// client (MCP-3 foundation; MCP-3b will wire host-owned tool
    /// dispatch through this).
    pub mcp_sessions: std::sync::OnceLock<Arc<crate::mcp::SessionMap>>,
    /// Approval gate for high-risk WS methods (audit 2026-05). The
    /// gate intercepts `automation.run_applescript`,
    /// `automation.shortcuts.run`, `messages.send`, `mail.send`, and
    /// `vision.ocr` before they dispatch, raises a
    /// `host.create_approval` for the user to act on, and waits
    /// (with a timeout) for `host.resolve_approval`. Approve →
    /// dispatch continues; deny / timeout → JSON-RPC error code
    /// `-32003`. The set of gated methods and the wait timeout are
    /// embedder-overridable via
    /// [`ServerStateConfig::with_approval_gate`].
    pub approval_gate: ApprovalGate,
    /// A2A-runtime / store / card factory carried over from the
    /// embedder's [`ServerStateConfig`]. Consumed lazily on first
    /// `a2a_dispatcher()` call so embedders can construct
    /// `ServerState` without paying the runtime spin-up cost when
    /// they don't actually use the A2A surface.
    pub(crate) a2a_runtime: std::sync::Mutex<Option<Arc<car_engine::Runtime>>>,
    pub(crate) a2a_store: std::sync::Mutex<Option<Arc<dyn car_a2a::TaskStore>>>,
    pub(crate) a2a_card_source: std::sync::Mutex<Option<Arc<car_a2a::AgentCardSource>>>,
}

impl ServerState {
    /// Constructor for the standalone `car-server` binary. Each WS
    /// connection gets its own per-session memgine — matches the
    /// pre-extraction default and is correct for a single-process
    /// daemon serving one user at a time.
    ///
    /// **Embedders must not call this.** It silently leaves
    /// `shared_memgine = None`, which re-introduces the dual-memgine
    /// bug U7 was created to prevent (one engine in the embedder, a
    /// fresh one inside every WS session). Embedders use
    /// [`ServerState::embedded`] instead, which makes the shared
    /// engine handle a required argument so it cannot be forgotten.
    pub fn standalone(journal_dir: PathBuf) -> Self {
        Self::with_config(ServerStateConfig::new(journal_dir))
    }

    /// Constructor for embedders (e.g. `tokhn-daemon`). The shared
    /// memgine handle is **required**: every WS session created by
    /// this state will reuse the same engine, preventing the
    /// dual-memgine bug.
    ///
    /// For embedders that also want to inject a pre-warmed inference
    /// engine or other advanced wiring, build a [`ServerStateConfig`]
    /// directly and call [`ServerState::with_config`].
    pub fn embedded(
        journal_dir: PathBuf,
        shared_memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
    ) -> Self {
        Self::with_config(ServerStateConfig::new(journal_dir).with_shared_memgine(shared_memgine))
    }

    /// Build a `ServerState` from a [`ServerStateConfig`] — the path
    /// embedders use when they need to inject a shared memgine *and*
    /// a pre-warmed inference engine, or any other advanced wiring
    /// the convenience constructors don't cover.
    pub fn with_config(cfg: ServerStateConfig) -> Self {
        let inference = std::sync::OnceLock::new();
        if let Some(eng) = cfg.inference {
            // OnceLock::set returns Err if already set — fresh OnceLock
            // means it's empty, so this is infallible here.
            let _ = inference.set(eng);
        }
        let voice_sessions = Arc::new(car_voice::VoiceSessionRegistry::new());
        // Reap sessions whose clients dropped without calling
        // voice.transcribe_stream.stop (WS disconnect, process exit,
        // etc.). Listener handles otherwise leak for the daemon's
        // lifetime. `with_config` is sync but always called from the
        // `#[tokio::main]` entry point, so `Handle::try_current()`
        // inside `start_sweeper` finds the runtime.
        voice_sessions.start_sweeper();
        // UI-improvement agent is pure decision logic — no I/O, no
        // persistence handle. Memgine ingest of strategy outcomes is
        // the caller's responsibility (handler.rs after a successful
        // Decision::Patch). Keeps the agent crate Mutex-flavor
        // agnostic so it can compose with std/tokio mutex callers.
        let ui_agent = Arc::new(car_ui_agent::UIImprovementAgent::with_default_strategies());
        let ui_agent_oscillation = Arc::new(crate::ui_agent_loop::OscillationDetector::new());
        let ui_agent_budget = Arc::new(crate::ui_agent_loop::IterationBudget::new());
        Self {
            journal_dir: cfg.journal_dir,
            sessions: Mutex::new(HashMap::new()),
            inference,
            host: Arc::new(crate::host::HostState::new()),
            shared_memgine: cfg.shared_memgine,
            voice_sessions,
            meetings: Arc::new(car_meeting::MeetingRegistry::new()),
            a2ui: car_a2ui::A2uiSurfaceStore::new(),
            ui_agent,
            ui_agent_oscillation,
            ui_agent_budget,
            admission: Arc::new(crate::admission::InferenceAdmission::new()),
            a2ui_route_auth: Mutex::new(HashMap::new()),
            supervisor: std::sync::OnceLock::new(),
            observer_manifest_path: std::sync::OnceLock::new(),
            a2a_dispatcher: std::sync::OnceLock::new(),
            a2a_runtime: std::sync::Mutex::new(cfg.a2a_runtime),
            a2a_store: std::sync::Mutex::new(cfg.a2a_store),
            a2a_card_source: std::sync::Mutex::new(cfg.a2a_card_source),
            a2ui_subscribers: Mutex::new(HashMap::new()),
            auth_token: std::sync::OnceLock::new(),
            parslee_session: std::sync::OnceLock::new(),
            attached_agents: Mutex::new(HashMap::new()),
            agent_memgines: Mutex::new(HashMap::new()),
            chat_sessions: Mutex::new(HashMap::new()),
            mcp_url: std::sync::OnceLock::new(),
            mcp_sessions: std::sync::OnceLock::new(),
            approval_gate: cfg.approval_gate.unwrap_or_default(),
        }
    }

    /// Enable the per-launch auth handshake. After this call, every
    /// new WS connection must call `session.auth` with `token` as
    /// the first frame; otherwise the connection is closed. Called
    /// by `car-server` at startup unless `--no-auth` is set
    /// (default flipped 2026-05); embedders supply their own token
    /// if they want the same posture. Returns `Err(token)` when
    /// auth was already installed.
    pub fn install_auth_token(&self, token: String) -> Result<(), String> {
        self.auth_token.set(token)
    }

    pub fn install_parslee_session(
        &self,
        session: crate::parslee_auth::ParsleeSession,
    ) -> Result<(), crate::parslee_auth::ParsleeSession> {
        self.parslee_session.set(session)
    }

    /// Install the bound MCP URL after car-server's listener is up.
    /// Idempotent on the first call; subsequent calls are accepted
    /// silently (matches the supervisor / a2a_dispatcher install
    /// idiom). Returns `Err(())` when an MCP URL was already
    /// installed — embedders should treat this as "another
    /// component beat us to it" and use whichever value is now set.
    pub fn install_mcp_url(&self, url: String) -> Result<(), String> {
        self.mcp_url.set(url)
    }

    /// Install the MCP SSE session registry. Pairs with
    /// [`install_mcp_url`] — both come from the same `start_mcp`
    /// call and either both get installed or neither does (the
    /// daemon binds them together).
    pub fn install_mcp_sessions(
        &self,
        sessions: Arc<crate::mcp::SessionMap>,
    ) -> Result<(), Arc<crate::mcp::SessionMap>> {
        self.mcp_sessions.set(sessions)
    }

    /// Lazy-initialize and return the agent supervisor. The first
    /// call constructs a [`car_registry::supervisor::Supervisor`] backed by
    /// `~/.car/agents.json` + `~/.car/logs/`. Embedders that need a
    /// non-default location should call
    /// [`ServerState::install_supervisor`] before any handler runs.
    ///
    /// In observer mode (set via [`install_observer_manifest`]),
    /// returns a clear error mentioning the manifest path the
    /// primary daemon owns. This prevents the second daemon from
    /// re-attempting `user_default()` (which would also fail with
    /// `AlreadyRunning`) on every WS call, and gives mutation
    /// handlers a stable refusal path. Read-only handlers
    /// (`agents.list`, `agents.health`) should call
    /// [`Self::observer_manifest_path`] first and fall back to
    /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
    /// `health_from_manifest` when set. Closes
    /// Parslee-ai/car-releases#44.
    pub fn supervisor(&self) -> Result<Arc<car_registry::supervisor::Supervisor>, String> {
        if let Some(s) = self.supervisor.get() {
            return Ok(s.clone());
        }
        if let Some(p) = self.observer_manifest_path.get() {
            return Err(format!(
                "this car-server is observe-only — another car-server process \
                 holds the supervisor lock for {}. Mutations refuse here; route \
                 them to the primary daemon, or stop the other car-server first.",
                p.display()
            ));
        }
        let s = car_registry::supervisor::Supervisor::user_default()
            .map(Arc::new)
            .map_err(|e| e.to_string())?;
        // OnceLock::set returns the original arg back on collision —
        // a concurrent caller racing through user_default. Take
        // whichever wins.
        let _ = self.supervisor.set(s);
        Ok(self.supervisor.get().expect("set or pre-existing").clone())
    }

    /// Replace the lazy default with a caller-supplied supervisor.
    /// Returns `Err(())` when a supervisor was already installed.
    /// Used by the standalone `car-server` binary to call
    /// `start_all()` on a known-good handle without paying the
    /// lazy-init lookup cost.
    pub fn install_supervisor(
        &self,
        supervisor: Arc<car_registry::supervisor::Supervisor>,
    ) -> Result<(), Arc<car_registry::supervisor::Supervisor>> {
        self.supervisor.set(supervisor)
    }

    /// Non-acquiring read of the currently-installed supervisor.
    /// Unlike [`supervisor`](Self::supervisor), this does NOT lazy-
    /// init via `user_default()` — it returns `None` instead of
    /// constructing a fresh `Supervisor` and acquiring the
    /// `<manifest>.lock` as a side effect. Use this from read-only
    /// metadata paths (`host.subscribe` identity, status surfaces)
    /// where causing lock acquisition on observation would be a
    /// Heisenberg subscribe — the act of asking "do you own the
    /// lock?" must not be the act of taking it.
    pub fn supervisor_if_installed(&self) -> Option<Arc<car_registry::supervisor::Supervisor>> {
        self.supervisor.get().cloned()
    }

    /// Mark this daemon as *observing* a manifest owned by another
    /// car-server process. After this call, `supervisor()` returns
    /// an "observe-only" error and read-only handlers
    /// (`agents.list`, `agents.health`) fall back to the static
    /// `Supervisor::list_from_manifest` / `health_from_manifest`
    /// paths. Idempotent — subsequent calls with the same path are
    /// no-ops; a different path returns `Err(())`. Closes
    /// Parslee-ai/car-releases#44.
    pub fn install_observer_manifest(&self, path: PathBuf) -> Result<(), PathBuf> {
        self.observer_manifest_path.set(path)
    }

    /// Path of the manifest this daemon is observing but not
    /// supervising. `None` when this daemon owns the supervisor
    /// (the normal case) or when no manifest is configured at all
    /// (no `HOME`, embedder didn't install one).
    pub fn observer_manifest_path(&self) -> Option<&PathBuf> {
        self.observer_manifest_path.get()
    }

    /// Lazy-initialize and return the in-core A2A dispatcher. The
    /// first call constructs an [`car_a2a::A2aDispatcher`] from
    /// either the embedder's overrides (set via
    /// [`ServerStateConfig::with_a2a_runtime`] / `with_a2a_store` /
    /// `with_a2a_card_source`) or sensible defaults: a fresh
    /// `Runtime` with `register_agent_basics` registered, an
    /// `InMemoryTaskStore`, and a card built from the runtime's
    /// tool schemas advertising `ws://127.0.0.1:9100/` as the
    /// public URL. Closes Parslee-ai/car-releases#28.
    pub async fn a2a_dispatcher(&self) -> Arc<car_a2a::A2aDispatcher> {
        if let Some(d) = self.a2a_dispatcher.get() {
            return d.clone();
        }

        // Embedder overrides take precedence; fall back to defaults
        // for each slot independently (so an embedder that only
        // wants a custom card can leave the runtime + store at
        // defaults). `Mutex::take()` consumes the slot so the
        // defaults aren't reconstructed on a racing init that loses
        // the OnceLock::set call below.
        let runtime = self
            .a2a_runtime
            .lock()
            .expect("a2a_runtime mutex poisoned")
            .take();
        let runtime = match runtime {
            Some(r) => r,
            None => {
                let r = Arc::new(car_engine::Runtime::new());
                r.register_agent_basics().await;
                r
            }
        };

        let store = self
            .a2a_store
            .lock()
            .expect("a2a_store mutex poisoned")
            .take()
            .unwrap_or_else(|| Arc::new(car_a2a::InMemoryTaskStore::new()));

        let card_source = self
            .a2a_card_source
            .lock()
            .expect("a2a_card_source mutex poisoned")
            .take();
        let card_source = match card_source {
            Some(c) => c,
            None => {
                let card = car_a2a::build_default_agent_card(
                    &runtime,
                    car_a2a::AgentCardConfig::minimal(
                        "Common Agent Runtime",
                        "Embedded CAR daemon — A2A v1.0 reachable over WebSocket JSON-RPC.",
                        "ws://127.0.0.1:9100/",
                        car_a2a::AgentProvider {
                            organization: "Parslee".into(),
                            url: Some("https://github.com/Parslee-ai/car".into()),
                        },
                    ),
                )
                .await;
                Arc::new(move || card.clone()) as Arc<car_a2a::AgentCardSource>
            }
        };

        let dispatcher = Arc::new(car_a2a::A2aDispatcher::new(runtime, store, card_source));
        // OnceLock::set returns Err on race — accept whichever
        // dispatcher won and clone-return that one.
        let _ = self.a2a_dispatcher.set(dispatcher);
        self.a2a_dispatcher
            .get()
            .expect("a2a_dispatcher set or pre-existing")
            .clone()
    }

    pub async fn create_session(
        &self,
        client_id: &str,
        channel: Arc<WsChannel>,
    ) -> Arc<ClientSession> {
        let journal_path = self.journal_dir.join(format!("{}.jsonl", client_id));
        let event_log = EventLog::with_journal(journal_path);

        let executor = Arc::new(WsToolExecutor {
            channel: channel.clone(),
        });

        let runtime = Runtime::new()
            .with_event_log(event_log)
            .with_executor(executor);

        // If the embedder supplied a shared memgine, every session uses it.
        // Otherwise each session gets its own — matches pre-extraction behavior.
        let memgine = match &self.shared_memgine {
            Some(eng) => eng.clone(),
            None => Arc::new(Mutex::new(car_memgine::MemgineEngine::new(None))),
        };

        let session = Arc::new(ClientSession {
            client_id: client_id.to_string(),
            runtime: Arc::new(runtime),
            channel,
            host: self.host.clone(),
            memgine,
            browser: car_ffi_common::browser::BrowserSessionSlot::new(),
            // When auth is disabled (no token installed), every
            // session is "authenticated" by default — preserves the
            // pre-#32 behaviour. When auth is enabled, the value is
            // ignored on creation; the dispatcher's gate checks
            // `ServerState::auth_token.is_some()` to decide whether
            // to enforce.
            authenticated: std::sync::atomic::AtomicBool::new(false),
            agent_id: tokio::sync::Mutex::new(None),
            bound_memgine: tokio::sync::Mutex::new(None),
        });

        self.sessions
            .lock()
            .await
            .insert(client_id.to_string(), session.clone());

        session
    }

    /// Remove a per-client session from the registry on disconnect.
    /// Returns the removed session if present so callers can drop any
    /// remaining strong refs (e.g. drain pending tool callbacks). Fix
    /// for MULTI-4 / WS-3 — without this, `state.sessions` retains
    /// `Arc<ClientSession>` for every connection that ever existed.
    pub async fn remove_session(&self, client_id: &str) -> Option<Arc<ClientSession>> {
        let removed = self.sessions.lock().await.remove(client_id);
        if let Some(session) = &removed {
            // #169: drop the agent_id → client_id binding so a
            // disconnected lifecycle agent can reconnect (or its
            // supervisor-respawned replacement can take the slot)
            // without colliding with the stale claim.
            let bound = session.agent_id.lock().await.clone();
            if let Some(id) = bound {
                let mut attached = self.attached_agents.lock().await;
                if attached.get(&id).map(String::as_str) == Some(client_id) {
                    attached.remove(&id);
                }
            }
            // Drop any in-flight `agents.chat` sessions bound to this
            // client — either side disconnecting orphans the stream,
            // and a respawned agent's stray `agent.chat.event`
            // notifications must not race into a stale routing entry.
            // See `docs/proposals/agent-chat-surface.md`.
            let bound_agent = session.agent_id.lock().await.clone();
            let mut chats = self.chat_sessions.lock().await;
            chats.retain(|_, s| {
                if s.host_client_id == client_id {
                    return false;
                }
                if let Some(agent_id) = &bound_agent {
                    if &s.agent_id == agent_id {
                        return false;
                    }
                }
                true
            });
        }
        removed
    }
}

#[cfg(test)]
mod observer_mode_tests {
    use super::*;

    fn journal_dir() -> PathBuf {
        let target = std::env::var_os("CARGO_TARGET_DIR")
            .map(std::path::PathBuf::from)
            .unwrap_or_else(|| {
                std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
                    .join("..")
                    .join("..")
                    .join("target")
            });
        std::fs::create_dir_all(&target).ok();
        let target = std::fs::canonicalize(&target).unwrap_or(target);
        let tmp = tempfile::TempDir::new_in(&target).unwrap();
        let p = tmp.path().to_path_buf();
        std::mem::forget(tmp); // keep the dir alive for the test
        p
    }

    #[test]
    fn supervisor_returns_observer_error_when_marker_set() {
        // Closes Parslee-ai/car-releases#44: the second car-server on
        // a host installs the observer marker after `with_paths`
        // returns AlreadyRunning. Subsequent `state.supervisor()`
        // calls must return a clear "observe-only" error mentioning
        // the manifest path — they must NOT retry user_default()
        // (which would re-acquire the lock and likely also fail).
        let state = ServerState::standalone(journal_dir());
        let fake_manifest = PathBuf::from("/tmp/fake-manifest-for-test.json");
        state
            .install_observer_manifest(fake_manifest.clone())
            .expect("install_observer_manifest succeeds on fresh state");
        assert_eq!(state.observer_manifest_path(), Some(&fake_manifest));

        let err = state.supervisor().map(|_| ()).unwrap_err();
        assert!(
            err.contains("observe-only"),
            "error must mention observe-only mode: {err}"
        );
        assert!(
            err.contains("fake-manifest-for-test.json"),
            "error must surface the manifest path so operators know which daemon owns it: {err}"
        );
    }

    #[test]
    fn install_observer_manifest_is_idempotent_per_path_collision() {
        let state = ServerState::standalone(journal_dir());
        let p = PathBuf::from("/tmp/manifest-a.json");
        let q = PathBuf::from("/tmp/manifest-b.json");
        state.install_observer_manifest(p.clone()).unwrap();
        // OnceLock::set returns the value back on collision.
        let err = state.install_observer_manifest(q.clone()).unwrap_err();
        assert_eq!(err, q);
        assert_eq!(state.observer_manifest_path(), Some(&p));
    }

    #[test]
    fn supervisor_if_installed_does_not_lazy_init() {
        // The Heisenberg-subscribe guard: `host.subscribe`'s
        // identity path must use the non-acquiring read so a
        // purely observational client can't cause the daemon to
        // claim `<manifest>.lock` as a side effect of asking
        // about it. Fresh state has no supervisor installed.
        let state = ServerState::standalone(journal_dir());
        assert!(state.supervisor_if_installed().is_none());
        // observer_manifest_path should remain unset too — no
        // implicit init.
        assert!(state.observer_manifest_path().is_none());
    }
}