Skip to main content

car_server_core/
session.rs

1//! Server-side session state — shared across all connections.
2
3use car_engine::{Runtime, ToolExecutor};
4use car_eventlog::EventLog;
5use car_proto::{ToolExecuteRequest, ToolExecuteResponse};
6use futures::Sink;
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::pin::Pin;
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::Arc;
14use tokio::sync::{oneshot, Mutex};
15use tokio_tungstenite::tungstenite::{Error as WsError, Message};
16
17/// Type-erased WebSocket sink. The dispatch loop accepts either a
18/// `WebSocketStream<TcpStream>` (the legacy car-server TCP listener)
19/// or a `WebSocketStream<UnixStream>` (the daemon-as-default UDS
20/// listener) — both implement `Sink<Message, Error = WsError>` after
21/// the tungstenite handshake. Erasing the type here avoids cascading
22/// a generic parameter through every WsChannel / Session / ServerState
23/// touchpoint in the dispatcher.
24pub type WsSink = Pin<Box<dyn Sink<Message, Error = WsError> + Send + Unpin + 'static>>;
25
26/// Server-side credentials for continuing an A2A-owned A2UI surface.
27///
28/// This intentionally lives outside `car_a2ui::A2uiSurfaceOwner` so
29/// renderers can inspect surface ownership without receiving secrets.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase", tag = "type")]
32pub enum A2aRouteAuth {
33    None,
34    Bearer { token: String },
35    Header { name: String, value: String },
36}
37
38/// Shared write half of the WebSocket, plus pending callback channels.
39/// `write` is type-erased via [`WsSink`] so the dispatcher can run
40/// against any transport-specific WebSocketStream (TCP or UDS today;
41/// axum-bridged in future) without templatizing every consumer.
42pub struct WsChannel {
43    pub write: Mutex<WsSink>,
44    /// Pending tool execution callbacks: request_id → oneshot sender
45    pub pending: Mutex<HashMap<String, oneshot::Sender<ToolExecuteResponse>>>,
46    pub next_id: AtomicU64,
47}
48
49impl WsChannel {
50    pub fn next_request_id(&self) -> String {
51        let id = self.next_id.fetch_add(1, Ordering::SeqCst);
52        format!("cb-{}", id)
53    }
54}
55
56/// In-flight `agents.chat` session bookkeeping. Created when a host
57/// client calls `agents.chat`, removed when the agent emits a terminal
58/// `agent.chat.event` (`kind: "done"` or `"error"`), when either side
59/// disconnects, or when the host cancels via `agents.chat.cancel`.
60///
61/// The session_id is host-supplied (or server-generated when omitted)
62/// and threads through every `agent.chat.event` notification so the
63/// server can route streamed deltas back to the originating host
64/// without needing per-session subscriptions. See
65/// `docs/proposals/agent-chat-surface.md` for the wire contract.
66#[derive(Debug, Clone)]
67pub struct ChatSession {
68    /// Agent that owns this chat — populated from
69    /// `attached_agents` at `agents.chat` dispatch time.
70    pub agent_id: String,
71    /// Client id of the host that issued `agents.chat`. The server
72    /// forwards `agent.chat.event` notifications back to *this* host
73    /// only, so two CarHost windows chatting with the same agent are
74    /// independent streams.
75    pub host_client_id: String,
76    /// Unix-seconds creation time — used by the future stale-session
77    /// sweeper to drop sessions whose agent died without emitting a
78    /// terminal event.
79    pub created_at: u64,
80}
81
82/// Tool executor that sends callbacks to the client over WebSocket.
83pub struct WsToolExecutor {
84    pub channel: Arc<WsChannel>,
85}
86
87#[async_trait::async_trait]
88impl ToolExecutor for WsToolExecutor {
89    async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
90        // Legacy callers that don't have a proposal-level Action.id
91        // (e.g. internal `executor.execute` chains in tests) — emit an
92        // empty action_id so the client-side handler can still see the
93        // payload shape and decide whether to fail loudly.
94        self.execute_with_action(tool, params, "").await
95    }
96
97    async fn execute_with_action(
98        &self,
99        tool: &str,
100        params: &Value,
101        action_id: &str,
102    ) -> Result<Value, String> {
103        use futures::SinkExt;
104
105        // The JSON-RPC request id is the daemon's callback-routing key
106        // (used by the pending-response map below). The `action_id`
107        // FIELD on the payload is the originating proposal Action.id
108        // surfaced to the host so process-wide handlers can route
109        // concurrent callbacks back to per-call dispatchers
110        // (Parslee-ai/car-releases#43 follow-up). They serve different
111        // purposes and must stay distinct: routing id is daemon-side,
112        // action id is host-side.
113        let request_id = self.channel.next_request_id();
114
115        let callback = ToolExecuteRequest {
116            action_id: action_id.to_string(),
117            tool: tool.to_string(),
118            parameters: params.clone(),
119            timeout_ms: None,
120            attempt: 1,
121        };
122
123        // Create a oneshot channel for the response
124        let (tx, rx) = oneshot::channel();
125        self.channel
126            .pending
127            .lock()
128            .await
129            .insert(request_id.clone(), tx);
130
131        // Send the callback to the client as a JSON-RPC request
132        let rpc_request = serde_json::json!({
133            "jsonrpc": "2.0",
134            "method": "tools.execute",
135            "params": callback,
136            "id": request_id,
137        });
138
139        let msg = Message::Text(
140            serde_json::to_string(&rpc_request)
141                .map_err(|e| e.to_string())?
142                .into(),
143        );
144        self.channel
145            .write
146            .lock()
147            .await
148            .send(msg)
149            .await
150            .map_err(|e| format!("failed to send tool callback: {}", e))?;
151
152        // Wait for the client to respond (with a timeout)
153        let response = tokio::time::timeout(std::time::Duration::from_secs(60), rx)
154            .await
155            .map_err(|_| format!("tool '{}' callback timed out (60s)", tool))?
156            .map_err(|_| format!("tool '{}' callback channel closed", tool))?;
157
158        if let Some(err) = response.error {
159            Err(err)
160        } else {
161            Ok(response.output.unwrap_or(Value::Null))
162        }
163    }
164}
165
166/// Voice event sink that forwards events to a specific WebSocket client
167/// as `voice.event` JSON-RPC notifications.
168///
169/// Each `voice.transcribe_stream.start` call constructs one of these
170/// bound to the originating client's [`WsChannel`], so a client only
171/// receives events for sessions it started.
172pub struct WsVoiceEventSink {
173    pub channel: Arc<WsChannel>,
174}
175
176impl car_voice::VoiceEventSink for WsVoiceEventSink {
177    fn send(&self, session_id: &str, event_json: String) {
178        use futures::SinkExt;
179        let channel = self.channel.clone();
180        let session_id = session_id.to_string();
181        tokio::spawn(async move {
182            let payload: Value = serde_json::from_str(&event_json)
183                .unwrap_or_else(|_| Value::String(event_json.clone()));
184            let notification = serde_json::json!({
185                "jsonrpc": "2.0",
186                "method": "voice.event",
187                "params": {
188                    "session_id": session_id,
189                    "event": payload,
190                },
191            });
192            let Ok(text) = serde_json::to_string(&notification) else {
193                return;
194            };
195            let _ = channel
196                .write
197                .lock()
198                .await
199                .send(Message::Text(text.into()))
200                .await;
201        });
202    }
203}
204
205/// Per-meeting fanout sink that ingests transcript text into a
206/// session-scoped memgine using the `Arc<tokio::sync::Mutex<...>>`
207/// wrapper, then forwards every event upstream untouched.
208///
209/// Lives here (not in `car-ffi-common`) because the engine handle uses
210/// `tokio::sync::Mutex` per the "one-wrapper rule" — the FFI-common
211/// `MeetingMemgineFanout` still uses `std::sync::Mutex` for the NAPI/
212/// PyO3 bindings, which keep their sync wrappers. Each binding owns the
213/// fanout that matches its lock primitive; the parsing/formatting logic
214/// itself is shared via [`car_meeting::extract_transcript_for_ingest`].
215///
216/// `send` is called from the voice drain task and must be non-blocking,
217/// so the lock acquisition is shipped to a `tokio::spawn`. Transcript
218/// events are independent so reordering across spawned tasks is fine.
219pub struct WsMemgineIngestSink {
220    pub meeting_id: String,
221    pub engine: Arc<Mutex<car_memgine::MemgineEngine>>,
222    pub upstream: Arc<dyn car_voice::VoiceEventSink>,
223}
224
225impl car_voice::VoiceEventSink for WsMemgineIngestSink {
226    fn send(&self, voice_session_id: &str, event_json: String) {
227        if let Ok(value) = serde_json::from_str::<Value>(&event_json) {
228            if let Some((speaker, text)) = car_meeting::extract_transcript_for_ingest(
229                &value,
230                &self.meeting_id,
231                voice_session_id,
232            ) {
233                let engine = self.engine.clone();
234                tokio::spawn(async move {
235                    let mut guard = engine.lock().await;
236                    guard.ingest_conversation(&speaker, &text, chrono::Utc::now());
237                });
238            }
239        }
240        self.upstream.send(voice_session_id, event_json);
241    }
242}
243
244/// Per-client session.
245pub struct ClientSession {
246    pub client_id: String,
247    pub runtime: Arc<Runtime>,
248    pub channel: Arc<WsChannel>,
249    pub host: Arc<crate::host::HostState>,
250    /// Memgine handle. Wrapped in `tokio::sync::Mutex` so dispatcher
251    /// handlers can hold the lock across `.await` points without
252    /// risking poisoning. Migrated from `std::sync::Mutex` in the
253    /// car-server-core extraction (U1) per the "one-wrapper rule".
254    pub memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
255    /// Lazy browser session — first `browser.run` call launches Chromium,
256    /// subsequent calls reuse it so element IDs resolve across invocations
257    /// within the same WebSocket connection.
258    pub browser: car_ffi_common::browser::BrowserSessionSlot,
259    /// Per-connection auth state. Starts `false`; flips to `true`
260    /// after a successful `session.auth` handshake. Always considered
261    /// authenticated when `ServerState::auth_token` is unset (auth
262    /// disabled). Closes Parslee-ai/car-releases#32.
263    pub authenticated: std::sync::atomic::AtomicBool,
264    /// Bound agent identity (#169). `Some(id)` once a lifecycle-agent
265    /// child has called `session.auth { token, agent_id }` and the
266    /// supervisor confirmed `agent_id` is supervised + token matches.
267    /// Used by `agents.list` to surface which managed agents have
268    /// actually attached vs. just being marked `Running` at the
269    /// process level. Cleared at disconnect by `remove_session`.
270    pub agent_id: tokio::sync::Mutex<Option<String>>,
271    /// Bound persistent memgine (#170). `Some` after `session.auth`
272    /// successfully attaches the connection to a daemon-owned
273    /// per-agent memgine (paired with `agent_id`). Memory handlers
274    /// route through [`ClientSession::effective_memgine`] which
275    /// returns this when set, falling back to the ephemeral
276    /// `memgine` field for browser/host/CLI connections.
277    pub bound_memgine: tokio::sync::Mutex<Option<Arc<Mutex<car_memgine::MemgineEngine>>>>,
278}
279
280impl ClientSession {
281    /// Returns the memgine handle the memory.* handlers should use:
282    /// the bound per-agent memgine when this session attached via
283    /// `session.auth { agent_id }` (#169 + #170), otherwise the
284    /// ephemeral per-WS memgine. Cheap (one async lock + Arc clone).
285    pub async fn effective_memgine(&self) -> Arc<Mutex<car_memgine::MemgineEngine>> {
286        if let Some(eng) = self.bound_memgine.lock().await.as_ref() {
287            return eng.clone();
288        }
289        self.memgine.clone()
290    }
291}
292
293/// Builder for constructing a [`ServerState`] with embedder-supplied
294/// dependencies. Embedders (e.g. `tokhn-daemon`) use this to inject
295/// their own memgine handle and other shared infrastructure; the
296/// Approval-gate policy for high-risk WS methods.
297///
298/// Every method in `methods` must be acknowledged via
299/// `host.resolve_approval` before the dispatcher will route the
300/// request to its handler. The dispatcher waits up to `timeout` for
301/// a resolution; on timeout (or any non-`approve` resolution) the
302/// request fails with JSON-RPC error `-32003`.
303///
304/// Default: gate enabled, the macOS-automation surface
305/// (`automation.run_applescript`, `automation.shortcuts.run`,
306/// `messages.send`, `mail.send`, `vision.ocr`), 60-second timeout.
307/// `car-server --no-approvals` (or embedders calling
308/// [`ServerStateConfig::with_approval_gate`] with `enabled=false`)
309/// turns it off — only appropriate when no untrusted caller can
310/// reach the WS port.
311#[derive(Debug, Clone)]
312pub struct ApprovalGate {
313    /// Master switch. When `false`, every method dispatches without
314    /// raising an approval — the pre-2026-05 behaviour.
315    pub enabled: bool,
316    /// Methods that require approval. Match is by exact method-name
317    /// string against the JSON-RPC `method` field.
318    pub methods: std::collections::HashSet<String>,
319    /// How long to wait for the user to resolve the approval before
320    /// timing out and surfacing an error to the caller.
321    pub timeout: std::time::Duration,
322}
323
324impl Default for ApprovalGate {
325    fn default() -> Self {
326        let methods = [
327            "automation.run_applescript",
328            "automation.shortcuts.run",
329            "messages.send",
330            "mail.send",
331            "vision.ocr",
332        ]
333        .iter()
334        .map(|s| s.to_string())
335        .collect();
336        Self {
337            enabled: true,
338            methods,
339            timeout: std::time::Duration::from_secs(60),
340        }
341    }
342}
343
344impl ApprovalGate {
345    /// Disable the gate entirely. Equivalent to passing
346    /// `car-server --no-approvals`. Only appropriate when no
347    /// untrusted caller can reach the WS port.
348    pub fn disabled() -> Self {
349        Self {
350            enabled: false,
351            methods: std::collections::HashSet::new(),
352            timeout: std::time::Duration::from_secs(60),
353        }
354    }
355
356    /// `true` if this method must be acknowledged before dispatch.
357    pub fn requires_approval(&self, method: &str) -> bool {
358        self.enabled && self.methods.contains(method)
359    }
360}
361
362/// standalone `car-server` binary uses [`ServerState::standalone`]
363/// which calls `with_config` under the hood.
364pub struct ServerStateConfig {
365    pub journal_dir: PathBuf,
366    /// Optional pre-constructed memgine engine. When `None`, each
367    /// `create_session` call builds a fresh engine; embedders that want
368    /// to share a single engine across sessions can supply a clone of
369    /// their `Arc<Mutex<MemgineEngine>>` here.
370    pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
371    /// Optional pre-constructed inference engine.
372    pub inference: Option<Arc<car_inference::InferenceEngine>>,
373    /// Optional embedder-supplied A2A runtime. Used by the in-core
374    /// `A2aDispatcher` to execute peer-driven proposals. When `None`,
375    /// the dispatcher uses a fresh `Runtime` with `register_agent_basics`
376    /// — peer agents see CAR's built-in tools and nothing else,
377    /// matching the behaviour of the standalone `start_a2a_listener`.
378    pub a2a_runtime: Option<Arc<car_engine::Runtime>>,
379    /// Optional embedder-supplied A2A task store. When `None`,
380    /// defaults to `InMemoryTaskStore`. tokhn-style embedders that
381    /// want a polling-friendly persistent store plug it in here.
382    pub a2a_store: Option<Arc<dyn car_a2a::TaskStore>>,
383    /// Optional embedder-supplied agent card factory. When `None`,
384    /// the dispatcher serves a card built from the A2A runtime's
385    /// tool schemas at construction time, advertising its public URL
386    /// as `ws://127.0.0.1:9100/` (the WS surface the dispatcher itself
387    /// is reachable on).
388    pub a2a_card_source: Option<Arc<car_a2a::AgentCardSource>>,
389    /// Approval-gate policy. When `None`, the dispatcher uses
390    /// [`ApprovalGate::default`] (gate ON, the macOS-automation
391    /// surface gated, 60s timeout). Pass
392    /// [`ApprovalGate::disabled`] to opt out — only appropriate
393    /// when no untrusted caller can reach the WS port.
394    pub approval_gate: Option<ApprovalGate>,
395}
396
397impl ServerStateConfig {
398    /// Minimal config suitable for the standalone car-server binary:
399    /// only the journal dir is required; everything else is lazily
400    /// constructed at first use.
401    pub fn new(journal_dir: PathBuf) -> Self {
402        Self {
403            journal_dir,
404            shared_memgine: None,
405            inference: None,
406            a2a_runtime: None,
407            a2a_store: None,
408            a2a_card_source: None,
409            approval_gate: None,
410        }
411    }
412
413    pub fn with_shared_memgine(mut self, engine: Arc<Mutex<car_memgine::MemgineEngine>>) -> Self {
414        self.shared_memgine = Some(engine);
415        self
416    }
417
418    pub fn with_inference(mut self, engine: Arc<car_inference::InferenceEngine>) -> Self {
419        self.inference = Some(engine);
420        self
421    }
422
423    /// Plug in an embedder-supplied runtime for the A2A dispatcher.
424    /// Use case: tokhn-daemon wants peers to see its OPA preflight
425    /// tooling, not just CAR's `register_agent_basics` defaults.
426    pub fn with_a2a_runtime(mut self, runtime: Arc<car_engine::Runtime>) -> Self {
427        self.a2a_runtime = Some(runtime);
428        self
429    }
430
431    /// Plug in an embedder-supplied task store for the A2A
432    /// dispatcher. Use case: tokhn's polling-friendly persistent
433    /// store keyed by their session id.
434    pub fn with_a2a_store(mut self, store: Arc<dyn car_a2a::TaskStore>) -> Self {
435        self.a2a_store = Some(store);
436        self
437    }
438
439    /// Plug in an embedder-supplied agent card factory. The factory
440    /// is invoked on every `agent/getAuthenticatedExtendedCard`
441    /// dispatch, so embedders can reflect runtime tool changes.
442    pub fn with_a2a_card_source(mut self, source: Arc<car_a2a::AgentCardSource>) -> Self {
443        self.a2a_card_source = Some(source);
444        self
445    }
446
447    /// Override the approval-gate policy. Pass
448    /// [`ApprovalGate::disabled`] to skip the gate entirely (only
449    /// appropriate when no untrusted caller can reach the WS port);
450    /// pass a customised [`ApprovalGate`] to add or remove methods
451    /// or to change the timeout.
452    pub fn with_approval_gate(mut self, gate: ApprovalGate) -> Self {
453        self.approval_gate = Some(gate);
454        self
455    }
456}
457
458/// Global server state shared across all connections.
459pub struct ServerState {
460    pub journal_dir: PathBuf,
461    pub sessions: Mutex<HashMap<String, Arc<ClientSession>>>,
462    pub inference: std::sync::OnceLock<Arc<car_inference::InferenceEngine>>,
463    pub host: Arc<crate::host::HostState>,
464    /// When `Some`, `create_session` clones this handle into every new
465    /// `ClientSession.memgine` — embedders that want a single shared
466    /// memgine across all WS sessions set this. Standalone car-server
467    /// leaves it `None`, which gives each session its own engine
468    /// (preserving today's behavior).
469    pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
470    /// Process-wide voice session registry. Each
471    /// `voice.transcribe_stream.start` call registers its own per-client
472    /// [`WsVoiceEventSink`] so events route back to the originating WS
473    /// connection only.
474    pub voice_sessions: Arc<car_voice::VoiceSessionRegistry>,
475    /// Process-wide meeting registry. Meeting ids are global; each
476    /// meeting binds to the originating client's WS for upstream
477    /// events but persists transcripts to the resolved
478    /// `.car/meetings/<id>/` regardless of which client started it.
479    pub meetings: Arc<car_meeting::MeetingRegistry>,
480    /// Process-wide A2UI surface store. Agent-produced surfaces are
481    /// visible to every host UI subscriber, independent of the
482    /// WebSocket session that applied the update.
483    pub a2ui: car_a2ui::A2uiSurfaceStore,
484    /// In-process UI-improvement agent. Invoked from
485    /// `handle_a2ui_render_report` with each inbound report; returned
486    /// `Decision::Patch` envelopes are applied via the standard
487    /// `apply_a2ui_envelope` path so all subscribers see the patch.
488    /// `Arc` so the agent's interior `DashMap` state survives across
489    /// handler calls even when `ServerState` is cheap-cloned.
490    pub ui_agent: Arc<car_ui_agent::UIImprovementAgent>,
491    /// Per-surface oscillation detector for the UI-improvement
492    /// loop. Sits between the agent's `Decision::Patch` and the
493    /// apply path so A→B→A patch cycles get cooled down without
494    /// the agent itself having to track history. neo's review:
495    /// "controllers use workqueue backoff; reconcilers stay
496    /// stateless."
497    pub ui_agent_oscillation: Arc<crate::ui_agent_loop::OscillationDetector>,
498    /// Per-surface iteration budget. Backstop against runaway
499    /// loops the oscillation detector misses — caps total agent-
500    /// driven patches per surface at `DEFAULT_MAX_ITERATIONS`.
501    pub ui_agent_budget: Arc<crate::ui_agent_loop::IterationBudget>,
502    /// Process-wide concurrency gate for inference RPC handlers. Sized
503    /// from host RAM at startup, overridable via
504    /// [`crate::admission::ENV_MAX_CONCURRENT`]. Without this, N
505    /// concurrent users multiply KV-cache and activation memory and
506    /// take the host out (#114-adjacent: filed alongside the daemon
507    /// always-on rework). The semaphore lives on `ServerState` so it
508    /// is shared across every WebSocket session in the same process.
509    pub admission: Arc<crate::admission::InferenceAdmission>,
510    /// Server-side A2A continuation auth keyed by A2UI surface id.
511    /// Kept out of `A2uiSurface.owner` so host renderers never see
512    /// bearer/API-key material.
513    pub a2ui_route_auth: Mutex<HashMap<String, A2aRouteAuth>>,
514    /// Lifecycle-managed agents — declarative manifest at
515    /// `~/.car/agents.json` driving spawn/restart/stop. Closes
516    /// Parslee-ai/car-releases#27. Lazy-initialized so embedders that
517    /// don't want process supervision don't pay the disk-touch cost
518    /// at server start.
519    pub supervisor: std::sync::OnceLock<Arc<car_registry::supervisor::Supervisor>>,
520    /// Manifest path this daemon is *observing* but does NOT own.
521    /// Set by `car-server` when boot-time supervisor construction
522    /// fails with [`car_registry::supervisor::SupervisorError::AlreadyRunning`]
523    /// — another car-server process on the host holds the exclusive
524    /// lock on this manifest. In that state, `supervisor()` returns a
525    /// clear "observe-only" error so mutation handlers refuse
526    /// (preventing the duplicate-spawn bug from
527    /// Parslee-ai/car-releases#44), while read-only handlers
528    /// (`agents.list`, `agents.health`) fall back to
529    /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
530    /// [`car_registry::supervisor::Supervisor::health_from_manifest`]
531    /// so operators can still inspect what the primary daemon is
532    /// supervising.
533    pub observer_manifest_path: std::sync::OnceLock<PathBuf>,
534    /// In-core A2A dispatcher — embedders that consume `car-server-core`
535    /// get A2A reachability "for free" without standing up a separate
536    /// HTTP listener. Closes Parslee-ai/car-releases#28. Lazy-init so
537    /// the embedder can override the runtime / task store / agent card
538    /// via [`ServerStateConfig::with_a2a_runtime`] etc. before the
539    /// first dispatch.
540    pub a2a_dispatcher: std::sync::OnceLock<Arc<car_a2a::A2aDispatcher>>,
541    /// WS clients subscribed to A2UI envelope events. After every
542    /// successful `a2ui.apply` / `a2ui.ingest`, the resulting
543    /// `A2uiApplyResult` is broadcast to every subscriber as an
544    /// `a2ui.event` JSON-RPC notification. Closes
545    /// Parslee-ai/car-releases#29. Subscribers register via the
546    /// `a2ui/subscribe` method and are auto-cleaned on WS disconnect.
547    pub a2ui_subscribers: Mutex<HashMap<String, Arc<WsChannel>>>,
548    /// Per-launch auth token. When `Some`, the WS dispatcher rejects
549    /// non-auth methods on unauthenticated sessions until the client
550    /// calls `session.auth` with the matching value. When `None`,
551    /// auth is disabled and every connection works as before. Set
552    /// at startup by `car-server` unless `--no-auth` is passed
553    /// (default flipped 2026-05); embedders that want to enable
554    /// auth call [`ServerState::install_auth_token`]. Closes
555    /// Parslee-ai/car-releases#32.
556    pub auth_token: std::sync::OnceLock<String>,
557    /// Parslee cloud identity loaded from the user's OS keychain at
558    /// daemon startup when `car auth login` has been completed.
559    pub parslee_session: std::sync::OnceLock<crate::parslee_auth::ParsleeSession>,
560    /// `agent_id -> client_id` map of currently-attached lifecycle
561    /// agents (#169). Populated by the `session.auth` handler when a
562    /// supervised child presents its `agent_id` + per-agent token;
563    /// drained on disconnect by `remove_session`. Single-claim:
564    /// a second connection presenting the same `agent_id` is
565    /// rejected so the daemon-side per-agent state stays unambiguous.
566    pub attached_agents: Mutex<HashMap<String, String>>,
567    /// `agent_id -> persistent memgine` map (#170). Lazy-loaded on
568    /// first connection per id from `~/.car/memory/agents/<id>.jsonl`,
569    /// retained across daemon restart, surviving any single
570    /// disconnect/reconnect of the supervised child. Connections
571    /// that auth without an `agent_id` (browser, host, ad-hoc CLI)
572    /// keep the per-WS ephemeral memgine on `ClientSession.memgine`
573    /// — no behaviour change.
574    pub agent_memgines: Mutex<HashMap<String, Arc<Mutex<car_memgine::MemgineEngine>>>>,
575    /// In-flight `agents.chat` sessions keyed by `session_id`. See
576    /// [`ChatSession`] for shape. Populated by `agents.chat`,
577    /// cleared on terminal `agent.chat.event` or
578    /// `agents.chat.cancel`. Disconnect cleanup happens in
579    /// `remove_session` — any in-flight session bound to either the
580    /// disconnecting host or agent client is dropped so subsequent
581    /// stray notifications from a respawned agent fall on the floor
582    /// rather than racing into a stale stream.
583    pub chat_sessions: Mutex<HashMap<String, ChatSession>>,
584    /// Bound MCP HTTP-streamable URL (e.g.
585    /// `"http://127.0.0.1:9102/mcp"`) — `car-server` installs this
586    /// after binding the listener. Used by the
587    /// `agents.invoke_external` handler to default
588    /// `InvokeOptions.mcp_endpoint` so external agents
589    /// (Claude Code today) load the daemon's CAR namespace via
590    /// `--mcp-config` automatically. `None` when MCP isn't bound
591    /// (e.g. `--mcp-bind disabled`).
592    pub mcp_url: std::sync::OnceLock<String>,
593    /// Registry of connected MCP SSE sessions. Populated alongside
594    /// [`mcp_url`] when `car-server` boots the MCP listener. Public
595    /// so handlers can call `crate::mcp::push_to_session` to send
596    /// server-initiated requests to a specific MCP-connected
597    /// client (MCP-3 foundation; MCP-3b will wire host-owned tool
598    /// dispatch through this).
599    pub mcp_sessions: std::sync::OnceLock<Arc<crate::mcp::SessionMap>>,
600    /// Approval gate for high-risk WS methods (audit 2026-05). The
601    /// gate intercepts `automation.run_applescript`,
602    /// `automation.shortcuts.run`, `messages.send`, `mail.send`, and
603    /// `vision.ocr` before they dispatch, raises a
604    /// `host.create_approval` for the user to act on, and waits
605    /// (with a timeout) for `host.resolve_approval`. Approve →
606    /// dispatch continues; deny / timeout → JSON-RPC error code
607    /// `-32003`. The set of gated methods and the wait timeout are
608    /// embedder-overridable via
609    /// [`ServerStateConfig::with_approval_gate`].
610    pub approval_gate: ApprovalGate,
611    /// A2A-runtime / store / card factory carried over from the
612    /// embedder's [`ServerStateConfig`]. Consumed lazily on first
613    /// `a2a_dispatcher()` call so embedders can construct
614    /// `ServerState` without paying the runtime spin-up cost when
615    /// they don't actually use the A2A surface.
616    pub(crate) a2a_runtime: std::sync::Mutex<Option<Arc<car_engine::Runtime>>>,
617    pub(crate) a2a_store: std::sync::Mutex<Option<Arc<dyn car_a2a::TaskStore>>>,
618    pub(crate) a2a_card_source: std::sync::Mutex<Option<Arc<car_a2a::AgentCardSource>>>,
619}
620
621impl ServerState {
622    /// Constructor for the standalone `car-server` binary. Each WS
623    /// connection gets its own per-session memgine — matches the
624    /// pre-extraction default and is correct for a single-process
625    /// daemon serving one user at a time.
626    ///
627    /// **Embedders must not call this.** It silently leaves
628    /// `shared_memgine = None`, which re-introduces the dual-memgine
629    /// bug U7 was created to prevent (one engine in the embedder, a
630    /// fresh one inside every WS session). Embedders use
631    /// [`ServerState::embedded`] instead, which makes the shared
632    /// engine handle a required argument so it cannot be forgotten.
633    pub fn standalone(journal_dir: PathBuf) -> Self {
634        Self::with_config(ServerStateConfig::new(journal_dir))
635    }
636
637    /// Constructor for embedders (e.g. `tokhn-daemon`). The shared
638    /// memgine handle is **required**: every WS session created by
639    /// this state will reuse the same engine, preventing the
640    /// dual-memgine bug.
641    ///
642    /// For embedders that also want to inject a pre-warmed inference
643    /// engine or other advanced wiring, build a [`ServerStateConfig`]
644    /// directly and call [`ServerState::with_config`].
645    pub fn embedded(
646        journal_dir: PathBuf,
647        shared_memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
648    ) -> Self {
649        Self::with_config(ServerStateConfig::new(journal_dir).with_shared_memgine(shared_memgine))
650    }
651
652    /// Build a `ServerState` from a [`ServerStateConfig`] — the path
653    /// embedders use when they need to inject a shared memgine *and*
654    /// a pre-warmed inference engine, or any other advanced wiring
655    /// the convenience constructors don't cover.
656    pub fn with_config(cfg: ServerStateConfig) -> Self {
657        let inference = std::sync::OnceLock::new();
658        if let Some(eng) = cfg.inference {
659            // OnceLock::set returns Err if already set — fresh OnceLock
660            // means it's empty, so this is infallible here.
661            let _ = inference.set(eng);
662        }
663        let voice_sessions = Arc::new(car_voice::VoiceSessionRegistry::new());
664        // Reap sessions whose clients dropped without calling
665        // voice.transcribe_stream.stop (WS disconnect, process exit,
666        // etc.). Listener handles otherwise leak for the daemon's
667        // lifetime. `with_config` is sync but always called from the
668        // `#[tokio::main]` entry point, so `Handle::try_current()`
669        // inside `start_sweeper` finds the runtime.
670        voice_sessions.start_sweeper();
671        // UI-improvement agent is pure decision logic — no I/O, no
672        // persistence handle. Memgine ingest of strategy outcomes is
673        // the caller's responsibility (handler.rs after a successful
674        // Decision::Patch). Keeps the agent crate Mutex-flavor
675        // agnostic so it can compose with std/tokio mutex callers.
676        let ui_agent = Arc::new(car_ui_agent::UIImprovementAgent::with_default_strategies());
677        let ui_agent_oscillation = Arc::new(crate::ui_agent_loop::OscillationDetector::new());
678        let ui_agent_budget = Arc::new(crate::ui_agent_loop::IterationBudget::new());
679        Self {
680            journal_dir: cfg.journal_dir,
681            sessions: Mutex::new(HashMap::new()),
682            inference,
683            host: Arc::new(crate::host::HostState::new()),
684            shared_memgine: cfg.shared_memgine,
685            voice_sessions,
686            meetings: Arc::new(car_meeting::MeetingRegistry::new()),
687            a2ui: car_a2ui::A2uiSurfaceStore::new(),
688            ui_agent,
689            ui_agent_oscillation,
690            ui_agent_budget,
691            admission: Arc::new(crate::admission::InferenceAdmission::new()),
692            a2ui_route_auth: Mutex::new(HashMap::new()),
693            supervisor: std::sync::OnceLock::new(),
694            observer_manifest_path: std::sync::OnceLock::new(),
695            a2a_dispatcher: std::sync::OnceLock::new(),
696            a2a_runtime: std::sync::Mutex::new(cfg.a2a_runtime),
697            a2a_store: std::sync::Mutex::new(cfg.a2a_store),
698            a2a_card_source: std::sync::Mutex::new(cfg.a2a_card_source),
699            a2ui_subscribers: Mutex::new(HashMap::new()),
700            auth_token: std::sync::OnceLock::new(),
701            parslee_session: std::sync::OnceLock::new(),
702            attached_agents: Mutex::new(HashMap::new()),
703            agent_memgines: Mutex::new(HashMap::new()),
704            chat_sessions: Mutex::new(HashMap::new()),
705            mcp_url: std::sync::OnceLock::new(),
706            mcp_sessions: std::sync::OnceLock::new(),
707            approval_gate: cfg.approval_gate.unwrap_or_default(),
708        }
709    }
710
711    /// Enable the per-launch auth handshake. After this call, every
712    /// new WS connection must call `session.auth` with `token` as
713    /// the first frame; otherwise the connection is closed. Called
714    /// by `car-server` at startup unless `--no-auth` is set
715    /// (default flipped 2026-05); embedders supply their own token
716    /// if they want the same posture. Returns `Err(token)` when
717    /// auth was already installed.
718    pub fn install_auth_token(&self, token: String) -> Result<(), String> {
719        self.auth_token.set(token)
720    }
721
722    pub fn install_parslee_session(
723        &self,
724        session: crate::parslee_auth::ParsleeSession,
725    ) -> Result<(), crate::parslee_auth::ParsleeSession> {
726        self.parslee_session.set(session)
727    }
728
729    /// Install the bound MCP URL after car-server's listener is up.
730    /// Idempotent on the first call; subsequent calls are accepted
731    /// silently (matches the supervisor / a2a_dispatcher install
732    /// idiom). Returns `Err(())` when an MCP URL was already
733    /// installed — embedders should treat this as "another
734    /// component beat us to it" and use whichever value is now set.
735    pub fn install_mcp_url(&self, url: String) -> Result<(), String> {
736        self.mcp_url.set(url)
737    }
738
739    /// Install the MCP SSE session registry. Pairs with
740    /// [`install_mcp_url`] — both come from the same `start_mcp`
741    /// call and either both get installed or neither does (the
742    /// daemon binds them together).
743    pub fn install_mcp_sessions(
744        &self,
745        sessions: Arc<crate::mcp::SessionMap>,
746    ) -> Result<(), Arc<crate::mcp::SessionMap>> {
747        self.mcp_sessions.set(sessions)
748    }
749
750    /// Lazy-initialize and return the agent supervisor. The first
751    /// call constructs a [`car_registry::supervisor::Supervisor`] backed by
752    /// `~/.car/agents.json` + `~/.car/logs/`. Embedders that need a
753    /// non-default location should call
754    /// [`ServerState::install_supervisor`] before any handler runs.
755    ///
756    /// In observer mode (set via [`install_observer_manifest`]),
757    /// returns a clear error mentioning the manifest path the
758    /// primary daemon owns. This prevents the second daemon from
759    /// re-attempting `user_default()` (which would also fail with
760    /// `AlreadyRunning`) on every WS call, and gives mutation
761    /// handlers a stable refusal path. Read-only handlers
762    /// (`agents.list`, `agents.health`) should call
763    /// [`Self::observer_manifest_path`] first and fall back to
764    /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
765    /// `health_from_manifest` when set. Closes
766    /// Parslee-ai/car-releases#44.
767    pub fn supervisor(&self) -> Result<Arc<car_registry::supervisor::Supervisor>, String> {
768        if let Some(s) = self.supervisor.get() {
769            return Ok(s.clone());
770        }
771        if let Some(p) = self.observer_manifest_path.get() {
772            return Err(format!(
773                "this car-server is observe-only — another car-server process \
774                 holds the supervisor lock for {}. Mutations refuse here; route \
775                 them to the primary daemon, or stop the other car-server first.",
776                p.display()
777            ));
778        }
779        let s = car_registry::supervisor::Supervisor::user_default()
780            .map(Arc::new)
781            .map_err(|e| e.to_string())?;
782        // OnceLock::set returns the original arg back on collision —
783        // a concurrent caller racing through user_default. Take
784        // whichever wins.
785        let _ = self.supervisor.set(s);
786        Ok(self.supervisor.get().expect("set or pre-existing").clone())
787    }
788
789    /// Replace the lazy default with a caller-supplied supervisor.
790    /// Returns `Err(())` when a supervisor was already installed.
791    /// Used by the standalone `car-server` binary to call
792    /// `start_all()` on a known-good handle without paying the
793    /// lazy-init lookup cost.
794    pub fn install_supervisor(
795        &self,
796        supervisor: Arc<car_registry::supervisor::Supervisor>,
797    ) -> Result<(), Arc<car_registry::supervisor::Supervisor>> {
798        self.supervisor.set(supervisor)
799    }
800
801    /// Non-acquiring read of the currently-installed supervisor.
802    /// Unlike [`supervisor`](Self::supervisor), this does NOT lazy-
803    /// init via `user_default()` — it returns `None` instead of
804    /// constructing a fresh `Supervisor` and acquiring the
805    /// `<manifest>.lock` as a side effect. Use this from read-only
806    /// metadata paths (`host.subscribe` identity, status surfaces)
807    /// where causing lock acquisition on observation would be a
808    /// Heisenberg subscribe — the act of asking "do you own the
809    /// lock?" must not be the act of taking it.
810    pub fn supervisor_if_installed(&self) -> Option<Arc<car_registry::supervisor::Supervisor>> {
811        self.supervisor.get().cloned()
812    }
813
814    /// Mark this daemon as *observing* a manifest owned by another
815    /// car-server process. After this call, `supervisor()` returns
816    /// an "observe-only" error and read-only handlers
817    /// (`agents.list`, `agents.health`) fall back to the static
818    /// `Supervisor::list_from_manifest` / `health_from_manifest`
819    /// paths. Idempotent — subsequent calls with the same path are
820    /// no-ops; a different path returns `Err(())`. Closes
821    /// Parslee-ai/car-releases#44.
822    pub fn install_observer_manifest(&self, path: PathBuf) -> Result<(), PathBuf> {
823        self.observer_manifest_path.set(path)
824    }
825
826    /// Path of the manifest this daemon is observing but not
827    /// supervising. `None` when this daemon owns the supervisor
828    /// (the normal case) or when no manifest is configured at all
829    /// (no `HOME`, embedder didn't install one).
830    pub fn observer_manifest_path(&self) -> Option<&PathBuf> {
831        self.observer_manifest_path.get()
832    }
833
834    /// Lazy-initialize and return the in-core A2A dispatcher. The
835    /// first call constructs an [`car_a2a::A2aDispatcher`] from
836    /// either the embedder's overrides (set via
837    /// [`ServerStateConfig::with_a2a_runtime`] / `with_a2a_store` /
838    /// `with_a2a_card_source`) or sensible defaults: a fresh
839    /// `Runtime` with `register_agent_basics` registered, an
840    /// `InMemoryTaskStore`, and a card built from the runtime's
841    /// tool schemas advertising `ws://127.0.0.1:9100/` as the
842    /// public URL. Closes Parslee-ai/car-releases#28.
843    pub async fn a2a_dispatcher(&self) -> Arc<car_a2a::A2aDispatcher> {
844        if let Some(d) = self.a2a_dispatcher.get() {
845            return d.clone();
846        }
847
848        // Embedder overrides take precedence; fall back to defaults
849        // for each slot independently (so an embedder that only
850        // wants a custom card can leave the runtime + store at
851        // defaults). `Mutex::take()` consumes the slot so the
852        // defaults aren't reconstructed on a racing init that loses
853        // the OnceLock::set call below.
854        let runtime = self
855            .a2a_runtime
856            .lock()
857            .expect("a2a_runtime mutex poisoned")
858            .take();
859        let runtime = match runtime {
860            Some(r) => r,
861            None => {
862                let r = Arc::new(car_engine::Runtime::new());
863                r.register_agent_basics().await;
864                r
865            }
866        };
867
868        let store = self
869            .a2a_store
870            .lock()
871            .expect("a2a_store mutex poisoned")
872            .take()
873            .unwrap_or_else(|| Arc::new(car_a2a::InMemoryTaskStore::new()));
874
875        let card_source = self
876            .a2a_card_source
877            .lock()
878            .expect("a2a_card_source mutex poisoned")
879            .take();
880        let card_source = match card_source {
881            Some(c) => c,
882            None => {
883                let card = car_a2a::build_default_agent_card(
884                    &runtime,
885                    car_a2a::AgentCardConfig::minimal(
886                        "Common Agent Runtime",
887                        "Embedded CAR daemon — A2A v1.0 reachable over WebSocket JSON-RPC.",
888                        "ws://127.0.0.1:9100/",
889                        car_a2a::AgentProvider {
890                            organization: "Parslee".into(),
891                            url: Some("https://github.com/Parslee-ai/car".into()),
892                        },
893                    ),
894                )
895                .await;
896                Arc::new(move || card.clone()) as Arc<car_a2a::AgentCardSource>
897            }
898        };
899
900        let dispatcher = Arc::new(car_a2a::A2aDispatcher::new(runtime, store, card_source));
901        // OnceLock::set returns Err on race — accept whichever
902        // dispatcher won and clone-return that one.
903        let _ = self.a2a_dispatcher.set(dispatcher);
904        self.a2a_dispatcher
905            .get()
906            .expect("a2a_dispatcher set or pre-existing")
907            .clone()
908    }
909
910    pub async fn create_session(
911        &self,
912        client_id: &str,
913        channel: Arc<WsChannel>,
914    ) -> Arc<ClientSession> {
915        let journal_path = self.journal_dir.join(format!("{}.jsonl", client_id));
916        let event_log = EventLog::with_journal(journal_path);
917
918        let executor = Arc::new(WsToolExecutor {
919            channel: channel.clone(),
920        });
921
922        let runtime = Runtime::new()
923            .with_event_log(event_log)
924            .with_executor(executor);
925
926        // If the embedder supplied a shared memgine, every session uses it.
927        // Otherwise each session gets its own — matches pre-extraction behavior.
928        let memgine = match &self.shared_memgine {
929            Some(eng) => eng.clone(),
930            None => Arc::new(Mutex::new(car_memgine::MemgineEngine::new(None))),
931        };
932
933        let session = Arc::new(ClientSession {
934            client_id: client_id.to_string(),
935            runtime: Arc::new(runtime),
936            channel,
937            host: self.host.clone(),
938            memgine,
939            browser: car_ffi_common::browser::BrowserSessionSlot::new(),
940            // When auth is disabled (no token installed), every
941            // session is "authenticated" by default — preserves the
942            // pre-#32 behaviour. When auth is enabled, the value is
943            // ignored on creation; the dispatcher's gate checks
944            // `ServerState::auth_token.is_some()` to decide whether
945            // to enforce.
946            authenticated: std::sync::atomic::AtomicBool::new(false),
947            agent_id: tokio::sync::Mutex::new(None),
948            bound_memgine: tokio::sync::Mutex::new(None),
949        });
950
951        self.sessions
952            .lock()
953            .await
954            .insert(client_id.to_string(), session.clone());
955
956        session
957    }
958
959    /// Remove a per-client session from the registry on disconnect.
960    /// Returns the removed session if present so callers can drop any
961    /// remaining strong refs (e.g. drain pending tool callbacks). Fix
962    /// for MULTI-4 / WS-3 — without this, `state.sessions` retains
963    /// `Arc<ClientSession>` for every connection that ever existed.
964    pub async fn remove_session(&self, client_id: &str) -> Option<Arc<ClientSession>> {
965        let removed = self.sessions.lock().await.remove(client_id);
966        if let Some(session) = &removed {
967            // #169: drop the agent_id → client_id binding so a
968            // disconnected lifecycle agent can reconnect (or its
969            // supervisor-respawned replacement can take the slot)
970            // without colliding with the stale claim.
971            let bound = session.agent_id.lock().await.clone();
972            if let Some(id) = bound {
973                let mut attached = self.attached_agents.lock().await;
974                if attached.get(&id).map(String::as_str) == Some(client_id) {
975                    attached.remove(&id);
976                }
977            }
978            // Drop any in-flight `agents.chat` sessions bound to this
979            // client — either side disconnecting orphans the stream,
980            // and a respawned agent's stray `agent.chat.event`
981            // notifications must not race into a stale routing entry.
982            // See `docs/proposals/agent-chat-surface.md`.
983            let bound_agent = session.agent_id.lock().await.clone();
984            let mut chats = self.chat_sessions.lock().await;
985            chats.retain(|_, s| {
986                if s.host_client_id == client_id {
987                    return false;
988                }
989                if let Some(agent_id) = &bound_agent {
990                    if &s.agent_id == agent_id {
991                        return false;
992                    }
993                }
994                true
995            });
996        }
997        removed
998    }
999}
1000
1001#[cfg(test)]
1002mod observer_mode_tests {
1003    use super::*;
1004
1005    fn journal_dir() -> PathBuf {
1006        let target = std::env::var_os("CARGO_TARGET_DIR")
1007            .map(std::path::PathBuf::from)
1008            .unwrap_or_else(|| {
1009                std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1010                    .join("..")
1011                    .join("..")
1012                    .join("target")
1013            });
1014        std::fs::create_dir_all(&target).ok();
1015        let target = std::fs::canonicalize(&target).unwrap_or(target);
1016        let tmp = tempfile::TempDir::new_in(&target).unwrap();
1017        let p = tmp.path().to_path_buf();
1018        std::mem::forget(tmp); // keep the dir alive for the test
1019        p
1020    }
1021
1022    #[test]
1023    fn supervisor_returns_observer_error_when_marker_set() {
1024        // Closes Parslee-ai/car-releases#44: the second car-server on
1025        // a host installs the observer marker after `with_paths`
1026        // returns AlreadyRunning. Subsequent `state.supervisor()`
1027        // calls must return a clear "observe-only" error mentioning
1028        // the manifest path — they must NOT retry user_default()
1029        // (which would re-acquire the lock and likely also fail).
1030        let state = ServerState::standalone(journal_dir());
1031        let fake_manifest = PathBuf::from("/tmp/fake-manifest-for-test.json");
1032        state
1033            .install_observer_manifest(fake_manifest.clone())
1034            .expect("install_observer_manifest succeeds on fresh state");
1035        assert_eq!(state.observer_manifest_path(), Some(&fake_manifest));
1036
1037        let err = state.supervisor().map(|_| ()).unwrap_err();
1038        assert!(
1039            err.contains("observe-only"),
1040            "error must mention observe-only mode: {err}"
1041        );
1042        assert!(
1043            err.contains("fake-manifest-for-test.json"),
1044            "error must surface the manifest path so operators know which daemon owns it: {err}"
1045        );
1046    }
1047
1048    #[test]
1049    fn install_observer_manifest_is_idempotent_per_path_collision() {
1050        let state = ServerState::standalone(journal_dir());
1051        let p = PathBuf::from("/tmp/manifest-a.json");
1052        let q = PathBuf::from("/tmp/manifest-b.json");
1053        state.install_observer_manifest(p.clone()).unwrap();
1054        // OnceLock::set returns the value back on collision.
1055        let err = state.install_observer_manifest(q.clone()).unwrap_err();
1056        assert_eq!(err, q);
1057        assert_eq!(state.observer_manifest_path(), Some(&p));
1058    }
1059
1060    #[test]
1061    fn supervisor_if_installed_does_not_lazy_init() {
1062        // The Heisenberg-subscribe guard: `host.subscribe`'s
1063        // identity path must use the non-acquiring read so a
1064        // purely observational client can't cause the daemon to
1065        // claim `<manifest>.lock` as a side effect of asking
1066        // about it. Fresh state has no supervisor installed.
1067        let state = ServerState::standalone(journal_dir());
1068        assert!(state.supervisor_if_installed().is_none());
1069        // observer_manifest_path should remain unset too — no
1070        // implicit init.
1071        assert!(state.observer_manifest_path().is_none());
1072    }
1073}