car_server_core/session.rs
1//! Server-side session state — shared across all connections.
2
3use car_engine::{Runtime, ToolExecutor};
4use car_eventlog::EventLog;
5use car_proto::{ToolExecuteRequest, ToolExecuteResponse};
6use futures::Sink;
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::pin::Pin;
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::Arc;
14use tokio::sync::{oneshot, Mutex};
15use tokio_tungstenite::tungstenite::{Error as WsError, Message};
16
17/// Type-erased WebSocket sink. The dispatch loop accepts either a
18/// `WebSocketStream<TcpStream>` (the legacy car-server TCP listener)
19/// or a `WebSocketStream<UnixStream>` (the daemon-as-default UDS
20/// listener) — both implement `Sink<Message, Error = WsError>` after
21/// the tungstenite handshake. Erasing the type here avoids cascading
22/// a generic parameter through every WsChannel / Session / ServerState
23/// touchpoint in the dispatcher.
24pub type WsSink = Pin<Box<dyn Sink<Message, Error = WsError> + Send + Unpin + 'static>>;
25
26/// Server-side credentials for continuing an A2A-owned A2UI surface.
27///
28/// This intentionally lives outside `car_a2ui::A2uiSurfaceOwner` so
29/// renderers can inspect surface ownership without receiving secrets.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase", tag = "type")]
32pub enum A2aRouteAuth {
33 None,
34 Bearer { token: String },
35 Header { name: String, value: String },
36}
37
38/// Shared write half of the WebSocket, plus pending callback channels.
39/// `write` is type-erased via [`WsSink`] so the dispatcher can run
40/// against any transport-specific WebSocketStream (TCP or UDS today;
41/// axum-bridged in future) without templatizing every consumer.
42pub struct WsChannel {
43 pub write: Mutex<WsSink>,
44 /// Pending tool execution callbacks: request_id → oneshot sender
45 pub pending: Mutex<HashMap<String, oneshot::Sender<ToolExecuteResponse>>>,
46 pub next_id: AtomicU64,
47}
48
49impl WsChannel {
50 pub fn next_request_id(&self) -> String {
51 let id = self.next_id.fetch_add(1, Ordering::SeqCst);
52 format!("cb-{}", id)
53 }
54}
55
56/// In-flight `agents.chat` session bookkeeping. Created when a host
57/// client calls `agents.chat`, removed when the agent emits a terminal
58/// `agent.chat.event` (`kind: "done"` or `"error"`), when either side
59/// disconnects, or when the host cancels via `agents.chat.cancel`.
60///
61/// The session_id is host-supplied (or server-generated when omitted)
62/// and threads through every `agent.chat.event` notification so the
63/// server can route streamed deltas back to the originating host
64/// without needing per-session subscriptions. See
65/// `docs/proposals/agent-chat-surface.md` for the wire contract.
66#[derive(Debug, Clone)]
67pub struct ChatSession {
68 /// Agent that owns this chat — populated from
69 /// `attached_agents` at `agents.chat` dispatch time.
70 pub agent_id: String,
71 /// Client id of the host that issued `agents.chat`. The server
72 /// forwards `agent.chat.event` notifications back to *this* host
73 /// only, so two CarHost windows chatting with the same agent are
74 /// independent streams.
75 pub host_client_id: String,
76 /// Unix-seconds creation time — used by the future stale-session
77 /// sweeper to drop sessions whose agent died without emitting a
78 /// terminal event.
79 pub created_at: u64,
80}
81
82/// Tool executor that sends callbacks to the client over WebSocket.
83pub struct WsToolExecutor {
84 pub channel: Arc<WsChannel>,
85}
86
87#[async_trait::async_trait]
88impl ToolExecutor for WsToolExecutor {
89 async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
90 // Legacy callers that don't have a proposal-level Action.id
91 // (e.g. internal `executor.execute` chains in tests) — emit an
92 // empty action_id so the client-side handler can still see the
93 // payload shape and decide whether to fail loudly.
94 self.execute_with_action(tool, params, "").await
95 }
96
97 async fn execute_with_action(
98 &self,
99 tool: &str,
100 params: &Value,
101 action_id: &str,
102 ) -> Result<Value, String> {
103 use futures::SinkExt;
104
105 // The JSON-RPC request id is the daemon's callback-routing key
106 // (used by the pending-response map below). The `action_id`
107 // FIELD on the payload is the originating proposal Action.id
108 // surfaced to the host so process-wide handlers can route
109 // concurrent callbacks back to per-call dispatchers
110 // (Parslee-ai/car-releases#43 follow-up). They serve different
111 // purposes and must stay distinct: routing id is daemon-side,
112 // action id is host-side.
113 let request_id = self.channel.next_request_id();
114
115 let callback = ToolExecuteRequest {
116 action_id: action_id.to_string(),
117 tool: tool.to_string(),
118 parameters: params.clone(),
119 timeout_ms: None,
120 attempt: 1,
121 };
122
123 // Create a oneshot channel for the response
124 let (tx, rx) = oneshot::channel();
125 self.channel
126 .pending
127 .lock()
128 .await
129 .insert(request_id.clone(), tx);
130
131 // Send the callback to the client as a JSON-RPC request
132 let rpc_request = serde_json::json!({
133 "jsonrpc": "2.0",
134 "method": "tools.execute",
135 "params": callback,
136 "id": request_id,
137 });
138
139 let msg = Message::Text(
140 serde_json::to_string(&rpc_request)
141 .map_err(|e| e.to_string())?
142 .into(),
143 );
144 self.channel
145 .write
146 .lock()
147 .await
148 .send(msg)
149 .await
150 .map_err(|e| format!("failed to send tool callback: {}", e))?;
151
152 // Wait for the client to respond (with a timeout)
153 let response = tokio::time::timeout(std::time::Duration::from_secs(60), rx)
154 .await
155 .map_err(|_| format!("tool '{}' callback timed out (60s)", tool))?
156 .map_err(|_| format!("tool '{}' callback channel closed", tool))?;
157
158 if let Some(err) = response.error {
159 Err(err)
160 } else {
161 Ok(response.output.unwrap_or(Value::Null))
162 }
163 }
164}
165
166/// Voice event sink that forwards events to a specific WebSocket client
167/// as `voice.event` JSON-RPC notifications.
168///
169/// Each `voice.transcribe_stream.start` call constructs one of these
170/// bound to the originating client's [`WsChannel`], so a client only
171/// receives events for sessions it started.
172pub struct WsVoiceEventSink {
173 pub channel: Arc<WsChannel>,
174}
175
176impl car_voice::VoiceEventSink for WsVoiceEventSink {
177 fn send(&self, session_id: &str, event_json: String) {
178 use futures::SinkExt;
179 let channel = self.channel.clone();
180 let session_id = session_id.to_string();
181 tokio::spawn(async move {
182 let payload: Value = serde_json::from_str(&event_json)
183 .unwrap_or_else(|_| Value::String(event_json.clone()));
184 let notification = serde_json::json!({
185 "jsonrpc": "2.0",
186 "method": "voice.event",
187 "params": {
188 "session_id": session_id,
189 "event": payload,
190 },
191 });
192 let Ok(text) = serde_json::to_string(¬ification) else {
193 return;
194 };
195 let _ = channel
196 .write
197 .lock()
198 .await
199 .send(Message::Text(text.into()))
200 .await;
201 });
202 }
203}
204
205/// Per-meeting fanout sink that ingests transcript text into a
206/// session-scoped memgine using the `Arc<tokio::sync::Mutex<...>>`
207/// wrapper, then forwards every event upstream untouched.
208///
209/// Lives here (not in `car-ffi-common`) because the engine handle uses
210/// `tokio::sync::Mutex` per the "one-wrapper rule" — the FFI-common
211/// `MeetingMemgineFanout` still uses `std::sync::Mutex` for the NAPI/
212/// PyO3 bindings, which keep their sync wrappers. Each binding owns the
213/// fanout that matches its lock primitive; the parsing/formatting logic
214/// itself is shared via [`car_meeting::extract_transcript_for_ingest`].
215///
216/// `send` is called from the voice drain task and must be non-blocking,
217/// so the lock acquisition is shipped to a `tokio::spawn`. Transcript
218/// events are independent so reordering across spawned tasks is fine.
219pub struct WsMemgineIngestSink {
220 pub meeting_id: String,
221 pub engine: Arc<Mutex<car_memgine::MemgineEngine>>,
222 pub upstream: Arc<dyn car_voice::VoiceEventSink>,
223}
224
225impl car_voice::VoiceEventSink for WsMemgineIngestSink {
226 fn send(&self, voice_session_id: &str, event_json: String) {
227 if let Ok(value) = serde_json::from_str::<Value>(&event_json) {
228 if let Some((speaker, text)) = car_meeting::extract_transcript_for_ingest(
229 &value,
230 &self.meeting_id,
231 voice_session_id,
232 ) {
233 let engine = self.engine.clone();
234 tokio::spawn(async move {
235 let mut guard = engine.lock().await;
236 guard.ingest_conversation(&speaker, &text, chrono::Utc::now());
237 });
238 }
239 }
240 self.upstream.send(voice_session_id, event_json);
241 }
242}
243
244/// Per-client session.
245pub struct ClientSession {
246 pub client_id: String,
247 pub runtime: Arc<Runtime>,
248 pub channel: Arc<WsChannel>,
249 pub host: Arc<crate::host::HostState>,
250 /// Memgine handle. Wrapped in `tokio::sync::Mutex` so dispatcher
251 /// handlers can hold the lock across `.await` points without
252 /// risking poisoning. Migrated from `std::sync::Mutex` in the
253 /// car-server-core extraction (U1) per the "one-wrapper rule".
254 pub memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
255 /// Lazy browser session — first `browser.run` call launches Chromium,
256 /// subsequent calls reuse it so element IDs resolve across invocations
257 /// within the same WebSocket connection.
258 pub browser: car_ffi_common::browser::BrowserSessionSlot,
259 /// Per-connection auth state. Starts `false`; flips to `true`
260 /// after a successful `session.auth` handshake. Always considered
261 /// authenticated when `ServerState::auth_token` is unset (auth
262 /// disabled). Closes Parslee-ai/car-releases#32.
263 pub authenticated: std::sync::atomic::AtomicBool,
264 /// Bound agent identity (#169). `Some(id)` once a lifecycle-agent
265 /// child has called `session.auth { token, agent_id }` and the
266 /// supervisor confirmed `agent_id` is supervised + token matches.
267 /// Used by `agents.list` to surface which managed agents have
268 /// actually attached vs. just being marked `Running` at the
269 /// process level. Cleared at disconnect by `remove_session`.
270 pub agent_id: tokio::sync::Mutex<Option<String>>,
271 /// Bound persistent memgine (#170). `Some` after `session.auth`
272 /// successfully attaches the connection to a daemon-owned
273 /// per-agent memgine (paired with `agent_id`). Memory handlers
274 /// route through [`ClientSession::effective_memgine`] which
275 /// returns this when set, falling back to the ephemeral
276 /// `memgine` field for browser/host/CLI connections.
277 pub bound_memgine: tokio::sync::Mutex<Option<Arc<Mutex<car_memgine::MemgineEngine>>>>,
278}
279
280impl ClientSession {
281 /// Returns the memgine handle the memory.* handlers should use:
282 /// the bound per-agent memgine when this session attached via
283 /// `session.auth { agent_id }` (#169 + #170), otherwise the
284 /// ephemeral per-WS memgine. Cheap (one async lock + Arc clone).
285 pub async fn effective_memgine(&self) -> Arc<Mutex<car_memgine::MemgineEngine>> {
286 if let Some(eng) = self.bound_memgine.lock().await.as_ref() {
287 return eng.clone();
288 }
289 self.memgine.clone()
290 }
291}
292
293/// Builder for constructing a [`ServerState`] with embedder-supplied
294/// dependencies. Embedders (e.g. `tokhn-daemon`) use this to inject
295/// their own memgine handle and other shared infrastructure; the
296/// Approval-gate policy for high-risk WS methods.
297///
298/// Every method in `methods` must be acknowledged via
299/// `host.resolve_approval` before the dispatcher will route the
300/// request to its handler. The dispatcher waits up to `timeout` for
301/// a resolution; on timeout (or any non-`approve` resolution) the
302/// request fails with JSON-RPC error `-32003`.
303///
304/// Default: gate enabled, the macOS-automation surface
305/// (`automation.run_applescript`, `automation.shortcuts.run`,
306/// `messages.send`, `mail.send`, `vision.ocr`), 60-second timeout.
307/// `car-server --no-approvals` (or embedders calling
308/// [`ServerStateConfig::with_approval_gate`] with `enabled=false`)
309/// turns it off — only appropriate when no untrusted caller can
310/// reach the WS port.
311#[derive(Debug, Clone)]
312pub struct ApprovalGate {
313 /// Master switch. When `false`, every method dispatches without
314 /// raising an approval — the pre-2026-05 behaviour.
315 pub enabled: bool,
316 /// Methods that require approval. Match is by exact method-name
317 /// string against the JSON-RPC `method` field.
318 pub methods: std::collections::HashSet<String>,
319 /// How long to wait for the user to resolve the approval before
320 /// timing out and surfacing an error to the caller.
321 pub timeout: std::time::Duration,
322}
323
324impl Default for ApprovalGate {
325 fn default() -> Self {
326 let methods = [
327 "automation.run_applescript",
328 "automation.shortcuts.run",
329 "messages.send",
330 "mail.send",
331 "vision.ocr",
332 ]
333 .iter()
334 .map(|s| s.to_string())
335 .collect();
336 Self {
337 enabled: true,
338 methods,
339 timeout: std::time::Duration::from_secs(60),
340 }
341 }
342}
343
344impl ApprovalGate {
345 /// Disable the gate entirely. Equivalent to passing
346 /// `car-server --no-approvals`. Only appropriate when no
347 /// untrusted caller can reach the WS port.
348 pub fn disabled() -> Self {
349 Self {
350 enabled: false,
351 methods: std::collections::HashSet::new(),
352 timeout: std::time::Duration::from_secs(60),
353 }
354 }
355
356 /// `true` if this method must be acknowledged before dispatch.
357 pub fn requires_approval(&self, method: &str) -> bool {
358 self.enabled && self.methods.contains(method)
359 }
360}
361
362/// standalone `car-server` binary uses [`ServerState::standalone`]
363/// which calls `with_config` under the hood.
364pub struct ServerStateConfig {
365 pub journal_dir: PathBuf,
366 /// Optional pre-constructed memgine engine. When `None`, each
367 /// `create_session` call builds a fresh engine; embedders that want
368 /// to share a single engine across sessions can supply a clone of
369 /// their `Arc<Mutex<MemgineEngine>>` here.
370 pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
371 /// Optional pre-constructed inference engine.
372 pub inference: Option<Arc<car_inference::InferenceEngine>>,
373 /// Optional embedder-supplied A2A runtime. Used by the in-core
374 /// `A2aDispatcher` to execute peer-driven proposals. When `None`,
375 /// the dispatcher uses a fresh `Runtime` with `register_agent_basics`
376 /// — peer agents see CAR's built-in tools and nothing else,
377 /// matching the behaviour of the standalone `start_a2a_listener`.
378 pub a2a_runtime: Option<Arc<car_engine::Runtime>>,
379 /// Optional embedder-supplied A2A task store. When `None`,
380 /// defaults to `InMemoryTaskStore`. tokhn-style embedders that
381 /// want a polling-friendly persistent store plug it in here.
382 pub a2a_store: Option<Arc<dyn car_a2a::TaskStore>>,
383 /// Optional embedder-supplied agent card factory. When `None`,
384 /// the dispatcher serves a card built from the A2A runtime's
385 /// tool schemas at construction time, advertising its public URL
386 /// as `ws://127.0.0.1:9100/` (the WS surface the dispatcher itself
387 /// is reachable on).
388 pub a2a_card_source: Option<Arc<car_a2a::AgentCardSource>>,
389 /// Approval-gate policy. When `None`, the dispatcher uses
390 /// [`ApprovalGate::default`] (gate ON, the macOS-automation
391 /// surface gated, 60s timeout). Pass
392 /// [`ApprovalGate::disabled`] to opt out — only appropriate
393 /// when no untrusted caller can reach the WS port.
394 pub approval_gate: Option<ApprovalGate>,
395}
396
397impl ServerStateConfig {
398 /// Minimal config suitable for the standalone car-server binary:
399 /// only the journal dir is required; everything else is lazily
400 /// constructed at first use.
401 pub fn new(journal_dir: PathBuf) -> Self {
402 Self {
403 journal_dir,
404 shared_memgine: None,
405 inference: None,
406 a2a_runtime: None,
407 a2a_store: None,
408 a2a_card_source: None,
409 approval_gate: None,
410 }
411 }
412
413 pub fn with_shared_memgine(mut self, engine: Arc<Mutex<car_memgine::MemgineEngine>>) -> Self {
414 self.shared_memgine = Some(engine);
415 self
416 }
417
418 pub fn with_inference(mut self, engine: Arc<car_inference::InferenceEngine>) -> Self {
419 self.inference = Some(engine);
420 self
421 }
422
423 /// Plug in an embedder-supplied runtime for the A2A dispatcher.
424 /// Use case: tokhn-daemon wants peers to see its OPA preflight
425 /// tooling, not just CAR's `register_agent_basics` defaults.
426 pub fn with_a2a_runtime(mut self, runtime: Arc<car_engine::Runtime>) -> Self {
427 self.a2a_runtime = Some(runtime);
428 self
429 }
430
431 /// Plug in an embedder-supplied task store for the A2A
432 /// dispatcher. Use case: tokhn's polling-friendly persistent
433 /// store keyed by their session id.
434 pub fn with_a2a_store(mut self, store: Arc<dyn car_a2a::TaskStore>) -> Self {
435 self.a2a_store = Some(store);
436 self
437 }
438
439 /// Plug in an embedder-supplied agent card factory. The factory
440 /// is invoked on every `agent/getAuthenticatedExtendedCard`
441 /// dispatch, so embedders can reflect runtime tool changes.
442 pub fn with_a2a_card_source(mut self, source: Arc<car_a2a::AgentCardSource>) -> Self {
443 self.a2a_card_source = Some(source);
444 self
445 }
446
447 /// Override the approval-gate policy. Pass
448 /// [`ApprovalGate::disabled`] to skip the gate entirely (only
449 /// appropriate when no untrusted caller can reach the WS port);
450 /// pass a customised [`ApprovalGate`] to add or remove methods
451 /// or to change the timeout.
452 pub fn with_approval_gate(mut self, gate: ApprovalGate) -> Self {
453 self.approval_gate = Some(gate);
454 self
455 }
456}
457
458/// Global server state shared across all connections.
459pub struct ServerState {
460 pub journal_dir: PathBuf,
461 pub sessions: Mutex<HashMap<String, Arc<ClientSession>>>,
462 pub inference: std::sync::OnceLock<Arc<car_inference::InferenceEngine>>,
463 pub host: Arc<crate::host::HostState>,
464 /// When `Some`, `create_session` clones this handle into every new
465 /// `ClientSession.memgine` — embedders that want a single shared
466 /// memgine across all WS sessions set this. Standalone car-server
467 /// leaves it `None`, which gives each session its own engine
468 /// (preserving today's behavior).
469 pub shared_memgine: Option<Arc<Mutex<car_memgine::MemgineEngine>>>,
470 /// Process-wide voice session registry. Each
471 /// `voice.transcribe_stream.start` call registers its own per-client
472 /// [`WsVoiceEventSink`] so events route back to the originating WS
473 /// connection only.
474 pub voice_sessions: Arc<car_voice::VoiceSessionRegistry>,
475 /// Process-wide meeting registry. Meeting ids are global; each
476 /// meeting binds to the originating client's WS for upstream
477 /// events but persists transcripts to the resolved
478 /// `.car/meetings/<id>/` regardless of which client started it.
479 pub meetings: Arc<car_meeting::MeetingRegistry>,
480 /// Process-wide A2UI surface store. Agent-produced surfaces are
481 /// visible to every host UI subscriber, independent of the
482 /// WebSocket session that applied the update.
483 pub a2ui: car_a2ui::A2uiSurfaceStore,
484 /// In-process UI-improvement agent. Invoked from
485 /// `handle_a2ui_render_report` with each inbound report; returned
486 /// `Decision::Patch` envelopes are applied via the standard
487 /// `apply_a2ui_envelope` path so all subscribers see the patch.
488 /// `Arc` so the agent's interior `DashMap` state survives across
489 /// handler calls even when `ServerState` is cheap-cloned.
490 pub ui_agent: Arc<car_ui_agent::UIImprovementAgent>,
491 /// Per-surface oscillation detector for the UI-improvement
492 /// loop. Sits between the agent's `Decision::Patch` and the
493 /// apply path so A→B→A patch cycles get cooled down without
494 /// the agent itself having to track history. neo's review:
495 /// "controllers use workqueue backoff; reconcilers stay
496 /// stateless."
497 pub ui_agent_oscillation: Arc<crate::ui_agent_loop::OscillationDetector>,
498 /// Per-surface iteration budget. Backstop against runaway
499 /// loops the oscillation detector misses — caps total agent-
500 /// driven patches per surface at `DEFAULT_MAX_ITERATIONS`.
501 pub ui_agent_budget: Arc<crate::ui_agent_loop::IterationBudget>,
502 /// Process-wide concurrency gate for inference RPC handlers. Sized
503 /// from host RAM at startup, overridable via
504 /// [`crate::admission::ENV_MAX_CONCURRENT`]. Without this, N
505 /// concurrent users multiply KV-cache and activation memory and
506 /// take the host out (#114-adjacent: filed alongside the daemon
507 /// always-on rework). The semaphore lives on `ServerState` so it
508 /// is shared across every WebSocket session in the same process.
509 pub admission: Arc<crate::admission::InferenceAdmission>,
510 /// Server-side A2A continuation auth keyed by A2UI surface id.
511 /// Kept out of `A2uiSurface.owner` so host renderers never see
512 /// bearer/API-key material.
513 pub a2ui_route_auth: Mutex<HashMap<String, A2aRouteAuth>>,
514 /// Lifecycle-managed agents — declarative manifest at
515 /// `~/.car/agents.json` driving spawn/restart/stop. Closes
516 /// Parslee-ai/car-releases#27. Lazy-initialized so embedders that
517 /// don't want process supervision don't pay the disk-touch cost
518 /// at server start.
519 pub supervisor: std::sync::OnceLock<Arc<car_registry::supervisor::Supervisor>>,
520 /// Manifest path this daemon is *observing* but does NOT own.
521 /// Set by `car-server` when boot-time supervisor construction
522 /// fails with [`car_registry::supervisor::SupervisorError::AlreadyRunning`]
523 /// — another car-server process on the host holds the exclusive
524 /// lock on this manifest. In that state, `supervisor()` returns a
525 /// clear "observe-only" error so mutation handlers refuse
526 /// (preventing the duplicate-spawn bug from
527 /// Parslee-ai/car-releases#44), while read-only handlers
528 /// (`agents.list`, `agents.health`) fall back to
529 /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
530 /// [`car_registry::supervisor::Supervisor::health_from_manifest`]
531 /// so operators can still inspect what the primary daemon is
532 /// supervising.
533 pub observer_manifest_path: std::sync::OnceLock<PathBuf>,
534 /// In-core A2A dispatcher — embedders that consume `car-server-core`
535 /// get A2A reachability "for free" without standing up a separate
536 /// HTTP listener. Closes Parslee-ai/car-releases#28. Lazy-init so
537 /// the embedder can override the runtime / task store / agent card
538 /// via [`ServerStateConfig::with_a2a_runtime`] etc. before the
539 /// first dispatch.
540 pub a2a_dispatcher: std::sync::OnceLock<Arc<car_a2a::A2aDispatcher>>,
541 /// WS clients subscribed to A2UI envelope events. After every
542 /// successful `a2ui.apply` / `a2ui.ingest`, the resulting
543 /// `A2uiApplyResult` is broadcast to every subscriber as an
544 /// `a2ui.event` JSON-RPC notification. Closes
545 /// Parslee-ai/car-releases#29. Subscribers register via the
546 /// `a2ui/subscribe` method and are auto-cleaned on WS disconnect.
547 pub a2ui_subscribers: Mutex<HashMap<String, Arc<WsChannel>>>,
548 /// Per-launch auth token. When `Some`, the WS dispatcher rejects
549 /// non-auth methods on unauthenticated sessions until the client
550 /// calls `session.auth` with the matching value. When `None`,
551 /// auth is disabled and every connection works as before. Set
552 /// at startup by `car-server` unless `--no-auth` is passed
553 /// (default flipped 2026-05); embedders that want to enable
554 /// auth call [`ServerState::install_auth_token`]. Closes
555 /// Parslee-ai/car-releases#32.
556 pub auth_token: std::sync::OnceLock<String>,
557 /// Parslee cloud identity loaded from the user's OS keychain at
558 /// daemon startup when `car auth login` has been completed.
559 pub parslee_session: std::sync::OnceLock<crate::parslee_auth::ParsleeSession>,
560 /// `agent_id -> client_id` map of currently-attached lifecycle
561 /// agents (#169). Populated by the `session.auth` handler when a
562 /// supervised child presents its `agent_id` + per-agent token;
563 /// drained on disconnect by `remove_session`. Single-claim:
564 /// a second connection presenting the same `agent_id` is
565 /// rejected so the daemon-side per-agent state stays unambiguous.
566 pub attached_agents: Mutex<HashMap<String, String>>,
567 /// `agent_id -> persistent memgine` map (#170). Lazy-loaded on
568 /// first connection per id from `~/.car/memory/agents/<id>.jsonl`,
569 /// retained across daemon restart, surviving any single
570 /// disconnect/reconnect of the supervised child. Connections
571 /// that auth without an `agent_id` (browser, host, ad-hoc CLI)
572 /// keep the per-WS ephemeral memgine on `ClientSession.memgine`
573 /// — no behaviour change.
574 pub agent_memgines: Mutex<HashMap<String, Arc<Mutex<car_memgine::MemgineEngine>>>>,
575 /// In-flight `agents.chat` sessions keyed by `session_id`. See
576 /// [`ChatSession`] for shape. Populated by `agents.chat`,
577 /// cleared on terminal `agent.chat.event` or
578 /// `agents.chat.cancel`. Disconnect cleanup happens in
579 /// `remove_session` — any in-flight session bound to either the
580 /// disconnecting host or agent client is dropped so subsequent
581 /// stray notifications from a respawned agent fall on the floor
582 /// rather than racing into a stale stream.
583 pub chat_sessions: Mutex<HashMap<String, ChatSession>>,
584 /// Bound MCP HTTP-streamable URL (e.g.
585 /// `"http://127.0.0.1:9102/mcp"`) — `car-server` installs this
586 /// after binding the listener. Used by the
587 /// `agents.invoke_external` handler to default
588 /// `InvokeOptions.mcp_endpoint` so external agents
589 /// (Claude Code today) load the daemon's CAR namespace via
590 /// `--mcp-config` automatically. `None` when MCP isn't bound
591 /// (e.g. `--mcp-bind disabled`).
592 pub mcp_url: std::sync::OnceLock<String>,
593 /// Registry of connected MCP SSE sessions. Populated alongside
594 /// [`mcp_url`] when `car-server` boots the MCP listener. Public
595 /// so handlers can call `crate::mcp::push_to_session` to send
596 /// server-initiated requests to a specific MCP-connected
597 /// client (MCP-3 foundation; MCP-3b will wire host-owned tool
598 /// dispatch through this).
599 pub mcp_sessions: std::sync::OnceLock<Arc<crate::mcp::SessionMap>>,
600 /// Approval gate for high-risk WS methods (audit 2026-05). The
601 /// gate intercepts `automation.run_applescript`,
602 /// `automation.shortcuts.run`, `messages.send`, `mail.send`, and
603 /// `vision.ocr` before they dispatch, raises a
604 /// `host.create_approval` for the user to act on, and waits
605 /// (with a timeout) for `host.resolve_approval`. Approve →
606 /// dispatch continues; deny / timeout → JSON-RPC error code
607 /// `-32003`. The set of gated methods and the wait timeout are
608 /// embedder-overridable via
609 /// [`ServerStateConfig::with_approval_gate`].
610 pub approval_gate: ApprovalGate,
611 /// A2A-runtime / store / card factory carried over from the
612 /// embedder's [`ServerStateConfig`]. Consumed lazily on first
613 /// `a2a_dispatcher()` call so embedders can construct
614 /// `ServerState` without paying the runtime spin-up cost when
615 /// they don't actually use the A2A surface.
616 pub(crate) a2a_runtime: std::sync::Mutex<Option<Arc<car_engine::Runtime>>>,
617 pub(crate) a2a_store: std::sync::Mutex<Option<Arc<dyn car_a2a::TaskStore>>>,
618 pub(crate) a2a_card_source: std::sync::Mutex<Option<Arc<car_a2a::AgentCardSource>>>,
619}
620
621impl ServerState {
622 /// Constructor for the standalone `car-server` binary. Each WS
623 /// connection gets its own per-session memgine — matches the
624 /// pre-extraction default and is correct for a single-process
625 /// daemon serving one user at a time.
626 ///
627 /// **Embedders must not call this.** It silently leaves
628 /// `shared_memgine = None`, which re-introduces the dual-memgine
629 /// bug U7 was created to prevent (one engine in the embedder, a
630 /// fresh one inside every WS session). Embedders use
631 /// [`ServerState::embedded`] instead, which makes the shared
632 /// engine handle a required argument so it cannot be forgotten.
633 pub fn standalone(journal_dir: PathBuf) -> Self {
634 Self::with_config(ServerStateConfig::new(journal_dir))
635 }
636
637 /// Constructor for embedders (e.g. `tokhn-daemon`). The shared
638 /// memgine handle is **required**: every WS session created by
639 /// this state will reuse the same engine, preventing the
640 /// dual-memgine bug.
641 ///
642 /// For embedders that also want to inject a pre-warmed inference
643 /// engine or other advanced wiring, build a [`ServerStateConfig`]
644 /// directly and call [`ServerState::with_config`].
645 pub fn embedded(
646 journal_dir: PathBuf,
647 shared_memgine: Arc<Mutex<car_memgine::MemgineEngine>>,
648 ) -> Self {
649 Self::with_config(ServerStateConfig::new(journal_dir).with_shared_memgine(shared_memgine))
650 }
651
652 /// Build a `ServerState` from a [`ServerStateConfig`] — the path
653 /// embedders use when they need to inject a shared memgine *and*
654 /// a pre-warmed inference engine, or any other advanced wiring
655 /// the convenience constructors don't cover.
656 pub fn with_config(cfg: ServerStateConfig) -> Self {
657 let inference = std::sync::OnceLock::new();
658 if let Some(eng) = cfg.inference {
659 // OnceLock::set returns Err if already set — fresh OnceLock
660 // means it's empty, so this is infallible here.
661 let _ = inference.set(eng);
662 }
663 let voice_sessions = Arc::new(car_voice::VoiceSessionRegistry::new());
664 // Reap sessions whose clients dropped without calling
665 // voice.transcribe_stream.stop (WS disconnect, process exit,
666 // etc.). Listener handles otherwise leak for the daemon's
667 // lifetime. `with_config` is sync but always called from the
668 // `#[tokio::main]` entry point, so `Handle::try_current()`
669 // inside `start_sweeper` finds the runtime.
670 voice_sessions.start_sweeper();
671 // UI-improvement agent is pure decision logic — no I/O, no
672 // persistence handle. Memgine ingest of strategy outcomes is
673 // the caller's responsibility (handler.rs after a successful
674 // Decision::Patch). Keeps the agent crate Mutex-flavor
675 // agnostic so it can compose with std/tokio mutex callers.
676 let ui_agent = Arc::new(car_ui_agent::UIImprovementAgent::with_default_strategies());
677 let ui_agent_oscillation = Arc::new(crate::ui_agent_loop::OscillationDetector::new());
678 let ui_agent_budget = Arc::new(crate::ui_agent_loop::IterationBudget::new());
679 Self {
680 journal_dir: cfg.journal_dir,
681 sessions: Mutex::new(HashMap::new()),
682 inference,
683 host: Arc::new(crate::host::HostState::new()),
684 shared_memgine: cfg.shared_memgine,
685 voice_sessions,
686 meetings: Arc::new(car_meeting::MeetingRegistry::new()),
687 a2ui: car_a2ui::A2uiSurfaceStore::new(),
688 ui_agent,
689 ui_agent_oscillation,
690 ui_agent_budget,
691 admission: Arc::new(crate::admission::InferenceAdmission::new()),
692 a2ui_route_auth: Mutex::new(HashMap::new()),
693 supervisor: std::sync::OnceLock::new(),
694 observer_manifest_path: std::sync::OnceLock::new(),
695 a2a_dispatcher: std::sync::OnceLock::new(),
696 a2a_runtime: std::sync::Mutex::new(cfg.a2a_runtime),
697 a2a_store: std::sync::Mutex::new(cfg.a2a_store),
698 a2a_card_source: std::sync::Mutex::new(cfg.a2a_card_source),
699 a2ui_subscribers: Mutex::new(HashMap::new()),
700 auth_token: std::sync::OnceLock::new(),
701 parslee_session: std::sync::OnceLock::new(),
702 attached_agents: Mutex::new(HashMap::new()),
703 agent_memgines: Mutex::new(HashMap::new()),
704 chat_sessions: Mutex::new(HashMap::new()),
705 mcp_url: std::sync::OnceLock::new(),
706 mcp_sessions: std::sync::OnceLock::new(),
707 approval_gate: cfg.approval_gate.unwrap_or_default(),
708 }
709 }
710
711 /// Enable the per-launch auth handshake. After this call, every
712 /// new WS connection must call `session.auth` with `token` as
713 /// the first frame; otherwise the connection is closed. Called
714 /// by `car-server` at startup unless `--no-auth` is set
715 /// (default flipped 2026-05); embedders supply their own token
716 /// if they want the same posture. Returns `Err(token)` when
717 /// auth was already installed.
718 pub fn install_auth_token(&self, token: String) -> Result<(), String> {
719 self.auth_token.set(token)
720 }
721
722 pub fn install_parslee_session(
723 &self,
724 session: crate::parslee_auth::ParsleeSession,
725 ) -> Result<(), crate::parslee_auth::ParsleeSession> {
726 self.parslee_session.set(session)
727 }
728
729 /// Install the bound MCP URL after car-server's listener is up.
730 /// Idempotent on the first call; subsequent calls are accepted
731 /// silently (matches the supervisor / a2a_dispatcher install
732 /// idiom). Returns `Err(())` when an MCP URL was already
733 /// installed — embedders should treat this as "another
734 /// component beat us to it" and use whichever value is now set.
735 pub fn install_mcp_url(&self, url: String) -> Result<(), String> {
736 self.mcp_url.set(url)
737 }
738
739 /// Install the MCP SSE session registry. Pairs with
740 /// [`install_mcp_url`] — both come from the same `start_mcp`
741 /// call and either both get installed or neither does (the
742 /// daemon binds them together).
743 pub fn install_mcp_sessions(
744 &self,
745 sessions: Arc<crate::mcp::SessionMap>,
746 ) -> Result<(), Arc<crate::mcp::SessionMap>> {
747 self.mcp_sessions.set(sessions)
748 }
749
750 /// Lazy-initialize and return the agent supervisor. The first
751 /// call constructs a [`car_registry::supervisor::Supervisor`] backed by
752 /// `~/.car/agents.json` + `~/.car/logs/`. Embedders that need a
753 /// non-default location should call
754 /// [`ServerState::install_supervisor`] before any handler runs.
755 ///
756 /// In observer mode (set via [`install_observer_manifest`]),
757 /// returns a clear error mentioning the manifest path the
758 /// primary daemon owns. This prevents the second daemon from
759 /// re-attempting `user_default()` (which would also fail with
760 /// `AlreadyRunning`) on every WS call, and gives mutation
761 /// handlers a stable refusal path. Read-only handlers
762 /// (`agents.list`, `agents.health`) should call
763 /// [`Self::observer_manifest_path`] first and fall back to
764 /// [`car_registry::supervisor::Supervisor::list_from_manifest`] /
765 /// `health_from_manifest` when set. Closes
766 /// Parslee-ai/car-releases#44.
767 pub fn supervisor(&self) -> Result<Arc<car_registry::supervisor::Supervisor>, String> {
768 if let Some(s) = self.supervisor.get() {
769 return Ok(s.clone());
770 }
771 if let Some(p) = self.observer_manifest_path.get() {
772 return Err(format!(
773 "this car-server is observe-only — another car-server process \
774 holds the supervisor lock for {}. Mutations refuse here; route \
775 them to the primary daemon, or stop the other car-server first.",
776 p.display()
777 ));
778 }
779 let s = car_registry::supervisor::Supervisor::user_default()
780 .map(Arc::new)
781 .map_err(|e| e.to_string())?;
782 // OnceLock::set returns the original arg back on collision —
783 // a concurrent caller racing through user_default. Take
784 // whichever wins.
785 let _ = self.supervisor.set(s);
786 Ok(self.supervisor.get().expect("set or pre-existing").clone())
787 }
788
789 /// Replace the lazy default with a caller-supplied supervisor.
790 /// Returns `Err(())` when a supervisor was already installed.
791 /// Used by the standalone `car-server` binary to call
792 /// `start_all()` on a known-good handle without paying the
793 /// lazy-init lookup cost.
794 pub fn install_supervisor(
795 &self,
796 supervisor: Arc<car_registry::supervisor::Supervisor>,
797 ) -> Result<(), Arc<car_registry::supervisor::Supervisor>> {
798 self.supervisor.set(supervisor)
799 }
800
801 /// Non-acquiring read of the currently-installed supervisor.
802 /// Unlike [`supervisor`](Self::supervisor), this does NOT lazy-
803 /// init via `user_default()` — it returns `None` instead of
804 /// constructing a fresh `Supervisor` and acquiring the
805 /// `<manifest>.lock` as a side effect. Use this from read-only
806 /// metadata paths (`host.subscribe` identity, status surfaces)
807 /// where causing lock acquisition on observation would be a
808 /// Heisenberg subscribe — the act of asking "do you own the
809 /// lock?" must not be the act of taking it.
810 pub fn supervisor_if_installed(&self) -> Option<Arc<car_registry::supervisor::Supervisor>> {
811 self.supervisor.get().cloned()
812 }
813
814 /// Mark this daemon as *observing* a manifest owned by another
815 /// car-server process. After this call, `supervisor()` returns
816 /// an "observe-only" error and read-only handlers
817 /// (`agents.list`, `agents.health`) fall back to the static
818 /// `Supervisor::list_from_manifest` / `health_from_manifest`
819 /// paths. Idempotent — subsequent calls with the same path are
820 /// no-ops; a different path returns `Err(())`. Closes
821 /// Parslee-ai/car-releases#44.
822 pub fn install_observer_manifest(&self, path: PathBuf) -> Result<(), PathBuf> {
823 self.observer_manifest_path.set(path)
824 }
825
826 /// Path of the manifest this daemon is observing but not
827 /// supervising. `None` when this daemon owns the supervisor
828 /// (the normal case) or when no manifest is configured at all
829 /// (no `HOME`, embedder didn't install one).
830 pub fn observer_manifest_path(&self) -> Option<&PathBuf> {
831 self.observer_manifest_path.get()
832 }
833
834 /// Lazy-initialize and return the in-core A2A dispatcher. The
835 /// first call constructs an [`car_a2a::A2aDispatcher`] from
836 /// either the embedder's overrides (set via
837 /// [`ServerStateConfig::with_a2a_runtime`] / `with_a2a_store` /
838 /// `with_a2a_card_source`) or sensible defaults: a fresh
839 /// `Runtime` with `register_agent_basics` registered, an
840 /// `InMemoryTaskStore`, and a card built from the runtime's
841 /// tool schemas advertising `ws://127.0.0.1:9100/` as the
842 /// public URL. Closes Parslee-ai/car-releases#28.
843 pub async fn a2a_dispatcher(&self) -> Arc<car_a2a::A2aDispatcher> {
844 if let Some(d) = self.a2a_dispatcher.get() {
845 return d.clone();
846 }
847
848 // Embedder overrides take precedence; fall back to defaults
849 // for each slot independently (so an embedder that only
850 // wants a custom card can leave the runtime + store at
851 // defaults). `Mutex::take()` consumes the slot so the
852 // defaults aren't reconstructed on a racing init that loses
853 // the OnceLock::set call below.
854 let runtime = self
855 .a2a_runtime
856 .lock()
857 .expect("a2a_runtime mutex poisoned")
858 .take();
859 let runtime = match runtime {
860 Some(r) => r,
861 None => {
862 let r = Arc::new(car_engine::Runtime::new());
863 r.register_agent_basics().await;
864 r
865 }
866 };
867
868 let store = self
869 .a2a_store
870 .lock()
871 .expect("a2a_store mutex poisoned")
872 .take()
873 .unwrap_or_else(|| Arc::new(car_a2a::InMemoryTaskStore::new()));
874
875 let card_source = self
876 .a2a_card_source
877 .lock()
878 .expect("a2a_card_source mutex poisoned")
879 .take();
880 let card_source = match card_source {
881 Some(c) => c,
882 None => {
883 let card = car_a2a::build_default_agent_card(
884 &runtime,
885 car_a2a::AgentCardConfig::minimal(
886 "Common Agent Runtime",
887 "Embedded CAR daemon — A2A v1.0 reachable over WebSocket JSON-RPC.",
888 "ws://127.0.0.1:9100/",
889 car_a2a::AgentProvider {
890 organization: "Parslee".into(),
891 url: Some("https://github.com/Parslee-ai/car".into()),
892 },
893 ),
894 )
895 .await;
896 Arc::new(move || card.clone()) as Arc<car_a2a::AgentCardSource>
897 }
898 };
899
900 let dispatcher = Arc::new(car_a2a::A2aDispatcher::new(runtime, store, card_source));
901 // OnceLock::set returns Err on race — accept whichever
902 // dispatcher won and clone-return that one.
903 let _ = self.a2a_dispatcher.set(dispatcher);
904 self.a2a_dispatcher
905 .get()
906 .expect("a2a_dispatcher set or pre-existing")
907 .clone()
908 }
909
910 pub async fn create_session(
911 &self,
912 client_id: &str,
913 channel: Arc<WsChannel>,
914 ) -> Arc<ClientSession> {
915 let journal_path = self.journal_dir.join(format!("{}.jsonl", client_id));
916 let event_log = EventLog::with_journal(journal_path);
917
918 let executor = Arc::new(WsToolExecutor {
919 channel: channel.clone(),
920 });
921
922 let runtime = Runtime::new()
923 .with_event_log(event_log)
924 .with_executor(executor);
925
926 // If the embedder supplied a shared memgine, every session uses it.
927 // Otherwise each session gets its own — matches pre-extraction behavior.
928 let memgine = match &self.shared_memgine {
929 Some(eng) => eng.clone(),
930 None => Arc::new(Mutex::new(car_memgine::MemgineEngine::new(None))),
931 };
932
933 let session = Arc::new(ClientSession {
934 client_id: client_id.to_string(),
935 runtime: Arc::new(runtime),
936 channel,
937 host: self.host.clone(),
938 memgine,
939 browser: car_ffi_common::browser::BrowserSessionSlot::new(),
940 // When auth is disabled (no token installed), every
941 // session is "authenticated" by default — preserves the
942 // pre-#32 behaviour. When auth is enabled, the value is
943 // ignored on creation; the dispatcher's gate checks
944 // `ServerState::auth_token.is_some()` to decide whether
945 // to enforce.
946 authenticated: std::sync::atomic::AtomicBool::new(false),
947 agent_id: tokio::sync::Mutex::new(None),
948 bound_memgine: tokio::sync::Mutex::new(None),
949 });
950
951 self.sessions
952 .lock()
953 .await
954 .insert(client_id.to_string(), session.clone());
955
956 session
957 }
958
959 /// Remove a per-client session from the registry on disconnect.
960 /// Returns the removed session if present so callers can drop any
961 /// remaining strong refs (e.g. drain pending tool callbacks). Fix
962 /// for MULTI-4 / WS-3 — without this, `state.sessions` retains
963 /// `Arc<ClientSession>` for every connection that ever existed.
964 pub async fn remove_session(&self, client_id: &str) -> Option<Arc<ClientSession>> {
965 let removed = self.sessions.lock().await.remove(client_id);
966 if let Some(session) = &removed {
967 // #169: drop the agent_id → client_id binding so a
968 // disconnected lifecycle agent can reconnect (or its
969 // supervisor-respawned replacement can take the slot)
970 // without colliding with the stale claim.
971 let bound = session.agent_id.lock().await.clone();
972 if let Some(id) = bound {
973 let mut attached = self.attached_agents.lock().await;
974 if attached.get(&id).map(String::as_str) == Some(client_id) {
975 attached.remove(&id);
976 }
977 }
978 // Drop any in-flight `agents.chat` sessions bound to this
979 // client — either side disconnecting orphans the stream,
980 // and a respawned agent's stray `agent.chat.event`
981 // notifications must not race into a stale routing entry.
982 // See `docs/proposals/agent-chat-surface.md`.
983 let bound_agent = session.agent_id.lock().await.clone();
984 let mut chats = self.chat_sessions.lock().await;
985 chats.retain(|_, s| {
986 if s.host_client_id == client_id {
987 return false;
988 }
989 if let Some(agent_id) = &bound_agent {
990 if &s.agent_id == agent_id {
991 return false;
992 }
993 }
994 true
995 });
996 }
997 removed
998 }
999}
1000
1001#[cfg(test)]
1002mod observer_mode_tests {
1003 use super::*;
1004
1005 fn journal_dir() -> PathBuf {
1006 let target = std::env::var_os("CARGO_TARGET_DIR")
1007 .map(std::path::PathBuf::from)
1008 .unwrap_or_else(|| {
1009 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1010 .join("..")
1011 .join("..")
1012 .join("target")
1013 });
1014 std::fs::create_dir_all(&target).ok();
1015 let target = std::fs::canonicalize(&target).unwrap_or(target);
1016 let tmp = tempfile::TempDir::new_in(&target).unwrap();
1017 let p = tmp.path().to_path_buf();
1018 std::mem::forget(tmp); // keep the dir alive for the test
1019 p
1020 }
1021
1022 #[test]
1023 fn supervisor_returns_observer_error_when_marker_set() {
1024 // Closes Parslee-ai/car-releases#44: the second car-server on
1025 // a host installs the observer marker after `with_paths`
1026 // returns AlreadyRunning. Subsequent `state.supervisor()`
1027 // calls must return a clear "observe-only" error mentioning
1028 // the manifest path — they must NOT retry user_default()
1029 // (which would re-acquire the lock and likely also fail).
1030 let state = ServerState::standalone(journal_dir());
1031 let fake_manifest = PathBuf::from("/tmp/fake-manifest-for-test.json");
1032 state
1033 .install_observer_manifest(fake_manifest.clone())
1034 .expect("install_observer_manifest succeeds on fresh state");
1035 assert_eq!(state.observer_manifest_path(), Some(&fake_manifest));
1036
1037 let err = state.supervisor().map(|_| ()).unwrap_err();
1038 assert!(
1039 err.contains("observe-only"),
1040 "error must mention observe-only mode: {err}"
1041 );
1042 assert!(
1043 err.contains("fake-manifest-for-test.json"),
1044 "error must surface the manifest path so operators know which daemon owns it: {err}"
1045 );
1046 }
1047
1048 #[test]
1049 fn install_observer_manifest_is_idempotent_per_path_collision() {
1050 let state = ServerState::standalone(journal_dir());
1051 let p = PathBuf::from("/tmp/manifest-a.json");
1052 let q = PathBuf::from("/tmp/manifest-b.json");
1053 state.install_observer_manifest(p.clone()).unwrap();
1054 // OnceLock::set returns the value back on collision.
1055 let err = state.install_observer_manifest(q.clone()).unwrap_err();
1056 assert_eq!(err, q);
1057 assert_eq!(state.observer_manifest_path(), Some(&p));
1058 }
1059
1060 #[test]
1061 fn supervisor_if_installed_does_not_lazy_init() {
1062 // The Heisenberg-subscribe guard: `host.subscribe`'s
1063 // identity path must use the non-acquiring read so a
1064 // purely observational client can't cause the daemon to
1065 // claim `<manifest>.lock` as a side effect of asking
1066 // about it. Fresh state has no supervisor installed.
1067 let state = ServerState::standalone(journal_dir());
1068 assert!(state.supervisor_if_installed().is_none());
1069 // observer_manifest_path should remain unset too — no
1070 // implicit init.
1071 assert!(state.observer_manifest_path().is_none());
1072 }
1073}