trusty_memory/lib.rs
1//! MCP server (stdio + HTTP/SSE) for trusty-memory.
2//!
3//! Why: Claude Code and other MCP-aware clients integrate with trusty-memory
4//! through the standardized Model Context Protocol; we expose memory + KG
5//! tools so they can be called by name.
6//! What: Provides `run_stdio` (JSON-RPC 2.0 over stdin/stdout) and `run_http`
7//! (axum HTTP/SSE stub), plus an `AppState` that carries the shared
8//! `PalaceRegistry`, on-disk data root, and a lazily-initialized embedder.
9//! Test: `cargo test -p trusty-memory-mcp` validates handshake + dispatch.
10
11use anyhow::Result;
12use serde_json::{json, Value};
13use std::net::SocketAddr;
14use std::path::{Path, PathBuf};
15use std::sync::{Arc, OnceLock, RwLock};
16use tokio::sync::{broadcast, OnceCell};
17use tracing::info;
18use trusty_common::mcp::{error_codes, initialize_response, Request, Response};
19use trusty_common::memory_core::embed::FastEmbedder;
20use trusty_common::memory_core::store::ChatSessionStore;
21use trusty_common::memory_core::PalaceRegistry;
22use trusty_common::ChatProvider;
23
24pub mod bootstrap;
25pub mod commands;
26pub mod discovery;
27pub mod openrpc;
28pub mod prompt_facts;
29pub mod service;
30pub mod tools;
31pub mod web;
32
33pub use service::MemoryMcpService;
34pub use tools::MemoryMcpServer;
35
36/// Resolve the directory that actually holds the per-palace subdirectories.
37///
38/// Why: there are two on-disk layouts in the wild. The current monorepo code
39/// treats the registry directory *itself* as the parent of per-palace dirs
40/// (`<dir>/<id>/palace.json`). The legacy standalone `trusty-memory` repo
41/// nested everything one level deeper under a `palaces/` subdirectory
42/// (`<data_dir>/palaces/<id>/palace.json`) — and that is where existing
43/// installs' data lives (e.g. 88 palaces under
44/// `~/Library/Application Support/trusty-memory/palaces/`). A daemon that uses
45/// the bare data dir as its registry root finds zero palaces because every
46/// `palace.json` sits one level below where it looked — the "palaces lost on
47/// restart" bug.
48/// What: given the standard data dir, returns `<data_dir>/palaces` when that
49/// subdirectory exists, otherwise `<data_dir>` itself. Resolving this once in
50/// `main.rs` and using the result as `AppState::data_root` keeps every call
51/// site (`status`, `palace_list`, `open_palace`, `palace_create`,
52/// `load_palaces_from_disk`) consistent without forcing a data migration.
53/// Test: `tests::resolve_palace_registry_dir_prefers_palaces_subdir` and
54/// `resolve_palace_registry_dir_falls_back_to_data_dir`.
55pub fn resolve_palace_registry_dir(data_dir: PathBuf) -> PathBuf {
56 let nested = data_dir.join("palaces");
57 if nested.is_dir() {
58 nested
59 } else {
60 data_dir
61 }
62}
63
64/// Live daemon events broadcast to connected SSE subscribers.
65///
66/// Why: The dashboard needs push-driven updates so palace creation, drawer
67/// add/delete, dream cycles, and aggregate status changes are visible without
68/// polling. A single broadcast channel fans out to every connected browser.
69/// What: Tagged enum serialized as `{"type": "...", ...fields}` over SSE.
70/// Test: `web::tests::sse_stream_emits_events` subscribes, triggers a
71/// mutation, and asserts the frame arrives.
72#[derive(Clone, Debug, serde::Serialize)]
73#[serde(tag = "type", rename_all = "snake_case")]
74pub enum DaemonEvent {
75 PalaceCreated {
76 id: String,
77 name: String,
78 },
79 DrawerAdded {
80 palace_id: String,
81 /// Friendly palace name (Palace.name) at write time. Why: lets SSE
82 /// consumers (the dashboard activity feed) render the human-readable
83 /// label without a separate id→name lookup. Empty string if the
84 /// emitter could not resolve the name.
85 #[serde(default)]
86 palace_name: String,
87 drawer_count: usize,
88 /// Wall-clock timestamp when the drawer was added. Why: SSE
89 /// receivers want to render "just now / 2m ago" relative to the
90 /// daemon's clock, not the time the SSE frame happens to arrive.
91 timestamp: chrono::DateTime<chrono::Utc>,
92 },
93 DrawerDeleted {
94 palace_id: String,
95 drawer_count: usize,
96 },
97 DreamCompleted {
98 palace_id: Option<String>,
99 merged: usize,
100 pruned: usize,
101 compacted: usize,
102 closets_updated: usize,
103 duration_ms: u64,
104 },
105 StatusChanged {
106 total_drawers: usize,
107 total_vectors: usize,
108 total_kg_triples: usize,
109 },
110}
111
112/// Shared application state passed to every request handler.
113///
114/// Why: The stdio loop and HTTP server need the same handles to the registry,
115/// data root, and embedder so MCP tools can perform real reads/writes against
116/// the live trusty-memory core. The embedder is heavy (loads ONNX weights) so
117/// we hold it behind a `OnceCell` and initialize lazily on first use.
118/// What: `Clone`-able via `Arc` fields. The registry / data root are eager;
119/// `embedder` is `Arc<OnceCell<Arc<FastEmbedder>>>` so concurrent first-use
120/// races resolve to a single shared instance.
121/// Test: `app_state_default_constructs` confirms construction without panic.
122#[derive(Clone)]
123pub struct AppState {
124 pub version: String,
125 pub registry: Arc<PalaceRegistry>,
126 pub data_root: PathBuf,
127 pub embedder: Arc<OnceCell<Arc<FastEmbedder>>>,
128 /// Optional default palace applied to MCP tool calls when the caller
129 /// omits the `palace` argument. Set via `trusty-memory serve --palace`.
130 pub default_palace: Option<String>,
131 /// Active chat provider selected at startup. `None` means no upstream is
132 /// configured (no Ollama detected and no OpenRouter key) — callers must
133 /// degrade gracefully (chat endpoint returns 412).
134 pub chat_provider: Arc<OnceCell<Option<Arc<dyn ChatProvider>>>>,
135 /// Per-palace chat-session stores, opened lazily so cold-start cost is
136 /// paid only when chat-history endpoints are hit.
137 pub session_stores: Arc<dashmap::DashMap<String, Arc<ChatSessionStore>>>,
138 /// Broadcast sender for live `DaemonEvent` pushes to SSE subscribers.
139 ///
140 /// Why: Lets mutating handlers emit events that any connected dashboard
141 /// receives instantly. Cap of 128 buffers transient slow readers; if a
142 /// receiver lags it gets `RecvError::Lagged` and we emit a `lag` frame.
143 pub events: Arc<broadcast::Sender<DaemonEvent>>,
144 /// Instant the daemon started, used to compute `uptime_secs` on `/health`.
145 ///
146 /// Why (issue #35): `GET /health` reports how long the daemon has been
147 /// up. Capturing a monotonic `Instant` at `AppState` construction lets the
148 /// handler compute the elapsed seconds cheaply and without a clock-skew
149 /// hazard.
150 /// What: a wall-monotonic `Instant`; `AppState::new` stamps it at startup.
151 /// Test: `health_endpoint_includes_resource_fields`.
152 pub started_at: std::time::Instant,
153 /// In-memory ring buffer of recent tracing log lines (issue #35).
154 ///
155 /// Why: the `GET /api/v1/logs/tail` endpoint serves the last N log lines
156 /// so operators can inspect a running daemon without tailing a file. The
157 /// buffer is shared between the tracing `LogBufferLayer` (writer) and the
158 /// HTTP handler (reader).
159 /// What: a cheap `Arc`-backed clone of the buffer the subscriber writes
160 /// to. Defaults to an empty buffer for states that never install the
161 /// layer (tests, the stdio path).
162 /// Test: `logs_tail_returns_recent_lines`.
163 pub log_buffer: trusty_common::log_buffer::LogBuffer,
164 /// Most recent on-disk footprint of `data_root`, in bytes (issue #35).
165 ///
166 /// Why: `GET /health` reports `disk_bytes`. Walking the data directory on
167 /// every health request would make a frequent health poll do unbounded
168 /// I/O; a background task recomputes it every 10 s and stores it here so
169 /// the handler reads it lock-free.
170 /// What: an `AtomicU64` updated by the ticker spawned in `run_http_on`.
171 /// `0` until the first walk completes.
172 /// Test: `health_endpoint_includes_resource_fields`.
173 pub disk_bytes: Arc<std::sync::atomic::AtomicU64>,
174 /// Per-process RSS + CPU sampler, refreshed on each `/health` request
175 /// (issue #35).
176 ///
177 /// Why: CPU usage is a delta between two `sysinfo` refreshes, so the
178 /// sampler must persist between requests — hence the shared `Mutex`.
179 /// What: a `tokio::sync::Mutex<SysMetrics>` so the async health handler
180 /// can sample without blocking the runtime.
181 /// Test: `health_endpoint_includes_resource_fields`.
182 pub sys_metrics: Arc<tokio::sync::Mutex<trusty_common::sys_metrics::SysMetrics>>,
183 /// HTTP listener address the daemon bound to, once `run_http_on` is running.
184 ///
185 /// Why: clients (and `/health` responses) need to advertise the live
186 /// `host:port` even though port selection happens dynamically (7070–7079
187 /// walk + OS fallback). Stashing it on `AppState` lets request handlers
188 /// surface the discovery value without re-querying the listener.
189 /// What: a `OnceLock<SocketAddr>` so `run_http_on` writes it exactly once
190 /// at bind time and every handler reads it lock-free thereafter. Empty
191 /// (`None` from `get()`) on the stdio path where no listener exists.
192 /// Test: `health_endpoint_reports_bound_addr` (added below).
193 pub bound_addr: Arc<OnceLock<SocketAddr>>,
194 /// Cached prompt-facts surface served by the MCP `get_prompt_context`
195 /// tool (issue #42).
196 ///
197 /// Why: The original session-init `prompts/get` design loaded context
198 /// once per connection; switching to a per-message tool lets the model
199 /// pull fresh, query-filtered context on demand. The cache holds both
200 /// the raw triples (for filtered lookups) and a pre-formatted Markdown
201 /// block (for the unfiltered hot path) so neither code path re-walks
202 /// the KG. The cache is rebuilt by
203 /// `prompt_facts::rebuild_prompt_cache` after any write that touches a
204 /// hot predicate (`kg_assert`, `add_alias`, `remove_prompt_fact`).
205 /// What: An `Arc<RwLock<PromptFactsCache>>` so the hot read path takes
206 /// a brief read lock and clones the cache; rebuilds take a write lock
207 /// for the assignment only. An empty `triples` vec ↔ "no context
208 /// stored yet" (the tool handler renders a hint).
209 /// Test: `get_prompt_context_returns_cached_or_hint`,
210 /// `get_prompt_context_filters_by_query`.
211 pub prompt_context_cache: Arc<RwLock<prompt_facts::PromptFactsCache>>,
212}
213
214impl AppState {
215 /// Construct an `AppState` rooted at the given on-disk data directory.
216 ///
217 /// Why: The CLI (`serve`) and integration tests need to point the MCP
218 /// server at different roots — production at `dirs::data_dir`, tests at a
219 /// `tempfile::tempdir()`.
220 /// What: Builds an empty `PalaceRegistry`, captures the version, and
221 /// allocates an empty `OnceCell` for the embedder. `default_palace` is
222 /// `None`; use `with_default_palace` to set it.
223 /// Test: `tools::tests::dispatch_palace_create_persists` constructs an
224 /// AppState pointed at a tempdir and round-trips a palace through it.
225 pub fn new(data_root: PathBuf) -> Self {
226 let (events_tx, _) = broadcast::channel::<DaemonEvent>(128);
227 Self {
228 version: env!("CARGO_PKG_VERSION").to_string(),
229 registry: Arc::new(PalaceRegistry::new()),
230 data_root,
231 embedder: Arc::new(OnceCell::new()),
232 default_palace: None,
233 chat_provider: Arc::new(OnceCell::new()),
234 session_stores: Arc::new(dashmap::DashMap::new()),
235 events: Arc::new(events_tx),
236 started_at: std::time::Instant::now(),
237 // Default to an empty buffer — `with_log_buffer` overrides this
238 // when the daemon installs the `LogBufferLayer` (HTTP mode).
239 log_buffer: trusty_common::log_buffer::LogBuffer::new(
240 trusty_common::log_buffer::DEFAULT_LOG_CAPACITY,
241 ),
242 disk_bytes: Arc::new(std::sync::atomic::AtomicU64::new(0)),
243 sys_metrics: Arc::new(tokio::sync::Mutex::new(
244 trusty_common::sys_metrics::SysMetrics::new(),
245 )),
246 bound_addr: Arc::new(OnceLock::new()),
247 prompt_context_cache: Arc::new(RwLock::new(prompt_facts::PromptFactsCache::default())),
248 }
249 }
250
251 /// Scan the palace registry directory and re-register every persisted
252 /// palace into the in-memory [`PalaceRegistry`].
253 ///
254 /// Why: `AppState::new` builds an *empty* registry, so after a daemon
255 /// restart `palace_list` / the dashboard reported zero palaces even though
256 /// dozens existed on disk — palace metadata was persisted by
257 /// `palace_create` but never re-hydrated on startup. This method closes
258 /// that gap by walking the on-disk layout (each subdirectory holding a
259 /// `palace.json` is one palace) and rebuilding a live `PalaceHandle` for
260 /// each, so recall paths see the full set immediately after a restart.
261 /// What: runs the blocking filesystem walk + per-palace `PalaceHandle::open`
262 /// on a `spawn_blocking` thread (so it never stalls the async runtime),
263 /// registers each successfully opened palace via `register_arc`, logs every
264 /// load at `debug!`, and returns the count loaded. A palace that fails to
265 /// open (corrupt index, unreadable `kg.db`, etc.) is logged at `warn!` and
266 /// skipped — one bad palace must not abort startup or crash the daemon.
267 /// `data_root` is expected to already be the palace registry directory —
268 /// `main.rs` resolves it via [`resolve_palace_registry_dir`] before
269 /// constructing the `AppState`, so the flat / legacy-`palaces/` layout
270 /// difference is handled exactly once.
271 /// Test: `tests::load_palaces_from_disk_rehydrates_registry` writes two
272 /// palaces into a tempdir, constructs an `AppState`, calls this method, and
273 /// asserts the returned count and registry contents.
274 pub async fn load_palaces_from_disk(&self) -> Result<usize> {
275 let registry_dir = self.data_root.clone();
276 let registry = self.registry.clone();
277 // The directory walk and each `PalaceHandle::open` perform blocking
278 // filesystem + redb/usearch I/O — run the whole hydration on the
279 // blocking pool so it never parks an async worker thread.
280 let count = tokio::task::spawn_blocking(move || -> Result<usize> {
281 let palaces = PalaceRegistry::list_palaces(®istry_dir)?;
282 let total = palaces.len();
283 let mut loaded = 0usize;
284 let mut skipped = 0usize;
285 for palace in palaces {
286 match trusty_common::memory_core::PalaceHandle::open(&palace) {
287 Ok(handle) => {
288 tracing::debug!(
289 palace = %palace.id,
290 data_dir = %palace.data_dir.display(),
291 "loaded palace from disk"
292 );
293 registry.register_arc(handle);
294 loaded += 1;
295 }
296 Err(e) => {
297 // Why: a single bad palace (corrupt kg.db, stale WAL,
298 // permissions) must never abort startup or block the
299 // HTTP server from binding. Log per-palace and keep
300 // going; the summary below tells operators how many
301 // were skipped without trawling the log.
302 tracing::warn!(
303 palace = %palace.id,
304 data_dir = %palace.data_dir.display(),
305 "skipping palace during startup hydration: {e:#}"
306 );
307 skipped += 1;
308 }
309 }
310 }
311 tracing::info!(
312 "palace hydration summary: loaded {loaded}/{total} ({skipped} skipped due to errors)"
313 );
314 Ok(loaded)
315 })
316 .await
317 .map_err(|e| anyhow::anyhow!("join load_palaces_from_disk: {e}"))??;
318 Ok(count)
319 }
320
321 /// Builder-style: attach the daemon's shared [`LogBuffer`] so the
322 /// `GET /api/v1/logs/tail` endpoint serves the same lines the tracing
323 /// subscriber captures (issue #35).
324 ///
325 /// Why: `main` builds the buffer (via `init_tracing_with_buffer`) before
326 /// constructing the `AppState`, then hands a clone here so the HTTP
327 /// handler and the tracing layer observe the same ring.
328 /// What: replaces the empty default buffer with the supplied one.
329 /// Test: `logs_tail_returns_recent_lines`.
330 #[must_use]
331 pub fn with_log_buffer(mut self, buffer: trusty_common::log_buffer::LogBuffer) -> Self {
332 self.log_buffer = buffer;
333 self
334 }
335
336 /// Send a `DaemonEvent` to all connected SSE subscribers.
337 ///
338 /// Why: Mutating handlers call this after a successful write so the
339 /// dashboard can update without polling. The send is best-effort —
340 /// `broadcast::Sender::send` returns `Err` only when there are no live
341 /// receivers, which is fine (no listeners == no work to do).
342 /// What: Drops the result, so callers don't need to care whether anyone
343 /// is listening.
344 /// Test: `web::tests::sse_stream_receives_palace_created` confirms a
345 /// subscriber observes the emitted event.
346 pub fn emit(&self, event: DaemonEvent) {
347 let _ = self.events.send(event);
348 }
349
350 /// Open (or return cached) the chat-session store for a palace.
351 ///
352 /// Why: Chat session persistence lives in a dedicated SQLite file under
353 /// the palace's data dir (`chat_sessions.db`) so it doesn't intermingle
354 /// with the KG's transactional load. The store is cheap to clone via
355 /// `Arc` but the underlying r2d2 pool should be reused, so cache by id.
356 /// What: Creates the palace data dir if missing, opens (or reuses) a
357 /// `ChatSessionStore` and stashes an `Arc` in the DashMap.
358 /// Test: Indirectly via the session HTTP handlers in `web::tests`.
359 pub fn session_store(&self, palace_id: &str) -> Result<Arc<ChatSessionStore>> {
360 if let Some(entry) = self.session_stores.get(palace_id) {
361 return Ok(entry.clone());
362 }
363 let dir = self.data_root.join(palace_id);
364 std::fs::create_dir_all(&dir)
365 .map_err(|e| anyhow::anyhow!("create palace dir {}: {e}", dir.display()))?;
366 let store = Arc::new(ChatSessionStore::open(&dir.join("chat_sessions.db"))?);
367 self.session_stores
368 .insert(palace_id.to_string(), store.clone());
369 Ok(store)
370 }
371
372 /// Builder-style setter for the default palace name.
373 ///
374 /// Why: `serve --palace <name>` wants to bind every tool call to a
375 /// project-scoped namespace without forcing every MCP request to repeat
376 /// the palace argument.
377 /// What: Returns `self` with `default_palace = Some(name)`.
378 /// Test: `default_palace_used_when_arg_omitted` covers the resolution
379 /// path; this setter is exercised there.
380 pub fn with_default_palace(mut self, name: Option<String>) -> Self {
381 self.default_palace = name;
382 self
383 }
384
385 /// Resolve (or initialize) the shared embedder.
386 ///
387 /// Why: FastEmbedder load is expensive — we share one instance across all
388 /// tool calls; the `OnceCell` ensures concurrent first-use races collapse
389 /// to a single load.
390 /// What: Returns `Arc<FastEmbedder>` on success. Errors propagate from the
391 /// underlying ONNX load.
392 /// Test: Indirectly via `dispatch_remember_then_recall`.
393 /// Resolve the active chat provider, auto-detecting on first call.
394 ///
395 /// Why: Provider selection depends on filesystem-loaded config plus a
396 /// network probe (Ollama liveness), so it must be lazily initialised at
397 /// runtime. Caching the choice in a `OnceCell` keeps it stable across
398 /// concurrent requests without re-probing on every chat call.
399 /// What: On first use loads `~/.trusty-memory/config.toml`, prefers an
400 /// auto-detected Ollama instance (when `local_model.enabled`), and falls
401 /// back to OpenRouter when an API key is set. Returns `Ok(None)` when
402 /// neither is available so the caller can emit a 412.
403 /// Test: `web::tests::providers_endpoint_returns_payload` covers the
404 /// detection path indirectly through `/api/v1/chat/providers`.
405 pub async fn chat_provider(&self) -> Option<Arc<dyn ChatProvider>> {
406 self.chat_provider
407 .get_or_init(|| async {
408 let cfg = crate::web::load_user_config().unwrap_or_default();
409 if cfg.local_model.enabled {
410 if let Some(mut p) =
411 trusty_common::auto_detect_local_provider(&cfg.local_model.base_url).await
412 {
413 // auto_detect returns an empty model id; callers must
414 // set the configured model name themselves.
415 p.model = cfg.local_model.model.clone();
416 return Some(Arc::new(p) as Arc<dyn ChatProvider>);
417 }
418 }
419 if !cfg.openrouter_api_key.is_empty() {
420 return Some(Arc::new(trusty_common::OpenRouterProvider::new(
421 cfg.openrouter_api_key,
422 cfg.openrouter_model,
423 )) as Arc<dyn ChatProvider>);
424 }
425 None
426 })
427 .await
428 .clone()
429 }
430
431 /// Spawn a fire-and-forget background task that auto-discovers project
432 /// aliases under `project_root` and asserts new ones into `palace`.
433 ///
434 /// Why (issue #42): Projects carry implicit shorthand — cargo package
435 /// names that differ from their directory, binary names that differ
436 /// from packages, first-letter abbreviations — that should be surfaced
437 /// without a user ever calling `add_alias`. Running discovery as a
438 /// detached task on palace-open keeps startup latency unchanged: the
439 /// daemon binds and starts serving immediately while the discovery scan
440 /// completes in the background, and any newly-asserted aliases land in
441 /// the prompt cache before the model's next `get_prompt_context` call.
442 /// What: clones `self` (cheap; `Arc`-backed), spawns a tokio task that
443 /// invokes the `discover_aliases` tool handler directly so the
444 /// dedup + cache-rebuild logic runs exactly the same path as the MCP
445 /// tool call. Errors are logged at `warn!`; one failed discovery never
446 /// destabilises the daemon.
447 /// Test: not unit-tested (timing-dependent fire-and-forget); the
448 /// underlying `discover_aliases` dispatch is covered by
449 /// `dispatch_discover_aliases_inserts_new_and_dedupes` in `tools::tests`.
450 pub fn spawn_alias_discovery(&self, palace: String, project_root: PathBuf) {
451 let state = self.clone();
452 tokio::spawn(async move {
453 let args = serde_json::json!({
454 "palace": palace,
455 "project_root": project_root.to_string_lossy(),
456 });
457 match tools::dispatch_tool(&state, "discover_aliases", args).await {
458 Ok(result) => tracing::info!(
459 new = ?result.get("new"),
460 already_known = ?result.get("already_known"),
461 "alias discovery complete"
462 ),
463 Err(e) => tracing::warn!("alias discovery failed: {e:#}"),
464 }
465 });
466 }
467
468 pub async fn embedder(&self) -> Result<Arc<FastEmbedder>> {
469 let cell = self.embedder.clone();
470 let embedder = cell
471 .get_or_try_init(|| async {
472 let e = FastEmbedder::new().await?;
473 Ok::<Arc<FastEmbedder>, anyhow::Error>(Arc::new(e))
474 })
475 .await?
476 .clone();
477 Ok(embedder)
478 }
479}
480
481impl std::fmt::Debug for AppState {
482 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483 f.debug_struct("AppState")
484 .field("version", &self.version)
485 .field("data_root", &self.data_root)
486 .field("registry_len", &self.registry.len())
487 .finish()
488 }
489}
490
491/// Handle a single MCP JSON-RPC message and produce its response.
492///
493/// Why: Pulled out of the stdio loop so unit tests can drive every method
494/// without touching real stdin/stdout.
495/// What: Routes `initialize`, `tools/list`, `tools/call`, `ping`, and the
496/// `notifications/initialized` notification (which returns `Value::Null`).
497/// Test: See unit tests below — initialize/list/call all return expected
498/// JSON-RPC envelopes; notifications return `Null` (no response written).
499pub async fn handle_message(state: &AppState, msg: Value) -> Value {
500 let id = msg.get("id").cloned().unwrap_or(Value::Null);
501 let method = msg.get("method").and_then(|m| m.as_str()).unwrap_or("");
502
503 match method {
504 "initialize" => {
505 let extra = state
506 .default_palace
507 .as_ref()
508 .map(|dp| json!({ "default_palace": dp }));
509 let result = initialize_response("trusty-memory", &state.version, extra);
510 // Why (issue #42): prompt-facts now flow through the
511 // per-message `get_prompt_context` tool rather than MCP
512 // prompts, so we no longer advertise the `prompts` capability.
513 json!({
514 "jsonrpc": "2.0",
515 "id": id,
516 "result": result,
517 })
518 }
519 // Notifications must NOT receive a response.
520 "notifications/initialized" | "notifications/cancelled" => Value::Null,
521 "tools/list" => json!({
522 "jsonrpc": "2.0",
523 "id": id,
524 "result": tools::tool_definitions_with(state.default_palace.is_some())
525 }),
526 // OpenRPC 1.3.2 discovery — see `openrpc.rs`. Returns the full
527 // service description so orchestrators (open-mpm, etc.) can
528 // introspect every tool and its required `memory.read`/`memory.write`
529 // scope without bespoke per-server adapters.
530 "rpc.discover" => json!({
531 "jsonrpc": "2.0",
532 "id": id,
533 "result": openrpc::build_discover_response(
534 &state.version,
535 state.default_palace.is_some(),
536 ),
537 }),
538 "tools/call" => {
539 let params = msg.get("params").cloned().unwrap_or_default();
540 let tool_name = params
541 .get("name")
542 .and_then(|n| n.as_str())
543 .unwrap_or("")
544 .to_string();
545 let args = params.get("arguments").cloned().unwrap_or_default();
546 match tools::dispatch_tool(state, &tool_name, args).await {
547 Ok(content) => {
548 // Why: tools that return a bare JSON string (e.g.
549 // `get_prompt_context` returning the formatted
550 // Markdown block) should surface as plain text in the
551 // MCP `content[0].text` field — wrapping in
552 // `Value::to_string()` would re-quote the payload and
553 // force every caller to strip outer quotes.
554 let text = match &content {
555 Value::String(s) => s.clone(),
556 other => other.to_string(),
557 };
558 json!({
559 "jsonrpc": "2.0",
560 "id": id,
561 "result": {
562 "content": [{"type": "text", "text": text}]
563 }
564 })
565 }
566 Err(e) => json!({
567 "jsonrpc": "2.0",
568 "id": id,
569 // Why: anyhow's `{:#}` alternate format walks the full
570 // `Caused by:` chain so MCP clients see actionable
571 // detail (e.g. "PalaceHandle::remember_with_options:
572 // filter rejected: too short") instead of just the
573 // outermost context label.
574 "error": {"code": -32603, "message": format!("{e:#}")}
575 }),
576 }
577 }
578 "ping" => json!({"jsonrpc": "2.0", "id": id, "result": {}}),
579 _ => json!({
580 "jsonrpc": "2.0",
581 "id": id,
582 "error": {
583 "code": -32601,
584 "message": format!("Method not found: {method}")
585 }
586 }),
587 }
588}
589
590/// Run the MCP stdio JSON-RPC 2.0 server loop.
591///
592/// Why: Claude Code launches MCP servers as child processes and speaks
593/// JSON-RPC over stdin/stdout — this is the primary integration path.
594/// What: Delegates to `trusty_mcp_core::run_stdio_loop`, adapting each
595/// shared `Request` back into the JSON `Value` shape `handle_message`
596/// expects, and translating the returned `Value` into a `Response`.
597/// Notifications (where `handle_message` returns `Value::Null`) become
598/// suppressed responses so the loop emits nothing on the wire.
599/// Test: `handle_message` covers protocol behaviour in unit tests.
600pub async fn run_stdio(state: AppState) -> Result<()> {
601 info!("trusty-memory MCP stdio server starting");
602 let state = Arc::new(state);
603 trusty_common::mcp::run_stdio_loop(move |req: Request| {
604 let state = state.clone();
605 async move {
606 // Re-serialise the Request into the JSON shape handle_message expects.
607 // (handle_message predates the shared types and reads loose Values.)
608 let msg = json!({
609 "jsonrpc": req.jsonrpc.unwrap_or_else(|| "2.0".to_string()),
610 "id": req.id.clone().unwrap_or(Value::Null),
611 "method": req.method,
612 "params": req.params.unwrap_or(Value::Null),
613 });
614 let resp_value = handle_message(&state, msg).await;
615 // handle_message returns Value::Null for notifications.
616 if resp_value.is_null() {
617 return Response::suppressed();
618 }
619 // Otherwise it returns the full JSON-RPC envelope as a Value;
620 // re-encode into the shared Response struct so the loop can serialise.
621 let id = resp_value.get("id").cloned();
622 if let Some(result) = resp_value.get("result").cloned() {
623 Response::ok(id, result)
624 } else if let Some(err) = resp_value.get("error") {
625 let code =
626 err.get("code")
627 .and_then(|c| c.as_i64())
628 .unwrap_or(error_codes::INTERNAL_ERROR as i64) as i32;
629 let message = err
630 .get("message")
631 .and_then(|m| m.as_str())
632 .unwrap_or("internal error")
633 .to_string();
634 Response::err(id, code, message)
635 } else {
636 Response::err(
637 id,
638 error_codes::INTERNAL_ERROR,
639 "malformed handler response",
640 )
641 }
642 }
643 })
644 .await
645}
646
647/// Preferred starting port for the trusty-memory HTTP daemon.
648///
649/// Why: keeps the well-known default stable for clients that have hard-coded
650/// `127.0.0.1:7070` in their configuration, while still allowing dynamic
651/// walking when the port is in use (`DYNAMIC_PORT_RANGE` ports starting here).
652/// What: `7070` — historic default, matches the launchd plist's prior value.
653/// Test: covered indirectly by `bind_dynamic_port_returns_listener`.
654pub const DEFAULT_HTTP_PORT: u16 = 7070;
655
656/// Number of consecutive ports `bind_dynamic_port` walks before falling back
657/// to the OS-assigned port. Matches the trusty-search convention.
658const DYNAMIC_PORT_RANGE: u16 = 10;
659
660/// Path to `~/.trusty-memory/http_addr` — the canonical address-discovery file.
661///
662/// Why: clients (CLI, MCP tools, dashboards) need to find the running daemon
663/// without configuration when the port was selected dynamically. Mirrors
664/// `trusty-search`'s `~/.trusty-search/http_addr` contract so the two tools
665/// share a single discovery convention.
666/// What: returns `$HOME/.trusty-memory/http_addr`, or `None` if `$HOME` is
667/// unresolvable (locked-down container, no passwd entry).
668/// Test: `http_addr_path_uses_dot_trusty_memory`.
669pub fn http_addr_path() -> Option<PathBuf> {
670 dirs::home_dir().map(|h| h.join(".trusty-memory").join("http_addr"))
671}
672
673/// Bind a `TcpListener` to `127.0.0.1`, dynamically selecting a port.
674///
675/// Why: the historic default `7070` is convenient for clients but a stale
676/// process or a second daemon must not produce a noisy failure. Walking
677/// `DEFAULT_HTTP_PORT..DEFAULT_HTTP_PORT+DYNAMIC_PORT_RANGE` first preserves
678/// backwards compatibility for the common case; OS-assigned fallback (`:0`)
679/// guarantees the daemon always comes up even when every preferred port is
680/// busy.
681/// What: returns the first successful `TcpListener`. Tries 7070..=7079
682/// in order, then falls back to OS-assigned. Caller inspects
683/// `local_addr()` to learn the chosen port.
684/// Test: `bind_dynamic_port_returns_listener` confirms it always binds *some*
685/// port even after another listener occupies the preferred one.
686pub async fn bind_dynamic_port() -> Result<tokio::net::TcpListener> {
687 let preferred: SocketAddr = SocketAddr::from(([127, 0, 0, 1], DEFAULT_HTTP_PORT));
688 // First: walk the preferred range (7070..=7079).
689 if let Ok(listener) =
690 trusty_common::bind_with_auto_port(preferred, DYNAMIC_PORT_RANGE - 1).await
691 {
692 return Ok(listener);
693 }
694 // Last resort: ask the kernel for any free port. `bind_with_auto_port`
695 // with `:0` resolves immediately to the OS-assigned port.
696 tracing::warn!(
697 "all ports {DEFAULT_HTTP_PORT}..{} in use; requesting OS-assigned port",
698 DEFAULT_HTTP_PORT + DYNAMIC_PORT_RANGE - 1
699 );
700 let any: SocketAddr = SocketAddr::from(([127, 0, 0, 1], 0));
701 trusty_common::bind_with_auto_port(any, 0).await
702}
703
704/// Write the bound `host:port` to `~/.trusty-memory/http_addr` atomically.
705///
706/// Why: clients must read the file mid-write without observing a partial
707/// value. Writing to a `.tmp` sibling and renaming over the target gives
708/// POSIX atomicity, matching the trusty-search implementation.
709/// What: creates `~/.trusty-memory/` if missing; writes `addr` followed by a
710/// trailing newline (avoids the "no newline at end of file" warnings from
711/// `cat`); renames `.tmp` → `http_addr`. Best-effort: I/O errors are
712/// returned to the caller so `run_http_on` can log without panicking.
713/// Test: `http_addr_file_round_trip_via_helpers`.
714fn write_http_addr_file(path: &Path, addr: &SocketAddr) -> std::io::Result<()> {
715 use std::io::Write;
716 if let Some(parent) = path.parent() {
717 std::fs::create_dir_all(parent)?;
718 }
719 let tmp = path.with_extension("addr.tmp");
720 {
721 let mut f = std::fs::File::create(&tmp)?;
722 writeln!(f, "{addr}")?;
723 f.sync_all()?;
724 }
725 std::fs::rename(&tmp, path)?;
726 Ok(())
727}
728
729/// Run the optional HTTP/SSE + web admin server.
730///
731/// Why: A long-running daemon mode lets non-stdio clients (browsers, curl,
732/// future remote agents) hit `/health`, the `/api/v1/*` REST surface, and the
733/// embedded admin SPA.
734/// What: axum router built from `web::router()` plus a `/sse` stub for the
735/// existing MCP-over-SSE clients. Caller provides a pre-bound listener so
736/// port auto-detection lives at the call site. Before accepting connections
737/// the daemon stamps the bound `host:port` onto `AppState.bound_addr` and
738/// writes `~/.trusty-memory/http_addr` so clients can discover the live port.
739/// On shutdown the file is removed best-effort (a stale file with the wrong
740/// port is worse than a missing one).
741/// Test: `cargo test -p trusty-memory web::tests` exercises the router shape;
742/// manual: `curl http://127.0.0.1:<port>/health` returns `ok` with `addr`.
743pub async fn run_http_on(state: AppState, listener: tokio::net::TcpListener) -> Result<()> {
744 use axum::routing::get;
745
746 // Issue #35: recompute the `data_root` disk footprint every 10 s on a
747 // background task so `GET /health` reports `disk_bytes` without doing a
748 // recursive directory walk on the request path.
749 spawn_disk_size_ticker(state.clone());
750
751 // Capture and advertise the bound address BEFORE serving so the first
752 // request handler — and the http_addr discovery file — see the real port
753 // even if `local_addr()` would otherwise be racy.
754 let local = listener.local_addr().ok();
755 let written_path = if let Some(a) = local {
756 // Stash on state for handlers (e.g. /health) to surface.
757 let _ = state.bound_addr.set(a);
758 info!("HTTP server listening on http://{a}");
759 eprintln!("HTTP server listening on http://{a}");
760 // Best-effort: a missing $HOME or read-only fs is non-fatal — the
761 // /health endpoint still advertises `addr`. Logging the failure
762 // helps operators diagnose discovery problems.
763 match http_addr_path() {
764 Some(p) => match write_http_addr_file(&p, &a) {
765 Ok(()) => {
766 info!("wrote daemon address to {}", p.display());
767 Some(p)
768 }
769 Err(e) => {
770 tracing::warn!("could not write {}: {e}", p.display());
771 None
772 }
773 },
774 None => {
775 tracing::warn!("no $HOME — skipping http_addr discovery file");
776 None
777 }
778 }
779 } else {
780 None
781 };
782
783 let app = web::router()
784 .route("/sse", get(sse_handler))
785 .with_state(state);
786
787 let serve_result = axum::serve(listener, app).await;
788
789 // Best-effort cleanup: remove `http_addr` so stale clients fail fast
790 // instead of timing out against a dead port.
791 if let Some(p) = written_path.as_ref() {
792 let _ = std::fs::remove_file(p);
793 }
794
795 serve_result?;
796 Ok(())
797}
798
799/// Convenience: bind `addr` and serve via [`run_http_on`].
800pub async fn run_http(state: AppState, addr: std::net::SocketAddr) -> Result<()> {
801 let listener = tokio::net::TcpListener::bind(addr).await?;
802 run_http_on(state, listener).await
803}
804
805/// Convenience: bind dynamically (7070..=7079, OS fallback) and serve.
806///
807/// Why: `trusty-memory serve` with no `--http` flag is the canonical
808/// launchd-managed daemon entry point. Dynamic binding lets a stale daemon
809/// or a hand-spawned `serve --http 127.0.0.1:7070` coexist without breaking
810/// the launchd-managed instance.
811/// What: calls [`bind_dynamic_port`] then [`run_http_on`].
812/// Test: integration via `trusty-memory serve` + `cat ~/.trusty-memory/http_addr`.
813pub async fn run_http_dynamic(state: AppState) -> Result<()> {
814 let listener = bind_dynamic_port().await?;
815 run_http_on(state, listener).await
816}
817
818/// Spawn a background ticker that recomputes the `data_root` disk footprint
819/// every 10 seconds and stores it in `state.disk_bytes` (issue #35).
820///
821/// Why: `GET /health` reports `disk_bytes`. Walking the data directory on
822/// every health request would turn a frequent health poll into unbounded
823/// recursive I/O. Computing it off the request path on a fixed cadence keeps
824/// `/health` cheap and bounds the staleness to ~10 s — fine for an
825/// at-a-glance footprint figure.
826/// What: spawns a detached tokio task. `AppState` is cheap to `Clone` (all
827/// `Arc` fields), so the task holds a full clone; the daemon process lives
828/// for the lifetime of the server anyway, so no `Weak` downgrade is needed.
829/// Each tick runs the blocking directory walk on `spawn_blocking` so it never
830/// stalls the async runtime, then stores the byte total atomically.
831/// Test: `health_endpoint_includes_resource_fields` asserts the field shape;
832/// the ticker cadence is not unit-tested (timing-dependent).
833fn spawn_disk_size_ticker(state: AppState) {
834 tokio::spawn(async move {
835 let mut interval = tokio::time::interval(std::time::Duration::from_secs(10));
836 loop {
837 interval.tick().await;
838 let dir = state.data_root.clone();
839 // The directory walk is blocking filesystem I/O — run it on the
840 // blocking pool so it never parks an async worker thread.
841 let bytes = tokio::task::spawn_blocking(move || {
842 trusty_common::sys_metrics::dir_size_bytes(&dir)
843 })
844 .await
845 .unwrap_or(0);
846 state
847 .disk_bytes
848 .store(bytes, std::sync::atomic::Ordering::Relaxed);
849 }
850 });
851}
852
853/// Live SSE event stream — pushes `DaemonEvent` frames to dashboard clients.
854///
855/// Why: The dashboard subscribes once and reacts to live pushes (palace
856/// created, drawer added/deleted, dream completed, status changed) instead of
857/// polling `/api/v1/*` endpoints.
858/// What: Subscribes to `state.events`, emits an initial `connected` frame,
859/// then forwards every `DaemonEvent` as `data: <json>\n\n`. Lagged
860/// subscribers receive a `lag` frame indicating skipped events; channel
861/// closure ends the stream.
862/// Test: `web::tests::sse_stream_emits_palace_created` (covers subscribe +
863/// emit + receive); manual: `curl -N http://.../sse`.
864pub(crate) async fn sse_handler(
865 axum::extract::State(state): axum::extract::State<AppState>,
866) -> impl axum::response::IntoResponse {
867 use futures::StreamExt;
868 use tokio_stream::wrappers::BroadcastStream;
869
870 let rx = state.events.subscribe();
871 let initial = futures::stream::once(async {
872 Ok::<axum::body::Bytes, std::io::Error>(axum::body::Bytes::from(
873 "data: {\"type\":\"connected\"}\n\n",
874 ))
875 });
876 let events = BroadcastStream::new(rx).map(|res| {
877 let frame = match res {
878 Ok(event) => match serde_json::to_string(&event) {
879 Ok(json) => format!("data: {json}\n\n"),
880 Err(e) => format!("data: {{\"type\":\"error\",\"message\":\"{e}\"}}\n\n"),
881 },
882 Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(n)) => {
883 format!("data: {{\"type\":\"lag\",\"skipped\":{n}}}\n\n")
884 }
885 };
886 Ok::<axum::body::Bytes, std::io::Error>(axum::body::Bytes::from(frame))
887 });
888 let stream = initial.chain(events);
889
890 axum::response::Response::builder()
891 .header("Content-Type", "text/event-stream")
892 .header("Cache-Control", "no-cache")
893 .header("X-Accel-Buffering", "no")
894 .body(axum::body::Body::from_stream(stream))
895 .expect("valid SSE response")
896}
897
898#[cfg(test)]
899mod tests {
900 use super::*;
901
902 fn test_state() -> AppState {
903 let tmp = tempfile::tempdir().expect("tempdir");
904 let root = tmp.path().to_path_buf();
905 // Leak the tempdir so it lives for the test process; tests are short.
906 std::mem::forget(tmp);
907 AppState::new(root)
908 }
909
910 #[tokio::test]
911 async fn initialize_returns_protocol_version_and_capabilities() {
912 let state = test_state();
913 let req = json!({
914 "jsonrpc": "2.0",
915 "id": 1,
916 "method": "initialize",
917 "params": {
918 "protocolVersion": "2024-11-05",
919 "capabilities": {},
920 "clientInfo": {"name": "test", "version": "0"}
921 }
922 });
923 let resp = handle_message(&state, req).await;
924 assert_eq!(resp["jsonrpc"], "2.0");
925 assert_eq!(resp["id"], 1);
926 assert_eq!(resp["result"]["protocolVersion"], "2024-11-05");
927 assert!(resp["result"]["capabilities"]["tools"].is_object());
928 assert_eq!(resp["result"]["serverInfo"]["name"], "trusty-memory");
929 }
930
931 #[tokio::test]
932 async fn initialized_notification_returns_null() {
933 let state = test_state();
934 let req = json!({
935 "jsonrpc": "2.0",
936 "method": "notifications/initialized",
937 "params": {}
938 });
939 let resp = handle_message(&state, req).await;
940 assert!(resp.is_null());
941 }
942
943 #[tokio::test]
944 async fn tools_list_returns_all_tools() {
945 let state = test_state();
946 let req = json!({"jsonrpc": "2.0", "id": 2, "method": "tools/list"});
947 let resp = handle_message(&state, req).await;
948 let tools = resp["result"]["tools"].as_array().expect("tools array");
949 assert_eq!(tools.len(), 20);
950 }
951
952 #[tokio::test]
953 async fn unknown_method_returns_error() {
954 let state = test_state();
955 let req = json!({"jsonrpc": "2.0", "id": 4, "method": "wat"});
956 let resp = handle_message(&state, req).await;
957 assert_eq!(resp["error"]["code"], -32601);
958 }
959
960 #[tokio::test]
961 async fn ping_returns_empty_result() {
962 let state = test_state();
963 let req = json!({"jsonrpc": "2.0", "id": 5, "method": "ping"});
964 let resp = handle_message(&state, req).await;
965 assert!(resp["result"].is_object());
966 }
967
968 #[tokio::test]
969 async fn app_state_default_constructs() {
970 let s = test_state();
971 assert!(!s.version.is_empty());
972 assert!(s.registry.is_empty());
973 assert!(s.default_palace.is_none());
974 }
975
976 /// Why: Issue #26 — when `serve --palace <name>` is set, the MCP server
977 /// must (a) report the default in the `initialize` `serverInfo`, (b)
978 /// drop `palace` from the required schema in `tools/list`, and (c) let
979 /// `tools/call` use the default when the caller omits `palace`.
980 /// Test: Construct an AppState with a default palace, create that palace
981 /// on disk via the registry, then call `memory_remember` without a
982 /// `palace` argument and confirm it resolves to the default.
983 #[tokio::test]
984 async fn default_palace_used_when_arg_omitted() {
985 let tmp = tempfile::tempdir().expect("tempdir");
986 let root = tmp.path().to_path_buf();
987
988 // Pre-create the default palace so remember has somewhere to land.
989 let registry = trusty_common::memory_core::PalaceRegistry::new();
990 let palace = trusty_common::memory_core::Palace {
991 id: trusty_common::memory_core::PalaceId::new("default-pal"),
992 name: "default-pal".to_string(),
993 description: None,
994 created_at: chrono::Utc::now(),
995 data_dir: root.join("default-pal"),
996 };
997 registry
998 .create_palace(&root, palace)
999 .expect("create_palace");
1000
1001 let state = AppState::new(root).with_default_palace(Some("default-pal".to_string()));
1002
1003 // (a) initialize advertises the default.
1004 let init = handle_message(
1005 &state,
1006 json!({"jsonrpc": "2.0", "id": 1, "method": "initialize"}),
1007 )
1008 .await;
1009 assert_eq!(
1010 init["result"]["serverInfo"]["default_palace"], "default-pal",
1011 "initialize must echo default_palace in serverInfo"
1012 );
1013
1014 // (b) tools/list drops `palace` from required when default is set.
1015 let list = handle_message(
1016 &state,
1017 json!({"jsonrpc": "2.0", "id": 2, "method": "tools/list"}),
1018 )
1019 .await;
1020 let tools = list["result"]["tools"].as_array().expect("tools array");
1021 let remember = tools
1022 .iter()
1023 .find(|t| t["name"] == "memory_remember")
1024 .expect("memory_remember tool");
1025 let required: Vec<&str> = remember["inputSchema"]["required"]
1026 .as_array()
1027 .expect("required array")
1028 .iter()
1029 .filter_map(|v| v.as_str())
1030 .collect();
1031 assert!(
1032 !required.contains(&"palace"),
1033 "palace must not be required when default is configured; got {required:?}"
1034 );
1035 assert!(required.contains(&"text"));
1036
1037 // (c) tools/call resolves the default when arg is omitted.
1038 let call = handle_message(
1039 &state,
1040 json!({
1041 "jsonrpc": "2.0",
1042 "id": 3,
1043 "method": "tools/call",
1044 "params": {
1045 "name": "memory_remember",
1046 "arguments": {"text": "default palace test memory content with several tokens"},
1047 },
1048 }),
1049 )
1050 .await;
1051 // Successful dispatch returns `result.content[0].text` JSON.
1052 let text = call["result"]["content"][0]["text"]
1053 .as_str()
1054 .unwrap_or_else(|| panic!("expected success result, got {call}"));
1055 let parsed: Value = serde_json::from_str(text).expect("parse content json");
1056 assert_eq!(parsed["palace"], "default-pal");
1057 assert_eq!(parsed["status"], "stored");
1058 assert!(parsed["drawer_id"].as_str().is_some());
1059 }
1060
1061 /// Why: When no default is set, `tools/call` for a palace-bound tool
1062 /// without a `palace` argument should error helpfully rather than panic.
1063 #[tokio::test]
1064 async fn missing_palace_without_default_errors() {
1065 let state = test_state();
1066 let resp = handle_message(
1067 &state,
1068 json!({
1069 "jsonrpc": "2.0",
1070 "id": 7,
1071 "method": "tools/call",
1072 "params": {
1073 "name": "memory_recall",
1074 "arguments": {"query": "anything"},
1075 },
1076 }),
1077 )
1078 .await;
1079 assert_eq!(resp["error"]["code"], -32603);
1080 let msg = resp["error"]["message"].as_str().unwrap_or("");
1081 assert!(
1082 msg.contains("missing 'palace'"),
1083 "expected helpful error, got: {msg}"
1084 );
1085 }
1086
1087 /// Why: regression for the "palaces lost on restart" bug — `AppState::new`
1088 /// builds an empty registry, so the daemon must call
1089 /// `load_palaces_from_disk` on startup to re-register palaces persisted by
1090 /// a previous run. Without that call the registry stays empty even though
1091 /// `palace.json` files exist on disk.
1092 /// What: persists two palaces under a tempdir (via the same
1093 /// `create_palace` path the `palace_create` tool uses), constructs a fresh
1094 /// `AppState` rooted there, calls `load_palaces_from_disk`, and asserts the
1095 /// returned count and registry contents.
1096 /// Test: this test itself.
1097 #[tokio::test]
1098 async fn load_palaces_from_disk_rehydrates_registry() {
1099 use trusty_common::memory_core::{Palace, PalaceId, PalaceRegistry};
1100
1101 let tmp = tempfile::tempdir().expect("tempdir");
1102 let root = tmp.path().to_path_buf();
1103
1104 // Phase 1: persist two palaces to disk, then drop the writer registry
1105 // so nothing is held in memory — simulating a prior daemon run.
1106 {
1107 let writer = PalaceRegistry::new();
1108 for id in ["alpha", "beta"] {
1109 let palace = Palace {
1110 id: PalaceId::new(id),
1111 name: id.to_string(),
1112 description: None,
1113 created_at: chrono::Utc::now(),
1114 data_dir: root.join(id),
1115 };
1116 writer
1117 .create_palace(&root, palace)
1118 .expect("persist palace to disk");
1119 }
1120 }
1121
1122 // Add a stray non-palace subdirectory; the walker must ignore it.
1123 std::fs::create_dir_all(root.join("not-a-palace")).expect("mkdir");
1124
1125 // Phase 2: fresh AppState starts with an empty registry (the bug).
1126 let state = AppState::new(root);
1127 assert!(
1128 state.registry.is_empty(),
1129 "AppState::new must start with an empty registry"
1130 );
1131
1132 // The fix: hydrate from disk.
1133 let count = state
1134 .load_palaces_from_disk()
1135 .await
1136 .expect("load_palaces_from_disk");
1137
1138 assert_eq!(count, 2, "both persisted palaces should be loaded");
1139 assert_eq!(state.registry.len(), 2, "registry should hold both palaces");
1140 let ids: Vec<String> = state.registry.list().into_iter().map(|p| p.0).collect();
1141 assert!(ids.contains(&"alpha".to_string()));
1142 assert!(ids.contains(&"beta".to_string()));
1143 }
1144
1145 /// Why: existing installs (and the legacy standalone `trusty-memory` repo)
1146 /// nest palaces one level deeper under a `palaces/` subdirectory. When that
1147 /// subdirectory exists, `resolve_palace_registry_dir` must descend into it
1148 /// so the daemon scans the level that actually holds the `palace.json`
1149 /// files — otherwise it finds zero palaces, which is the restart bug.
1150 /// What: creates `<dir>/palaces/`, resolves, and asserts the nested path is
1151 /// returned.
1152 /// Test: this test itself.
1153 #[test]
1154 fn resolve_palace_registry_dir_prefers_palaces_subdir() {
1155 let tmp = tempfile::tempdir().expect("tempdir");
1156 let data_dir = tmp.path().to_path_buf();
1157 std::fs::create_dir_all(data_dir.join("palaces")).expect("mkdir palaces");
1158
1159 let resolved = resolve_palace_registry_dir(data_dir.clone());
1160 assert_eq!(resolved, data_dir.join("palaces"));
1161 }
1162
1163 /// Why: a fresh install with no `palaces/` subdirectory must fall back to
1164 /// the data dir itself (the current flat monorepo layout).
1165 #[test]
1166 fn resolve_palace_registry_dir_falls_back_to_data_dir() {
1167 let tmp = tempfile::tempdir().expect("tempdir");
1168 let data_dir = tmp.path().to_path_buf();
1169
1170 let resolved = resolve_palace_registry_dir(data_dir.clone());
1171 assert_eq!(resolved, data_dir);
1172 }
1173
1174 /// Why: end-to-end check that the nested-`palaces/` layout hydrates — the
1175 /// daemon resolves the registry dir via `resolve_palace_registry_dir`, so
1176 /// an `AppState` rooted there must load palaces persisted one level below
1177 /// the bare data dir.
1178 /// What: persists two palaces under `<root>/palaces/<id>/`, constructs an
1179 /// `AppState` rooted at the resolved registry dir, and asserts hydration
1180 /// finds both.
1181 /// Test: this test itself.
1182 #[tokio::test]
1183 async fn load_palaces_from_disk_handles_palaces_subdir() {
1184 use trusty_common::memory_core::{Palace, PalaceId, PalaceRegistry};
1185
1186 let tmp = tempfile::tempdir().expect("tempdir");
1187 let root = tmp.path().to_path_buf();
1188 let nested = root.join("palaces");
1189
1190 {
1191 let writer = PalaceRegistry::new();
1192 for id in ["cto", "engineering"] {
1193 let palace = Palace {
1194 id: PalaceId::new(id),
1195 name: id.to_string(),
1196 description: None,
1197 created_at: chrono::Utc::now(),
1198 data_dir: nested.join(id),
1199 };
1200 // create_palace anchors data_dir under the passed root, so
1201 // pass `nested` here to land palaces under `<root>/palaces/`.
1202 writer
1203 .create_palace(&nested, palace)
1204 .expect("persist palace under palaces/ subdir");
1205 }
1206 }
1207
1208 // Mirror main.rs: resolve the registry dir, then root AppState there.
1209 let registry_dir = resolve_palace_registry_dir(root);
1210 assert_eq!(registry_dir, nested, "must resolve into palaces/ subdir");
1211 let state = AppState::new(registry_dir);
1212 let count = state
1213 .load_palaces_from_disk()
1214 .await
1215 .expect("load_palaces_from_disk");
1216
1217 assert_eq!(count, 2, "both nested palaces should be loaded");
1218 assert_eq!(state.registry.len(), 2);
1219 let ids: Vec<String> = state.registry.list().into_iter().map(|p| p.0).collect();
1220 assert!(ids.contains(&"cto".to_string()));
1221 assert!(ids.contains(&"engineering".to_string()));
1222 }
1223
1224 /// Why: an empty (or missing) palace registry directory must not error — a
1225 /// brand-new install has nothing to hydrate and should report zero.
1226 #[tokio::test]
1227 async fn load_palaces_from_disk_empty_root_returns_zero() {
1228 let state = test_state();
1229 let count = state
1230 .load_palaces_from_disk()
1231 .await
1232 .expect("load_palaces_from_disk on empty root");
1233 assert_eq!(count, 0);
1234 assert!(state.registry.is_empty());
1235 }
1236
1237 /// Why: initialize without a default palace must omit `default_palace`
1238 /// from `serverInfo` so clients can detect the unbound mode.
1239 #[tokio::test]
1240 async fn initialize_without_default_palace_omits_field() {
1241 let state = test_state();
1242 let init = handle_message(
1243 &state,
1244 json!({"jsonrpc": "2.0", "id": 1, "method": "initialize"}),
1245 )
1246 .await;
1247 assert!(init["result"]["serverInfo"]["default_palace"].is_null());
1248 }
1249
1250 /// Why: every `~/.trusty-memory/http_addr` consumer (CLI, dashboard,
1251 /// future trusty-mpm wiring) must agree on the path. A regression that
1252 /// moves this file to e.g. `$XDG_DATA_HOME/trusty-memory/http_addr` would
1253 /// silently break every client.
1254 /// What: under a real `$HOME`, the path ends in `.trusty-memory/http_addr`.
1255 #[test]
1256 fn http_addr_path_uses_dot_trusty_memory() {
1257 if let Some(p) = http_addr_path() {
1258 assert!(
1259 p.ends_with(".trusty-memory/http_addr"),
1260 "unexpected http_addr path: {}",
1261 p.display()
1262 );
1263 }
1264 // CI containers with no $HOME return None — that's fine; the writer
1265 // logs and falls back gracefully.
1266 }
1267
1268 /// Why: write+read round-trip pins the disk format: a single line of
1269 /// `host:port\n`. Clients (cat, sh `$(cat ...)`) trim whitespace, so the
1270 /// trailing newline is invisible — but anything else (extra whitespace,
1271 /// multi-line) would break callers.
1272 #[test]
1273 fn http_addr_file_round_trip_via_helpers() {
1274 let dir = tempfile::tempdir().unwrap();
1275 let path = dir.path().join("http_addr");
1276 let addr: SocketAddr = "127.0.0.1:7073".parse().unwrap();
1277 write_http_addr_file(&path, &addr).unwrap();
1278 let raw = std::fs::read_to_string(&path).unwrap();
1279 assert_eq!(raw.trim(), "127.0.0.1:7073");
1280 // The trailing newline keeps `cat` and editors happy.
1281 assert!(raw.ends_with('\n'));
1282 }
1283
1284 /// Why: dynamic binding must succeed even when the preferred port is
1285 /// already in use. Walking 7070..=7079 + OS fallback guarantees the
1286 /// daemon never fails to come up just because another process holds 7070.
1287 /// What: pre-bind 7070 (best-effort — skip the test if it's already
1288 /// busy on the host), then call `bind_dynamic_port` and assert we got
1289 /// *some* listener back.
1290 #[tokio::test]
1291 async fn bind_dynamic_port_returns_listener() {
1292 let listener = bind_dynamic_port().await.expect("bind_dynamic_port");
1293 let addr = listener.local_addr().expect("local_addr");
1294 assert_eq!(addr.ip().to_string(), "127.0.0.1");
1295 assert!(addr.port() > 0, "port must be non-zero after bind");
1296 }
1297
1298 /// Why: Issue #42 — prompt-facts are now served by the per-message
1299 /// `get_prompt_context` tool rather than the MCP prompts surface, so the
1300 /// `initialize` handshake must NOT advertise a `prompts` capability and
1301 /// `prompts/list` / `prompts/get` must fall through to the "method not
1302 /// found" path.
1303 #[tokio::test]
1304 async fn initialize_does_not_advertise_prompts_capability() {
1305 let state = test_state();
1306 let init = handle_message(
1307 &state,
1308 json!({"jsonrpc": "2.0", "id": 1, "method": "initialize"}),
1309 )
1310 .await;
1311 assert!(
1312 init["result"]["capabilities"]["prompts"].is_null(),
1313 "initialize must NOT advertise the prompts capability; got {init}"
1314 );
1315
1316 // Both prompts/* dispatchers should now report method-not-found.
1317 for method in ["prompts/list", "prompts/get"] {
1318 let resp =
1319 handle_message(&state, json!({"jsonrpc": "2.0", "id": 2, "method": method})).await;
1320 assert_eq!(
1321 resp["error"]["code"], -32601,
1322 "{method} should return method-not-found; got {resp}"
1323 );
1324 }
1325 }
1326
1327 /// Why: `AppState::new` must initialise `bound_addr` to an empty
1328 /// `OnceLock` so `/health` reports `addr: None` on the stdio path. A
1329 /// regression that pre-populates this field would advertise a bogus
1330 /// address from a stale clone.
1331 #[test]
1332 fn app_state_starts_with_empty_bound_addr() {
1333 let state = test_state();
1334 assert!(state.bound_addr.get().is_none());
1335 }
1336}