trusty-memory 0.18.1

MCP server (stdio + HTTP/SSE) for trusty-memory
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
//! Handler for `trusty-memory prompt-context`.
//!
//! Why: Claude Code's `UserPromptSubmit` hooks inject any stdout produced by
//! the hook command as additional context for the model. The trusty-memory
//! setup command installs a hook that runs `trusty-memory prompt-context`
//! before every prompt, so the model gets a freshly-rendered block of
//! palace context (drawer recall + KG triples + global hot facts) without
//! paying the per-message MCP tool-call tax. This handler is the actual
//! command the hook invokes.
//!
//! What: a side-effect-only command that talks to the running trusty-memory
//! HTTP daemon and prints a formatted Markdown injection block to stdout.
//! Composition (issue #134):
//!
//!   1. Workspace hot facts (existing `GET /api/v1/kg/prompt-context`).
//!   2. Drawers recalled from the cwd-resolved palace
//!      (`GET /api/v1/palaces/{slug}/recall?q=<prompt>`).
//!   3. Knowledge-graph triples whose subject appears in the prompt
//!      (`GET /api/v1/palaces/{slug}/kg/all?limit=200`).
//!
//! Failures on any branch are isolated — each fetch is bounded by
//! `HTTP_TIMEOUT` and individual errors are skipped without failing the
//! hook. If everything is empty, the existing placeholder is emitted so
//! the empty-palace fallback behaviour is preserved.
//!
//! Note on MPM sub-agents: unlike `trusty-mpm hook`, this command is
//! **intentionally NOT** gated on the `CLAUDE_MPM_SUB_AGENT` environment
//! variable. Sub-agents benefit from the parent palace's prompt-fact block
//! just as much as the PM does — withholding it would force every nested
//! agent to rediscover project conventions from scratch. The token cost is
//! a single rendered fact list and the signal payoff (consistent style,
//! vocabulary, and architectural facts across the agent tree) is high. The
//! suppression of nested hook traffic happens at the `trusty-mpm hook`
//! layer instead, where doubled audit events are the real failure mode.
//!
//! Test: daemon-touching paths are exercised via integration tests in this
//! module (`prompt_context_recalls_palace_drawers`,
//! `prompt_context_empty_palace_falls_back_to_global`,
//! `prompt_context_returns_ok_without_daemon`).

mod fetch;
mod filter;
mod format;

use anyhow::Result;
use serde_json::Value;
use std::time::{Duration, Instant};

use crate::hook_emit::{post_hook_event, HookEventPayload};
use crate::prompt_log::{PromptLogEntry, PromptLogger};
use crate::{hook_prompt_excerpt, HookType, InjectionKind};

use fetch::{fetch_global_prompt_context, fetch_palace_kg_triples, fetch_palace_recall};
use filter::{filter_drawers_by_deny_tags, select_relevant_triples};
use format::{compose_injection, count_facts};

/// HTTP path for the global hot-facts block.
pub(super) const PROMPT_CONTEXT_PATH: &str = "/api/v1/kg/prompt-context";

/// HTTP path template for per-palace recall. Substitute `{slug}`.
pub(super) const PALACE_RECALL_PATH: &str = "/api/v1/palaces/{slug}/recall";

/// HTTP path template for per-palace KG list. Substitute `{slug}`.
pub(super) const PALACE_KG_ALL_PATH: &str = "/api/v1/palaces/{slug}/kg/all";

/// Connect + total request timeout. Kept short so a slow/dead daemon can
/// never block a Claude Code prompt for more than a couple seconds.
pub(super) const HTTP_TIMEOUT: Duration = Duration::from_millis(2500);

/// Default top-K for drawer recall and KG triple selection.
///
/// Why: 5 + 5 keeps the injection focused on the strongest signal without
/// flooding the prompt. With a 4 KB cap on total output, this leaves ample
/// budget for hot facts and per-bullet content.
/// What: a `usize` constant used unless the env override below is set.
/// Test: `prompt_context_recalls_palace_drawers` uses the default.
pub(super) const DEFAULT_TOP_K: usize = 5;

/// Hard byte cap on the rendered injection.
///
/// Why: hook-injection budgets in Claude Code are small (~few KB) and
/// every byte we emit is a token the model has to spend reading. 4 KB is
/// a comfortable ceiling above the typical render and well under any
/// downstream limit.
/// What: `4 * 1024` bytes. Sections are appended until the cap is hit;
/// truncation emits an explicit `…` marker so downstream readers know
/// the block was cut.
/// Test: `prompt_context_recalls_palace_drawers` exercises the budget
/// implicitly by asserting a real (non-placeholder) injection.
pub(super) const INJECTION_BYTE_CAP: usize = 4 * 1024;

/// Per-drawer-content preview cap inside the injection.
///
/// Why: dumping the full drawer body would burn the byte budget on a
/// single entry; a short single-line preview is enough to remind the model
/// what's available and lets it pull more via MCP recall if needed.
/// What: `220` characters of the whitespace-collapsed content.
/// Test: indirectly via `prompt_context_recalls_palace_drawers`.
pub(super) const DRAWER_PREVIEW_CHARS: usize = 220;

/// Env override for the top-K used by both recall and KG walks.
///
/// Why: gives operators an emergency knob without re-deploying. Optional
/// — when unset / unparseable / zero, [`DEFAULT_TOP_K`] is used.
/// What: a string env var parsed as a `usize`; clamped to `[1, 20]` to
/// keep the byte budget meaningful.
/// Test: not unit-tested (env mutation across parallel tests is hostile).
pub const ENV_TOP_K: &str = "TRUSTY_MEMORY_PROMPT_TOP_K";

/// Env override for the deny-listed drawer tags filtered out of recall.
///
/// Why (issue #139): operators need a way to widen or narrow the noise
/// filter without a rebuild — e.g. add project-specific synthetic tags
/// that have polluted a palace from an upstream hook source. Optional —
/// when unset, the recall path uses [`DEFAULT_DENY_TAGS`].
/// What: a comma-separated list of tag strings. Whitespace around each
/// entry is trimmed; empty entries are ignored; matching is case-
/// insensitive against the drawer's tag list.
/// Test: `prompt_context_recall_env_override_extends_deny_list` exercises
/// the env-driven path with a synthetic noise tag.
pub const ENV_RECALL_DENY_TAGS: &str = "TRUSTY_MEMORY_PROMPT_RECALL_DENY_TAGS";

/// Default deny list applied to recalled drawer tags before composition.
///
/// Why (issue #139): live evidence in the user's palace showed that the
/// auto-capture hook (`trusty-memory hooks fire claude.user-prompt`,
/// wired by `trusty-mpm-core::session_launch`) persists every user prompt
/// as a drawer tagged `claude-session` + `user-prompt`. These drawers
/// dominate recall and crowd out signal content — three sample sessions
/// returned the literal token "yes" five times across semantically
/// distinct prompts. Filtering by tag is cheap, safe (empty tag lists
/// pass through unchanged), and reversible via [`ENV_RECALL_DENY_TAGS`].
/// What: a `&[&str]` of tag names. A drawer is filtered when ANY of its
/// tags matches (case-insensitive) ANY entry in this list.
/// Test: `prompt_context_recall_filters_deny_tags` covers the default
/// path; `prompt_context_recall_all_filtered_falls_back_to_global` covers
/// the all-filtered fallback.
const DEFAULT_DENY_TAGS: &[&str] = &["claude-session", "user-prompt"];

/// Placeholder body emitted when no daemon is reachable or every fetch
/// returned nothing useful.
///
/// Why: kept verbatim from the pre-#134 behaviour so the empty-palace
/// case is byte-identical for downstream tooling. The non-empty palace
/// path now overrides it with real content.
/// What: a static string.
/// Test: `prompt_context_empty_palace_falls_back_to_global`.
pub(super) const EMPTY_PLACEHOLDER: &str = "No prompt facts stored yet.";

/// Entry point for `trusty-memory prompt-context`.
///
/// Why: every error path in this handler must result in a clean exit 0 — the
/// `UserPromptSubmit` hook is wired into every Claude Code prompt the user
/// types, so any non-zero exit (or panic) would either block the prompt or
/// inject a confusing error into the model's context. Logging to stderr is
/// fine because Claude Code only ingests stdout from hook commands.
/// What:
///   1. Read stdin (the UserPromptSubmit JSON payload) — extract `cwd` and
///      `prompt`.
///   2. Resolve the palace slug from the stdin `cwd` (fall back to process
///      cwd).
///   3. Fetch the global prompt-context block + per-palace recall + per-
///      palace KG triples (each best-effort, bounded by [`HTTP_TIMEOUT`]).
///   4. Compose a single Markdown injection capped at
///      [`INJECTION_BYTE_CAP`] bytes and print it.
///   5. Log a [`PromptLogEntry`] for the hook event (failure-isolated).
///
/// Sub-agent behaviour: deliberately unguarded. MPM-spawned sub-agents inject
/// the same prompt-context block as the PM because the marginal token cost
/// is small and the convention/style signal is high — see the module-level
/// note for the full rationale.
/// Test: `prompt_context_returns_ok_without_daemon` covers the no-daemon
/// branch; live-daemon paths are exercised by
/// `prompt_context_recalls_palace_drawers` and
/// `prompt_context_empty_palace_falls_back_to_global`.
pub async fn handle_prompt_context() -> Result<()> {
    let start = Instant::now();
    let trigger_payload = read_stdin_best_effort();
    let body = build_injection_body(&trigger_payload).await;
    if body.ends_with('\n') {
        print!("{body}");
    } else {
        println!("{body}");
    }

    // Submission-logging Part A: emit a `HookFired` activity event so the
    // dashboard / TUI feed shows this prompt-context invocation. Best-effort
    // — failures are swallowed inside `post_hook_event` so the hook never
    // fails because of activity-emit problems.
    emit_hook_event(&trigger_payload, &body, start).await;

    Ok(())
}

/// POST a `HookFired` event to the daemon's activity ingestion endpoint.
///
/// Why: surfaces every prompt-context hook firing in the activity feed
/// (issue: TUI activity feed was empty in sessions whose only daemon
/// traffic was hooks).
/// What: builds a `HookEventPayload` carrying the resolved palace, the
/// rendered injection length, a short excerpt of the user prompt, and
/// the hook's elapsed wall-clock duration, then calls `post_hook_event`.
/// Test: `hook_fired_activity_emit_smoke` in this module.
async fn emit_hook_event(trigger_payload: &str, injection: &str, start: Instant) {
    let user_prompt = parse_user_prompt(trigger_payload);
    let palace_id = resolve_palace_slug(trigger_payload);
    let payload = HookEventPayload {
        palace_id: palace_id.clone(),
        palace_name: palace_id,
        hook_type: HookType::UserPromptSubmit,
        injection_kind: InjectionKind::PromptContext,
        injection_length: injection.len() as u64,
        trigger_prompt_excerpt: hook_prompt_excerpt(&user_prompt),
        duration_ms: start.elapsed().as_millis() as u64,
    };
    post_hook_event(payload).await;
}

/// Build the prompt-context injection body for a given stdin payload.
///
/// Why: factored out of [`handle_prompt_context`] so integration tests can
/// drive the full enrichment pipeline against a real HTTP daemon without
/// trampling the process' stdout. Production code wraps this with
/// [`handle_prompt_context`] which prints the result and returns `Ok(())`.
/// What: same flow as the original — resolve daemon → resolve palace →
/// fetch global facts + recall + KG → compose injection → log. Returns
/// the rendered body verbatim. Never panics; every failure path degrades
/// to the legacy placeholder or an empty string.
/// Test: `prompt_context_recalls_palace_drawers`,
/// `prompt_context_empty_palace_falls_back_to_global`.
pub(crate) async fn build_injection_body(trigger_payload: &str) -> String {
    let start = Instant::now();
    let user_prompt = parse_user_prompt(trigger_payload);

    // 1. Discover the running daemon. Missing file → daemon not running →
    //    return empty so the caller exits silently with no stdout output.
    let addr = match trusty_common::read_daemon_addr("trusty-memory") {
        Ok(Some(addr)) => addr,
        Ok(None) | Err(_) => {
            log_entry(trigger_payload, "", 0, start);
            return String::new();
        }
    };

    // The shared helper persists the bare `host:port`. The web daemon binds
    // HTTP, so prepend the scheme when callers haven't already.
    let base = if addr.starts_with("http://") || addr.starts_with("https://") {
        addr
    } else {
        format!("http://{addr}")
    };

    // 2. Tightly-bounded HTTP client. Any failure → return empty silently so
    //    the Claude Code prompt is never blocked by a degraded daemon.
    let client = match reqwest::Client::builder()
        .timeout(HTTP_TIMEOUT)
        .connect_timeout(HTTP_TIMEOUT)
        .build()
    {
        Ok(c) => c,
        Err(_) => {
            log_entry(trigger_payload, "", 0, start);
            return String::new();
        }
    };

    // 3. Resolve the palace slug from the stdin `cwd` first, then fall back
    //    to the process cwd. Both lookups are wrapped in `ok()` so failure
    //    just yields `None` (we'll skip palace-specific sections).
    let palace_slug = resolve_palace_slug(trigger_payload);

    // 4. Fan out the fetches. Each is best-effort; failures are skipped.
    let global_facts = fetch_global_prompt_context(&client, &base).await;
    let (drawers, kg_triples) = match &palace_slug {
        Some(slug) => {
            let top_k = configured_top_k();
            let drawers_fut = fetch_palace_recall(&client, &base, slug, &user_prompt, top_k);
            let kg_fut = fetch_palace_kg_triples(&client, &base, slug);
            let (drawers, kg_all) = tokio::join!(drawers_fut, kg_fut);
            // Issue #139: drop low-signal drawers (e.g. `claude-session` /
            // `user-prompt` auto-captures) before composition. When this
            // filter empties the recall set, `compose_injection` falls
            // back to global hot facts via the existing branch below.
            let deny_tags = configured_deny_tags();
            let drawers = filter_drawers_by_deny_tags(drawers, &deny_tags);
            let kg_filtered = select_relevant_triples(&kg_all, &user_prompt, top_k);
            (drawers, kg_filtered)
        }
        None => (Vec::new(), Vec::new()),
    };

    // 5. Compose the injection. If every section is empty, emit the legacy
    //    placeholder so downstream consumers see byte-identical behaviour
    //    on a brand-new install.
    let composed = compose_injection(
        global_facts.as_deref(),
        &drawers,
        &kg_triples,
        palace_slug.as_deref(),
    );
    let body = if composed.is_empty() {
        EMPTY_PLACEHOLDER.to_string()
    } else {
        composed
    };

    // Best-effort log entry — `count_facts` approximates the number of
    // bulleted facts in the rendered Markdown block. Errors are swallowed
    // inside the logger.
    let facts_count = count_facts(&body);
    log_entry(trigger_payload, &body, facts_count, start);

    body
}

/// Read the hook's stdin into a string, capped at 64 KiB.
///
/// Why (issue #105): the UserPromptSubmit hook delivers the user prompt as
/// stdin so we capture it for the enriched-prompt log. Stdin may be empty
/// (e.g. when the daemon is probed manually). The cap defends against an
/// adversarial prompt the size of a novel from inflating the log file.
/// What: synchronously reads stdin to EOF (or 64 KiB), returns the trimmed
/// payload. Failures degrade to an empty string — the hook continues either
/// way.
/// Test: not unit-tested (process stdin is hard to mock); covered by the
/// integration test which writes the entry directly.
fn read_stdin_best_effort() -> String {
    use std::io::Read;
    const STDIN_CAP_BYTES: usize = 64 * 1024;
    // `is_terminal()` lets us bail when stdin is the controlling TTY — there
    // is no prompt to read in that case and `read_to_string` would block.
    let stdin = std::io::stdin();
    if std::io::IsTerminal::is_terminal(&stdin) {
        return String::new();
    }
    let mut buf = String::new();
    let _ = stdin
        .lock()
        .take(STDIN_CAP_BYTES as u64)
        .read_to_string(&mut buf);
    buf
}

/// Extract the user prompt string from the stdin JSON payload.
///
/// Why (issue #134): the recall query against the palace's vectors needs
/// the actual prompt text the user typed. Claude Code's UserPromptSubmit
/// hook payload carries `"prompt": "..."`; without it we'd have to recall
/// against an empty string and return generic results.
/// What: best-effort `serde_json` parse — on success and when the JSON has
/// a string `prompt` field, returns it trimmed. On any failure (non-JSON,
/// missing field) returns the raw stdin payload trimmed, so a manually-
/// piped prompt still drives recall.
/// Test: `parse_user_prompt_prefers_prompt_field`.
fn parse_user_prompt(stdin_payload: &str) -> String {
    if stdin_payload.trim().is_empty() {
        return String::new();
    }
    if let Ok(value) = serde_json::from_str::<Value>(stdin_payload) {
        if let Some(p) = value.get("prompt").and_then(|v| v.as_str()) {
            return p.trim().to_string();
        }
    }
    stdin_payload.trim().to_string()
}

/// Read the optional [`ENV_TOP_K`] env var, clamped to a sane range.
///
/// Why: operator escape hatch with a strict ceiling so accidental large
/// values can't blow the byte budget.
/// What: parses the env string as a `usize`; on success clamps to
/// `[1, 20]`; on failure returns [`DEFAULT_TOP_K`].
/// Test: not unit-tested (env mutation races); covered by the default
/// path through `prompt_context_recalls_palace_drawers`.
fn configured_top_k() -> usize {
    std::env::var(ENV_TOP_K)
        .ok()
        .and_then(|v| v.trim().parse::<usize>().ok())
        .map(|k| k.clamp(1, 20))
        .unwrap_or(DEFAULT_TOP_K)
}

/// Resolve the effective deny-list of drawer tags for prompt-context recall.
///
/// Why (issue #139): centralises the env-override + default logic so the
/// filter call site stays small and the deny list is testable in isolation.
/// What: returns the lowercase tag strings parsed from
/// [`ENV_RECALL_DENY_TAGS`] when set (comma-separated, whitespace-trimmed,
/// empty entries skipped). Falls back to [`DEFAULT_DENY_TAGS`] when the env
/// var is unset, empty, or contains nothing but whitespace/commas.
/// Test: not unit-tested directly (env mutation races); covered indirectly
/// via `prompt_context_recall_env_override_extends_deny_list` and
/// `prompt_context_recall_filters_deny_tags`.
fn configured_deny_tags() -> Vec<String> {
    if let Ok(raw) = std::env::var(ENV_RECALL_DENY_TAGS) {
        let parsed: Vec<String> = raw
            .split(',')
            .map(|s| s.trim().to_lowercase())
            .filter(|s| !s.is_empty())
            .collect();
        if !parsed.is_empty() {
            return parsed;
        }
    }
    DEFAULT_DENY_TAGS.iter().map(|s| s.to_lowercase()).collect()
}

/// Resolve the palace slug from the stdin payload.
///
/// Why (issue #125 + #134): the hook's recall + KG enrichment both target
/// the project palace that owns the user's actual cwd, not the cwd the
/// hook process was launched with. The stdin `cwd` is the source of truth.
/// What: parse stdin as JSON, take `cwd`, derive slug via
/// [`crate::messaging::cwd_palace_slug_at`]. Falls back to the process
/// cwd's slug. Returns `None` only when neither resolves cleanly.
/// Test: `resolve_palace_for_log_prefers_stdin_cwd` (the log helper uses
/// the same chain).
fn resolve_palace_slug(stdin_payload: &str) -> Option<String> {
    if let Some(slug) = palace_slug_from_stdin_cwd(stdin_payload) {
        return Some(slug);
    }
    crate::messaging::cwd_palace_slug().ok()
}

/// Resolve the palace identifier for the log entry.
///
/// Why (issue #125): see [`resolve_palace_slug`]; the log helper keeps
/// the legacy `"<unknown>"` sentinel so log shape stays stable.
/// Test: `resolve_palace_for_log_prefers_stdin_cwd`.
fn resolve_palace_for_log(stdin_payload: &str) -> String {
    resolve_palace_slug(stdin_payload).unwrap_or_else(|| "<unknown>".to_string())
}

/// Parse `stdin_payload` as JSON and, when it carries a `cwd` string, derive
/// the palace slug from that path.
///
/// Why: factored out so the unit test can exercise the stdin-override path
/// without manipulating the process cwd.
/// What: returns `Some(slug)` only when the payload parses as a JSON object,
/// contains a non-empty string `cwd`, and slug derivation succeeds for that
/// path. Returns `None` on every failure mode so the caller can fall back.
/// Test: `resolve_palace_for_log_prefers_stdin_cwd`.
fn palace_slug_from_stdin_cwd(stdin_payload: &str) -> Option<String> {
    if stdin_payload.trim().is_empty() {
        return None;
    }
    let value: Value = serde_json::from_str(stdin_payload).ok()?;
    let cwd = value.get("cwd")?.as_str()?;
    if cwd.is_empty() {
        return None;
    }
    crate::messaging::cwd_palace_slug_at(std::path::Path::new(cwd)).ok()
}

/// Append one log entry to the enriched-prompt log, swallowing failures.
///
/// Why: prompt logging is best-effort — a write failure must never block
/// the hook from completing.
/// What: constructs a `PromptLogEntry` and writes it via `PromptLogger`.
/// Test: `prompt_context_logs_attempt_without_daemon`.
fn log_entry(trigger_prompt: &str, injection: &str, facts_count: usize, start: Instant) {
    let logger = PromptLogger::from_env();
    let palace = resolve_palace_for_log(trigger_prompt);
    let entry = PromptLogEntry::new(
        "UserPromptSubmit",
        "prompt-context-facts",
        palace,
        trigger_prompt,
        injection,
    )
    .with_palace_facts_count(facts_count)
    .with_duration_ms(start.elapsed().as_millis() as u64);
    logger.log(entry);
}

#[cfg(test)]
mod tests;