trusty-memory 0.15.4

MCP server (stdio + HTTP/SSE) for trusty-memory
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
//! `GET /health` handler — liveness probe with optional store/recall smoke test.
//!
//! Why: Provides an unauthenticated round-trip check for operator and
//! orchestrator polling. Issues #35, #71, and #185 progressively enriched
//! the endpoint with metrics, round-trip semantics, and a dedicated probe
//! palace. Issue #1101 makes the expensive ONNX round-trip opt-in (via
//! `?probe=true`) so the default path remains cheap enough for 1 s LB polling.
//! Issue #1142 adds self-healing: `ensure_health_probe_palace` now seeds a
//! persistent sentinel drawer when the probe palace is empty (e.g. after a
//! redb v2→v3 migration wipes the vector store) so the palace re-populates
//! itself on the next deep-probe request without operator intervention.
//! What: `health()` axum handler, `HealthQuery` params struct,
//! `HealthResponse` wire struct, `HealthProbeError`,
//! `ensure_health_probe_palace`, `run_health_round_trip`, and the testable
//! `run_health_round_trip_inner` helper.
//! Test: `health_endpoint_*` and `health_probe_*` tests in
//! `web::tests::health_tests`.

use axum::{
    extract::{Query, State},
    Json,
};
use trusty_common::memory_core::palace::{Palace, PalaceId, RoomType};
use trusty_common::memory_core::retrieval::recall_with_default_embedder;
use uuid::Uuid;

/// Persistent content stored in the probe palace as an always-present
/// sentinel (issue #1142). A migration (e.g. redb v2→v3) may wipe the
/// vector store but leave the palace directory intact. The sentinel is
/// re-seeded automatically on the first deep probe after such an event.
///
/// Why: gives `ensure_health_probe_palace` a drawer it can check for
/// existence to determine whether the palace data was lost — and, if lost,
/// to re-plant it so the next probe round-trip has a healthy baseline.
/// What: a fixed string that is recognisable in drawer dumps / logs.
/// Test: `health_probe_self_heals_after_migration_wipe` (issue #1142).
pub(crate) const PROBE_SENTINEL_CONTENT: &str =
    "__trusty_memory_health_sentinel__ issue-#1142 self-heal probe";

use crate::AppState;

use super::HEALTH_PROBE_PALACE;

/// Query parameters for `GET /health`.
///
/// Why (issue #1101): the default `/health` path must be cheap enough for
/// 1-second load-balancer polling. The expensive remember/recall/forget
/// round-trip (ONNX embedder calls) is now opt-in: callers that genuinely
/// want to probe the data plane pass `?probe=true` (or `?deep=true` for
/// symmetry with other endpoints).
/// What: both `probe` and `deep` default to `false`. When either is `true`
/// the handler runs the full `run_health_round_trip`; otherwise it returns
/// a lightweight liveness response without touching the memory store.
/// Test: `health_endpoint_cheap_by_default` and
/// `health_endpoint_probe_param_triggers_round_trip`.
#[derive(serde::Deserialize)]
pub(super) struct HealthQuery {
    /// When `true`, run the full remember/recall/forget round-trip.
    #[serde(default)]
    probe: bool,
    /// Alias for `probe` (matches the `deep=` convention on other endpoints).
    #[serde(default)]
    deep: bool,
}

impl HealthQuery {
    /// Returns `true` if either the `probe` or `deep` flag is set.
    fn wants_deep_probe(&self) -> bool {
        self.probe || self.deep
    }
}

/// Liveness/version payload for `GET /health`.
///
/// Why: `daemon_probe` requires an HTTP 200 from `/health` to confirm that the
/// port is owned by this daemon (and not a stale or foreign process). Issue
/// #35 enriches it with process resource metrics so operators (and the admin
/// UI) can see RSS, disk footprint, CPU, and uptime in one cheap call.
/// The fd-exhaustion fix adds `open_fds` and `fd_soft_limit` so operators can
/// see "244 / 256" before EMFILE hits.
/// What: Carries a fixed `status` string, the compile-time crate version,
/// the issue-#35 resource block, and `open_fds` / `fd_soft_limit`.
/// Test: Asserted by `health_endpoint_returns_ok`,
/// `health_endpoint_includes_resource_fields`, and
/// `health_endpoint_includes_fd_gauge` in this module's tests.
#[derive(serde::Serialize)]
pub(super) struct HealthResponse {
    /// `"ok"` when the round-trip smoke test succeeds (or no palace exists
    /// yet), `"degraded"` when store/recall is broken (issue #71). Owned
    /// `String` so the handler can report different statuses without
    /// requiring static lifetimes.
    pub(super) status: String,
    /// Populated only when `status == "degraded"` (issue #71). Carries a
    /// short phrase identifying which round-trip stage failed so operators
    /// can triage quickly (e.g. `"store failed: ..."`).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(super) detail: Option<String>,
    pub(super) version: &'static str,
    /// Current process Resident Set Size in megabytes (issue #35). Sampled
    /// via the shared `SysMetrics` on each health request.
    pub(super) rss_mb: u64,
    /// On-disk footprint of the daemon's `data_root` in bytes (issue #35):
    /// the sum of every palace file. Refreshed by a background task every
    /// 10 s; `0` until the first walk completes.
    pub(super) disk_bytes: u64,
    /// Current process CPU usage as a percentage (issue #35), where `100.0`
    /// means one fully-saturated core. The first reading after daemon start
    /// may be `0.0` until a delta window exists.
    pub(super) cpu_pct: f32,
    /// Seconds elapsed since the daemon started (issue #35).
    pub(super) uptime_secs: u64,
    /// Bound `host:port` of the HTTP listener. Why: dynamic port selection
    /// (7070..=7079 + OS fallback) means clients cannot assume `7070`; this
    /// field advertises the real port without forcing them to read
    /// `~/.trusty-memory/http_addr`. `None` when the daemon was constructed
    /// without ever binding (tests that drive the router with `TestServer`).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(super) addr: Option<String>,
    /// Number of file descriptors currently open by this process (fd-exhaustion
    /// gauge). `None` when the platform does not expose this cheaply (rare).
    /// Sampled on every `/health` call via [`crate::fd_metrics::count_open_fds`].
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(super) open_fds: Option<u64>,
    /// Soft `RLIMIT_NOFILE` ceiling for this process (fd-exhaustion gauge).
    /// `None` when `getrlimit` fails or returns `RLIM_INFINITY` (unlimited).
    /// Together with `open_fds`, lets operators see "244 / 256" before EMFILE.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(super) fd_soft_limit: Option<u64>,
    /// Newer crates.io version available, if any (issue #537).
    ///
    /// Why: surfaces update availability without polling crates.io on every
    /// health call — a single background check at startup stores the result
    /// here for the health handler to read cheaply.
    /// What: `null`/absent = up to date or check not completed; `"x.y.z"` =
    /// the available newer version.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(super) update_available: Option<String>,
    /// Daemon readiness state (issues #910 / #911).
    ///
    /// Why: operators and monitoring scripts need to distinguish "the daemon
    /// is alive but the embedder hasn't finished compiling yet" from "the
    /// daemon is fully operational". Before this field, a fresh daemon looked
    /// healthy to external monitors even while `memory_remember` /
    /// `memory_recall` calls were returning warming errors.
    /// What: `"warming"` until the embedder init succeeds; `"ready"` once
    /// `spawn_startup_tasks` flips `AppState::daemon_readiness`.
    pub(super) daemon_state: String,
}

/// `GET /health[?probe=true]` — unauthenticated liveness probe with optional
/// store/recall smoke test.
///
/// Why: Gives `daemon_probe` and external monitors a cheap way to confirm port
/// ownership without touching palace state. Issue #35 additionally reports
/// process RSS, CPU, the `data_root` disk footprint, and uptime. Issue #71
/// upgrades the check to a full memory round-trip (store → recall → verify →
/// delete) so operators learn about store/recall regressions immediately
/// instead of after a real request fails. Issue #185 routes the round-trip
/// to a dedicated `__health_probe__` palace (hidden from user listings) so
/// the probe never leaks drawers into a real user palace even on recall
/// failures. The fd-exhaustion fix adds `open_fds` and `fd_soft_limit` so
/// operators can catch "approaching ceiling" before EMFILE hits.
/// Issue #1101: the expensive ONNX round-trip is now OPT-IN. Pass `?probe=true`
/// (or `?deep=true`) to run the full store/recall/forget cycle and report
/// `"ok"` or `"degraded"`. Without that flag the handler returns
/// `status: "ok"` immediately after sampling the cheap resource metrics —
/// suitable for 1-second LB polling without ONNX overhead.
/// What: Returns HTTP 200 with `{status, version, rss_mb, disk_bytes,
/// cpu_pct, uptime_secs, open_fds?, fd_soft_limit?, detail?}`. Without
/// `?probe=true`, `status` is always `"ok"` (daemon is alive). With
/// `?probe=true`, `status` is `"ok"` or `"degraded"` based on the
/// remember/recall/forget cycle. The handler never returns non-200 so
/// monitors keyed on HTTP status still see the daemon as up.
/// Test: `health_endpoint_returns_ok`,
/// `health_endpoint_includes_resource_fields`,
/// `health_endpoint_includes_fd_gauge`,
/// `health_endpoint_cheap_by_default`,
/// `health_endpoint_round_trip_on_fresh_install_is_ok`,
/// `health_endpoint_round_trip_with_palace_is_ok`,
/// `health_probe_palace_is_invisible`,
/// `health_probe_cleans_up_on_success`,
/// `health_probe_cleans_up_on_recall_miss`.
pub(super) async fn health(
    State(state): State<AppState>,
    Query(query): Query<HealthQuery>,
) -> Json<HealthResponse> {
    let (rss_mb, cpu_pct) = {
        let mut metrics = state.sys_metrics.lock().await;
        metrics.sample()
    };
    let disk_bytes = state.disk_bytes.load(std::sync::atomic::Ordering::Relaxed);
    let uptime_secs = state.started_at.elapsed().as_secs();
    let addr = state.bound_addr.get().map(|a| a.to_string());

    // fd-exhaustion gauge: sample best-effort; failures return None (not an
    // error so we do not have to import the fd_metrics crate in every test
    // that drives this handler via in-process TestServer).
    let open_fds = crate::fd_metrics::count_open_fds();
    let fd_soft_limit = crate::fd_metrics::fd_soft_limit();

    // Issue #1101: the expensive ONNX round-trip only runs when the caller
    // explicitly requests it via ?probe=true or ?deep=true. Without either
    // flag the handler returns "ok" immediately — cheap enough for 1 s LB
    // polling without ONNX embedder calls.
    let (status, detail) = if query.wants_deep_probe() {
        match run_health_round_trip(&state).await {
            Ok(()) => ("ok".to_string(), None),
            Err(err) => {
                tracing::warn!("/health round-trip degraded: {err}");
                ("degraded".to_string(), Some(err.to_string()))
            }
        }
    } else {
        ("ok".to_string(), None)
    };

    let update_available = state.update_available.lock().ok().and_then(|g| g.clone());
    // Issues #910/#911: surface readiness so monitors and Claude Code can
    // distinguish "alive but warming" from "fully ready".
    let daemon_state = match state.readiness() {
        crate::DaemonReadiness::Warming => "warming",
        crate::DaemonReadiness::Ready => "ready",
    }
    .to_string();

    Json(HealthResponse {
        status,
        detail,
        version: env!("CARGO_PKG_VERSION"),
        rss_mb,
        disk_bytes,
        cpu_pct,
        uptime_secs,
        addr,
        open_fds,
        fd_soft_limit,
        update_available,
        daemon_state,
    })
}

/// Stages of the `/health` round-trip that can fail (issue #71).
///
/// Why: `thiserror`-derived enum gives every failure point a stable phrase the
/// handler can render into the `detail` field without printing implementation
/// detail or full backtraces. Issue #185 dropped the `NoPalaces` and
/// `ListPalaces` sentinels: the probe now provisions its dedicated
/// `__health_probe__` palace itself, so neither short-circuit can occur.
/// What: One variant per stage (open palace, ensure-probe-palace, store,
/// recall, missing-in-results, delete).
/// Test: Exercised indirectly by the `health_endpoint_round_trip_*` and
/// `health_probe_*` tests.
#[derive(Debug, thiserror::Error)]
pub(crate) enum HealthProbeError {
    #[error("open palace failed: {0}")]
    OpenPalace(String),
    #[error("provision health probe palace failed: {0}")]
    EnsureProbePalace(String),
    #[error("store failed: {0}")]
    Store(String),
    #[error("recall failed: {0}")]
    Recall(String),
    #[error("recall did not return the probe drawer (id={0})")]
    ProbeMissing(Uuid),
    #[error("delete probe drawer failed: {0}")]
    Delete(String),
}

/// Ensure the dedicated `__health_probe__` palace exists on disk.
///
/// Why: Issue #185 — picking whichever palace `list_palaces` returns first
/// leaked health-probe drawers into a real user palace whenever recall failed
/// or returned an empty result. Routing the probe to a dedicated palace whose
/// id starts with the reserved `__` prefix confines any leak (e.g. a daemon
/// crash mid-round-trip) to a palace the user can never see. This helper is
/// idempotent: it is safe to call on every `/health` request, even when the
/// palace already exists.
/// What: Calls `PalaceRegistry::open_palace` first (cheap cache hit when the
/// palace is already registered). If the palace metadata is missing on disk,
/// creates it via `PalaceRegistry::create_palace` with a description that
/// flags its purpose. Either path returns success when the palace is ready
/// for the round-trip; failures propagate as `HealthProbeError::EnsureProbePalace`.
/// Test: `health_probe_palace_is_invisible`, `health_probe_cleans_up_on_success`,
/// `health_probe_cleans_up_on_recall_miss`,
/// `health_probe_self_heals_after_migration_wipe` (issue #1142).
pub(crate) fn ensure_health_probe_palace(state: &AppState) -> Result<(), HealthProbeError> {
    let id = PalaceId::new(HEALTH_PROBE_PALACE);

    // Fast path: already registered in-memory, no disk hit needed.
    if state.registry.get(&id).is_some() {
        return Ok(());
    }

    // Try to open from disk first — succeeds on every request after the
    // first one once the palace has been persisted.
    if state.registry.open_palace(&state.data_root, &id).is_ok() {
        return Ok(());
    }

    // Cold path: first run on this `data_root`. Create the palace metadata
    // on disk so subsequent probes hit the open-path above.
    let palace = Palace {
        id: id.clone(),
        name: HEALTH_PROBE_PALACE.to_string(),
        description: Some(
            "Internal health-probe palace (issue #185). Hidden from listings; \
             holds short-lived round-trip drawers cleaned up on every probe."
                .to_string(),
        ),
        created_at: chrono::Utc::now(),
        data_dir: state.data_root.join(HEALTH_PROBE_PALACE),
    };
    state
        .registry
        .create_palace(&state.data_root, palace)
        .map_err(|e| HealthProbeError::EnsureProbePalace(format!("{e:#}")))?;
    Ok(())
}

/// Seed or re-seed the persistent sentinel drawer in the probe palace.
///
/// Why (issue #1142): after a redb v2→v3 migration the probe palace
/// directory and `palace.json` survive but the internal vector/drawer stores
/// are wiped. The first deep-probe after migration finds an empty palace,
/// stores an ephemeral probe drawer, then runs recall — but the vector index
/// was just reset and may not return the just-stored item, producing a
/// spurious `ProbeMissing` on every probe. The fix: seed a *persistent*
/// sentinel drawer that outlives ephemeral round-trip drawers. On the first
/// deep probe after any migration event, if the sentinel is absent the
/// current call seeds it and returns `Ok(())` immediately (skipping the
/// full round-trip) — the palace is healthy, it just lost its sentinel.
/// On the next probe the sentinel will be present and the normal round-trip
/// executes.
/// What: Checks `handle.drawers` for [`PROBE_SENTINEL_CONTENT`]. If absent,
/// calls `handle.remember_with_options` with `force = true` to bypass the
/// token-length gate and store the sentinel. Returns `true` when seeding
/// occurred (caller should skip the normal round-trip for this request to
/// avoid a false ProbeMissing from the freshly-seeded vector), `false` when
/// the sentinel was already present.
/// Test: `health_probe_self_heals_after_migration_wipe` (issue #1142).
pub(crate) async fn seed_probe_sentinel_if_absent(
    handle: &std::sync::Arc<trusty_common::memory_core::PalaceHandle>,
) -> Result<bool, HealthProbeError> {
    let sentinel_present = handle
        .drawers
        .read()
        .iter()
        .any(|d| d.content == PROBE_SENTINEL_CONTENT);

    if sentinel_present {
        return Ok(false);
    }

    use trusty_common::memory_core::retrieval::RememberOptions;
    handle
        .remember_with_options(
            PROBE_SENTINEL_CONTENT.to_string(),
            RoomType::General,
            vec!["healthcheck".to_string(), "sentinel".to_string()],
            0.0,
            RememberOptions {
                force: true,
                ..Default::default()
            },
        )
        .await
        .map_err(|e| HealthProbeError::EnsureProbePalace(format!("seed sentinel: {e:#}")))?;
    tracing::info!(
        "health probe: seeded sentinel drawer in {} (issue #1142 self-heal)",
        HEALTH_PROBE_PALACE
    );
    Ok(true)
}

/// Execute a remember/recall/forget cycle against the dedicated probe palace.
///
/// Why: `/health` used to return `status: "ok"` even when `POST /drawers` or
/// the recall path was broken — only that the process was alive. Issue #71
/// asks the probe to actually exercise the store and recall service layer
/// (no HTTP loopback) so monitors detect data-plane regressions on the next
/// poll instead of waiting for a real client to surface them. Issue #185
/// additionally requires the probe to (a) never touch user-facing palaces and
/// (b) never leak drawers even when recall fails or returns an empty result.
/// What: Provisions the dedicated `__health_probe__` palace via
/// [`ensure_health_probe_palace`], opens its handle, stores a content-unique
/// probe drawer via `PalaceHandle::remember`, runs
/// `recall_with_default_embedder` with the probe phrase, and then **always**
/// attempts `PalaceHandle::forget` *before* propagating any recall error so a
/// failing recall (Err *or* empty result) can never leave a drawer behind.
/// The probe palace is hidden from `MemoryService::list_palaces`, so any rare
/// leak (e.g. mid-call daemon crash) is confined to a palace the user can't see.
/// Test: Indirect — `health_endpoint_round_trip_with_palace_is_ok`,
/// `health_endpoint_round_trip_on_fresh_install_is_ok`, plus the three
/// `health_probe_*` cleanup tests added for issue #185.
pub(crate) async fn run_health_round_trip(state: &AppState) -> Result<(), HealthProbeError> {
    // Issue #185: always use the dedicated probe palace. Provision it on the
    // first request so a fresh install with zero user palaces still exercises
    // the full data plane — no more `NoPalaces` short-circuit.
    ensure_health_probe_palace(state)?;
    let probe_id = PalaceId::new(HEALTH_PROBE_PALACE);
    let handle = state
        .registry
        .open_palace(&state.data_root, &probe_id)
        .map_err(|e| HealthProbeError::OpenPalace(format!("{e:#}")))?;

    // Issue #1142: self-heal the sentinel when the palace is empty (e.g. after
    // a redb migration wipes the vector/drawer stores). If the sentinel was
    // absent and we just seeded it, skip the normal round-trip for THIS request
    // — the vector index on a just-seeded single item may not return it yet,
    // and the palace is clearly healthy (remember just succeeded). The next
    // probe will find the sentinel and exercise the full round-trip.
    if seed_probe_sentinel_if_absent(&handle).await? {
        return Ok(());
    }

    // Delegate the cleanup-ordering logic to the testable helper so unit tests
    // can substitute the recall implementation. The real handler always uses
    // the shared ONNX embedder.
    run_health_round_trip_inner(handle, |handle, query| async move {
        recall_with_default_embedder(&handle, &query, 5)
            .await
            .map_err(|e| HealthProbeError::Recall(format!("{e:#}")))
    })
    .await
}

/// Store-recall-forget core that always cleans up the probe drawer.
///
/// Why: Issue #185 — the cleanup invariant ("the probe drawer is always
/// deleted before any error returns") is the central correctness property of
/// the health round-trip. Splitting it out from `run_health_round_trip` lets
/// the tests inject a recall stub that returns `Ok(empty)` or
/// `Err(Recall(...))` and prove the invariant directly, without relying on
/// the ONNX embedder.
/// What: Stores a content-unique probe drawer via `PalaceHandle::remember`,
/// invokes `recall` with the probe phrase, and then **always** calls
/// `PalaceHandle::forget` *before* propagating any recall error. The recall
/// result is evaluated after the forget so a missing or errored recall can
/// never leave a drawer behind. Cleanup errors are reported only when recall
/// succeeded; otherwise the upstream recall failure is preserved as the root
/// cause for operators.
/// Test: `health_probe_cleans_up_on_recall_miss` and
/// `health_probe_cleans_up_on_recall_error` exercise both failure modes with
/// a stubbed recall; `health_probe_cleans_up_on_success` covers the happy path.
pub(crate) async fn run_health_round_trip_inner<F, Fut>(
    handle: std::sync::Arc<trusty_common::memory_core::PalaceHandle>,
    recall: F,
) -> Result<(), HealthProbeError>
where
    F: FnOnce(std::sync::Arc<trusty_common::memory_core::PalaceHandle>, String) -> Fut,
    Fut: std::future::Future<
        Output = Result<Vec<trusty_common::memory_core::retrieval::RecallResult>, HealthProbeError>,
    >,
{
    // Content-unique probe phrase. `__trusty_memory_healthcheck__` makes the
    // probe identifiable in logs / drawer dumps if a forget step is ever
    // skipped (e.g. handler panic between store and delete); the UUID
    // guarantees uniqueness across concurrent probes.
    let probe_token = Uuid::new_v4();
    let probe_content = format!("__trusty_memory_healthcheck__ probe {probe_token}");

    let drawer_id = handle
        .remember(
            probe_content.clone(),
            RoomType::General,
            vec!["healthcheck".to_string()],
            0.0,
        )
        .await
        .map_err(|e| HealthProbeError::Store(format!("{e:#}")))?;

    let recall_result = recall(handle.clone(), probe_content).await;

    // Issue #185: cleanup runs BEFORE we propagate any recall error so the
    // probe can never leave a drawer behind. Both the Err and the
    // empty-result failure modes used to bypass forget; this ordering closes
    // both holes. Cleanup errors are surfaced only when the recall path
    // itself succeeded; otherwise we preserve the upstream recall failure as
    // the root cause for operators.
    let delete_result = handle.forget(drawer_id).await;

    match recall_result {
        Ok(hits) => {
            if !hits.iter().any(|hit| hit.drawer.id == drawer_id) {
                return Err(HealthProbeError::ProbeMissing(drawer_id));
            }
        }
        Err(e) => return Err(e),
    }

    delete_result.map_err(|e| HealthProbeError::Delete(format!("{e:#}")))?;
    Ok(())
}