Skip to main content

solo_api/
http.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! HTTP/JSON transport for Solo. Local-only by default — binds to
4//! `127.0.0.1:<port>` and serves the same operations the MCP server
5//! exposes:
6//!
7//! Episode operations:
8//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
9//!   - `POST /memory/search`         — recall  (body: { query, limit? })
10//!   - `POST /memory/context`        — recall + themes + facts + contradictions bundle
11//!   - `GET  /memory/{id}`           — inspect
12//!   - `PATCH /memory/{id}`           — correct/update one active memory
13//!   - `DELETE /memory/{id}?reason=…` — forget
14//!
15//! Maintenance:
16//!   - `POST /memory/consolidate`    — trigger a consolidation pass
17//!   - `POST /backup`                — encrypted online backup
18//!
19//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
20//!   - `GET  /memory/themes?window_days=N&limit=K`
21//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
22//!   - `GET  /memory/entities?query=X&limit=K`
23//!   - `GET  /memory/contradictions?limit=K`
24//!   - `POST /memory/contradictions/resolve`
25//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
26//!
27//! Document operations (v0.7.0+):
28//!   - `POST   /memory/documents`               — ingest a file
29//!   - `POST   /memory/documents/search`        — vector search over chunks
30//!   - `GET    /memory/documents`               — paginate documents
31//!   - `GET    /memory/documents/{id}`          — inspect one document
32//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
33//!
34//! There's no auth at this layer. The threat model is local-machine
35//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
36//! A future commit can add bearer-token auth + LAN binding.
37//!
38//! ## Lifecycle
39//!
40//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
41//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
42//! the listener errors. `solo http-serve` invokes this from inside a
43//! `OneShotContext`, so writer + reader pool + lockfile stay live for
44//! the server's lifetime and clean up properly afterwards.
45
46use std::convert::Infallible;
47use std::net::SocketAddr;
48use std::str::FromStr;
49use std::sync::Arc;
50use std::time::Duration;
51
52use axum::extract::{FromRequestParts, Path, Query, State};
53use axum::http::request::Parts;
54use axum::http::{HeaderValue, Method, StatusCode};
55use axum::response::sse::{Event, KeepAlive, Sse};
56use axum::response::{IntoResponse, Response};
57use axum::routing::{get, post};
58use axum::{Json, Router};
59use futures::Stream;
60use serde::{Deserialize, Serialize};
61use solo_core::{
62    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId, Tier,
63};
64use solo_storage::{TenantHandle, TenantRegistry};
65use tokio::sync::broadcast;
66use tower_http::cors::{AllowOrigin, CorsLayer};
67use tower_http::trace::TraceLayer;
68
69use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};
70
71/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
72/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
73/// request via the `X-Solo-Tenant` header (default tenant if absent).
74#[derive(Clone)]
75pub struct SoloHttpState {
76    /// Multi-tenant registry. Lazy-loads tenants on first request.
77    pub registry: Arc<TenantRegistry>,
78    /// Default tenant used when the `X-Solo-Tenant` header is absent.
79    /// Typically `TenantId::default_tenant()`.
80    pub default_tenant: TenantId,
81    /// Read-path aliases for the canonical `"user"` subject. Sourced
82    /// from `solo.config.toml` `[identity] user_aliases`; threaded
83    /// through to `solo_query::facts_about` so a query for `"alex"`
84    /// also surfaces rows historically extracted as `"user"`. Empty
85    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
86    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
87    pub user_aliases: Arc<Vec<String>>,
88    /// v0.11.0 P1: MCP `Mcp-Session-Id` session store. In-memory,
89    /// TTL-bounded (30 min inactivity / 4 hr absolute). The middleware
90    /// on the `/mcp` route validates request headers against this
91    /// store; the POST handler creates new entries on the first
92    /// request without a session id. See
93    /// `crates/solo-api/src/mcp_session.rs` +
94    /// `docs/dev-log/0132-v0.11.0-implementation-plan.md` §3 Decision A.
95    pub mcp_sessions: crate::mcp_session::SessionStore,
96}
97
98/// HTTP header that routes a request to a specific tenant. Optional;
99/// absent → state.default_tenant.
100pub const TENANT_HEADER: &str = "x-solo-tenant";
101
102/// Axum extractor that resolves the request's target tenant, then
103/// lazy-opens the tenant via the registry.
104///
105/// Resolution order (v0.8.0 P3):
106///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
107///      set by the auth middleware. In OIDC mode this is the validated
108///      value of the configured custom claim (default `solo_tenant`);
109///      in bearer mode this is the daemon's default tenant.
110///   2. `X-Solo-Tenant` header — falls back to this when no
111///      authenticated principal is on the request (unauthenticated
112///      loopback deployments — the default).
113///   3. `state.default_tenant` when neither is present.
114///
115/// Bad header values → 400. Lazy-open failures → 500 unless the failure
116/// kind is `NotFound` (unknown tenant id) → 404.
117pub struct TenantExtractor(pub Arc<TenantHandle>);
118
119impl<S> FromRequestParts<S> for TenantExtractor
120where
121    SoloHttpState: FromRef<S>,
122    S: Send + Sync,
123{
124    type Rejection = ApiError;
125
126    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
127        let state = SoloHttpState::from_ref(state);
128        // Order: (1) principal.tenant_claim (set by auth middleware),
129        // (2) X-Solo-Tenant header, (3) state.default_tenant.
130        //
131        // The principal wins because in OIDC mode the JWT is the source
132        // of truth — letting the header override an OIDC claim would
133        // be a tenant-impersonation hole.
134        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
135            && let Some(claim) = principal.tenant_claim.clone()
136        {
137            claim
138        } else {
139            match parts.headers.get(TENANT_HEADER) {
140                None => state.default_tenant.clone(),
141                Some(raw) => {
142                    let s = raw.to_str().map_err(|e| {
143                        ApiError::bad_request(format!(
144                            "{TENANT_HEADER}: header value must be ASCII ({e})"
145                        ))
146                    })?;
147                    TenantId::new(s.to_string()).map_err(|e| {
148                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
149                    })?
150                }
151            }
152        };
153        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
154            // Map NotFound → 404; everything else → 500.
155            use solo_core::Error;
156            match &e {
157                Error::NotFound(_) => ApiError::not_found(e.to_string()),
158                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
159                _ => ApiError::internal(e.to_string()),
160            }
161        })?;
162        Ok(TenantExtractor(handle))
163    }
164}
165
166use axum::extract::FromRef;
167
168/// v0.8.0 P4: extractor that pulls the authenticated principal's
169/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
170/// audit log. `None` when no `AuthenticatedPrincipal` is present
171/// (unauthenticated loopback deployments).
172pub struct AuditPrincipal(pub Option<String>);
173
174impl<S> FromRequestParts<S> for AuditPrincipal
175where
176    S: Send + Sync,
177{
178    type Rejection = std::convert::Infallible;
179
180    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
181        Ok(AuditPrincipal(
182            parts
183                .extensions
184                .get::<AuthenticatedPrincipal>()
185                .map(|p| p.subject.clone()),
186        ))
187    }
188}
189
190/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
191/// of request extensions for the `/v1/tenants` handler. Distinct from
192/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
193/// tenant-list handler needs the `tenant_claim` and `claims` fields to
194/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
195/// principals.
196///
197/// `None` when no `AuthenticatedPrincipal` is on the request — the
198/// unauthenticated loopback deployment path, which the tenant-list
199/// handler treats as "all tenants visible" (same scope as the
200/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
201/// for the three-case visibility rule.
202pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);
203
204impl<S> FromRequestParts<S> for MaybePrincipal
205where
206    S: Send + Sync,
207{
208    type Rejection = std::convert::Infallible;
209
210    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
211        Ok(MaybePrincipal(
212            parts.extensions.get::<AuthenticatedPrincipal>().cloned(),
213        ))
214    }
215}
216
217/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
218///
219/// When `bearer_token` is `Some(t)`, every request except `GET /health`
220/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
221/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
222/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
223/// is attached to every authenticated request (the `TenantExtractor`
224/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
225pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
226    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
227    router_with_auth_config(state, auth)
228}
229
230/// Build the router with a config-driven auth block (v0.8.0 P3+).
231///
232/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
233/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
234/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
235/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
236///
237/// Public routes (`/health`, `/openapi.json`) are always exempt from
238/// auth — load balancers, uptime monitors, and codegen tools shouldn't
239/// need credentials.
240pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
241    let cors = build_cors_layer();
242    // Public, always-unauthenticated routes:
243    //   - GET /health: liveness probe (load balancers, uptime monitors).
244    //   - GET /openapi.json: machine-readable API description for client
245    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
246    //     curl-tools, etc.). The spec describes the API shape, not
247    //     secrets — fine to serve unauthenticated even on a LAN-bound
248    //     instance.
249    let public = Router::new()
250        .route("/health", get(|| async { "ok" }))
251        .route("/openapi.json", get(openapi_handler));
252
253    let authed = Router::new()
254        .route("/memory", post(remember_handler))
255        .route("/memory/search", post(recall_handler))
256        .route("/memory/context", post(memory_context_handler))
257        .route("/memory/consolidate", post(consolidate_handler))
258        .route(
259            "/memory/{id}",
260            get(inspect_handler)
261                .patch(update_handler)
262                .delete(forget_handler),
263        )
264        .route("/backup", post(backup_handler))
265        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
266        // these are pure read-only queries; query-string params for
267        // simple filters keep them curl-friendly without a JSON body.
268        .route("/memory/themes", get(themes_handler))
269        .route("/memory/facts_about", get(facts_about_handler))
270        .route("/memory/entities", get(entities_handler))
271        .route("/memory/contradictions", get(contradictions_handler))
272        .route(
273            "/memory/contradictions/resolve",
274            post(contradiction_resolve_handler),
275        )
276        // v0.5.0 Priority 3: drill into one cluster + abstraction +
277        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
278        // does not shadow the single-segment `/memory/{id}` UUID
279        // inspect route.
280        .route(
281            "/memory/clusters/{cluster_id}",
282            get(inspect_cluster_handler),
283        )
284        // v0.7.0 P6: document operations. Two-segment paths
285        // (`/memory/documents/...`) so they don't shadow the
286        // single-segment `/memory/{id}` episode-inspect route. Order
287        // matters: register the literal `/memory/documents/search`
288        // ahead of `/memory/documents/{id}` so axum's matcher prefers
289        // the literal over the path parameter.
290        .route("/memory/documents/search", post(search_docs_handler))
291        .route(
292            "/memory/documents",
293            post(ingest_document_handler).get(list_documents_handler),
294        )
295        .route(
296            "/memory/documents/{id}",
297            get(inspect_document_handler).delete(forget_document_handler),
298        )
299        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
300        // expansion off any node in the memory graph. See
301        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
302        // for the full `/v1/graph/*` family this is the first of.
303        .route("/v1/graph/expand", get(graph_expand_handler))
304        // v0.10.0: paginated catalog reads for solo-web's initial graph
305        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
306        // alongside the same scoping doc.
307        .route("/v1/graph/nodes", get(graph_nodes_handler))
308        .route("/v1/graph/edges", get(graph_edges_handler))
309        // v0.10.0: kind-discriminated full-record drill for solo-web's
310        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
311        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
312        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
313        // web's "show similar" overlay. See
314        // `docs/dev-log/0116-graph-neighbors-impl.md`.
315        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
316        // v0.10.0: Server-Sent Events stream of graph-data invalidations
317        // for solo-web's live update story. The wire format is
318        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
319        // scoping doc §3 Decision C — clients refetch the affected page
320        // on each event rather than receiving row payloads. See
321        // `docs/dev-log/0117-graph-stream-impl.md`.
322        .route("/v1/graph/stream", get(graph_stream_handler))
323        // Authenticated readiness/status surface for local UIs and
324        // agent bridges. `/health` stays public and tiny; this route
325        // resolves the tenant and reports operator-facing JSON.
326        .route("/v1/status", get(status_handler))
327        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
328        // tenant picker. Read-only — admin CRUD (create/delete) remains
329        // CLI-only per ADR-0004 §"Admin operations". The visibility
330        // filter is principal-driven: no-auth + bearer principals see
331        // every active tenant; OIDC principals see only the tenant
332        // named by their `tenant_claim`. See
333        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
334        // Decision F + §4 Route 6.
335        .route("/v1/tenants", get(tenants_list_handler))
336        .with_state(state.clone());
337
338    // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo process
339    // serve both `/v1/graph/*` (REST, for solo-web) and `/mcp`
340    // (JSON-RPC, for solo-jarvis) without the
341    // single-writer-per-data-dir lock dance. See
342    // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
343    // POST + GET share the same path; axum's `MethodRouter` muxes by
344    // HTTP method. OPTIONS is handled by the `CorsLayer` (already
345    // wired below) — we don't need an explicit handler.
346    //
347    // v0.11.0 P1: the route gets its own session middleware layer
348    // (`mcp_session_middleware`) that validates the `Mcp-Session-Id`
349    // request header against the per-process `SessionStore`. Expired
350    // / unknown sessions return 404 with a re-init instruction; the
351    // POST handler creates a new session on a request that arrived
352    // without the header and echoes the assigned id back via
353    // `Mcp-Session-Id` response header. The middleware lives on this
354    // sub-router (not the outer `authed`) so the rest of the API
355    // surface is unaffected — only `/mcp` carries session semantics.
356    let mcp_router: Router<SoloHttpState> = Router::new()
357        .route(
358            "/mcp",
359            post(mcp_http_post_handler).get(mcp_http_get_handler),
360        )
361        .layer(axum::middleware::from_fn_with_state(
362            state.mcp_sessions.clone(),
363            crate::mcp_session::mcp_session_middleware,
364        ));
365    let authed = authed.merge(mcp_router.with_state(state.clone()));
366
367    let authed = if let Some(cfg) = auth {
368        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
369        // AuthenticatedPrincipal into request extensions for the
370        // TenantExtractor + audit-log to read.
371        let validator = Arc::new(AuthValidator::from_config(
372            &cfg,
373            state.default_tenant.clone(),
374        ));
375        authed.layer(axum::middleware::from_fn_with_state(
376            validator,
377            crate::auth::middleware::auth_middleware,
378        ))
379    } else {
380        authed
381    };
382
383    public
384        .merge(authed)
385        .layer(cors)
386        .layer(TraceLayer::new_for_http())
387}
388
389/// Convenience wrapper: no auth (loopback-only deployments).
390pub fn router(state: SoloHttpState) -> Router {
391    router_with_auth_config(state, None)
392}
393
394fn build_cors_layer() -> CorsLayer {
395    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
396    // browser-based UIs running on a different local port can call the API
397    // without preflight friction. We do NOT use `Any` because that would
398    // allow arbitrary remote origins to talk to our localhost server via
399    // a victim's browser. With bearer-token auth enabled the practical
400    // impact is reduced (the cross-origin attacker still can't supply
401    // the token), but principle of least privilege says refuse anyway.
402    //
403    // When the server is bound to a non-loopback address (auth required),
404    // the same CORS predicate keeps localhost-only browser clients —
405    // suitable for trusted-LAN deployments where the LAN client itself
406    // tunnels through ssh/wireguard back to localhost. Wider CORS for
407    // genuine cross-origin browser use is a future config knob.
408    CorsLayer::new()
409        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
410            origin.to_str().map(is_localhost_origin).unwrap_or(false)
411        }))
412        .allow_methods([
413            Method::GET,
414            Method::POST,
415            Method::PATCH,
416            Method::DELETE,
417            Method::OPTIONS,
418        ])
419        .allow_headers([
420            axum::http::header::CONTENT_TYPE,
421            axum::http::header::AUTHORIZATION,
422            // Custom Solo headers — browsers preflight-check these and
423            // refuse the actual request if they're not in the allow list.
424            // Without `x-solo-tenant` solo-web's browser fetches all fail
425            // with "Failed to fetch" (CORS preflight rejection).
426            axum::http::HeaderName::from_static("x-solo-tenant"),
427            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
428            // HTTP transport spec (sessions, resumable streams). v0.11.0
429            // P1/P2 implement the real session affinity + resumable GET
430            // stream behind this header; the allow-list entry was
431            // pre-wired in v0.10.2 so browser-based MCP clients that
432            // preflight for it (per the spec) succeed instead of
433            // failing with a CORS error before the first request even
434            // lands.
435            axum::http::HeaderName::from_static("mcp-session-id"),
436            // v0.11.0 P2: `Last-Event-ID` is the SSE-spec header carrying
437            // the client's last-seen event id on reconnect. The
438            // resumable `GET /mcp` handler reads it and replays the
439            // missed events from the per-session ring buffer
440            // (Decision E). Browsers preflight any non-CORS-safelisted
441            // request header; without this entry the preflight fails
442            // before the actual reconnect lands.
443            axum::http::HeaderName::from_static(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER),
444        ])
445}
446
447/// True if `origin` is `http(s)://localhost[:port]` or
448/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
449/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
450fn is_localhost_origin(origin: &str) -> bool {
451    let rest = origin
452        .strip_prefix("http://")
453        .or_else(|| origin.strip_prefix("https://"));
454    let host = match rest {
455        Some(r) => r,
456        None => return false,
457    };
458    // Strip path (shouldn't appear on Origin headers but defend anyway).
459    let host = host.split('/').next().unwrap_or(host);
460    // Strip port.
461    let host = if let Some(idx) = host.rfind(':') {
462        // For [::1]:port, keep the brackets in the host part.
463        if host.starts_with('[') {
464            // Find matching ']'; everything up to and including it is the host.
465            host.find(']').map(|i| &host[..=i]).unwrap_or(host)
466        } else {
467            &host[..idx]
468        }
469    } else {
470        host
471    };
472    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
473}
474
475/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
476/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
477/// `bearer_token = None` runs unauthenticated (loopback default);
478/// `Some(t)` requires `Authorization: Bearer t` on every request
479/// except `GET /health` + `GET /openapi.json`.
480pub async fn serve_http(
481    addr: SocketAddr,
482    state: SoloHttpState,
483    bearer_token: Option<String>,
484    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
485) -> std::io::Result<()> {
486    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
487    serve_http_with_auth_config(addr, state, auth, shutdown).await
488}
489
490/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
491/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
492/// for the auth-mode semantics.
493pub async fn serve_http_with_auth_config(
494    addr: SocketAddr,
495    state: SoloHttpState,
496    auth: Option<AuthConfig>,
497    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
498) -> std::io::Result<()> {
499    let auth_kind = match &auth {
500        Some(AuthConfig::Bearer { .. }) => "bearer",
501        Some(AuthConfig::Oidc { .. }) => "oidc",
502        None => "none",
503    };
504    let app = router_with_auth_config(state, auth);
505    let listener = tokio::net::TcpListener::bind(addr).await?;
506    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
507    axum::serve(listener, app)
508        .with_graceful_shutdown(shutdown)
509        .await
510}
511
512// ---------------------------------------------------------------------------
513// OpenAPI 3.1 spec
514// ---------------------------------------------------------------------------
515
516/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
517///
518/// We keep the spec hand-written (rather than deriving via `utoipa`)
519/// for v0.1: 4 simple endpoints, types live across crate boundaries
520/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
521/// `utoipa` retrofit would touch every crate. Hand-crafted is one
522/// JSON literal in this file; a smoke test in `handler_tests` parses
523/// the response and asserts the expected paths + components are
524/// present, so drift between spec and code is caught at PR time.
525async fn openapi_handler() -> Json<serde_json::Value> {
526    Json(openapi_spec())
527}
528
529/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
530/// Public so the smoke test + future client-codegen tooling can
531/// produce the same document without spinning up the server.
532pub fn openapi_spec() -> serde_json::Value {
533    serde_json::json!({
534        "openapi": "3.1.0",
535        "info": {
536            "title": "Solo HTTP API",
537            "description":
538                "Local-first personal memory daemon. The HTTP transport \
539                 mirrors the MCP memory tools. Default deployment is loopback-only \
540                 (127.0.0.1); LAN-bound deployments require a bearer \
541                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
542            "version": env!("CARGO_PKG_VERSION"),
543            "license": { "name": "Apache-2.0" }
544        },
545        "servers": [
546            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
547        ],
548        "components": {
549            "securitySchemes": {
550                "bearerAuth": {
551                    "type": "http",
552                    "scheme": "bearer",
553                    "description":
554                        "Bearer-token auth. Required only on LAN-bound deployments \
555                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
556                         the default `127.0.0.1` deployment is unauthenticated. \
557                         `GET /health` and `GET /openapi.json` are exempt from auth even \
558                         on bearer-protected instances."
559                }
560            },
561            "schemas": {
562                "RememberRequest": {
563                    "type": "object",
564                    "required": ["content"],
565                    "properties": {
566                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
567                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
568                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." },
569                        "salience": { "type": "number", "minimum": 0.0, "maximum": 1.0, "default": 0.5, "description": "v0.9.2+. Optional priority hint in [0.0, 1.0]; defaults to 0.5. Parity with the `memory_remember` MCP tool." }
570                    },
571                    "additionalProperties": false
572                },
573                "RememberResponse": {
574                    "type": "object",
575                    "required": ["memory_id"],
576                    "properties": {
577                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
578                    }
579                },
580                "RecallRequest": {
581                    "type": "object",
582                    "required": ["query"],
583                    "properties": {
584                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
585                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5, "description": "Max number of hits to return. Server clamps to [1, 100]." }
586                    },
587                    "additionalProperties": false
588                },
589                "RecallResult": {
590                    "type": "object",
591                    "description":
592                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
593                         see `solo_query::RecallResult` in the source for the canonical shape. \
594                         Treat as a forward-compatible JSON object.",
595                    "additionalProperties": true
596                },
597                "MemoryContextRequest": {
598                    "type": "object",
599                    "required": ["query"],
600                    "properties": {
601                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query for episodic recall." },
602                        "subject": { "type": "string", "description": "Optional subject for structured facts; when present, facts also match object-position references." },
603                        "window_days": { "type": "integer", "minimum": 1, "description": "Optional recency window for themes." },
604                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5, "description": "Per-section result limit." }
605                    },
606                    "additionalProperties": false
607                },
608                "MemoryContextResult": {
609                    "type": "object",
610                    "description": "Agent-oriented memory context bundle: recall, themes, facts, and contradictions.",
611                    "additionalProperties": true
612                },
613                "MemoryUpdateRequest": {
614                    "type": "object",
615                    "required": ["content"],
616                    "properties": {
617                        "content": { "type": "string", "minLength": 1, "description": "Replacement content for the active memory." }
618                    },
619                    "additionalProperties": false
620                },
621                "MemoryUpdateResult": {
622                    "type": "object",
623                    "description": "Result of PATCH /memory/{id}. See `solo_query::MemoryUpdateResult`.",
624                    "additionalProperties": true
625                },
626                "ConsolidationScope": {
627                    "type": "object",
628                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
629                    "properties": {
630                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
631                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
632                    },
633                    "additionalProperties": false
634                },
635                "ConsolidationReport": {
636                    "type": "object",
637                    "required": [
638                        "episodes_seen", "clusters_built", "clusters_merged",
639                        "clusters_absorbed", "existing_clusters_merged",
640                        "episodes_clustered", "abstractions_built",
641                        "abstractions_regenerated", "triples_built",
642                        "contradictions_found"
643                    ],
644                    "properties": {
645                        "episodes_seen":             { "type": "integer", "minimum": 0 },
646                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
647                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
648                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
649                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
650                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
651                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
652                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
653                        "triples_built":             { "type": "integer", "minimum": 0 },
654                        "contradictions_found":      { "type": "integer", "minimum": 0 }
655                    }
656                },
657                "EpisodeRecord": {
658                    "type": "object",
659                    "description":
660                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
661                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
662                         Treat as a forward-compatible JSON object.",
663                    "additionalProperties": true
664                },
665                "ThemeHit": {
666                    "type": "object",
667                    "description":
668                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
669                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
670                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
671                    "additionalProperties": true
672                },
673                "FactHit": {
674                    "type": "object",
675                    "description":
676                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
677                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
678                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
679                    "additionalProperties": true
680                },
681                "EntityHit": {
682                    "type": "object",
683                    "description":
684                        "One discovered entity-like id from the structured-fact graph. Returned by \
685                         GET /memory/entities. See `solo_query::EntityHit`.",
686                    "additionalProperties": true
687                },
688                "ContradictionHit": {
689                    "type": "object",
690                    "description":
691                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
692                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
693                         a_id, b_id, kind, explanation, detected_at_ms, status, resolved_at_ms?, \
694                         resolution_note?, winning_triple_id?, a_triple?, b_triple?.",
695                    "additionalProperties": true
696                },
697                "ContradictionResolveRequest": {
698                    "type": "object",
699                    "required": ["a_id", "b_id", "kind"],
700                    "properties": {
701                        "a_id": { "type": "string", "minLength": 1 },
702                        "b_id": { "type": "string", "minLength": 1 },
703                        "kind": { "type": "string", "minLength": 1 },
704                        "status": {
705                            "type": "string",
706                            "enum": ["unresolved", "resolved", "reopened"],
707                            "default": "resolved"
708                        },
709                        "resolution_note": { "type": "string" },
710                        "winning_triple_id": { "type": "string" }
711                    },
712                    "additionalProperties": false
713                },
714                "ContradictionResolution": {
715                    "type": "object",
716                    "description": "Lifecycle update result for POST /memory/contradictions/resolve.",
717                    "additionalProperties": true
718                },
719                "ClusterRecord": {
720                    "type": "object",
721                    "description":
722                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
723                         (content truncated to 200 chars unless ?full_content=true). Returned by \
724                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
725                    "additionalProperties": true
726                },
727                "IngestDocumentRequest": {
728                    "type": "object",
729                    "required": ["path"],
730                    "properties": {
731                        "path": {
732                            "type": "string",
733                            "minLength": 1,
734                            "description":
735                                "Server-side absolute path to the file to ingest. The file must be \
736                                 readable by the Solo process. Supported formats: plaintext / \
737                                 markdown / code, HTML, PDF."
738                        }
739                    },
740                    "additionalProperties": false
741                },
742                "IngestReport": {
743                    "type": "object",
744                    "description":
745                        "Returned by POST /memory/documents. Reports the document id assigned, \
746                         the number of chunks persisted + embedded, the total byte size, and a \
747                         `deduped` flag (true when the same content_hash was already present and \
748                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
749                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
750                    "properties": {
751                        "doc_id":            { "type": "string", "format": "uuid" },
752                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
753                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
754                        "deduped":           { "type": "boolean" }
755                    },
756                    "additionalProperties": false
757                },
758                "ForgetDocumentReport": {
759                    "type": "object",
760                    "description":
761                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
762                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
763                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
764                    "required": ["doc_id", "chunks_tombstoned"],
765                    "properties": {
766                        "doc_id":             { "type": "string", "format": "uuid" },
767                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
768                    },
769                    "additionalProperties": false
770                },
771                "SearchDocsRequest": {
772                    "type": "object",
773                    "required": ["query"],
774                    "properties": {
775                        "query": { "type": "string", "minLength": 1 },
776                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
777                    },
778                    "additionalProperties": false
779                },
780                "DocSearchHit": {
781                    "type": "object",
782                    "description":
783                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
784                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
785                         content, cos_distance, start_offset, end_offset.",
786                    "additionalProperties": true
787                },
788                "DocumentInspectResult": {
789                    "type": "object",
790                    "description":
791                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
792                         plus an ordered list of chunk summaries (each preview truncated to 200 \
793                         chars). See `solo_query::DocumentInspectResult`.",
794                    "additionalProperties": true
795                },
796                "DocumentSummary": {
797                    "type": "object",
798                    "description":
799                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
800                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
801                    "additionalProperties": true
802                },
803                "GraphNode": {
804                    "type": "object",
805                    "required": ["id", "kind", "label", "tenant_id"],
806                    "properties": {
807                        "id": { "type": "string", "description": "Prefixed graph node id, e.g. ep:<uuid>, doc:<uuid>, chunk:<uuid>, cl:<id>, ent:<value>." },
808                        "kind": { "type": "string", "enum": ["episode", "document", "chunk", "cluster", "entity"] },
809                        "label": { "type": "string" },
810                        "tenant_id": { "type": "string" },
811                        "preview": { "type": ["string", "null"] },
812                        "score": { "type": ["number", "null"] },
813                        "meta": { "type": ["object", "null"], "additionalProperties": true }
814                    },
815                    "additionalProperties": true
816                },
817                "GraphEdge": {
818                    "type": "object",
819                    "required": ["id", "source", "target", "kind"],
820                    "properties": {
821                        "id": { "type": "string" },
822                        "source": { "type": "string" },
823                        "target": { "type": "string" },
824                        "kind": { "type": "string" },
825                        "label": { "type": ["string", "null"] },
826                        "weight": { "type": ["number", "null"] },
827                        "meta": { "type": ["object", "null"], "additionalProperties": true }
828                    },
829                    "additionalProperties": true
830                },
831                "GraphResponse": {
832                    "type": "object",
833                    "required": ["nodes", "edges"],
834                    "properties": {
835                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
836                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
837                    }
838                },
839                "GraphNodesResponse": {
840                    "type": "object",
841                    "required": ["nodes"],
842                    "properties": {
843                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
844                        "next_cursor": { "type": ["string", "null"] }
845                    }
846                },
847                "GraphEdgesResponse": {
848                    "type": "object",
849                    "required": ["edges"],
850                    "properties": {
851                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
852                        "next_cursor": { "type": ["string", "null"] }
853                    }
854                },
855                "GraphInspectResponse": {
856                    "type": "object",
857                    "required": ["node"],
858                    "properties": {
859                        "node": { "$ref": "#/components/schemas/GraphNode" },
860                        "record": { "type": ["object", "null"], "additionalProperties": true },
861                        "triples_in": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
862                        "triples_out": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
863                    },
864                    "additionalProperties": true
865                },
866                "TenantListItem": {
867                    "type": "object",
868                    "required": ["id", "display_name", "created_at_ms", "last_accessed_ms", "status", "quota_bytes", "episode_count", "size_bytes", "pct_used"],
869                    "properties": {
870                        "id": { "type": "string" },
871                        "display_name": { "type": ["string", "null"] },
872                        "created_at_ms": { "type": "integer", "format": "int64" },
873                        "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" },
874                        "status": { "type": "string", "enum": ["active"] },
875                        "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
876                        "episode_count": { "type": ["integer", "null"], "minimum": 0 },
877                        "size_bytes": { "type": ["integer", "null"], "minimum": 0 },
878                        "pct_used": { "type": ["number", "null"], "minimum": 0, "maximum": 100 }
879                    }
880                },
881                "TenantsListResponse": {
882                    "type": "object",
883                    "required": ["tenants"],
884                    "properties": {
885                        "tenants": { "type": "array", "items": { "$ref": "#/components/schemas/TenantListItem" } }
886                    }
887                },
888                "StatusResponse": {
889                    "type": "object",
890                    "required": ["ok", "version", "tenant", "embedder", "active_tenants", "mcp"],
891                    "properties": {
892                        "ok": { "type": "boolean" },
893                        "version": { "type": "string" },
894                        "tenant": {
895                            "type": "object",
896                            "required": ["id", "registered", "status", "quota_bytes", "last_accessed_ms"],
897                            "properties": {
898                                "id": { "type": "string" },
899                                "registered": { "type": "boolean" },
900                                "status": { "type": ["string", "null"], "enum": ["active", null] },
901                                "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
902                                "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" }
903                            }
904                        },
905                        "embedder": {
906                            "type": "object",
907                            "required": ["name", "version", "dim", "dtype"],
908                            "properties": {
909                                "name": { "type": "string" },
910                                "version": { "type": "string" },
911                                "dim": { "type": "integer", "minimum": 1 },
912                                "dtype": { "type": "string" }
913                            }
914                        },
915                        "active_tenants": { "type": "integer", "minimum": 0 },
916                        "mcp": {
917                            "type": "object",
918                            "required": ["sessions"],
919                            "properties": {
920                                "sessions": { "type": "integer", "minimum": 0 }
921                            }
922                        }
923                    }
924                },
925                "JsonRpcRequest": {
926                    "type": "object",
927                    "required": ["jsonrpc", "method"],
928                    "properties": {
929                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
930                        "id": { "description": "String or number request id. Omit for notifications." },
931                        "method": { "type": "string" },
932                        "params": { "type": ["object", "array", "null"], "additionalProperties": true }
933                    },
934                    "additionalProperties": true
935                },
936                "JsonRpcResponse": {
937                    "type": "object",
938                    "required": ["jsonrpc", "id"],
939                    "properties": {
940                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
941                        "id": {},
942                        "result": {},
943                        "error": {
944                            "type": "object",
945                            "required": ["code", "message"],
946                            "properties": {
947                                "code": { "type": "integer" },
948                                "message": { "type": "string" },
949                                "data": {}
950                            }
951                        }
952                    },
953                    "additionalProperties": true
954                },
955                "ApiError": {
956                    "type": "object",
957                    "required": ["error", "status"],
958                    "properties": {
959                        "error": { "type": "string" },
960                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
961                    }
962                }
963            }
964        },
965        "paths": {
966            "/health": {
967                "get": {
968                    "summary": "Liveness probe",
969                    "description": "Returns plain text `ok`. Always unauthenticated.",
970                    "responses": {
971                        "200": {
972                            "description": "Server is up.",
973                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
974                        }
975                    }
976                }
977            },
978            "/openapi.json": {
979                "get": {
980                    "summary": "Self-describing OpenAPI 3.1 spec",
981                    "description": "Returns this document. Always unauthenticated.",
982                    "responses": {
983                        "200": {
984                            "description": "OpenAPI 3.1 document.",
985                            "content": { "application/json": { "schema": { "type": "object" } } }
986                        }
987                    }
988                }
989            },
990            "/memory": {
991                "post": {
992                    "summary": "Remember (store an episode)",
993                    "description": "Equivalent to MCP tool `memory_remember`.",
994                    "security": [{ "bearerAuth": [] }, {}],
995                    "requestBody": {
996                        "required": true,
997                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
998                    },
999                    "responses": {
1000                        "200": {
1001                            "description": "Memory stored; returns the new MemoryId.",
1002                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
1003                        },
1004                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1005                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1006                    }
1007                }
1008            },
1009            "/memory/search": {
1010                "post": {
1011                    "summary": "Recall (vector search)",
1012                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
1013                    "security": [{ "bearerAuth": [] }, {}],
1014                    "requestBody": {
1015                        "required": true,
1016                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
1017                    },
1018                    "responses": {
1019                        "200": {
1020                            "description": "Search results.",
1021                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
1022                        },
1023                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1024                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1025                    }
1026                }
1027            },
1028            "/memory/context": {
1029                "post": {
1030                    "summary": "Build agent memory context",
1031                    "description": "Equivalent to MCP tool `memory_context`. Returns one bounded bundle containing episodic recall, recent themes, optional facts about a subject, and contradictions.",
1032                    "security": [{ "bearerAuth": [] }, {}],
1033                    "requestBody": {
1034                        "required": true,
1035                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextRequest" } } }
1036                    },
1037                    "responses": {
1038                        "200": {
1039                            "description": "Combined memory context.",
1040                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextResult" } } }
1041                        },
1042                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1043                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1044                    }
1045                }
1046            },
1047            "/memory/consolidate": {
1048                "post": {
1049                    "summary": "Run a consolidation pass (clustering + abstraction)",
1050                    "description":
1051                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
1052                         on the server, also runs the REM-equivalent abstraction pass that populates \
1053                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
1054                         window). Equivalent to the `solo consolidate` CLI.",
1055                    "security": [{ "bearerAuth": [] }, {}],
1056                    "requestBody": {
1057                        "required": false,
1058                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
1059                    },
1060                    "responses": {
1061                        "200": {
1062                            "description": "Consolidation complete; report counts the work done.",
1063                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
1064                        },
1065                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1066                    }
1067                }
1068            },
1069            "/backup": {
1070                "post": {
1071                    "summary": "Online encrypted backup",
1072                    "description":
1073                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
1074                         The destination file is encrypted with the same Argon2id-derived raw key as \
1075                         the source, so it restores under the same passphrase + a copy of the source's \
1076                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
1077                         connection without taking the lockfile, so the daemon keeps serving reads + \
1078                         writes during the operation. v0.3.2+.",
1079                    "security": [{ "bearerAuth": [] }, {}],
1080                    "requestBody": {
1081                        "required": true,
1082                        "content": { "application/json": { "schema": {
1083                            "type": "object",
1084                            "properties": {
1085                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
1086                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
1087                            },
1088                            "required": ["to"]
1089                        } } }
1090                    },
1091                    "responses": {
1092                        "200": {
1093                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
1094                            "content": { "application/json": { "schema": {
1095                                "type": "object",
1096                                "properties": {
1097                                    "path": { "type": "string" },
1098                                    "elapsed_ms": { "type": "integer", "format": "int64" }
1099                                }
1100                            } } }
1101                        },
1102                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
1103                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1104                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
1105                    }
1106                }
1107            },
1108            "/memory/{id}": {
1109                "get": {
1110                    "summary": "Inspect a memory by ID",
1111                    "description": "Equivalent to MCP tool `memory_inspect`.",
1112                    "security": [{ "bearerAuth": [] }, {}],
1113                    "parameters": [{
1114                        "name": "id",
1115                        "in": "path",
1116                        "required": true,
1117                        "schema": { "type": "string", "format": "uuid" },
1118                        "description": "MemoryId (UUID v7)."
1119                    }],
1120                    "responses": {
1121                        "200": {
1122                            "description": "Episode record.",
1123                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
1124                        },
1125                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1126                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1127                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1128                    }
1129                },
1130                "patch": {
1131                    "summary": "Correct/update a single active memory",
1132                    "description":
1133                        "Equivalent to MCP tool `memory_update`. Rewrites the active episode content, \
1134                         refreshes its embedding, updates the pending index/HNSW entry, and records \
1135                         an audit event. Forgotten memories cannot be updated.",
1136                    "security": [{ "bearerAuth": [] }, {}],
1137                    "parameters": [
1138                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1139                    ],
1140                    "requestBody": {
1141                        "required": true,
1142                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateRequest" } } }
1143                    },
1144                    "responses": {
1145                        "200": {
1146                            "description": "Updated memory metadata.",
1147                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateResult" } } }
1148                        },
1149                        "400": { "description": "Malformed ID or empty content.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1150                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1151                        "409": { "description": "Memory exists but is not active.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1152                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1153                    }
1154                },
1155                "delete": {
1156                    "summary": "Forget (soft-delete) a memory by ID",
1157                    "description":
1158                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
1159                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
1160                         re-running `solo reembed` after this does NOT restore visibility.",
1161                    "security": [{ "bearerAuth": [] }, {}],
1162                    "parameters": [
1163                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
1164                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
1165                    ],
1166                    "responses": {
1167                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
1168                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1169                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1170                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1171                    }
1172                }
1173            },
1174            "/memory/themes": {
1175                "get": {
1176                    "summary": "List recent cluster themes",
1177                    "description":
1178                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
1179                         most-recent first. Use to surface 'what has the user been thinking about lately' \
1180                         without paging through individual episodes. v0.4.0+.",
1181                    "security": [{ "bearerAuth": [] }, {}],
1182                    "parameters": [
1183                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
1184                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1185                    ],
1186                    "responses": {
1187                        "200": {
1188                            "description": "Array of ThemeHits (possibly empty).",
1189                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
1190                        },
1191                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1192                    }
1193                }
1194            },
1195            "/memory/facts_about": {
1196                "get": {
1197                    "summary": "Query the SPO knowledge graph by subject",
1198                    "description":
1199                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
1200                         subject + optional predicate + optional time window. Subject is required \
1201                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
1202                         to also surface rows where `subject` appears as the object. v0.4.0+.",
1203                    "security": [{ "bearerAuth": [] }, {}],
1204                    "parameters": [
1205                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
1206                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
1207                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
1208                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
1209                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
1210                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1211                    ],
1212                    "responses": {
1213                        "200": {
1214                            "description": "Array of FactHits (possibly empty).",
1215                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
1216                        },
1217                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1218                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1219                    }
1220                }
1221            },
1222            "/memory/entities": {
1223                "get": {
1224                    "summary": "Discover structured-graph entities",
1225                    "description":
1226                        "Equivalent to MCP tool `memory_entities`. Searches entity-like ids found in \
1227                         active triples and returns counts plus common predicates. Use before \
1228                         `/memory/facts_about` when the exact subject id is uncertain.",
1229                    "security": [{ "bearerAuth": [] }, {}],
1230                    "parameters": [
1231                        { "name": "query", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Partial or exact entity id." },
1232                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1233                    ],
1234                    "responses": {
1235                        "200": {
1236                            "description": "Array of EntityHits (possibly empty).",
1237                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/EntityHit" } } } }
1238                        },
1239                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1240                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1241                    }
1242                }
1243            },
1244            "/memory/contradictions": {
1245                "get": {
1246                    "summary": "List Steward-flagged contradictions",
1247                    "description":
1248                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
1249                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
1250                    "security": [{ "bearerAuth": [] }, {}],
1251                    "parameters": [
1252                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1253                    ],
1254                    "responses": {
1255                        "200": {
1256                            "description": "Array of ContradictionHits (possibly empty).",
1257                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
1258                        },
1259                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1260                    }
1261                }
1262            },
1263            "/memory/contradictions/resolve": {
1264                "post": {
1265                    "summary": "Resolve or reopen a contradiction",
1266                    "description":
1267                        "Equivalent to MCP tool `memory_contradiction_resolve`. Updates the lifecycle \
1268                         fields on one contradiction row after the user clarifies which memory is current.",
1269                    "security": [{ "bearerAuth": [] }, {}],
1270                    "requestBody": {
1271                        "required": true,
1272                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolveRequest" } } }
1273                    },
1274                    "responses": {
1275                        "200": {
1276                            "description": "Contradiction lifecycle update result.",
1277                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolution" } } }
1278                        },
1279                        "400": { "description": "Bad request (missing ids/kind or invalid status).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1280                        "404": { "description": "No matching contradiction.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1281                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1282                    }
1283                }
1284            },
1285            "/memory/clusters/{cluster_id}": {
1286                "get": {
1287                    "summary": "Inspect a single cluster",
1288                    "description":
1289                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
1290                         its (optional) abstraction, and its source episodes. By default each \
1291                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
1292                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
1293                    "security": [{ "bearerAuth": [] }, {}],
1294                    "parameters": [
1295                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
1296                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
1297                    ],
1298                    "responses": {
1299                        "200": {
1300                            "description": "Cluster snapshot.",
1301                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
1302                        },
1303                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1304                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1305                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1306                    }
1307                }
1308            },
1309            "/memory/documents": {
1310                "post": {
1311                    "summary": "Ingest a document",
1312                    "description":
1313                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
1314                         supplied server-side path, parses + chunks + embeds, and persists under \
1315                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
1316                         a `deduped` flag (true when an existing document with the same content_hash \
1317                         was returned without re-embedding). v0.7.0+.",
1318                    "security": [{ "bearerAuth": [] }, {}],
1319                    "requestBody": {
1320                        "required": true,
1321                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
1322                    },
1323                    "responses": {
1324                        "200": {
1325                            "description": "Document ingested (or deduplicated).",
1326                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
1327                        },
1328                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1329                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1330                    }
1331                },
1332                "get": {
1333                    "summary": "List ingested documents (paginated)",
1334                    "description":
1335                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
1336                         newest first. Forgotten documents are hidden by default; pass \
1337                         `?include_forgotten=true` to see them too. v0.7.0+.",
1338                    "security": [{ "bearerAuth": [] }, {}],
1339                    "parameters": [
1340                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
1341                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
1342                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
1343                    ],
1344                    "responses": {
1345                        "200": {
1346                            "description": "Array of DocumentSummary (possibly empty).",
1347                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
1348                        },
1349                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1350                    }
1351                }
1352            },
1353            "/memory/documents/search": {
1354                "post": {
1355                    "summary": "Vector search across document chunks",
1356                    "description":
1357                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
1358                         up to `limit` matching chunks, best match first, each annotated with the \
1359                         parent document's title + source path. Forgotten documents are excluded. \
1360                         v0.7.0+.",
1361                    "security": [{ "bearerAuth": [] }, {}],
1362                    "requestBody": {
1363                        "required": true,
1364                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
1365                    },
1366                    "responses": {
1367                        "200": {
1368                            "description": "Array of DocSearchHits (possibly empty).",
1369                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
1370                        },
1371                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1372                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1373                    }
1374                }
1375            },
1376            "/memory/documents/{id}": {
1377                "get": {
1378                    "summary": "Inspect one document",
1379                    "description":
1380                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
1381                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
1382                    "security": [{ "bearerAuth": [] }, {}],
1383                    "parameters": [
1384                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
1385                    ],
1386                    "responses": {
1387                        "200": {
1388                            "description": "Document inspection result.",
1389                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
1390                        },
1391                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1392                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1393                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1394                    }
1395                },
1396                "delete": {
1397                    "summary": "Forget (soft-delete) one document",
1398                    "description":
1399                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
1400                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
1401                         survive in SQL for forensic value. v0.7.0+.",
1402                    "security": [{ "bearerAuth": [] }, {}],
1403                    "parameters": [
1404                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1405                    ],
1406                    "responses": {
1407                        "200": {
1408                            "description": "Document soft-deleted; report counts chunks tombstoned.",
1409                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
1410                        },
1411                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1412                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1413                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1414                    }
1415                }
1416            },
1417            "/v1/graph/expand": {
1418                "get": {
1419                    "summary": "Expand one graph node",
1420                    "description": "Return neighboring nodes and edges for one graph node id. Powers solo-web graph expansion.",
1421                    "security": [{ "bearerAuth": [] }, {}],
1422                    "parameters": [
1423                        { "name": "node_id", "in": "query", "required": true, "schema": { "type": "string" } },
1424                        { "name": "kind", "in": "query", "required": true, "schema": { "type": "string", "enum": ["cluster_member", "document_chunk", "triple", "semantic"] } },
1425                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1426                    ],
1427                    "responses": {
1428                        "200": { "description": "Expanded graph neighborhood.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1429                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1430                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1431                        "404": { "description": "Tenant or node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1432                    }
1433                }
1434            },
1435            "/v1/graph/nodes": {
1436                "get": {
1437                    "summary": "List graph nodes",
1438                    "description": "Paginated graph-node catalog used by solo-web's initial render.",
1439                    "security": [{ "bearerAuth": [] }, {}],
1440                    "parameters": [
1441                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Comma-separated node kinds, e.g. episode,document,entity." },
1442                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1443                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } },
1444                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } },
1445                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } }
1446                    ],
1447                    "responses": {
1448                        "200": { "description": "Page of graph nodes.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphNodesResponse" } } } },
1449                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1450                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1451                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1452                    }
1453                }
1454            },
1455            "/v1/graph/edges": {
1456                "get": {
1457                    "summary": "List graph edges",
1458                    "description": "Paginated graph-edge catalog for explicit graph relations. Semantic HNSW edges are exposed through /v1/graph/neighbors/{id}.",
1459                    "security": [{ "bearerAuth": [] }, {}],
1460                    "parameters": [
1461                        { "name": "type", "in": "query", "required": false, "schema": { "type": "string" } },
1462                        { "name": "node_id", "in": "query", "required": false, "schema": { "type": "string" } },
1463                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1464                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } }
1465                    ],
1466                    "responses": {
1467                        "200": { "description": "Page of graph edges.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphEdgesResponse" } } } },
1468                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1469                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1470                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1471                    }
1472                }
1473            },
1474            "/v1/graph/inspect/{id}": {
1475                "get": {
1476                    "summary": "Inspect one graph node",
1477                    "description": "Kind-discriminated full-record drill for solo-web's inspector panel.",
1478                    "security": [{ "bearerAuth": [] }, {}],
1479                    "parameters": [
1480                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } }
1481                    ],
1482                    "responses": {
1483                        "200": { "description": "Graph node inspection payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphInspectResponse" } } } },
1484                        "400": { "description": "Bad graph node id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1485                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1486                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1487                    }
1488                }
1489            },
1490            "/v1/graph/neighbors/{id}": {
1491                "get": {
1492                    "summary": "List graph neighbors",
1493                    "description": "Unified explicit and semantic neighbor lookup for solo-web's show-similar overlay.",
1494                    "security": [{ "bearerAuth": [] }, {}],
1495                    "parameters": [
1496                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } },
1497                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string", "enum": ["explicit", "semantic", "both"] } },
1498                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1499                    ],
1500                    "responses": {
1501                        "200": { "description": "Neighbor graph.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1502                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1503                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1504                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1505                    }
1506                }
1507            },
1508            "/v1/graph/stream": {
1509                "get": {
1510                    "summary": "Stream graph invalidations",
1511                    "description": "Server-Sent Events stream of graph-data invalidation notifications. Clients refetch affected pages on each event.",
1512                    "security": [{ "bearerAuth": [] }, {}],
1513                    "responses": {
1514                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1515                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1516                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1517                    }
1518                }
1519            },
1520            "/v1/status": {
1521                "get": {
1522                    "summary": "Authenticated Solo status",
1523                    "description": "Tenant-aware readiness payload for local UIs and agent bridges. Unlike public /health, this resolves auth and tenant routing.",
1524                    "security": [{ "bearerAuth": [] }, {}],
1525                    "parameters": [
1526                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1527                    ],
1528                    "responses": {
1529                        "200": { "description": "Solo status payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusResponse" } } } },
1530                        "400": { "description": "Invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1531                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1532                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1533                    }
1534                }
1535            },
1536            "/v1/tenants": {
1537                "get": {
1538                    "summary": "List visible tenants",
1539                    "description": "Principal-scoped active tenant list for solo-web's tenant picker and status UI.",
1540                    "security": [{ "bearerAuth": [] }, {}],
1541                    "responses": {
1542                        "200": {
1543                            "description": "Visible tenants.",
1544                            "headers": {
1545                                "X-Solo-Tenants-Count-Cap-Reached": {
1546                                    "schema": { "type": "string", "enum": ["true"] },
1547                                    "description": "Present when episode_count hydration was capped."
1548                                }
1549                            },
1550                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TenantsListResponse" } } }
1551                        },
1552                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1553                    }
1554                }
1555            },
1556            "/mcp": {
1557                "post": {
1558                    "summary": "MCP JSON-RPC request",
1559                    "description": "Streamable HTTP MCP request/response endpoint. A POST without Mcp-Session-Id creates a session and echoes it in the response header.",
1560                    "security": [{ "bearerAuth": [] }, {}],
1561                    "parameters": [
1562                        { "name": "Mcp-Session-Id", "in": "header", "required": false, "schema": { "type": "string" } },
1563                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1564                    ],
1565                    "requestBody": {
1566                        "required": true,
1567                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcRequest" } } }
1568                    },
1569                    "responses": {
1570                        "200": {
1571                            "description": "JSON-RPC success or in-body error response.",
1572                            "headers": { "Mcp-Session-Id": { "schema": { "type": "string" } } },
1573                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcResponse" } } }
1574                        },
1575                        "202": { "description": "JSON-RPC notification accepted; no response body." },
1576                        "400": { "description": "Malformed JSON-RPC envelope or invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1577                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1578                        "404": { "description": "Unknown tenant or unknown/expired MCP session.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1579                    }
1580                },
1581                "get": {
1582                    "summary": "MCP SSE stream",
1583                    "description": "Attach to an existing MCP session's resumable Server-Sent Events stream. Requires Mcp-Session-Id from a prior POST.",
1584                    "security": [{ "bearerAuth": [] }, {}],
1585                    "parameters": [
1586                        { "name": "Mcp-Session-Id", "in": "header", "required": true, "schema": { "type": "string" } },
1587                        { "name": "Last-Event-ID", "in": "header", "required": false, "schema": { "type": "string" } },
1588                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1589                    ],
1590                    "responses": {
1591                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1592                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1593                        "404": { "description": "Missing, unknown, or expired MCP session; or tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1594                    }
1595                }
1596            }
1597        }
1598    })
1599}
1600
1601// ---------------------------------------------------------------------------
1602// Handlers
1603// ---------------------------------------------------------------------------
1604
1605#[derive(Debug, Deserialize)]
1606struct RememberBody {
1607    content: String,
1608    #[serde(default)]
1609    source_type: Option<String>,
1610    #[serde(default)]
1611    source_id: Option<String>,
1612    /// v0.9.2+ parity with the MCP `memory_remember` tool. Optional;
1613    /// must be in `[0.0, 1.0]` if supplied. Defaults to 0.5 when absent.
1614    /// Rejected with 400 if out of range.
1615    #[serde(default)]
1616    salience: Option<f32>,
1617}
1618
1619#[derive(Debug, Serialize)]
1620struct RememberResponse {
1621    memory_id: String,
1622}
1623
1624async fn remember_handler(
1625    TenantExtractor(tenant): TenantExtractor,
1626    AuditPrincipal(principal): AuditPrincipal,
1627    Json(body): Json<RememberBody>,
1628) -> Result<Json<RememberResponse>, ApiError> {
1629    let content = body.content.trim_end().to_string();
1630    if content.is_empty() {
1631        return Err(ApiError::bad_request("content must not be empty"));
1632    }
1633    // Validate caller-supplied salience (parity with MCP `memory_remember`).
1634    let salience = match body.salience {
1635        Some(s) if !(0.0..=1.0).contains(&s) || s.is_nan() => {
1636            return Err(ApiError::bad_request(
1637                "salience must be a finite value in [0.0, 1.0]",
1638            ));
1639        }
1640        Some(s) => s,
1641        None => 0.5,
1642    };
1643    let embedding = tenant
1644        .embedder()
1645        .embed(&content)
1646        .await
1647        .map_err(ApiError::from)?;
1648    let episode = Episode {
1649        memory_id: MemoryId::new(),
1650        ts_ms: chrono::Utc::now().timestamp_millis(),
1651        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
1652        source_id: body.source_id,
1653        content,
1654        encoding_context: EncodingContext::default(),
1655        provenance: None,
1656        confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
1657        strength: 0.5,
1658        salience,
1659        tier: Tier::Hot,
1660    };
1661    let mid = tenant
1662        .write()
1663        .remember_as(principal, episode, embedding)
1664        .await
1665        .map_err(ApiError::from)?;
1666    Ok(Json(RememberResponse {
1667        memory_id: mid.to_string(),
1668    }))
1669}
1670
1671#[derive(Debug, Deserialize)]
1672struct RecallBody {
1673    query: String,
1674    #[serde(default = "default_limit")]
1675    limit: usize,
1676}
1677
1678#[derive(Debug, Deserialize)]
1679struct MemoryContextBody {
1680    query: String,
1681    #[serde(default)]
1682    subject: Option<String>,
1683    #[serde(default)]
1684    window_days: Option<i64>,
1685    #[serde(default = "default_limit")]
1686    limit: usize,
1687}
1688
1689fn default_limit() -> usize {
1690    5
1691}
1692
1693async fn recall_handler(
1694    TenantExtractor(tenant): TenantExtractor,
1695    AuditPrincipal(principal): AuditPrincipal,
1696    Json(body): Json<RecallBody>,
1697) -> Result<Json<solo_query::RecallResult>, ApiError> {
1698    // solo_query::run_recall handles empty-query rejection (returns
1699    // InvalidInput → ApiError::bad_request(400)) and clamps limit
1700    // upstream of the embedder call.
1701    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
1702        .await
1703        .map_err(ApiError::from)?;
1704    Ok(Json(result))
1705}
1706
1707async fn memory_context_handler(
1708    State(s): State<SoloHttpState>,
1709    TenantExtractor(tenant): TenantExtractor,
1710    AuditPrincipal(principal): AuditPrincipal,
1711    Json(body): Json<MemoryContextBody>,
1712) -> Result<Json<solo_query::MemoryContextResult>, ApiError> {
1713    let result = solo_query::memory_context(
1714        tenant.as_ref(),
1715        principal,
1716        &body.query,
1717        body.subject.as_deref(),
1718        &s.user_aliases,
1719        body.window_days,
1720        body.limit,
1721    )
1722    .await
1723    .map_err(ApiError::from)?;
1724    Ok(Json(result))
1725}
1726
1727async fn inspect_handler(
1728    TenantExtractor(tenant): TenantExtractor,
1729    AuditPrincipal(principal): AuditPrincipal,
1730    Path(id): Path<String>,
1731) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
1732    let mid =
1733        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1734    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
1735        .await
1736        .map_err(ApiError::from)?;
1737    Ok(Json(row))
1738}
1739
1740#[derive(Debug, Deserialize)]
1741struct MemoryUpdateBody {
1742    content: String,
1743}
1744
1745async fn update_handler(
1746    TenantExtractor(tenant): TenantExtractor,
1747    AuditPrincipal(principal): AuditPrincipal,
1748    Path(id): Path<String>,
1749    Json(body): Json<MemoryUpdateBody>,
1750) -> Result<Json<solo_query::MemoryUpdateResult>, ApiError> {
1751    let mid =
1752        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1753    if body.content.trim().is_empty() {
1754        return Err(ApiError::bad_request("content must not be empty"));
1755    }
1756    let result = solo_query::memory_update(tenant.as_ref(), principal, mid, &body.content)
1757        .await
1758        .map_err(ApiError::from)?;
1759    Ok(Json(result))
1760}
1761
1762// Path 1 derived-layer handlers (v0.4.0+). Read handlers are GET-shaped:
1763// pure read-only queries against the Steward's outputs, query-string
1764// params for simple filters. Each handler delegates to a single
1765// solo_query::derived pipeline and returns the result Vec as JSON.
1766// Empty derived layer → 200 with `[]` body (parseable JSON array).
1767
1768#[derive(Debug, Deserialize)]
1769struct ThemesQuery {
1770    #[serde(default)]
1771    window_days: Option<i64>,
1772    #[serde(default = "default_limit")]
1773    limit: usize,
1774}
1775
1776async fn themes_handler(
1777    TenantExtractor(tenant): TenantExtractor,
1778    AuditPrincipal(principal): AuditPrincipal,
1779    Query(q): Query<ThemesQuery>,
1780) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
1781    let hits = solo_query::themes(
1782        tenant.read(),
1783        tenant.audit(),
1784        principal,
1785        q.window_days,
1786        q.limit,
1787    )
1788    .await
1789    .map_err(ApiError::from)?;
1790    Ok(Json(hits))
1791}
1792
1793#[derive(Debug, Deserialize)]
1794struct FactsAboutQuery {
1795    subject: String,
1796    #[serde(default)]
1797    predicate: Option<String>,
1798    #[serde(default)]
1799    since_ms: Option<i64>,
1800    #[serde(default)]
1801    until_ms: Option<i64>,
1802    /// v0.5.1 Priority 8 — widen the query to also match rows where
1803    /// `subject` appears as the object. Default `false`.
1804    #[serde(default)]
1805    include_as_object: bool,
1806    #[serde(default = "default_limit")]
1807    limit: usize,
1808}
1809
1810async fn facts_about_handler(
1811    State(s): State<SoloHttpState>,
1812    TenantExtractor(tenant): TenantExtractor,
1813    AuditPrincipal(principal): AuditPrincipal,
1814    Query(q): Query<FactsAboutQuery>,
1815) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
1816    if q.subject.trim().is_empty() {
1817        return Err(ApiError::bad_request("subject must not be empty"));
1818    }
1819    let hits = solo_query::facts_about(
1820        tenant.read(),
1821        tenant.audit(),
1822        principal,
1823        &q.subject,
1824        &s.user_aliases,
1825        q.include_as_object,
1826        q.predicate.as_deref(),
1827        q.since_ms,
1828        q.until_ms,
1829        q.limit,
1830    )
1831    .await
1832    .map_err(ApiError::from)?;
1833    Ok(Json(hits))
1834}
1835
1836#[derive(Debug, Deserialize)]
1837struct EntitiesQuery {
1838    query: String,
1839    #[serde(default = "default_limit")]
1840    limit: usize,
1841}
1842
1843async fn entities_handler(
1844    TenantExtractor(tenant): TenantExtractor,
1845    AuditPrincipal(principal): AuditPrincipal,
1846    Query(q): Query<EntitiesQuery>,
1847) -> Result<Json<Vec<solo_query::EntityHit>>, ApiError> {
1848    if q.query.trim().is_empty() {
1849        return Err(ApiError::bad_request("query must not be empty"));
1850    }
1851    let hits = solo_query::entities(tenant.read(), tenant.audit(), principal, &q.query, q.limit)
1852        .await
1853        .map_err(ApiError::from)?;
1854    Ok(Json(hits))
1855}
1856
1857#[derive(Debug, Deserialize)]
1858struct ContradictionsQuery {
1859    #[serde(default = "default_limit")]
1860    limit: usize,
1861}
1862
1863async fn contradictions_handler(
1864    TenantExtractor(tenant): TenantExtractor,
1865    AuditPrincipal(principal): AuditPrincipal,
1866    Query(q): Query<ContradictionsQuery>,
1867) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
1868    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
1869        .await
1870        .map_err(ApiError::from)?;
1871    Ok(Json(hits))
1872}
1873
1874fn default_contradiction_status() -> String {
1875    "resolved".to_string()
1876}
1877
1878#[derive(Debug, Deserialize)]
1879struct ContradictionResolveBody {
1880    a_id: String,
1881    b_id: String,
1882    kind: String,
1883    #[serde(default = "default_contradiction_status")]
1884    status: String,
1885    #[serde(default)]
1886    resolution_note: Option<String>,
1887    #[serde(default)]
1888    winning_triple_id: Option<String>,
1889}
1890
1891async fn contradiction_resolve_handler(
1892    TenantExtractor(tenant): TenantExtractor,
1893    AuditPrincipal(principal): AuditPrincipal,
1894    Json(body): Json<ContradictionResolveBody>,
1895) -> Result<Json<solo_query::ContradictionResolution>, ApiError> {
1896    if body.a_id.trim().is_empty() || body.b_id.trim().is_empty() || body.kind.trim().is_empty() {
1897        return Err(ApiError::bad_request(
1898            "a_id, b_id, and kind must not be empty",
1899        ));
1900    }
1901    // Dev-log 0152 H1: routed through the writer actor for atomic
1902    // UPDATE + audit row. Reader-pool + audit-writer args are kept for
1903    // signature stability but ignored by the function body.
1904    let result = solo_query::resolve_contradiction(
1905        tenant.write(),
1906        tenant.read(),
1907        tenant.audit(),
1908        principal,
1909        &body.a_id,
1910        &body.b_id,
1911        &body.kind,
1912        &body.status,
1913        body.resolution_note.as_deref(),
1914        body.winning_triple_id.as_deref(),
1915    )
1916    .await
1917    .map_err(ApiError::from)?;
1918    Ok(Json(result))
1919}
1920
1921#[derive(Debug, Deserialize, Default)]
1922struct InspectClusterQuery {
1923    /// Default `false` — episode `content` is truncated to
1924    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
1925    /// `?full_content=true` returns each episode's content verbatim.
1926    #[serde(default)]
1927    full_content: bool,
1928}
1929
1930async fn inspect_cluster_handler(
1931    TenantExtractor(tenant): TenantExtractor,
1932    AuditPrincipal(principal): AuditPrincipal,
1933    Path(cluster_id): Path<String>,
1934    Query(q): Query<InspectClusterQuery>,
1935) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
1936    if cluster_id.trim().is_empty() {
1937        return Err(ApiError::bad_request("cluster_id must not be empty"));
1938    }
1939    let record = solo_query::inspect_cluster(
1940        tenant.read(),
1941        tenant.audit(),
1942        principal,
1943        &cluster_id,
1944        q.full_content,
1945    )
1946    .await
1947    .map_err(ApiError::from)?;
1948    Ok(Json(record))
1949}
1950
1951// ---------------------------------------------------------------------------
1952// Document handlers (v0.7.0 P6)
1953// ---------------------------------------------------------------------------
1954
1955#[derive(Debug, Deserialize)]
1956struct IngestDocumentBody {
1957    /// Server-side absolute path to the file. Must be readable by the
1958    /// Solo process. The writer reads, parses, chunks, and embeds.
1959    path: String,
1960}
1961
1962async fn ingest_document_handler(
1963    TenantExtractor(tenant): TenantExtractor,
1964    AuditPrincipal(principal): AuditPrincipal,
1965    Json(body): Json<IngestDocumentBody>,
1966) -> Result<Json<solo_storage::IngestReport>, ApiError> {
1967    if body.path.trim().is_empty() {
1968        return Err(ApiError::bad_request("path must not be empty"));
1969    }
1970    let path = std::path::PathBuf::from(body.path);
1971    let chunk_config = solo_storage::document::ChunkConfig::default();
1972    let report = tenant
1973        .write()
1974        .ingest_document_as(principal, path, chunk_config)
1975        .await
1976        .map_err(ApiError::from)?;
1977    Ok(Json(report))
1978}
1979
1980#[derive(Debug, Deserialize)]
1981struct SearchDocsBody {
1982    query: String,
1983    #[serde(default = "default_limit")]
1984    limit: usize,
1985}
1986
1987async fn search_docs_handler(
1988    TenantExtractor(tenant): TenantExtractor,
1989    AuditPrincipal(principal): AuditPrincipal,
1990    Json(body): Json<SearchDocsBody>,
1991) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
1992    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
1993        .await
1994        .map_err(ApiError::from)?;
1995    Ok(Json(hits))
1996}
1997
1998async fn inspect_document_handler(
1999    TenantExtractor(tenant): TenantExtractor,
2000    AuditPrincipal(principal): AuditPrincipal,
2001    Path(id): Path<String>,
2002) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
2003    let doc_id =
2004        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2005    let result_opt =
2006        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
2007            .await
2008            .map_err(ApiError::from)?;
2009    match result_opt {
2010        Some(record) => Ok(Json(record)),
2011        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
2012    }
2013}
2014
2015#[derive(Debug, Deserialize)]
2016struct ListDocumentsQuery {
2017    #[serde(default = "default_list_documents_limit")]
2018    limit: usize,
2019    #[serde(default)]
2020    offset: usize,
2021    #[serde(default)]
2022    include_forgotten: bool,
2023}
2024
2025fn default_list_documents_limit() -> usize {
2026    20
2027}
2028
2029async fn list_documents_handler(
2030    TenantExtractor(tenant): TenantExtractor,
2031    AuditPrincipal(principal): AuditPrincipal,
2032    Query(q): Query<ListDocumentsQuery>,
2033) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
2034    let rows = solo_query::list_documents(
2035        tenant.read(),
2036        tenant.audit(),
2037        principal,
2038        q.limit,
2039        q.offset,
2040        q.include_forgotten,
2041    )
2042    .await
2043    .map_err(ApiError::from)?;
2044    Ok(Json(rows))
2045}
2046
2047async fn forget_document_handler(
2048    TenantExtractor(tenant): TenantExtractor,
2049    AuditPrincipal(principal): AuditPrincipal,
2050    Path(id): Path<String>,
2051) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
2052    let doc_id =
2053        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2054    let report = tenant
2055        .write()
2056        .forget_document_as(principal, doc_id)
2057        .await
2058        .map_err(ApiError::from)?;
2059    Ok(Json(report))
2060}
2061
2062#[derive(Debug, Deserialize)]
2063struct ForgetQuery {
2064    #[serde(default)]
2065    reason: Option<String>,
2066}
2067
2068async fn forget_handler(
2069    TenantExtractor(tenant): TenantExtractor,
2070    AuditPrincipal(principal): AuditPrincipal,
2071    Path(id): Path<String>,
2072    Query(q): Query<ForgetQuery>,
2073) -> Result<StatusCode, ApiError> {
2074    let mid =
2075        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2076    let reason = q.reason.unwrap_or_else(|| "http".into());
2077    tenant
2078        .write()
2079        .forget_as(principal, mid, reason)
2080        .await
2081        .map_err(ApiError::from)?;
2082    Ok(StatusCode::NO_CONTENT)
2083}
2084
2085async fn consolidate_handler(
2086    TenantExtractor(tenant): TenantExtractor,
2087    AuditPrincipal(principal): AuditPrincipal,
2088    body: axum::body::Bytes,
2089) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
2090    // Empty body = default scope (unbounded window). We parse via
2091    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
2092    // extractor 400s on an empty body when Content-Type is JSON
2093    // (it can't deserialize zero bytes as `T`), and the `Option`
2094    // wrapper doesn't reliably degrade that failure to `None`.
2095    let scope = if body.is_empty() {
2096        solo_storage::ConsolidationScope::default()
2097    } else {
2098        serde_json::from_slice(&body)
2099            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
2100    };
2101    let report = tenant
2102        .write()
2103        .consolidate_as(principal, scope)
2104        .await
2105        .map_err(ApiError::from)?;
2106    Ok(Json(report))
2107}
2108
2109#[derive(Debug, Deserialize)]
2110struct BackupBody {
2111    /// Server-side absolute path where the backup file should be
2112    /// written. Must be writable by the Solo process. Refuses to
2113    /// overwrite an existing file unless `force = true`.
2114    to: String,
2115    #[serde(default)]
2116    force: bool,
2117}
2118
2119#[derive(Debug, Serialize)]
2120struct BackupResponse {
2121    path: String,
2122    elapsed_ms: u64,
2123}
2124
2125async fn backup_handler(
2126    TenantExtractor(tenant): TenantExtractor,
2127    Json(body): Json<BackupBody>,
2128) -> Result<Json<BackupResponse>, ApiError> {
2129    use std::path::PathBuf;
2130
2131    let dest = PathBuf::from(&body.to);
2132    if dest.as_os_str().is_empty() {
2133        return Err(ApiError::bad_request("`to` must not be empty"));
2134    }
2135    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
2136    // The tenant's source DB path comes from the resolved TenantHandle.
2137    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
2138        return Err(ApiError::bad_request(format!(
2139            "destination {} is the same file as the source database; \
2140             refusing to run (would corrupt the live database)",
2141            dest.display()
2142        )));
2143    }
2144    if dest.exists() {
2145        if !body.force {
2146            return Err(ApiError::bad_request(format!(
2147                "destination {} exists; pass force=true to overwrite",
2148                dest.display()
2149            )));
2150        }
2151        std::fs::remove_file(&dest).map_err(|e| {
2152            ApiError::internal(format!(
2153                "remove existing destination {}: {e}",
2154                dest.display()
2155            ))
2156        })?;
2157    }
2158    if let Some(parent) = dest.parent() {
2159        if !parent.as_os_str().is_empty() && !parent.is_dir() {
2160            return Err(ApiError::bad_request(format!(
2161                "destination parent directory {} does not exist",
2162                parent.display()
2163            )));
2164        }
2165    }
2166
2167    let started = std::time::Instant::now();
2168    tenant
2169        .write()
2170        .backup(dest.clone())
2171        .await
2172        .map_err(ApiError::from)?;
2173    let elapsed_ms = started.elapsed().as_millis() as u64;
2174
2175    Ok(Json(BackupResponse {
2176        path: dest.display().to_string(),
2177        elapsed_ms,
2178    }))
2179}
2180
2181// ---------------------------------------------------------------------------
2182// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
2183// ---------------------------------------------------------------------------
2184//
2185// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
2186// drill off any node. Supports four edge kinds:
2187//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
2188//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
2189//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
2190//     object_id / source_episode_id added in migration 0007).
2191//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
2192//     episode's content via the tenant embedder, then calls the same
2193//     pipeline as `/memory/search`; cheaper than a separate embeddings-
2194//     table fetch path and reuses one well-tested code path).
2195//
2196// **Node-id prefix convention** (locked in this PR; the future
2197// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
2198// same scheme):
2199//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
2200//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
2201//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
2202//   * `cl:<cluster_id>`    — cluster
2203//   * `ent:<value>`        — entity (synthetic — minted from a triple's
2204//     subject_id / object_id; value is the raw string verbatim, no
2205//     URL-encoding — `:` and other punctuation appear in real entity
2206//     ids in the wild).
2207//
2208// Entity nodes are synthetic: there's no `entities` table. They're derived
2209// on-the-fly from triples and only exist in the wire format. Two entity
2210// nodes with the same `ent:<value>` are the same node.
2211//
2212// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
2213// over already-audited primitives; the explicit-query audit events from
2214// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
2215// underlying reads).
2216//
2217// Tests live inline in `handler_tests` below.
2218
2219const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
2220const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;
2221
2222/// Edge-kind discriminator. Drives which expansion path runs and what edge
2223/// kind appears in the response.
2224#[derive(Debug, Clone, Copy, Deserialize)]
2225#[serde(rename_all = "snake_case")]
2226enum GraphExpandKind {
2227    ClusterMember,
2228    DocumentChunk,
2229    Triple,
2230    Semantic,
2231}
2232
2233#[derive(Debug, Deserialize)]
2234struct GraphExpandQuery {
2235    node_id: String,
2236    kind: GraphExpandKind,
2237    #[serde(default)]
2238    limit: Option<u32>,
2239}
2240
2241/// Source-node kind, derived from the `node_id` prefix.
2242#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2243enum NodeKind {
2244    Episode,
2245    Document,
2246    Chunk,
2247    Cluster,
2248    Entity,
2249}
2250
2251impl NodeKind {
2252    fn as_wire_str(self) -> &'static str {
2253        match self {
2254            Self::Episode => "episode",
2255            Self::Document => "document",
2256            Self::Chunk => "chunk",
2257            Self::Cluster => "cluster",
2258            Self::Entity => "entity",
2259        }
2260    }
2261}
2262
2263/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
2264/// unknown prefix / empty value / no `:`.
2265fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
2266    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
2267        ApiError::bad_request(format!(
2268            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
2269        ))
2270    })?;
2271    if value.is_empty() {
2272        return Err(ApiError::bad_request(format!(
2273            "node_id value is empty after prefix: {raw:?}"
2274        )));
2275    }
2276    let kind = match prefix {
2277        "ep" => NodeKind::Episode,
2278        "doc" => NodeKind::Document,
2279        "chunk" => NodeKind::Chunk,
2280        "cl" => NodeKind::Cluster,
2281        "ent" => NodeKind::Entity,
2282        other => {
2283            return Err(ApiError::bad_request(format!(
2284                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
2285            )));
2286        }
2287    };
2288    Ok((kind, value))
2289}
2290
2291/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
2292/// TS interface (see `solo-web/src/api/types.ts`).
2293#[derive(Debug, Serialize)]
2294struct GraphNode {
2295    id: String,
2296    kind: &'static str,
2297    label: String,
2298    #[serde(skip_serializing_if = "Option::is_none")]
2299    ts_ms: Option<i64>,
2300    tenant_id: String,
2301    #[serde(skip_serializing_if = "Option::is_none")]
2302    preview: Option<String>,
2303}
2304
2305/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
2306/// `${source}--${kind}--${target}` so the renderer can dedupe.
2307#[derive(Debug, Serialize)]
2308struct GraphEdge {
2309    id: String,
2310    source: String,
2311    target: String,
2312    kind: &'static str,
2313    #[serde(skip_serializing_if = "Option::is_none")]
2314    predicate: Option<String>,
2315    #[serde(skip_serializing_if = "Option::is_none")]
2316    weight: Option<f32>,
2317}
2318
2319#[derive(Debug, Serialize)]
2320struct GraphExpandResponse {
2321    nodes: Vec<GraphNode>,
2322    edges: Vec<GraphEdge>,
2323}
2324
2325fn edge_id(source: &str, kind: &str, target: &str) -> String {
2326    format!("{source}--{kind}--{target}")
2327}
2328
2329/// Episode summary needed to mint a `GraphNode` from an episode row.
2330#[derive(Debug)]
2331struct ExpandedEpisode {
2332    memory_id: String,
2333    ts_ms: i64,
2334    content: String,
2335}
2336
2337/// Document summary.
2338#[derive(Debug)]
2339struct ExpandedDocument {
2340    doc_id: String,
2341    title: Option<String>,
2342    source: Option<String>,
2343    ingested_at_ms: i64,
2344}
2345
2346/// Chunk summary.
2347#[derive(Debug)]
2348struct ExpandedChunk {
2349    chunk_id: String,
2350    chunk_index: i64,
2351    content: String,
2352}
2353
2354fn truncate_preview(s: &str, max: usize) -> String {
2355    if s.chars().count() <= max {
2356        return s.to_string();
2357    }
2358    let mut out: String = s.chars().take(max - 1).collect();
2359    out.push('…');
2360    out
2361}
2362
2363/// First-line label cap. Keeps payloads tight for the graph renderer
2364/// (labels are headings, not full content).
2365const GRAPH_LABEL_CHARS: usize = 80;
2366const GRAPH_PREVIEW_CHARS: usize = 200;
2367
2368fn episode_label(content: &str) -> String {
2369    let first_line = content.lines().next().unwrap_or(content);
2370    truncate_preview(first_line, GRAPH_LABEL_CHARS)
2371}
2372
2373fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
2374    GraphNode {
2375        id: format!("ep:{}", ep.memory_id),
2376        kind: NodeKind::Episode.as_wire_str(),
2377        label: episode_label(&ep.content),
2378        ts_ms: Some(ep.ts_ms),
2379        tenant_id: tenant_id.to_string(),
2380        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
2381    }
2382}
2383
2384fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
2385    let label = d
2386        .title
2387        .clone()
2388        .or_else(|| d.source.clone())
2389        .unwrap_or_else(|| d.doc_id.clone());
2390    GraphNode {
2391        id: format!("doc:{}", d.doc_id),
2392        kind: NodeKind::Document.as_wire_str(),
2393        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
2394        ts_ms: Some(d.ingested_at_ms),
2395        tenant_id: tenant_id.to_string(),
2396        preview: d.source.clone(),
2397    }
2398}
2399
2400fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
2401    GraphNode {
2402        id: format!("chunk:{}", c.chunk_id),
2403        kind: NodeKind::Chunk.as_wire_str(),
2404        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
2405        ts_ms: None,
2406        tenant_id: tenant_id.to_string(),
2407        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
2408    }
2409}
2410
2411fn graph_node_for_cluster(
2412    tenant_id: &str,
2413    cluster_id: &str,
2414    abstraction: Option<&str>,
2415    created_at_ms: i64,
2416) -> GraphNode {
2417    let label = abstraction
2418        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
2419        .unwrap_or_else(|| format!("cluster {cluster_id}"));
2420    GraphNode {
2421        id: format!("cl:{cluster_id}"),
2422        kind: NodeKind::Cluster.as_wire_str(),
2423        label,
2424        ts_ms: Some(created_at_ms),
2425        tenant_id: tenant_id.to_string(),
2426        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
2427    }
2428}
2429
2430fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
2431    GraphNode {
2432        id: format!("ent:{value}"),
2433        kind: NodeKind::Entity.as_wire_str(),
2434        label: truncate_preview(value, GRAPH_LABEL_CHARS),
2435        ts_ms: None,
2436        tenant_id: tenant_id.to_string(),
2437        preview: None,
2438    }
2439}
2440
2441/// `GET /v1/graph/expand`. See module-level comments for the contract.
2442async fn graph_expand_handler(
2443    TenantExtractor(tenant): TenantExtractor,
2444    Query(q): Query<GraphExpandQuery>,
2445) -> Result<Json<GraphExpandResponse>, ApiError> {
2446    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
2447    // solo-query's read pipelines (recall, themes, etc.). Documented in
2448    // the OpenAPI spec.
2449    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
2450    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;
2451
2452    let (node_kind, value) = parse_node_id(&q.node_id)?;
2453    let value = value.to_string();
2454    let node_id_full = q.node_id.clone();
2455    let tenant_id_str = tenant.tenant_id().to_string();
2456
2457    match q.kind {
2458        GraphExpandKind::ClusterMember => {
2459            expand_cluster_member(
2460                &tenant,
2461                &tenant_id_str,
2462                node_kind,
2463                &value,
2464                &node_id_full,
2465                limit,
2466            )
2467            .await
2468        }
2469        GraphExpandKind::DocumentChunk => {
2470            expand_document_chunk(
2471                &tenant,
2472                &tenant_id_str,
2473                node_kind,
2474                &value,
2475                &node_id_full,
2476                limit,
2477            )
2478            .await
2479        }
2480        GraphExpandKind::Triple => {
2481            expand_triple(
2482                &tenant,
2483                &tenant_id_str,
2484                node_kind,
2485                &value,
2486                &node_id_full,
2487                limit,
2488            )
2489            .await
2490        }
2491        GraphExpandKind::Semantic => {
2492            expand_semantic(
2493                &tenant,
2494                &tenant_id_str,
2495                node_kind,
2496                &value,
2497                &node_id_full,
2498                limit,
2499            )
2500            .await
2501        }
2502    }
2503    .map(Json)
2504}
2505
2506// ---- cluster_member ----
2507
2508async fn expand_cluster_member(
2509    tenant: &TenantHandle,
2510    tenant_id: &str,
2511    node_kind: NodeKind,
2512    value: &str,
2513    node_id_full: &str,
2514    limit: i64,
2515) -> Result<GraphExpandResponse, ApiError> {
2516    match node_kind {
2517        NodeKind::Episode => {
2518            expand_cluster_member_from_episode(
2519                tenant,
2520                tenant_id,
2521                value.to_string(),
2522                node_id_full.to_string(),
2523                limit,
2524            )
2525            .await
2526        }
2527        NodeKind::Cluster => {
2528            expand_cluster_member_from_cluster(
2529                tenant,
2530                tenant_id,
2531                value.to_string(),
2532                node_id_full.to_string(),
2533                limit,
2534            )
2535            .await
2536        }
2537        _ => Err(ApiError::bad_request(format!(
2538            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
2539            node_kind.as_wire_str()
2540        ))),
2541    }
2542}
2543
2544async fn expand_cluster_member_from_episode(
2545    tenant: &TenantHandle,
2546    tenant_id: &str,
2547    memory_id: String,
2548    node_id_full: String,
2549    limit: i64,
2550) -> Result<GraphExpandResponse, ApiError> {
2551    let memory_id_for_err = memory_id.clone();
2552    let rows: Vec<(String, Option<String>, i64)> = tenant
2553        .read()
2554        .interact(move |conn| {
2555            // First confirm the source episode exists in this tenant.
2556            let exists: i64 = conn.query_row(
2557                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
2558                rusqlite::params![&memory_id],
2559                |r| r.get(0),
2560            )?;
2561            if exists == 0 {
2562                return Ok(Vec::new());
2563            }
2564            let mut stmt = conn.prepare(
2565                "SELECT c.cluster_id, sa.content, c.created_at_ms
2566                   FROM cluster_episodes ce
2567                   JOIN clusters c ON c.cluster_id = ce.cluster_id
2568                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
2569                  WHERE ce.memory_id = ?1
2570                  ORDER BY c.created_at_ms DESC
2571                  LIMIT ?2",
2572            )?;
2573            let mapped = stmt
2574                .query_map(rusqlite::params![&memory_id, limit], |r| {
2575                    Ok((
2576                        r.get::<_, String>(0)?,
2577                        r.get::<_, Option<String>>(1)?,
2578                        r.get::<_, i64>(2)?,
2579                    ))
2580                })?
2581                .collect::<rusqlite::Result<Vec<_>>>()?;
2582            // Marker tuple to signal "episode found" via Vec emptiness +
2583            // an extra sentinel; we use a different shape:
2584            // pack the "found" flag via an out-of-band trick — actually
2585            // we re-query above. Keep it simple: confirm again here by
2586            // returning the rows; a missing episode short-circuits to
2587            // a 404 below via the `exists == 0` guard.
2588            Ok::<_, rusqlite::Error>(mapped)
2589        })
2590        .await
2591        .map_err(ApiError::from)?;
2592
2593    // The interact() returns Vec<(...)>; but we need to distinguish "no
2594    // such episode" (→ 404) from "episode exists, has no clusters" (→
2595    // 200 with empty arrays). Re-run a cheap existence check separately
2596    // — we already inlined it above and returned `Vec::new()` on miss,
2597    // but a real miss is indistinguishable from "episode in zero
2598    // clusters". Use a separate existence probe.
2599    if rows.is_empty() {
2600        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2601        return Ok(GraphExpandResponse {
2602            nodes: Vec::new(),
2603            edges: Vec::new(),
2604        });
2605    }
2606
2607    let mut nodes = Vec::with_capacity(rows.len());
2608    let mut edges = Vec::with_capacity(rows.len());
2609    for (cluster_id, abstraction, created_at_ms) in rows {
2610        let target_id = format!("cl:{cluster_id}");
2611        edges.push(GraphEdge {
2612            id: edge_id(&node_id_full, "cluster_member", &target_id),
2613            source: node_id_full.clone(),
2614            target: target_id,
2615            kind: "cluster_member",
2616            predicate: None,
2617            weight: None,
2618        });
2619        nodes.push(graph_node_for_cluster(
2620            tenant_id,
2621            &cluster_id,
2622            abstraction.as_deref(),
2623            created_at_ms,
2624        ));
2625    }
2626    Ok(GraphExpandResponse { nodes, edges })
2627}
2628
2629async fn expand_cluster_member_from_cluster(
2630    tenant: &TenantHandle,
2631    tenant_id: &str,
2632    cluster_id: String,
2633    node_id_full: String,
2634    limit: i64,
2635) -> Result<GraphExpandResponse, ApiError> {
2636    let cluster_id_for_err = cluster_id.clone();
2637    let rows: Vec<ExpandedEpisode> = tenant
2638        .read()
2639        .interact(move |conn| {
2640            let exists: i64 = conn.query_row(
2641                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
2642                rusqlite::params![&cluster_id],
2643                |r| r.get(0),
2644            )?;
2645            if exists == 0 {
2646                return Ok(Vec::new());
2647            }
2648            let mut stmt = conn.prepare(
2649                "SELECT e.memory_id, e.ts_ms, e.content
2650                   FROM cluster_episodes ce
2651                   JOIN episodes e ON e.memory_id = ce.memory_id
2652                  WHERE ce.cluster_id = ?1
2653                    AND e.status = 'active'
2654                  ORDER BY e.ts_ms DESC
2655                  LIMIT ?2",
2656            )?;
2657            let mapped = stmt
2658                .query_map(rusqlite::params![&cluster_id, limit], |r| {
2659                    Ok(ExpandedEpisode {
2660                        memory_id: r.get(0)?,
2661                        ts_ms: r.get(1)?,
2662                        content: r.get(2)?,
2663                    })
2664                })?
2665                .collect::<rusqlite::Result<Vec<_>>>()?;
2666            Ok::<_, rusqlite::Error>(mapped)
2667        })
2668        .await
2669        .map_err(ApiError::from)?;
2670
2671    if rows.is_empty() {
2672        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
2673        return Ok(GraphExpandResponse {
2674            nodes: Vec::new(),
2675            edges: Vec::new(),
2676        });
2677    }
2678
2679    let mut nodes = Vec::with_capacity(rows.len());
2680    let mut edges = Vec::with_capacity(rows.len());
2681    for ep in rows {
2682        let target_id = format!("ep:{}", ep.memory_id);
2683        edges.push(GraphEdge {
2684            id: edge_id(&node_id_full, "cluster_member", &target_id),
2685            source: node_id_full.clone(),
2686            target: target_id,
2687            kind: "cluster_member",
2688            predicate: None,
2689            weight: None,
2690        });
2691        nodes.push(graph_node_for_episode(tenant_id, &ep));
2692    }
2693    Ok(GraphExpandResponse { nodes, edges })
2694}
2695
2696// ---- document_chunk ----
2697
2698async fn expand_document_chunk(
2699    tenant: &TenantHandle,
2700    tenant_id: &str,
2701    node_kind: NodeKind,
2702    value: &str,
2703    node_id_full: &str,
2704    limit: i64,
2705) -> Result<GraphExpandResponse, ApiError> {
2706    match node_kind {
2707        NodeKind::Document => {
2708            expand_document_chunk_from_document(
2709                tenant,
2710                tenant_id,
2711                value.to_string(),
2712                node_id_full.to_string(),
2713                limit,
2714            )
2715            .await
2716        }
2717        NodeKind::Chunk => {
2718            expand_document_chunk_from_chunk(
2719                tenant,
2720                tenant_id,
2721                value.to_string(),
2722                node_id_full.to_string(),
2723            )
2724            .await
2725        }
2726        _ => Err(ApiError::bad_request(format!(
2727            "kind=document_chunk only valid for document or chunk source nodes; got {}",
2728            node_kind.as_wire_str()
2729        ))),
2730    }
2731}
2732
2733async fn expand_document_chunk_from_document(
2734    tenant: &TenantHandle,
2735    tenant_id: &str,
2736    doc_id: String,
2737    node_id_full: String,
2738    limit: i64,
2739) -> Result<GraphExpandResponse, ApiError> {
2740    let doc_id_for_err = doc_id.clone();
2741    let rows: Vec<ExpandedChunk> = tenant
2742        .read()
2743        .interact(move |conn| {
2744            let exists: i64 = conn.query_row(
2745                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2746                rusqlite::params![&doc_id],
2747                |r| r.get(0),
2748            )?;
2749            if exists == 0 {
2750                return Ok(Vec::new());
2751            }
2752            let mut stmt = conn.prepare(
2753                "SELECT chunk_id, chunk_index, content
2754                   FROM document_chunks
2755                  WHERE doc_id = ?1
2756                  ORDER BY chunk_index ASC
2757                  LIMIT ?2",
2758            )?;
2759            let mapped = stmt
2760                .query_map(rusqlite::params![&doc_id, limit], |r| {
2761                    Ok(ExpandedChunk {
2762                        chunk_id: r.get(0)?,
2763                        chunk_index: r.get(1)?,
2764                        content: r.get(2)?,
2765                    })
2766                })?
2767                .collect::<rusqlite::Result<Vec<_>>>()?;
2768            Ok::<_, rusqlite::Error>(mapped)
2769        })
2770        .await
2771        .map_err(ApiError::from)?;
2772
2773    if rows.is_empty() {
2774        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
2775        return Ok(GraphExpandResponse {
2776            nodes: Vec::new(),
2777            edges: Vec::new(),
2778        });
2779    }
2780
2781    let mut nodes = Vec::with_capacity(rows.len());
2782    let mut edges = Vec::with_capacity(rows.len());
2783    for c in rows {
2784        let target_id = format!("chunk:{}", c.chunk_id);
2785        edges.push(GraphEdge {
2786            id: edge_id(&node_id_full, "document_chunk", &target_id),
2787            source: node_id_full.clone(),
2788            target: target_id,
2789            kind: "document_chunk",
2790            predicate: None,
2791            weight: None,
2792        });
2793        nodes.push(graph_node_for_chunk(tenant_id, &c));
2794    }
2795    Ok(GraphExpandResponse { nodes, edges })
2796}
2797
2798async fn expand_document_chunk_from_chunk(
2799    tenant: &TenantHandle,
2800    tenant_id: &str,
2801    chunk_id: String,
2802    node_id_full: String,
2803) -> Result<GraphExpandResponse, ApiError> {
2804    let chunk_id_for_err = chunk_id.clone();
2805    let row: Option<ExpandedDocument> = tenant
2806        .read()
2807        .interact(move |conn| {
2808            conn.query_row(
2809                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
2810                   FROM document_chunks c
2811                   JOIN documents d ON d.doc_id = c.doc_id
2812                  WHERE c.chunk_id = ?1",
2813                rusqlite::params![&chunk_id],
2814                |r| {
2815                    Ok(ExpandedDocument {
2816                        doc_id: r.get(0)?,
2817                        title: r.get(1)?,
2818                        source: r.get(2)?,
2819                        ingested_at_ms: r.get(3)?,
2820                    })
2821                },
2822            )
2823            .map(Some)
2824            .or_else(|e| match e {
2825                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2826                other => Err(other),
2827            })
2828        })
2829        .await
2830        .map_err(ApiError::from)?;
2831
2832    let d = row.ok_or_else(|| {
2833        ApiError::not_found(format!(
2834            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
2835        ))
2836    })?;
2837    let target_id = format!("doc:{}", d.doc_id);
2838    let edge = GraphEdge {
2839        id: edge_id(&node_id_full, "document_chunk", &target_id),
2840        source: node_id_full.clone(),
2841        target: target_id,
2842        kind: "document_chunk",
2843        predicate: None,
2844        weight: None,
2845    };
2846    let node = graph_node_for_document(tenant_id, &d);
2847    Ok(GraphExpandResponse {
2848        nodes: vec![node],
2849        edges: vec![edge],
2850    })
2851}
2852
2853// ---- triple ----
2854
2855async fn expand_triple(
2856    tenant: &TenantHandle,
2857    tenant_id: &str,
2858    node_kind: NodeKind,
2859    value: &str,
2860    node_id_full: &str,
2861    limit: i64,
2862) -> Result<GraphExpandResponse, ApiError> {
2863    match node_kind {
2864        NodeKind::Episode => {
2865            expand_triple_from_episode(
2866                tenant,
2867                tenant_id,
2868                value.to_string(),
2869                node_id_full.to_string(),
2870                limit,
2871            )
2872            .await
2873        }
2874        NodeKind::Entity => {
2875            expand_triple_from_entity(
2876                tenant,
2877                tenant_id,
2878                value.to_string(),
2879                node_id_full.to_string(),
2880                limit,
2881            )
2882            .await
2883        }
2884        _ => Err(ApiError::bad_request(format!(
2885            "kind=triple only valid for episode or entity source nodes; got {}",
2886            node_kind.as_wire_str()
2887        ))),
2888    }
2889}
2890
2891#[derive(Debug)]
2892struct TripleRow {
2893    subject_id: String,
2894    predicate: String,
2895    object_id: String,
2896    confidence: f32,
2897}
2898
2899async fn expand_triple_from_episode(
2900    tenant: &TenantHandle,
2901    tenant_id: &str,
2902    memory_id: String,
2903    node_id_full: String,
2904    limit: i64,
2905) -> Result<GraphExpandResponse, ApiError> {
2906    let memory_id_for_err = memory_id.clone();
2907    let rows: Vec<TripleRow> = tenant
2908        .read()
2909        .interact(move |conn| {
2910            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
2911            let rowid_opt: Option<i64> = conn
2912                .query_row(
2913                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
2914                    rusqlite::params![&memory_id],
2915                    |r| r.get(0),
2916                )
2917                .map(Some)
2918                .or_else(|e| match e {
2919                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
2920                    other => Err(other),
2921                })?;
2922            let Some(rowid) = rowid_opt else {
2923                return Ok(Vec::new());
2924            };
2925            let mut stmt = conn.prepare(
2926                "SELECT subject_id, predicate, object_id, confidence
2927                   FROM triples
2928                  WHERE source_episode_id = ?1
2929                    AND status = 'active'
2930                  ORDER BY valid_from_ms DESC
2931                  LIMIT ?2",
2932            )?;
2933            let mapped = stmt
2934                .query_map(rusqlite::params![rowid, limit], |r| {
2935                    Ok(TripleRow {
2936                        subject_id: r.get(0)?,
2937                        predicate: r.get(1)?,
2938                        object_id: r.get(2)?,
2939                        confidence: r.get(3)?,
2940                    })
2941                })?
2942                .collect::<rusqlite::Result<Vec<_>>>()?;
2943            Ok::<_, rusqlite::Error>(mapped)
2944        })
2945        .await
2946        .map_err(ApiError::from)?;
2947
2948    if rows.is_empty() {
2949        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2950        return Ok(GraphExpandResponse {
2951            nodes: Vec::new(),
2952            edges: Vec::new(),
2953        });
2954    }
2955
2956    let mut nodes = Vec::new();
2957    let mut edges = Vec::new();
2958    let mut seen_entities: std::collections::HashSet<String> = Default::default();
2959    for t in rows {
2960        // Mint both endpoints as entity nodes. The source episode is
2961        // node_id_full; each triple becomes two edges (source→subj +
2962        // subj→obj) connected through the entity nodes, OR a single
2963        // edge labelled with the predicate from the source episode to
2964        // a representative entity. The TS schema treats `triple` as a
2965        // single edge with `predicate`; we emit one edge per triple:
2966        // source_episode → subject_entity (kind=triple, predicate=p),
2967        // plus one extra edge subject_entity → object_entity (also
2968        // kind=triple, same predicate) so a renderer can hop along the
2969        // SPO graph.
2970        let subj_id = format!("ent:{}", t.subject_id);
2971        let obj_id = format!("ent:{}", t.object_id);
2972        if seen_entities.insert(t.subject_id.clone()) {
2973            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
2974        }
2975        if seen_entities.insert(t.object_id.clone()) {
2976            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
2977        }
2978        edges.push(GraphEdge {
2979            id: edge_id(&subj_id, "triple", &obj_id),
2980            source: subj_id,
2981            target: obj_id,
2982            kind: "triple",
2983            predicate: Some(t.predicate),
2984            weight: Some(t.confidence),
2985        });
2986    }
2987    Ok(GraphExpandResponse { nodes, edges })
2988}
2989
2990async fn expand_triple_from_entity(
2991    tenant: &TenantHandle,
2992    tenant_id: &str,
2993    entity_value: String,
2994    node_id_full: String,
2995    limit: i64,
2996) -> Result<GraphExpandResponse, ApiError> {
2997    // Entity nodes are synthetic — there's no existence check we can
2998    // run. "Unknown entity" naturally resolves to an empty result.
2999    let entity_q = entity_value.clone();
3000    let rows: Vec<ExpandedEpisode> = tenant
3001        .read()
3002        .interact(move |conn| {
3003            // Find episodes whose triples reference this entity on either
3004            // side. JOIN against episodes.rowid via triples.source_episode_id.
3005            let mut stmt = conn.prepare(
3006                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
3007                   FROM triples t
3008                   JOIN episodes e ON e.rowid = t.source_episode_id
3009                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
3010                    AND t.status = 'active'
3011                    AND t.source_episode_id IS NOT NULL
3012                    AND e.status = 'active'
3013                  ORDER BY e.ts_ms DESC
3014                  LIMIT ?2",
3015            )?;
3016            let mapped = stmt
3017                .query_map(rusqlite::params![&entity_q, limit], |r| {
3018                    Ok(ExpandedEpisode {
3019                        memory_id: r.get(0)?,
3020                        ts_ms: r.get(1)?,
3021                        content: r.get(2)?,
3022                    })
3023                })?
3024                .collect::<rusqlite::Result<Vec<_>>>()?;
3025            Ok::<_, rusqlite::Error>(mapped)
3026        })
3027        .await
3028        .map_err(ApiError::from)?;
3029
3030    // Empty result on entity expand is a valid 200 — the entity exists
3031    // only in the wire format; "no edges" is the right answer.
3032    let mut nodes = Vec::with_capacity(rows.len());
3033    let mut edges = Vec::with_capacity(rows.len());
3034    for ep in rows {
3035        let target_id = format!("ep:{}", ep.memory_id);
3036        edges.push(GraphEdge {
3037            id: edge_id(&node_id_full, "triple", &target_id),
3038            source: node_id_full.clone(),
3039            target: target_id,
3040            kind: "triple",
3041            predicate: None,
3042            weight: None,
3043        });
3044        nodes.push(graph_node_for_episode(tenant_id, &ep));
3045    }
3046    // Annotate _ to suppress unused (only used in match guard).
3047    let _ = entity_value;
3048    Ok(GraphExpandResponse { nodes, edges })
3049}
3050
3051// ---- semantic ----
3052
3053async fn expand_semantic(
3054    tenant: &TenantHandle,
3055    tenant_id: &str,
3056    node_kind: NodeKind,
3057    value: &str,
3058    node_id_full: &str,
3059    limit: i64,
3060) -> Result<GraphExpandResponse, ApiError> {
3061    if node_kind != NodeKind::Episode {
3062        return Err(ApiError::bad_request(format!(
3063            "kind=semantic only valid for episode source nodes; got {}",
3064            node_kind.as_wire_str()
3065        )));
3066    }
3067    let memory_id = value.to_string();
3068    let memory_id_q = memory_id.clone();
3069    // Fetch the source episode's content so we can re-embed it and call
3070    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
3071    // the well-tested `run_recall_inner` path that already filters
3072    // forgotten rows + decodes hnsw ids.
3073    let content: Option<String> = tenant
3074        .read()
3075        .interact(move |conn| {
3076            conn.query_row(
3077                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
3078                rusqlite::params![&memory_id_q],
3079                |r| r.get::<_, String>(0),
3080            )
3081            .map(Some)
3082            .or_else(|e| match e {
3083                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3084                other => Err(other),
3085            })
3086        })
3087        .await
3088        .map_err(ApiError::from)?;
3089
3090    let content = content.ok_or_else(|| {
3091        ApiError::not_found(format!(
3092            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
3093        ))
3094    })?;
3095
3096    // Pull one extra hit so we can drop self without losing user-requested
3097    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
3098    let widened = (limit as usize).saturating_add(1).min(100);
3099    let result = solo_query::recall::run_recall_inner(
3100        tenant.embedder(),
3101        tenant.hnsw(),
3102        tenant.read(),
3103        &content,
3104        widened,
3105    )
3106    .await
3107    .map_err(ApiError::from)?;
3108
3109    let mut nodes = Vec::new();
3110    let mut edges = Vec::new();
3111    for hit in result.hits.into_iter() {
3112        if hit.memory_id == memory_id {
3113            // Skip self.
3114            continue;
3115        }
3116        if nodes.len() as i64 >= limit {
3117            break;
3118        }
3119        // The HNSW `cos_distance` is a distance (smaller = more similar).
3120        // Convert to a weight in [0, 1] (larger = more similar) for the
3121        // wire format: weight = (1 - distance).max(0).
3122        let weight = (1.0 - hit.cos_distance).max(0.0);
3123        let target_id = format!("ep:{}", hit.memory_id);
3124        edges.push(GraphEdge {
3125            id: edge_id(node_id_full, "semantic", &target_id),
3126            source: node_id_full.to_string(),
3127            target: target_id,
3128            kind: "semantic",
3129            predicate: None,
3130            weight: Some(weight),
3131        });
3132        nodes.push(GraphNode {
3133            id: format!("ep:{}", hit.memory_id),
3134            kind: NodeKind::Episode.as_wire_str(),
3135            label: episode_label(&hit.content),
3136            ts_ms: None,
3137            tenant_id: tenant_id.to_string(),
3138            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
3139        });
3140    }
3141    Ok(GraphExpandResponse { nodes, edges })
3142}
3143
3144// ---- existence checks ----
3145
3146/// 404 if the memory_id has no row in this tenant's `episodes` table.
3147async fn ensure_episode_exists(
3148    tenant: &TenantHandle,
3149    memory_id: &str,
3150    node_id_full: &str,
3151) -> Result<(), ApiError> {
3152    let memory_id_q = memory_id.to_string();
3153    let exists: i64 = tenant
3154        .read()
3155        .interact(move |conn| {
3156            conn.query_row(
3157                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
3158                rusqlite::params![&memory_id_q],
3159                |r| r.get(0),
3160            )
3161        })
3162        .await
3163        .map_err(ApiError::from)?;
3164    if exists == 0 {
3165        return Err(ApiError::not_found(format!(
3166            "node_id {node_id_full:?} not found in current tenant"
3167        )));
3168    }
3169    Ok(())
3170}
3171
3172async fn ensure_cluster_exists(
3173    tenant: &TenantHandle,
3174    cluster_id: &str,
3175    node_id_full: &str,
3176) -> Result<(), ApiError> {
3177    let cluster_id_q = cluster_id.to_string();
3178    let exists: i64 = tenant
3179        .read()
3180        .interact(move |conn| {
3181            conn.query_row(
3182                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
3183                rusqlite::params![&cluster_id_q],
3184                |r| r.get(0),
3185            )
3186        })
3187        .await
3188        .map_err(ApiError::from)?;
3189    if exists == 0 {
3190        return Err(ApiError::not_found(format!(
3191            "node_id {node_id_full:?} not found in current tenant"
3192        )));
3193    }
3194    Ok(())
3195}
3196
3197async fn ensure_document_exists(
3198    tenant: &TenantHandle,
3199    doc_id: &str,
3200    node_id_full: &str,
3201) -> Result<(), ApiError> {
3202    let doc_id_q = doc_id.to_string();
3203    let exists: i64 = tenant
3204        .read()
3205        .interact(move |conn| {
3206            conn.query_row(
3207                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
3208                rusqlite::params![&doc_id_q],
3209                |r| r.get(0),
3210            )
3211        })
3212        .await
3213        .map_err(ApiError::from)?;
3214    if exists == 0 {
3215        return Err(ApiError::not_found(format!(
3216            "node_id {node_id_full:?} not found in current tenant"
3217        )));
3218    }
3219    Ok(())
3220}
3221
3222// ---------------------------------------------------------------------------
3223// Graph nodes + edges — paginated catalog reads (v0.10.0)
3224//
3225// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
3226// powers solo-web's initial graph render. Both are read-only, both
3227// share the same tenant / auth / cursor scaffolding, both inherit the
3228// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
3229//
3230// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
3231// notes (cursor format, entity scan strategy, semantic-edge rejection
3232// rationale, UNION pagination shape).
3233// ---------------------------------------------------------------------------
3234
3235const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
3236const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
3237const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
3238const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
3239const GRAPH_ENTITY_CAP: usize = 200;
3240
3241/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
3242/// frequency entities were dropped from the response. Clients can show
3243/// "entities truncated" UX without parsing the body.
3244const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";
3245
3246#[derive(Debug, Deserialize)]
3247struct GraphNodesQuery {
3248    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
3249    /// `?kind=` query params are NOT supported by axum's `Query<T>`
3250    /// extractor for `Option<String>` (it picks one) — comma-separated
3251    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
3252    #[serde(default)]
3253    kind: Option<String>,
3254    #[serde(default)]
3255    since_ms: Option<i64>,
3256    #[serde(default)]
3257    until_ms: Option<i64>,
3258    #[serde(default)]
3259    limit: Option<u32>,
3260    #[serde(default)]
3261    cursor: Option<String>,
3262}
3263
3264#[derive(Debug, Deserialize)]
3265struct GraphEdgesQuery {
3266    #[serde(default)]
3267    node_id: Option<String>,
3268    /// Comma-separated. Default = all kinds EXCEPT semantic.
3269    /// Values: triple|document_chunk|cluster_member|semantic.
3270    #[serde(default)]
3271    r#type: Option<String>,
3272    #[serde(default)]
3273    limit: Option<u32>,
3274    #[serde(default)]
3275    cursor: Option<String>,
3276}
3277
3278#[derive(Debug, Serialize)]
3279struct GraphNodesResponse {
3280    nodes: Vec<GraphNode>,
3281    /// Always serialised (as `null` when absent) so client codegen against
3282    /// the OpenAPI schema (`"type": ["string", "null"]`) sees a field
3283    /// rather than a missing key. See dev-log 0152 finding M3.
3284    next_cursor: Option<String>,
3285}
3286
3287#[derive(Debug, Serialize)]
3288struct GraphEdgesResponse {
3289    edges: Vec<GraphEdge>,
3290    /// Always serialised; see `GraphNodesResponse::next_cursor`.
3291    next_cursor: Option<String>,
3292}
3293
3294/// Decode the `kind` filter from the query string. Returns the set of
3295/// kinds the caller wants (all five when filter absent / empty). 400 on
3296/// unknown kind.
3297fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
3298    let raw = raw.unwrap_or("").trim();
3299    if raw.is_empty() {
3300        return Ok(vec![
3301            NodeKind::Episode,
3302            NodeKind::Document,
3303            NodeKind::Chunk,
3304            NodeKind::Cluster,
3305            NodeKind::Entity,
3306        ]);
3307    }
3308    let mut out = Vec::new();
3309    for token in raw.split(',') {
3310        let token = token.trim();
3311        if token.is_empty() {
3312            continue;
3313        }
3314        let kind = match token {
3315            "episode" => NodeKind::Episode,
3316            "document" => NodeKind::Document,
3317            "chunk" => NodeKind::Chunk,
3318            "cluster" => NodeKind::Cluster,
3319            "entity" => NodeKind::Entity,
3320            other => {
3321                return Err(ApiError::bad_request(format!(
3322                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
3323                )));
3324            }
3325        };
3326        if !out.contains(&kind) {
3327            out.push(kind);
3328        }
3329    }
3330    if out.is_empty() {
3331        return Err(ApiError::bad_request(
3332            "kind filter is empty after parsing; either omit or list at least one kind",
3333        ));
3334    }
3335    Ok(out)
3336}
3337
3338/// Edge-kind discriminator on `/v1/graph/edges`.
3339#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3340enum EdgeKind {
3341    Triple,
3342    DocumentChunk,
3343    ClusterMember,
3344}
3345
3346impl EdgeKind {
3347    /// Sort-stable kind ordering for pagination. Lower runs first.
3348    fn order_idx(self) -> u8 {
3349        match self {
3350            Self::Triple => 0,
3351            Self::DocumentChunk => 1,
3352            Self::ClusterMember => 2,
3353        }
3354    }
3355}
3356
3357fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
3358    let raw = raw.unwrap_or("").trim();
3359    if raw.is_empty() {
3360        // Default = all three concrete kinds; semantic is opt-in via
3361        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
3362        return Ok(vec![
3363            EdgeKind::Triple,
3364            EdgeKind::DocumentChunk,
3365            EdgeKind::ClusterMember,
3366        ]);
3367    }
3368    let mut out = Vec::new();
3369    for token in raw.split(',') {
3370        let token = token.trim();
3371        if token.is_empty() {
3372            continue;
3373        }
3374        let kind = match token {
3375            "triple" => EdgeKind::Triple,
3376            "document_chunk" => EdgeKind::DocumentChunk,
3377            "cluster_member" => EdgeKind::ClusterMember,
3378            "semantic" => {
3379                // semantic edges aren't precomputed; they're HNSW queries
3380                // at request time. Wrong endpoint.
3381                return Err(ApiError::bad_request(
3382                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
3383                ));
3384            }
3385            other => {
3386                return Err(ApiError::bad_request(format!(
3387                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
3388                )));
3389            }
3390        };
3391        if !out.contains(&kind) {
3392            out.push(kind);
3393        }
3394    }
3395    if out.is_empty() {
3396        return Err(ApiError::bad_request(
3397            "type filter is empty after parsing; either omit or list at least one type",
3398        ));
3399    }
3400    Ok(out)
3401}
3402
3403/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
3404/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
3405/// cursor.id)` under sort `ts_ms DESC, id ASC`.
3406#[derive(Debug, Serialize, Deserialize)]
3407struct NodesCursor {
3408    ts_ms: i64,
3409    id: String,
3410}
3411
3412/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
3413/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
3414/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
3415/// row id (triple_id for triples, chunk_id for document_chunk, the
3416/// composite `cluster_id||memory_id` string for cluster_member).
3417#[derive(Debug, Serialize, Deserialize)]
3418struct EdgesCursor {
3419    kind_idx: u8,
3420    sub_id: String,
3421}
3422
3423fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
3424    use base64::Engine;
3425    let json = serde_json::to_vec(value)
3426        .map_err(|e| ApiError::internal(format!("cursor serialize: {e}")))?;
3427    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
3428}
3429
3430fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
3431    use base64::Engine;
3432    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
3433        .decode(raw.as_bytes())
3434        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
3435    serde_json::from_slice::<T>(&bytes)
3436        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
3437}
3438
3439/// Internal staging row for the nodes endpoint. Carries the GraphNode
3440/// plus the sort key so we can merge all kinds before applying the
3441/// pagination cut.
3442#[derive(Debug)]
3443struct StagingNode {
3444    node: GraphNode,
3445    sort_ts_ms: i64,
3446    sort_id: String,
3447}
3448
3449/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
3450/// tie-break on id.)
3451fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
3452    // ts_ms DESC: invert
3453    match b.0.cmp(&a.0) {
3454        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
3455        other => other,
3456    }
3457}
3458
3459/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
3460/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
3461/// the cursor.
3462fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
3463    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
3464        == std::cmp::Ordering::Greater
3465}
3466
3467// --- Per-kind row fetchers (each runs a bounded query, applies the time
3468//     filter, returns rows already sorted `ts_ms DESC, id ASC`).
3469
3470#[derive(Debug)]
3471struct NodeRowEp {
3472    memory_id: String,
3473    ts_ms: i64,
3474    content: String,
3475}
3476
3477fn fetch_episodes_for_nodes(
3478    conn: &rusqlite::Connection,
3479    since_ms: Option<i64>,
3480    until_ms: Option<i64>,
3481    cursor: Option<&NodesCursor>,
3482    limit: i64,
3483) -> rusqlite::Result<Vec<NodeRowEp>> {
3484    let mut sql = String::from(
3485        "SELECT memory_id, ts_ms, content
3486           FROM episodes
3487          WHERE status = 'active'",
3488    );
3489    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3490    if let Some(s) = since_ms {
3491        sql.push_str(" AND ts_ms >= ?");
3492        params.push(s.into());
3493    }
3494    if let Some(u) = until_ms {
3495        sql.push_str(" AND ts_ms <= ?");
3496        params.push(u.into());
3497    }
3498    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
3499    // anything strictly newer than the cursor's ts_ms is in a previous
3500    // page; rows with equal ts_ms may or may not be (depends on the
3501    // cross-kind ordering). The post-merge step applies the full
3502    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
3503    // that can't possibly survive.
3504    if let Some(cur) = cursor {
3505        sql.push_str(" AND ts_ms <= ?");
3506        params.push(cur.ts_ms.into());
3507    }
3508    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
3509    params.push(limit.into());
3510    let mut stmt = conn.prepare(&sql)?;
3511    let rows: Vec<NodeRowEp> = stmt
3512        .query_map(rusqlite::params_from_iter(params), |r| {
3513            Ok(NodeRowEp {
3514                memory_id: r.get(0)?,
3515                ts_ms: r.get(1)?,
3516                content: r.get(2)?,
3517            })
3518        })?
3519        .collect::<rusqlite::Result<Vec<_>>>()?;
3520    Ok(rows)
3521}
3522
3523#[derive(Debug)]
3524struct NodeRowDoc {
3525    doc_id: String,
3526    title: Option<String>,
3527    source: Option<String>,
3528    ingested_at_ms: i64,
3529}
3530
3531fn fetch_documents_for_nodes(
3532    conn: &rusqlite::Connection,
3533    since_ms: Option<i64>,
3534    until_ms: Option<i64>,
3535    cursor: Option<&NodesCursor>,
3536    limit: i64,
3537) -> rusqlite::Result<Vec<NodeRowDoc>> {
3538    let mut sql = String::from(
3539        "SELECT doc_id, title, source, ingested_at_ms
3540           FROM documents
3541          WHERE status = 'active'",
3542    );
3543    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3544    if let Some(s) = since_ms {
3545        sql.push_str(" AND ingested_at_ms >= ?");
3546        params.push(s.into());
3547    }
3548    if let Some(u) = until_ms {
3549        sql.push_str(" AND ingested_at_ms <= ?");
3550        params.push(u.into());
3551    }
3552    if let Some(cur) = cursor {
3553        sql.push_str(" AND ingested_at_ms <= ?");
3554        params.push(cur.ts_ms.into());
3555    }
3556    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
3557    params.push(limit.into());
3558    let mut stmt = conn.prepare(&sql)?;
3559    let rows: Vec<NodeRowDoc> = stmt
3560        .query_map(rusqlite::params_from_iter(params), |r| {
3561            Ok(NodeRowDoc {
3562                doc_id: r.get(0)?,
3563                title: r.get(1)?,
3564                source: r.get(2)?,
3565                ingested_at_ms: r.get(3)?,
3566            })
3567        })?
3568        .collect::<rusqlite::Result<Vec<_>>>()?;
3569    Ok(rows)
3570}
3571
3572#[derive(Debug)]
3573struct NodeRowChunk {
3574    chunk_id: String,
3575    chunk_index: i64,
3576    content: String,
3577    created_at_ms: i64,
3578}
3579
3580fn fetch_chunks_for_nodes(
3581    conn: &rusqlite::Connection,
3582    since_ms: Option<i64>,
3583    until_ms: Option<i64>,
3584    cursor: Option<&NodesCursor>,
3585    limit: i64,
3586) -> rusqlite::Result<Vec<NodeRowChunk>> {
3587    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
3588    // documents are filtered out by the join on `documents.status`.
3589    let mut sql = String::from(
3590        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
3591           FROM document_chunks c
3592           JOIN documents d ON d.doc_id = c.doc_id
3593          WHERE d.status = 'active'",
3594    );
3595    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3596    if let Some(s) = since_ms {
3597        sql.push_str(" AND c.created_at_ms >= ?");
3598        params.push(s.into());
3599    }
3600    if let Some(u) = until_ms {
3601        sql.push_str(" AND c.created_at_ms <= ?");
3602        params.push(u.into());
3603    }
3604    if let Some(cur) = cursor {
3605        sql.push_str(" AND c.created_at_ms <= ?");
3606        params.push(cur.ts_ms.into());
3607    }
3608    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
3609    params.push(limit.into());
3610    let mut stmt = conn.prepare(&sql)?;
3611    let rows: Vec<NodeRowChunk> = stmt
3612        .query_map(rusqlite::params_from_iter(params), |r| {
3613            Ok(NodeRowChunk {
3614                chunk_id: r.get(0)?,
3615                chunk_index: r.get(1)?,
3616                content: r.get(2)?,
3617                created_at_ms: r.get(3)?,
3618            })
3619        })?
3620        .collect::<rusqlite::Result<Vec<_>>>()?;
3621    Ok(rows)
3622}
3623
3624#[derive(Debug)]
3625struct NodeRowCluster {
3626    cluster_id: String,
3627    abstraction: Option<String>,
3628    created_at_ms: i64,
3629}
3630
3631fn fetch_clusters_for_nodes(
3632    conn: &rusqlite::Connection,
3633    since_ms: Option<i64>,
3634    until_ms: Option<i64>,
3635    cursor: Option<&NodesCursor>,
3636    limit: i64,
3637) -> rusqlite::Result<Vec<NodeRowCluster>> {
3638    // clusters has no `status` column; LEFT JOIN abstractions for the
3639    // optional label.
3640    let mut sql = String::from(
3641        "SELECT c.cluster_id, sa.content, c.created_at_ms
3642           FROM clusters c
3643           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
3644          WHERE 1=1",
3645    );
3646    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3647    if let Some(s) = since_ms {
3648        sql.push_str(" AND c.created_at_ms >= ?");
3649        params.push(s.into());
3650    }
3651    if let Some(u) = until_ms {
3652        sql.push_str(" AND c.created_at_ms <= ?");
3653        params.push(u.into());
3654    }
3655    if let Some(cur) = cursor {
3656        sql.push_str(" AND c.created_at_ms <= ?");
3657        params.push(cur.ts_ms.into());
3658    }
3659    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
3660    params.push(limit.into());
3661    let mut stmt = conn.prepare(&sql)?;
3662    let rows: Vec<NodeRowCluster> = stmt
3663        .query_map(rusqlite::params_from_iter(params), |r| {
3664            Ok(NodeRowCluster {
3665                cluster_id: r.get(0)?,
3666                abstraction: r.get(1)?,
3667                created_at_ms: r.get(2)?,
3668            })
3669        })?
3670        .collect::<rusqlite::Result<Vec<_>>>()?;
3671    Ok(rows)
3672}
3673
3674#[derive(Debug)]
3675struct NodeRowEntity {
3676    value: String,
3677    ref_count: i64,
3678    first_seen_ms: i64,
3679}
3680
3681/// Synthesize entity nodes from the triples table. Caps result at
3682/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
3683/// entities make the cut. Returns (rows, cap_reached).
3684///
3685/// **Cost**: this is O(N) over active triples per request. For tenants
3686/// with >100k triples this can be noticeable; v0.10.x can cache the
3687/// rollup if profiling justifies it. The 200-row cap keeps the wire
3688/// payload bounded regardless.
3689fn fetch_entities_for_nodes(
3690    conn: &rusqlite::Connection,
3691    since_ms: Option<i64>,
3692    until_ms: Option<i64>,
3693    cursor: Option<&NodesCursor>,
3694) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
3695    // Pull subject + object columns, group by value, compute count + min
3696    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
3697    // time filter against `valid_from_ms` (the closest analogue to "when
3698    // was this entity first referenced").
3699    let mut sql = String::from(
3700        "WITH all_refs AS (
3701            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3702            UNION ALL
3703            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3704         )
3705         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
3706           FROM all_refs
3707          WHERE 1=1",
3708    );
3709    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3710    if let Some(s) = since_ms {
3711        sql.push_str(" AND ts_ms >= ?");
3712        params.push(s.into());
3713    }
3714    if let Some(u) = until_ms {
3715        sql.push_str(" AND ts_ms <= ?");
3716        params.push(u.into());
3717    }
3718    // Cursor: drop entities whose first_seen_ms strictly newer than the
3719    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
3720    // cap-applicable filter sits in the HAVING clause.
3721    sql.push_str(" GROUP BY value");
3722    if let Some(ts) = cursor.map(|c| c.ts_ms) {
3723        sql.push_str(" HAVING MIN(ts_ms) <= ?");
3724        params.push(ts.into());
3725    }
3726    // Over-fetch by one to detect "cap reached".
3727    let want = GRAPH_ENTITY_CAP as i64 + 1;
3728    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
3729    params.push(want.into());
3730    let mut stmt = conn.prepare(&sql)?;
3731    let rows: Vec<NodeRowEntity> = stmt
3732        .query_map(rusqlite::params_from_iter(params), |r| {
3733            Ok(NodeRowEntity {
3734                value: r.get(0)?,
3735                ref_count: r.get(1)?,
3736                first_seen_ms: r.get(2)?,
3737            })
3738        })?
3739        .collect::<rusqlite::Result<Vec<_>>>()?;
3740    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
3741    let mut trimmed = rows;
3742    if cap_reached {
3743        trimmed.truncate(GRAPH_ENTITY_CAP);
3744    }
3745    Ok((trimmed, cap_reached))
3746}
3747
3748/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
3749/// See module-level comments for the contract.
3750async fn graph_nodes_handler(
3751    TenantExtractor(tenant): TenantExtractor,
3752    Query(q): Query<GraphNodesQuery>,
3753) -> Result<Response, ApiError> {
3754    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
3755    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
3756    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
3757    let since_ms = q.since_ms;
3758    let until_ms = q.until_ms;
3759    if let (Some(s), Some(u)) = (since_ms, until_ms) {
3760        if s > u {
3761            return Err(ApiError::bad_request(format!(
3762                "since_ms ({s}) must be <= until_ms ({u})"
3763            )));
3764        }
3765    }
3766    let cursor = match q.cursor.as_deref() {
3767        None => None,
3768        Some("") => None,
3769        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
3770    };
3771    let want_episode = kinds.contains(&NodeKind::Episode);
3772    let want_document = kinds.contains(&NodeKind::Document);
3773    let want_chunk = kinds.contains(&NodeKind::Chunk);
3774    let want_cluster = kinds.contains(&NodeKind::Cluster);
3775    let want_entity = kinds.contains(&NodeKind::Entity);
3776
3777    // Over-fetch `limit + 2` per kind:
3778    //   * `+1` so the merge step can detect "more rows available beyond
3779    //     this page" → emits a `next_cursor` instead of None.
3780    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
3781    //     can pull the previous page's last item back in; the post-merge
3782    //     cursor predicate drops it, costing one row of headroom.
3783    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
3784    // independently by the response cap, not the page limit.
3785    let per_kind_limit = (limit as i64).saturating_add(2);
3786    let tenant_id_for_blocking = tenant.tenant_id().to_string();
3787    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
3788        ts_ms: c.ts_ms,
3789        id: c.id.clone(),
3790    });
3791
3792    let (mut staged, cap_reached) = tenant
3793        .read()
3794        .interact(move |conn| {
3795            let mut staged: Vec<StagingNode> = Vec::new();
3796            let mut cap_reached = false;
3797            let cursor_ref = cursor_clone.as_ref();
3798
3799            if want_episode {
3800                let eps =
3801                    fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3802                for ep in eps {
3803                    let id = format!("ep:{}", ep.memory_id);
3804                    let exp = ExpandedEpisode {
3805                        memory_id: ep.memory_id,
3806                        ts_ms: ep.ts_ms,
3807                        content: ep.content,
3808                    };
3809                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
3810                    staged.push(StagingNode {
3811                        sort_ts_ms: ep.ts_ms,
3812                        sort_id: id.clone(),
3813                        node,
3814                    });
3815                }
3816            }
3817            if want_document {
3818                let docs = fetch_documents_for_nodes(
3819                    conn,
3820                    since_ms,
3821                    until_ms,
3822                    cursor_ref,
3823                    per_kind_limit,
3824                )?;
3825                for d in docs {
3826                    let id = format!("doc:{}", d.doc_id);
3827                    let exp = ExpandedDocument {
3828                        doc_id: d.doc_id,
3829                        title: d.title,
3830                        source: d.source,
3831                        ingested_at_ms: d.ingested_at_ms,
3832                    };
3833                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
3834                    staged.push(StagingNode {
3835                        sort_ts_ms: d.ingested_at_ms,
3836                        sort_id: id.clone(),
3837                        node,
3838                    });
3839                }
3840            }
3841            if want_chunk {
3842                let chunks =
3843                    fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3844                for c in chunks {
3845                    let id = format!("chunk:{}", c.chunk_id);
3846                    let exp = ExpandedChunk {
3847                        chunk_id: c.chunk_id,
3848                        chunk_index: c.chunk_index,
3849                        content: c.content,
3850                    };
3851                    // graph_node_for_chunk sets ts_ms = None for the
3852                    // wire format (chunks don't have a natural user-
3853                    // facing timestamp); but for sorting we use the
3854                    // row's created_at_ms.
3855                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
3856                    node.ts_ms = Some(c.created_at_ms);
3857                    staged.push(StagingNode {
3858                        sort_ts_ms: c.created_at_ms,
3859                        sort_id: id.clone(),
3860                        node,
3861                    });
3862                }
3863            }
3864            if want_cluster {
3865                let cls =
3866                    fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3867                for c in cls {
3868                    let id = format!("cl:{}", c.cluster_id);
3869                    let node = graph_node_for_cluster(
3870                        &tenant_id_for_blocking,
3871                        &c.cluster_id,
3872                        c.abstraction.as_deref(),
3873                        c.created_at_ms,
3874                    );
3875                    staged.push(StagingNode {
3876                        sort_ts_ms: c.created_at_ms,
3877                        sort_id: id.clone(),
3878                        node,
3879                    });
3880                }
3881            }
3882            if want_entity {
3883                let (ents, was_cap_reached) =
3884                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
3885                cap_reached = was_cap_reached;
3886                for e in ents {
3887                    let id = format!("ent:{}", e.value);
3888                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
3889                    node.ts_ms = Some(e.first_seen_ms);
3890                    node.preview = Some(format!("Referenced in {} triples", e.ref_count));
3891                    staged.push(StagingNode {
3892                        sort_ts_ms: e.first_seen_ms,
3893                        sort_id: id.clone(),
3894                        node,
3895                    });
3896                }
3897            }
3898            Ok::<_, rusqlite::Error>((staged, cap_reached))
3899        })
3900        .await
3901        .map_err(ApiError::from)?;
3902
3903    // Apply cursor filter.
3904    if let Some(cur) = &cursor {
3905        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
3906    }
3907
3908    // Sort `ts_ms DESC, id ASC`.
3909    staged
3910        .sort_by(|a, b| cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id)));
3911
3912    // Apply page limit + compute next_cursor.
3913    let limit_us = limit as usize;
3914    let next_cursor = if staged.len() > limit_us {
3915        let last = &staged[limit_us - 1];
3916        Some(NodesCursor {
3917            ts_ms: last.sort_ts_ms,
3918            id: last.sort_id.clone(),
3919        })
3920    } else {
3921        None
3922    };
3923    staged.truncate(limit_us);
3924
3925    let next_cursor_str = match next_cursor {
3926        Some(c) => Some(encode_cursor(&c)?),
3927        None => None,
3928    };
3929
3930    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
3931    let payload = GraphNodesResponse {
3932        nodes,
3933        next_cursor: next_cursor_str,
3934    };
3935
3936    // Attach the entity-cap header so clients can show truncation UX
3937    // without parsing the body.
3938    let mut response = Json(payload).into_response();
3939    if cap_reached {
3940        response
3941            .headers_mut()
3942            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
3943    }
3944    Ok(response)
3945}
3946
3947// --- /v1/graph/edges --------------------------------------------------
3948
3949#[derive(Debug)]
3950struct StagingEdge {
3951    edge: GraphEdge,
3952    kind_idx: u8,
3953    sub_id: String,
3954}
3955
3956fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
3957    match a.0.cmp(&b.0) {
3958        std::cmp::Ordering::Equal => a.1.cmp(b.1),
3959        other => other,
3960    }
3961}
3962
3963fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
3964    cmp_edge_sort_keys(
3965        (kind_idx, sub_id),
3966        (cursor.kind_idx, cursor.sub_id.as_str()),
3967    ) == std::cmp::Ordering::Greater
3968}
3969
3970/// Whether the supplied focus `node_id` (kind, value) matches an edge's
3971/// (source, target) endpoint pair under a given edge kind. Used to
3972/// filter `?node_id=...` queries.
3973fn edge_touches_focus(
3974    kind: EdgeKind,
3975    focus_kind: NodeKind,
3976    focus_value: &str,
3977    src_value: &str,
3978    tgt_value: &str,
3979    extra_value: Option<&str>,
3980) -> bool {
3981    // Determine which endpoint kinds this edge family produces; if the
3982    // focus kind isn't compatible, no match.
3983    match kind {
3984        EdgeKind::Triple => match focus_kind {
3985            // Triple edges flow source_episode → ent:<object_id>. We
3986            // also expose subject/object entities as endpoints (see
3987            // emit_triple_edges_for_focus); the matching here covers
3988            // episode focus + entity focus + the symmetric pair.
3989            NodeKind::Episode => src_value == focus_value,
3990            NodeKind::Entity => {
3991                tgt_value == focus_value
3992                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
3993                    || src_value == focus_value
3994            }
3995            _ => false,
3996        },
3997        EdgeKind::DocumentChunk => match focus_kind {
3998            NodeKind::Document => src_value == focus_value,
3999            NodeKind::Chunk => tgt_value == focus_value,
4000            _ => false,
4001        },
4002        EdgeKind::ClusterMember => match focus_kind {
4003            NodeKind::Cluster => src_value == focus_value,
4004            NodeKind::Episode => tgt_value == focus_value,
4005            _ => false,
4006        },
4007    }
4008}
4009
4010#[derive(Debug)]
4011struct EdgeRowTriple {
4012    triple_id: String,
4013    source_memory_id: Option<String>,
4014    object_id: String,
4015    predicate: String,
4016    confidence: f32,
4017}
4018
4019fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
4020    // Emit one edge per triple: source_episode → ent:object_id. Skip
4021    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
4022    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
4023    // doesn't OOM the page-builder; the merge-and-page step trims to
4024    // the real limit downstream.
4025    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4026    let mut stmt = conn.prepare(
4027        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
4028           FROM triples t
4029           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
4030          WHERE t.status = 'active'
4031          ORDER BY t.triple_id ASC
4032          LIMIT ?1",
4033    )?;
4034    let rows: Vec<EdgeRowTriple> = stmt
4035        .query_map(rusqlite::params![safety_cap], |r| {
4036            Ok(EdgeRowTriple {
4037                triple_id: r.get(0)?,
4038                source_memory_id: r.get::<_, Option<String>>(1)?,
4039                object_id: r.get(2)?,
4040                predicate: r.get(3)?,
4041                confidence: r.get(4)?,
4042            })
4043        })?
4044        .collect::<rusqlite::Result<Vec<_>>>()?;
4045    Ok(rows)
4046}
4047
4048#[derive(Debug)]
4049struct EdgeRowDocChunk {
4050    chunk_id: String,
4051    doc_id: String,
4052}
4053
4054fn fetch_document_chunk_edges(
4055    conn: &rusqlite::Connection,
4056) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
4057    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4058    let mut stmt = conn.prepare(
4059        "SELECT c.chunk_id, c.doc_id
4060           FROM document_chunks c
4061           JOIN documents d ON d.doc_id = c.doc_id
4062          WHERE d.status = 'active'
4063          ORDER BY c.chunk_id ASC
4064          LIMIT ?1",
4065    )?;
4066    let rows: Vec<EdgeRowDocChunk> = stmt
4067        .query_map(rusqlite::params![safety_cap], |r| {
4068            Ok(EdgeRowDocChunk {
4069                chunk_id: r.get(0)?,
4070                doc_id: r.get(1)?,
4071            })
4072        })?
4073        .collect::<rusqlite::Result<Vec<_>>>()?;
4074    Ok(rows)
4075}
4076
4077#[derive(Debug)]
4078struct EdgeRowClusterMember {
4079    cluster_id: String,
4080    memory_id: String,
4081}
4082
4083fn fetch_cluster_member_edges(
4084    conn: &rusqlite::Connection,
4085) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
4086    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4087    let mut stmt = conn.prepare(
4088        "SELECT ce.cluster_id, ce.memory_id
4089           FROM cluster_episodes ce
4090           JOIN episodes e ON e.memory_id = ce.memory_id
4091          WHERE e.status = 'active'
4092          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
4093          LIMIT ?1",
4094    )?;
4095    let rows: Vec<EdgeRowClusterMember> = stmt
4096        .query_map(rusqlite::params![safety_cap], |r| {
4097            Ok(EdgeRowClusterMember {
4098                cluster_id: r.get(0)?,
4099                memory_id: r.get(1)?,
4100            })
4101        })?
4102        .collect::<rusqlite::Result<Vec<_>>>()?;
4103    Ok(rows)
4104}
4105
4106/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
4107/// comments for the contract.
4108async fn graph_edges_handler(
4109    TenantExtractor(tenant): TenantExtractor,
4110    Query(q): Query<GraphEdgesQuery>,
4111) -> Result<Json<GraphEdgesResponse>, ApiError> {
4112    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
4113    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
4114    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
4115    let cursor = match q.cursor.as_deref() {
4116        None => None,
4117        Some("") => None,
4118        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
4119    };
4120
4121    let focus = match q.node_id.as_deref() {
4122        None => None,
4123        Some(raw) => {
4124            let (kind, value) = parse_node_id(raw)?;
4125            Some((kind, value.to_string()))
4126        }
4127    };
4128
4129    let want_triple = kinds.contains(&EdgeKind::Triple);
4130    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
4131    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);
4132
4133    let staged: Vec<StagingEdge> = tenant
4134        .read()
4135        .interact(move |conn| {
4136            let mut staged: Vec<StagingEdge> = Vec::new();
4137
4138            if want_triple {
4139                for t in fetch_triple_edges(conn)? {
4140                    let src_id = match &t.source_memory_id {
4141                        Some(mid) => format!("ep:{mid}"),
4142                        None => continue, // orphan triple — skip
4143                    };
4144                    let tgt_id = format!("ent:{}", t.object_id);
4145                    if let Some((fk, fv)) = &focus {
4146                        // `src_value` for matching is the bare memory_id
4147                        // (after the `ep:` prefix); `tgt_value` is the
4148                        // bare entity value.
4149                        if !edge_touches_focus(
4150                            EdgeKind::Triple,
4151                            *fk,
4152                            fv,
4153                            t.source_memory_id.as_deref().unwrap_or(""),
4154                            &t.object_id,
4155                            // Triples carry a subject_id too, but the
4156                            // emitted edge only goes ep → ent(object).
4157                            // For entity-focus matches we also accept
4158                            // hits on subject_id; surface it through
4159                            // the `extra` slot.
4160                            None,
4161                        ) {
4162                            continue;
4163                        }
4164                    }
4165                    let edge = GraphEdge {
4166                        id: edge_id(&src_id, "triple", &tgt_id),
4167                        source: src_id,
4168                        target: tgt_id,
4169                        kind: "triple",
4170                        predicate: Some(t.predicate),
4171                        weight: Some(t.confidence),
4172                    };
4173                    staged.push(StagingEdge {
4174                        edge,
4175                        kind_idx: EdgeKind::Triple.order_idx(),
4176                        sub_id: t.triple_id,
4177                    });
4178                }
4179            }
4180            if want_doc_chunk {
4181                for dc in fetch_document_chunk_edges(conn)? {
4182                    let src_id = format!("doc:{}", dc.doc_id);
4183                    let tgt_id = format!("chunk:{}", dc.chunk_id);
4184                    if let Some((fk, fv)) = &focus {
4185                        if !edge_touches_focus(
4186                            EdgeKind::DocumentChunk,
4187                            *fk,
4188                            fv,
4189                            &dc.doc_id,
4190                            &dc.chunk_id,
4191                            None,
4192                        ) {
4193                            continue;
4194                        }
4195                    }
4196                    let edge = GraphEdge {
4197                        id: edge_id(&src_id, "document_chunk", &tgt_id),
4198                        source: src_id,
4199                        target: tgt_id,
4200                        kind: "document_chunk",
4201                        predicate: None,
4202                        weight: None,
4203                    };
4204                    staged.push(StagingEdge {
4205                        edge,
4206                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
4207                        sub_id: dc.chunk_id,
4208                    });
4209                }
4210            }
4211            if want_cluster_member {
4212                for cm in fetch_cluster_member_edges(conn)? {
4213                    let src_id = format!("cl:{}", cm.cluster_id);
4214                    let tgt_id = format!("ep:{}", cm.memory_id);
4215                    if let Some((fk, fv)) = &focus {
4216                        if !edge_touches_focus(
4217                            EdgeKind::ClusterMember,
4218                            *fk,
4219                            fv,
4220                            &cm.cluster_id,
4221                            &cm.memory_id,
4222                            None,
4223                        ) {
4224                            continue;
4225                        }
4226                    }
4227                    let edge = GraphEdge {
4228                        id: edge_id(&src_id, "cluster_member", &tgt_id),
4229                        source: src_id,
4230                        target: tgt_id,
4231                        kind: "cluster_member",
4232                        predicate: None,
4233                        weight: None,
4234                    };
4235                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
4236                    staged.push(StagingEdge {
4237                        edge,
4238                        kind_idx: EdgeKind::ClusterMember.order_idx(),
4239                        sub_id,
4240                    });
4241                }
4242            }
4243            Ok::<_, rusqlite::Error>(staged)
4244        })
4245        .await
4246        .map_err(ApiError::from)?;
4247
4248    // Apply cursor filter.
4249    let mut staged = staged;
4250    if let Some(cur) = &cursor {
4251        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
4252    }
4253
4254    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
4255    staged.sort_by(|a, b| cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id)));
4256
4257    let limit_us = limit as usize;
4258    let next_cursor = if staged.len() > limit_us {
4259        let last = &staged[limit_us - 1];
4260        Some(EdgesCursor {
4261            kind_idx: last.kind_idx,
4262            sub_id: last.sub_id.clone(),
4263        })
4264    } else {
4265        None
4266    };
4267    staged.truncate(limit_us);
4268    let next_cursor_str = match next_cursor {
4269        Some(c) => Some(encode_cursor(&c)?),
4270        None => None,
4271    };
4272
4273    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
4274    Ok(Json(GraphEdgesResponse {
4275        edges,
4276        next_cursor: next_cursor_str,
4277    }))
4278}
4279
4280// ---------------------------------------------------------------------------
4281// Graph inspect — kind-discriminated full-record drill (v0.10.0)
4282//
4283// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
4284// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
4285// cl:/ent:); the handler dispatches per-kind and returns the same wire
4286// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
4287// triples_in[], triples_out[] }`.
4288//
4289// Per-kind contract (v0.10.0 P1):
4290//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
4291//                          triples_in = [],
4292//                          triples_out = triples WHERE source_episode_id = rowid
4293//                          (one edge per triple, ep -> ent(object), predicate
4294//                          + weight surfaced). Episodes never appear as triple
4295//                          subjects/objects, so triples_in is structurally
4296//                          empty.
4297//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
4298//                          "\n\n" (no `documents.full_text` column exists; the
4299//                          chunks-concat path produces the same final text the
4300//                          ingester chunked from). triples_in/out = [] --
4301//                          documents don't directly carry triples; their
4302//                          chunks transitively do, but the inspector reaches
4303//                          those via the existing `/v1/graph/expand` drill.
4304//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
4305//                          triples_in/out = [] (chunks aren't triple endpoints).
4306//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
4307//                          (`semantic_abstractions.content`) when an
4308//                          abstraction exists; just the label otherwise.
4309//                          triples_in/out = [].
4310//   * `ent:<value>`        full_text = None (entities have no body),
4311//                          triples_in = [],
4312//                          triples_out = all triples where the entity appears
4313//                          as subject OR object. Capped at
4314//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
4315//                          are synthetic -- an `ent:<value>` with zero triples
4316//                          in the tenant returns 404 (the entity exists only
4317//                          if at least one triple references it).
4318//
4319// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
4320// 400 if the prefix is unknown or the body after `:` is empty (reuses
4321// `parse_node_id`). Tenant + auth are handled by the existing extractors.
4322//
4323// Lesson #30: no audit emit. Inspect is a derived read over already-
4324// audited primitives.
4325// ---------------------------------------------------------------------------
4326
4327/// Cap on triples returned for an entity inspect. Entities can be heavily
4328/// referenced ("user", "Alice"); the inspector panel only needs enough
4329/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
4330/// paginated full set when the UI needs more.
4331const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;
4332
4333#[derive(Debug, Serialize)]
4334struct GraphInspectResponse {
4335    node: GraphNode,
4336    #[serde(skip_serializing_if = "Option::is_none")]
4337    full_text: Option<String>,
4338    triples_in: Vec<GraphEdge>,
4339    triples_out: Vec<GraphEdge>,
4340}
4341
4342/// `GET /v1/graph/inspect/{id}`. See module-level comments.
4343async fn graph_inspect_handler(
4344    TenantExtractor(tenant): TenantExtractor,
4345    Path(id): Path<String>,
4346) -> Result<Json<GraphInspectResponse>, ApiError> {
4347    let (kind, value) = parse_node_id(&id)?;
4348    let tenant_id_str = tenant.tenant_id().to_string();
4349    let value = value.to_string();
4350    let node_id_full = id;
4351    match kind {
4352        NodeKind::Episode => {
4353            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
4354        }
4355        NodeKind::Document => {
4356            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
4357        }
4358        NodeKind::Chunk => inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await,
4359        NodeKind::Cluster => {
4360            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
4361        }
4362        NodeKind::Entity => inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await,
4363    }
4364    .map(Json)
4365}
4366
4367// ---- per-kind paths ----
4368
4369async fn inspect_episode_node(
4370    tenant: &TenantHandle,
4371    tenant_id: &str,
4372    memory_id: String,
4373    node_id_full: String,
4374) -> Result<GraphInspectResponse, ApiError> {
4375    let memory_id_for_err = memory_id.clone();
4376    let memory_id_q = memory_id.clone();
4377    // Fetch the episode row + all triples sourced from it in one
4378    // interact() call to keep the connection check-out short.
4379    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
4380        .read()
4381        .interact(move |conn| {
4382            let ep_row: Option<(i64, i64, String)> = conn
4383                .query_row(
4384                    "SELECT rowid, ts_ms, content
4385                       FROM episodes
4386                      WHERE memory_id = ?1
4387                        AND status = 'active'",
4388                    rusqlite::params![&memory_id_q],
4389                    |r| {
4390                        Ok((
4391                            r.get::<_, i64>(0)?,
4392                            r.get::<_, i64>(1)?,
4393                            r.get::<_, String>(2)?,
4394                        ))
4395                    },
4396                )
4397                .map(Some)
4398                .or_else(|e| match e {
4399                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4400                    other => Err(other),
4401                })?;
4402            let Some((rowid, ts_ms, content)) = ep_row else {
4403                return Ok(None);
4404            };
4405            let mut stmt = conn.prepare(
4406                "SELECT subject_id, predicate, object_id, confidence
4407                   FROM triples
4408                  WHERE source_episode_id = ?1
4409                    AND status = 'active'
4410                  ORDER BY valid_from_ms DESC",
4411            )?;
4412            let triples = stmt
4413                .query_map(rusqlite::params![rowid], |r| {
4414                    Ok(TripleRow {
4415                        subject_id: r.get(0)?,
4416                        predicate: r.get(1)?,
4417                        object_id: r.get(2)?,
4418                        confidence: r.get(3)?,
4419                    })
4420                })?
4421                .collect::<rusqlite::Result<Vec<_>>>()?;
4422            let ep = ExpandedEpisode {
4423                memory_id: memory_id_q,
4424                ts_ms,
4425                content,
4426            };
4427            Ok::<_, rusqlite::Error>(Some((ep, triples)))
4428        })
4429        .await
4430        .map_err(ApiError::from)?;
4431
4432    let (ep, triples) = fetched.ok_or_else(|| {
4433        ApiError::not_found(format!(
4434            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
4435        ))
4436    })?;
4437
4438    let node = graph_node_for_episode(tenant_id, &ep);
4439    let full_text = Some(ep.content.clone());
4440    // Triples flow from this episode (the source) to entity endpoints.
4441    // Emit one edge per triple: ep -> ent(object), predicate from the
4442    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
4443    // triple-edge convention so the renderer can dedupe via composite id.
4444    let mut triples_out = Vec::with_capacity(triples.len());
4445    for t in triples {
4446        let tgt_id = format!("ent:{}", t.object_id);
4447        triples_out.push(GraphEdge {
4448            id: edge_id(&node_id_full, "triple", &tgt_id),
4449            source: node_id_full.clone(),
4450            target: tgt_id,
4451            kind: "triple",
4452            predicate: Some(t.predicate),
4453            weight: Some(t.confidence),
4454        });
4455    }
4456    Ok(GraphInspectResponse {
4457        node,
4458        full_text,
4459        triples_in: Vec::new(),
4460        triples_out,
4461    })
4462}
4463
4464async fn inspect_document_node(
4465    tenant: &TenantHandle,
4466    tenant_id: &str,
4467    doc_id: String,
4468    node_id_full: String,
4469) -> Result<GraphInspectResponse, ApiError> {
4470    let doc_id_for_err = doc_id.clone();
4471    let doc_id_q = doc_id.clone();
4472    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
4473    // one interact() call. The chunks-concat path is the source of full_text
4474    // since the `documents` table doesn't carry the original raw text. For
4475    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
4476    // panel's responsibility if the document is very large.
4477    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
4478        .read()
4479        .interact(move |conn| {
4480            let doc_row: Option<ExpandedDocument> = conn
4481                .query_row(
4482                    "SELECT doc_id, title, source, ingested_at_ms
4483                       FROM documents
4484                      WHERE doc_id = ?1
4485                        AND status = 'active'",
4486                    rusqlite::params![&doc_id_q],
4487                    |r| {
4488                        Ok(ExpandedDocument {
4489                            doc_id: r.get(0)?,
4490                            title: r.get(1)?,
4491                            source: r.get(2)?,
4492                            ingested_at_ms: r.get(3)?,
4493                        })
4494                    },
4495                )
4496                .map(Some)
4497                .or_else(|e| match e {
4498                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4499                    other => Err(other),
4500                })?;
4501            let Some(doc) = doc_row else {
4502                return Ok(None);
4503            };
4504            let mut stmt = conn.prepare(
4505                "SELECT content
4506                   FROM document_chunks
4507                  WHERE doc_id = ?1
4508                  ORDER BY chunk_index ASC",
4509            )?;
4510            let chunks = stmt
4511                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
4512                .collect::<rusqlite::Result<Vec<_>>>()?;
4513            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
4514        })
4515        .await
4516        .map_err(ApiError::from)?;
4517
4518    let (doc, chunks) = fetched.ok_or_else(|| {
4519        ApiError::not_found(format!(
4520            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
4521        ))
4522    })?;
4523
4524    let full_text = if chunks.is_empty() {
4525        // Document with zero chunks (e.g. mid-ingest, or an empty source).
4526        // Return None to signal "no body available" rather than an empty
4527        // string -- saves the renderer a degenerate code path.
4528        None
4529    } else {
4530        Some(chunks.join("\n\n"))
4531    };
4532
4533    Ok(GraphInspectResponse {
4534        node: graph_node_for_document(tenant_id, &doc),
4535        full_text,
4536        triples_in: Vec::new(),
4537        triples_out: Vec::new(),
4538    })
4539}
4540
4541async fn inspect_chunk_node(
4542    tenant: &TenantHandle,
4543    tenant_id: &str,
4544    chunk_id: String,
4545    node_id_full: String,
4546) -> Result<GraphInspectResponse, ApiError> {
4547    let chunk_id_for_err = chunk_id.clone();
4548    let chunk_id_q = chunk_id.clone();
4549    let row: Option<(ExpandedChunk, i64)> = tenant
4550        .read()
4551        .interact(move |conn| {
4552            conn.query_row(
4553                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
4554                   FROM document_chunks c
4555                   JOIN documents d ON d.doc_id = c.doc_id
4556                  WHERE c.chunk_id = ?1
4557                    AND d.status = 'active'",
4558                rusqlite::params![&chunk_id_q],
4559                |r| {
4560                    Ok((
4561                        ExpandedChunk {
4562                            chunk_id: r.get(0)?,
4563                            chunk_index: r.get(1)?,
4564                            content: r.get(2)?,
4565                        },
4566                        r.get::<_, i64>(3)?,
4567                    ))
4568                },
4569            )
4570            .map(Some)
4571            .or_else(|e| match e {
4572                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4573                other => Err(other),
4574            })
4575        })
4576        .await
4577        .map_err(ApiError::from)?;
4578
4579    let (chunk, created_at_ms) = row.ok_or_else(|| {
4580        ApiError::not_found(format!(
4581            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
4582        ))
4583    })?;
4584
4585    let full_text = Some(chunk.content.clone());
4586    let mut node = graph_node_for_chunk(tenant_id, &chunk);
4587    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
4588    // `created_at_ms` so the inspector panel has a sortable timestamp.
4589    node.ts_ms = Some(created_at_ms);
4590
4591    Ok(GraphInspectResponse {
4592        node,
4593        full_text,
4594        triples_in: Vec::new(),
4595        triples_out: Vec::new(),
4596    })
4597}
4598
4599async fn inspect_cluster_node(
4600    tenant: &TenantHandle,
4601    tenant_id: &str,
4602    cluster_id: String,
4603    node_id_full: String,
4604) -> Result<GraphInspectResponse, ApiError> {
4605    let cluster_id_for_err = cluster_id.clone();
4606    let cluster_id_q = cluster_id.clone();
4607    let row: Option<(Option<String>, i64)> = tenant
4608        .read()
4609        .interact(move |conn| {
4610            conn.query_row(
4611                "SELECT sa.content, c.created_at_ms
4612                   FROM clusters c
4613                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
4614                  WHERE c.cluster_id = ?1",
4615                rusqlite::params![&cluster_id_q],
4616                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
4617            )
4618            .map(Some)
4619            .or_else(|e| match e {
4620                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4621                other => Err(other),
4622            })
4623        })
4624        .await
4625        .map_err(ApiError::from)?;
4626
4627    let (abstraction, created_at_ms) = row.ok_or_else(|| {
4628        ApiError::not_found(format!(
4629            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
4630        ))
4631    })?;
4632
4633    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
4634    // exists; just the label otherwise. Brief "cluster" -- the cluster
4635    // label is `clusters.cluster_id` (the user-facing label is the
4636    // abstraction; clusters don't have a `label` column).
4637    let full_text = match abstraction.as_deref() {
4638        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
4639        None => Some(format!("cluster {cluster_id_for_err}")),
4640    };
4641
4642    Ok(GraphInspectResponse {
4643        node: graph_node_for_cluster(
4644            tenant_id,
4645            &cluster_id_for_err,
4646            abstraction.as_deref(),
4647            created_at_ms,
4648        ),
4649        full_text,
4650        triples_in: Vec::new(),
4651        triples_out: Vec::new(),
4652    })
4653}
4654
4655async fn inspect_entity_node(
4656    tenant: &TenantHandle,
4657    tenant_id: &str,
4658    entity_value: String,
4659    node_id_full: String,
4660) -> Result<GraphInspectResponse, ApiError> {
4661    // Entities are synthetic. They "exist" only if at least one triple
4662    // references them as subject or object. Zero triples -> 404 per brief.
4663    let entity_q = entity_value.clone();
4664    let rows: Vec<TripleRow> = tenant
4665        .read()
4666        .interact(move |conn| {
4667            let mut stmt = conn.prepare(
4668                "SELECT subject_id, predicate, object_id, confidence
4669                   FROM triples
4670                  WHERE (subject_id = ?1 OR object_id = ?1)
4671                    AND status = 'active'
4672                  ORDER BY valid_from_ms DESC
4673                  LIMIT ?2",
4674            )?;
4675            stmt.query_map(
4676                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
4677                |r| {
4678                    Ok(TripleRow {
4679                        subject_id: r.get(0)?,
4680                        predicate: r.get(1)?,
4681                        object_id: r.get(2)?,
4682                        confidence: r.get(3)?,
4683                    })
4684                },
4685            )?
4686            .collect::<rusqlite::Result<Vec<_>>>()
4687        })
4688        .await
4689        .map_err(ApiError::from)?;
4690
4691    if rows.is_empty() {
4692        return Err(ApiError::not_found(format!(
4693            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
4694        )));
4695    }
4696
4697    // Triples flow out FROM the entity to its counterpart. For each row
4698    // determine which side the entity appears on and emit ent:<self> ->
4699    // ent:<other>. Brief calls these triples_out (entities don't have
4700    // structural triples_in in v0.10.0 P1).
4701    let mut triples_out = Vec::with_capacity(rows.len());
4702    for t in rows {
4703        let other = if t.subject_id == entity_value {
4704            t.object_id
4705        } else {
4706            // entity_value matched on object_id; counterpart is subject.
4707            t.subject_id
4708        };
4709        let tgt_id = format!("ent:{other}");
4710        triples_out.push(GraphEdge {
4711            id: edge_id(&node_id_full, "triple", &tgt_id),
4712            source: node_id_full.clone(),
4713            target: tgt_id,
4714            kind: "triple",
4715            predicate: Some(t.predicate),
4716            weight: Some(t.confidence),
4717        });
4718    }
4719
4720    Ok(GraphInspectResponse {
4721        node: graph_node_for_entity(tenant_id, &entity_value),
4722        full_text: None,
4723        triples_in: Vec::new(),
4724        triples_out,
4725    })
4726}
4727
4728// ---------------------------------------------------------------------------
4729// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
4730//
4731// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
4732// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
4733// the family, combining:
4734//
4735//   * Explicit edges (triples / document_chunk / cluster_member) incident
4736//     to the focal node -- the same shape `/v1/graph/expand` produces for
4737//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
4738//     compatible with the focal node's kind.
4739//
4740//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
4741//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
4742//     400 when `kind=semantic` is requested alone, or are silently skipped
4743//     when `kind=both` is requested (explicit-only path still runs).
4744//
4745// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
4746// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
4747// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
4748// incident to the focal node into one response. Different UX (drill vs.
4749// overview); different API; both needed.
4750//
4751// ## Refactor decision
4752//
4753// The brief recommends extracting `expand`'s per-kind helpers into a
4754// shared module. In practice the `expand_*` async fns already do exactly
4755// what neighbors needs for the explicit path (same response shape, same
4756// tenant + auth + existence semantics). To keep the change surgical and
4757// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
4758// the existing `expand_*` async fns directly** rather than refactoring
4759// their bodies. The explicit path is a thin orchestrator that calls every
4760// `expand_*` fn compatible with the focal node's kind and concatenates
4761// the results.
4762//
4763// ## Dedup rule (kind=both)
4764//
4765// When an edge with the same (source, target) appears in BOTH the
4766// explicit and the semantic result sets, the explicit edge wins -- the
4767// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
4768// edge id, which encodes the kind too): the rule "explicit beats
4769// semantic" only makes sense when both endpoints agree, regardless of
4770// kind. In practice this is most likely to fire when an entity-focused
4771// expand (which surfaces episodes as triple-targets) collides with a
4772// semantic search hit on the same episode pair.
4773//
4774// ## Limit policy
4775//
4776// `limit` is applied PER KIND, not total. With `limit=25` and
4777// `kind=both`, the response carries up to 25 explicit + 25 semantic
4778// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
4779// `/v1/graph/*` family).
4780//
4781// ## Threshold filter
4782//
4783// `threshold` (default 0.75) filters semantic neighbors by
4784// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
4785// default is conservative -- below 0.75 the renderer typically shows too
4786// many spurious edges for a useful "show similar" overlay. Callers can
4787// dial down (e.g. `?threshold=0.5`) for a broader view.
4788//
4789// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
4790// ---------------------------------------------------------------------------
4791
4792/// Default page size when the caller omits `?limit=`. Conservative so the
4793/// "show similar" overlay isn't visually overwhelming on first click.
4794const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
4795/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
4796const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
4797/// Conservative similarity floor. Edges with `weight < threshold` are
4798/// dropped from the semantic result set.
4799const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;
4800
4801/// Discriminator for which neighbor kinds the caller wants. Default is
4802/// `both` (explicit edges + HNSW-semantic).
4803#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
4804#[serde(rename_all = "snake_case")]
4805enum GraphNeighborsKind {
4806    Explicit,
4807    Semantic,
4808    #[default]
4809    Both,
4810}
4811
4812#[derive(Debug, Deserialize)]
4813struct GraphNeighborsQuery {
4814    #[serde(default)]
4815    kind: Option<GraphNeighborsKind>,
4816    #[serde(default)]
4817    threshold: Option<f32>,
4818    #[serde(default)]
4819    limit: Option<u32>,
4820}
4821
4822/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
4823async fn graph_neighbors_handler(
4824    TenantExtractor(tenant): TenantExtractor,
4825    Path(id): Path<String>,
4826    Query(q): Query<GraphNeighborsQuery>,
4827) -> Result<Json<GraphExpandResponse>, ApiError> {
4828    let kind = q.kind.unwrap_or_default();
4829    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
4830    if !(0.0..=1.0).contains(&threshold) {
4831        return Err(ApiError::bad_request(format!(
4832            "threshold must be in [0.0, 1.0]; got {threshold}"
4833        )));
4834    }
4835    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
4836    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
4837    // locks in the clamp policy.
4838    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
4839    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);
4840
4841    let (node_kind, value) = parse_node_id(&id)?;
4842    let value_owned = value.to_string();
4843    let tenant_id_str = tenant.tenant_id().to_string();
4844    let node_id_full = id;
4845
4846    // Existence probe for the focal node. The explicit + semantic paths
4847    // each handle "node-found-but-zero-neighbors" gracefully (200 with
4848    // empty arrays) -- but we want a true 404 when the id resolves to no
4849    // row at all, regardless of which kind the caller asked for. This
4850    // matches the inspect endpoint's gate: a node has to exist to be
4851    // meaningfully "neighborable".
4852    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;
4853
4854    // Dispatch.
4855    let (explicit_nodes, explicit_edges) = if matches!(
4856        kind,
4857        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
4858    ) {
4859        neighbors_explicit(
4860            &tenant,
4861            &tenant_id_str,
4862            node_kind,
4863            &value_owned,
4864            &node_id_full,
4865            limit as i64,
4866        )
4867        .await?
4868    } else {
4869        (Vec::new(), Vec::new())
4870    };
4871
4872    let (semantic_nodes, semantic_edges) = if matches!(
4873        kind,
4874        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
4875    ) {
4876        match neighbors_semantic(
4877            &tenant,
4878            &tenant_id_str,
4879            node_kind,
4880            &value_owned,
4881            &node_id_full,
4882            limit,
4883            threshold,
4884        )
4885        .await
4886        {
4887            Ok(parts) => parts,
4888            Err(e) => {
4889                // `kind=semantic` alone against an unsupported focal node
4890                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
4891                // semantic neighbors and there are none possible.
4892                //
4893                // `kind=both` against an unsupported focal node silently
4894                // skips the semantic step; the explicit path still
4895                // delivers a meaningful answer. This mirrors the
4896                // pragmatic UX: clicking "show similar" on an entity
4897                // still surfaces the entity's triples without surfacing a
4898                // pointless error.
4899                if matches!(kind, GraphNeighborsKind::Semantic) {
4900                    return Err(e);
4901                }
4902                (Vec::new(), Vec::new())
4903            }
4904        }
4905    } else {
4906        (Vec::new(), Vec::new())
4907    };
4908
4909    // Merge + dedupe. Explicit edges win over semantic edges with the
4910    // same (source, target). Nodes dedupe by id.
4911    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
4912        std::collections::HashSet::with_capacity(explicit_edges.len());
4913    for e in &explicit_edges {
4914        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
4915    }
4916
4917    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4918    let mut edges: Vec<GraphEdge> = Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
4919    let mut seen_node_ids: std::collections::HashSet<String> =
4920        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4921
4922    for n in explicit_nodes {
4923        if seen_node_ids.insert(n.id.clone()) {
4924            nodes.push(n);
4925        }
4926    }
4927    for e in explicit_edges {
4928        edges.push(e);
4929    }
4930    for n in semantic_nodes {
4931        if seen_node_ids.insert(n.id.clone()) {
4932            nodes.push(n);
4933        }
4934    }
4935    for e in semantic_edges {
4936        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
4937            // Explicit edge already covers this pair -- drop the semantic
4938            // duplicate per the dedup rule. The semantic node may still
4939            // remain in `nodes` if no other edge already pulled it in;
4940            // that's fine -- the renderer renders nodes with weight-less
4941            // structural edges either way.
4942            continue;
4943        }
4944        edges.push(e);
4945    }
4946
4947    Ok(Json(GraphExpandResponse { nodes, edges }))
4948}
4949
4950/// Existence probe for the focal node. Translates the prefixed id into a
4951/// per-kind COUNT query against the matching table. Returns 404 (not 200
4952/// with empty arrays) when the node doesn't exist in the tenant's DB.
4953/// For entities the "existence" check is "is this entity referenced by
4954/// at least one triple" -- consistent with the inspect-entity contract
4955/// from `0115`.
4956async fn ensure_neighbors_focal_exists(
4957    tenant: &TenantHandle,
4958    node_kind: NodeKind,
4959    value: &str,
4960    node_id_full: &str,
4961) -> Result<(), ApiError> {
4962    match node_kind {
4963        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
4964        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
4965        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
4966        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
4967        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
4968    }
4969}
4970
4971/// 404 if the chunk_id has no row in this tenant's `document_chunks`
4972/// table whose parent doc is active. Mirrors `ensure_*_exists` from
4973/// `expand`.
4974async fn ensure_chunk_exists(
4975    tenant: &TenantHandle,
4976    chunk_id: &str,
4977    node_id_full: &str,
4978) -> Result<(), ApiError> {
4979    let chunk_id_q = chunk_id.to_string();
4980    let exists: i64 = tenant
4981        .read()
4982        .interact(move |conn| {
4983            conn.query_row(
4984                "SELECT COUNT(*)
4985                   FROM document_chunks c
4986                   JOIN documents d ON d.doc_id = c.doc_id
4987                  WHERE c.chunk_id = ?1
4988                    AND d.status = 'active'",
4989                rusqlite::params![&chunk_id_q],
4990                |r| r.get(0),
4991            )
4992        })
4993        .await
4994        .map_err(ApiError::from)?;
4995    if exists == 0 {
4996        return Err(ApiError::not_found(format!(
4997            "node_id {node_id_full:?} not found in current tenant"
4998        )));
4999    }
5000    Ok(())
5001}
5002
5003/// 404 if the entity isn't referenced by at least one active triple in
5004/// the tenant. Matches the inspect-entity 404 contract: entities are
5005/// synthetic, "existence" is "shows up in at least one triple".
5006async fn ensure_entity_referenced(
5007    tenant: &TenantHandle,
5008    entity_value: &str,
5009    node_id_full: &str,
5010) -> Result<(), ApiError> {
5011    let entity_q = entity_value.to_string();
5012    let exists: i64 = tenant
5013        .read()
5014        .interact(move |conn| {
5015            conn.query_row(
5016                "SELECT COUNT(*)
5017                   FROM triples
5018                  WHERE (subject_id = ?1 OR object_id = ?1)
5019                    AND status = 'active'",
5020                rusqlite::params![&entity_q],
5021                |r| r.get(0),
5022            )
5023        })
5024        .await
5025        .map_err(ApiError::from)?;
5026    if exists == 0 {
5027        return Err(ApiError::not_found(format!(
5028            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
5029        )));
5030    }
5031    Ok(())
5032}
5033
5034/// Explicit-neighbor path. Dispatches per focal node kind, calling the
5035/// existing `expand_*` async fns for each compatible edge kind and
5036/// concatenating the results. This is the "reuse" refactor decision:
5037/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
5038/// intact because we don't touch its bodies.
5039async fn neighbors_explicit(
5040    tenant: &TenantHandle,
5041    tenant_id: &str,
5042    node_kind: NodeKind,
5043    value: &str,
5044    node_id_full: &str,
5045    limit: i64,
5046) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5047    let mut nodes: Vec<GraphNode> = Vec::new();
5048    let mut edges: Vec<GraphEdge> = Vec::new();
5049
5050    match node_kind {
5051        NodeKind::Episode => {
5052            // Episodes have two compatible explicit-edge kinds:
5053            //   * cluster_member (episode -> clusters)
5054            //   * triple (episode -> entities, plus subj/obj entity pairs)
5055            //
5056            // document_chunk doesn't apply (episodes aren't documents).
5057            // Run each path, concat. Per-kind limit -- the caller asked for
5058            // up to `limit` neighbors PER KIND.
5059            let r1 =
5060                expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5061                    .await?;
5062            nodes.extend(r1.nodes);
5063            edges.extend(r1.edges);
5064            let r2 =
5065                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5066            nodes.extend(r2.nodes);
5067            edges.extend(r2.edges);
5068        }
5069        NodeKind::Document => {
5070            // Documents have one compatible explicit-edge kind:
5071            // document_chunk (document -> chunks).
5072            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5073                .await?;
5074            nodes.extend(r.nodes);
5075            edges.extend(r.edges);
5076        }
5077        NodeKind::Chunk => {
5078            // Chunks have one compatible explicit-edge kind:
5079            // document_chunk (chunk -> parent document).
5080            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5081                .await?;
5082            nodes.extend(r.nodes);
5083            edges.extend(r.edges);
5084        }
5085        NodeKind::Cluster => {
5086            // Clusters have one compatible explicit-edge kind:
5087            // cluster_member (cluster -> episodes).
5088            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5089                .await?;
5090            nodes.extend(r.nodes);
5091            edges.extend(r.edges);
5092        }
5093        NodeKind::Entity => {
5094            // Entities have one compatible explicit-edge kind:
5095            // triple (entity -> episodes where this entity is referenced).
5096            let r = expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5097            nodes.extend(r.nodes);
5098            edges.extend(r.edges);
5099        }
5100    }
5101    Ok((nodes, edges))
5102}
5103
5104/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
5105/// other kinds return 400. Reuses the existing inner pipelines:
5106///
5107///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
5108///     `expand_semantic` uses; filters out chunk hits).
5109///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
5110///     equivalent chunk-restricted vector pipeline).
5111///
5112/// Re-embed the focal node's content for the HNSW query rather than
5113/// loading the persisted vector from `embeddings` -- the same trade-off
5114/// `expand_semantic` made: cheaper code path overall, with deterministic
5115/// embedders in tests + batch-sized embedders in prod making the recompute
5116/// cost negligible.
5117async fn neighbors_semantic(
5118    tenant: &TenantHandle,
5119    tenant_id: &str,
5120    node_kind: NodeKind,
5121    value: &str,
5122    node_id_full: &str,
5123    limit: u32,
5124    threshold: f32,
5125) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5126    match node_kind {
5127        NodeKind::Episode => {
5128            neighbors_semantic_from_episode(
5129                tenant,
5130                tenant_id,
5131                value,
5132                node_id_full,
5133                limit,
5134                threshold,
5135            )
5136            .await
5137        }
5138        NodeKind::Chunk => {
5139            neighbors_semantic_from_chunk(tenant, tenant_id, value, node_id_full, limit, threshold)
5140                .await
5141        }
5142        _ => Err(ApiError::bad_request(format!(
5143            "semantic neighbors only valid for episode or chunk source; got {}",
5144            node_kind.as_wire_str()
5145        ))),
5146    }
5147}
5148
5149async fn neighbors_semantic_from_episode(
5150    tenant: &TenantHandle,
5151    tenant_id: &str,
5152    memory_id: &str,
5153    node_id_full: &str,
5154    limit: u32,
5155    threshold: f32,
5156) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5157    let memory_id_q = memory_id.to_string();
5158    let memory_id_for_self_excl = memory_id.to_string();
5159    let content: Option<String> = tenant
5160        .read()
5161        .interact(move |conn| {
5162            conn.query_row(
5163                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
5164                rusqlite::params![&memory_id_q],
5165                |r| r.get::<_, String>(0),
5166            )
5167            .map(Some)
5168            .or_else(|e| match e {
5169                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5170                other => Err(other),
5171            })
5172        })
5173        .await
5174        .map_err(ApiError::from)?;
5175
5176    // Existence is guaranteed by the focal-exists probe earlier; an
5177    // empty content here would be a status-transition race we treat as
5178    // "nothing to compare against".
5179    let Some(content) = content else {
5180        return Ok((Vec::new(), Vec::new()));
5181    };
5182
5183    // Widen the request by 1 so dropping self doesn't shrink the page.
5184    let widened = (limit as usize).saturating_add(1).min(100);
5185    let result = solo_query::recall::run_recall_inner(
5186        tenant.embedder(),
5187        tenant.hnsw(),
5188        tenant.read(),
5189        &content,
5190        widened,
5191    )
5192    .await
5193    .map_err(ApiError::from)?;
5194
5195    let mut nodes = Vec::new();
5196    let mut edges = Vec::new();
5197    for hit in result.hits.into_iter() {
5198        if hit.memory_id == memory_id_for_self_excl {
5199            // Skip self.
5200            continue;
5201        }
5202        if nodes.len() as u32 >= limit {
5203            break;
5204        }
5205        let weight = (1.0 - hit.cos_distance).max(0.0);
5206        if weight < threshold {
5207            continue;
5208        }
5209        let target_id = format!("ep:{}", hit.memory_id);
5210        edges.push(GraphEdge {
5211            id: edge_id(node_id_full, "semantic", &target_id),
5212            source: node_id_full.to_string(),
5213            target: target_id,
5214            kind: "semantic",
5215            predicate: None,
5216            weight: Some(weight),
5217        });
5218        nodes.push(GraphNode {
5219            id: format!("ep:{}", hit.memory_id),
5220            kind: NodeKind::Episode.as_wire_str(),
5221            label: episode_label(&hit.content),
5222            ts_ms: None,
5223            tenant_id: tenant_id.to_string(),
5224            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
5225        });
5226    }
5227    Ok((nodes, edges))
5228}
5229
5230async fn neighbors_semantic_from_chunk(
5231    tenant: &TenantHandle,
5232    tenant_id: &str,
5233    chunk_id: &str,
5234    node_id_full: &str,
5235    limit: u32,
5236    threshold: f32,
5237) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5238    let chunk_id_q = chunk_id.to_string();
5239    let chunk_id_for_self_excl = chunk_id.to_string();
5240    let content: Option<String> = tenant
5241        .read()
5242        .interact(move |conn| {
5243            conn.query_row(
5244                "SELECT c.content
5245                   FROM document_chunks c
5246                   JOIN documents d ON d.doc_id = c.doc_id
5247                  WHERE c.chunk_id = ?1
5248                    AND d.status = 'active'",
5249                rusqlite::params![&chunk_id_q],
5250                |r| r.get::<_, String>(0),
5251            )
5252            .map(Some)
5253            .or_else(|e| match e {
5254                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5255                other => Err(other),
5256            })
5257        })
5258        .await
5259        .map_err(ApiError::from)?;
5260
5261    let Some(content) = content else {
5262        return Ok((Vec::new(), Vec::new()));
5263    };
5264
5265    let widened = (limit as usize).saturating_add(1).min(100);
5266    let hits = solo_query::doc_search::run_doc_search_inner(
5267        tenant.embedder(),
5268        tenant.hnsw(),
5269        tenant.read(),
5270        &content,
5271        widened,
5272    )
5273    .await
5274    .map_err(ApiError::from)?;
5275
5276    let mut nodes = Vec::new();
5277    let mut edges = Vec::new();
5278    for hit in hits.into_iter() {
5279        if hit.chunk_id == chunk_id_for_self_excl {
5280            continue;
5281        }
5282        if nodes.len() as u32 >= limit {
5283            break;
5284        }
5285        let weight = (1.0 - hit.cos_distance).max(0.0);
5286        if weight < threshold {
5287            continue;
5288        }
5289        let target_id = format!("chunk:{}", hit.chunk_id);
5290        edges.push(GraphEdge {
5291            id: edge_id(node_id_full, "semantic", &target_id),
5292            source: node_id_full.to_string(),
5293            target: target_id,
5294            kind: "semantic",
5295            predicate: None,
5296            weight: Some(weight),
5297        });
5298        let exp = ExpandedChunk {
5299            chunk_id: hit.chunk_id.clone(),
5300            chunk_index: hit.chunk_index as i64,
5301            content: hit.content.clone(),
5302        };
5303        nodes.push(graph_node_for_chunk(tenant_id, &exp));
5304    }
5305    Ok((nodes, edges))
5306}
5307
5308// ---------------------------------------------------------------------------
5309// /v1/graph/stream — SSE invalidation feed (v0.10.0)
5310//
5311// Powers solo-web's live-update behaviour: instead of polling, the
5312// frontend subscribes once and refetches its pages only when the
5313// writer-actor signals "your tenant's data changed". Per scoping doc
5314// §3 Decision C, the wire format is invalidation-shaped (not row
5315// payload) — the SSE channel says "refetch the affected page" rather
5316// than streaming actual rows.
5317//
5318// Wire format:
5319//
5320//   ```
5321//   event: init
5322//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
5323//
5324//   event: invalidate
5325//   data: {"reason": "memory.remember", "tenant_id": "default",
5326//          "ts_ms": 1715625610000, "kind": "episode"}
5327//
5328//   event: heartbeat
5329//   data: {"ts_ms": 1715625640000}
5330//   ```
5331//
5332// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
5333// whether real events fired (simpler than resetting the timer on every
5334// invalidate; the cost is a few extra bytes per minute on idle).
5335//
5336// Lagged subscribers (subscriber polled slower than 256 writes) see one
5337// emit-only-once warning and resync via the next real `invalidate` —
5338// invalidation events are idempotent, so the missed batch reduces to a
5339// single refetch on the client side. No correctness loss.
5340//
5341// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
5342// ---------------------------------------------------------------------------
5343
5344/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
5345/// every 30 seconds — easier to reason about than "fire 30s after the
5346/// last event", and keeps proxies happy without code that races a
5347/// reset on every invalidate.
5348pub const STREAM_HEARTBEAT_SECS: u64 = 30;
5349
5350/// SSE event name emitted on connection open. Single fire; client uses
5351/// this to confirm the subscription is live.
5352const STREAM_EVENT_INIT: &str = "init";
5353
5354/// SSE event name emitted on every writer-actor commit (and on
5355/// `gdpr.forget_user`'s non-writer-actor cascade).
5356const STREAM_EVENT_INVALIDATE: &str = "invalidate";
5357
5358/// SSE event name emitted by the heartbeat interval.
5359const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";
5360
5361/// `GET /v1/graph/stream` — Server-Sent Events feed of
5362/// `InvalidateEvent`s scoped to the request's tenant.
5363///
5364/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
5365/// held by `TenantHandle` (populated by `TenantHandle::open`). The
5366/// stream:
5367///
5368///   1. Emits one `event: init` line at connection open.
5369///   2. Selects between (broadcast recv) and (heartbeat tick) in a
5370///      loop, emitting `invalidate` / `heartbeat` events as either
5371///      fires.
5372///   3. Exits when the client closes the connection (axum drops the
5373///      response future) OR the broadcast Sender is dropped (tenant
5374///      shutdown).
5375///
5376/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
5377/// `auth_middleware` returns 401 on missing bearer; the
5378/// `TenantExtractor` resolves the per-tenant DB. The handler itself
5379/// has no per-route auth logic.
5380async fn graph_stream_handler(
5381    TenantExtractor(tenant): TenantExtractor,
5382) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
5383    // Subscribe BEFORE building the init event so a writer-actor
5384    // commit that lands in the (microscopic) window between init and
5385    // the first poll is still observed. `broadcast::Receiver` buffers
5386    // up to the channel's capacity from the moment of subscribe.
5387    let rx = tenant.invalidate_sender().subscribe();
5388    let tenant_id = tenant.tenant_id().to_string();
5389    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
5390    // axum's keep-alive layer adds its own `:` comment line every
5391    // configured interval; we keep that OFF and ship our own typed
5392    // `heartbeat` event instead. The client distinguishes the two by
5393    // looking at the SSE `event:` field — typed heartbeats let solo-web
5394    // surface "connection healthy" in its UI without parsing comment
5395    // lines.
5396    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
5397}
5398
5399/// Per-subscriber state threaded through `futures::stream::unfold`.
5400/// Carries the receiver + heartbeat interval + a one-shot flag for
5401/// the initial `init` event.
5402struct StreamState {
5403    rx: broadcast::Receiver<InvalidateEvent>,
5404    heartbeat: tokio::time::Interval,
5405    tenant_id: String,
5406    /// `true` until the first poll completes — used to gate the `init`
5407    /// event. Flipped to `false` after the init event yields.
5408    needs_init: bool,
5409}
5410
5411/// Build the stream of SSE [`Event`]s for one subscriber.
5412///
5413/// First yield is the `init` event. After that, the stream selects
5414/// between the broadcast receiver and a tokio interval timer that
5415/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
5416/// swallowed with a single `tracing::warn!` line — the client resyncs
5417/// on the next real invalidate (invalidation events are idempotent).
5418fn build_invalidate_stream(
5419    rx: broadcast::Receiver<InvalidateEvent>,
5420    tenant_id: String,
5421    heartbeat_secs: u64,
5422) -> impl Stream<Item = Result<Event, Infallible>> {
5423    // `tokio::time::interval_at(start, period)` starts ticking at
5424    // `start`; we set `start = now + period` so the first heartbeat
5425    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
5426    // the default `interval()` would fire immediately at t=0, racing
5427    // the init event.
5428    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
5429    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
5430
5431    let state = StreamState {
5432        rx,
5433        heartbeat,
5434        tenant_id,
5435        needs_init: true,
5436    };
5437    futures::stream::unfold(state, move |mut state| async move {
5438        // First-poll: yield the init event without touching the
5439        // receiver or the heartbeat. Subsequent polls fall through to
5440        // the select loop.
5441        if state.needs_init {
5442            state.needs_init = false;
5443            let init_payload = serde_json::json!({
5444                "connected": true,
5445                "tenant_id": state.tenant_id,
5446                "ts_ms": chrono::Utc::now().timestamp_millis(),
5447            });
5448            let ev = Event::default()
5449                .event(STREAM_EVENT_INIT)
5450                .json_data(init_payload)
5451                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
5452            return Some((Ok::<Event, Infallible>(ev), state));
5453        }
5454        loop {
5455            tokio::select! {
5456                event = state.rx.recv() => {
5457                    match event {
5458                        Ok(ev) => {
5459                            let sse_event = Event::default()
5460                                .event(STREAM_EVENT_INVALIDATE)
5461                                .json_data(&ev)
5462                                .unwrap_or_else(|_| Event::default()
5463                                    .event(STREAM_EVENT_INVALIDATE));
5464                            return Some((Ok::<Event, Infallible>(sse_event), state));
5465                        }
5466                        Err(broadcast::error::RecvError::Lagged(n)) => {
5467                            tracing::warn!(
5468                                lagged = n,
5469                                "graph stream subscriber lagged; client will \
5470                                 resync on the next real invalidate"
5471                            );
5472                            // Continue receiving — do NOT yield anything
5473                            // for a lag.
5474                        }
5475                        Err(broadcast::error::RecvError::Closed) => {
5476                            tracing::debug!(
5477                                "graph stream broadcast closed; ending SSE stream"
5478                            );
5479                            return None;
5480                        }
5481                    }
5482                }
5483                _ = state.heartbeat.tick() => {
5484                    let hb_payload = serde_json::json!({
5485                        "ts_ms": chrono::Utc::now().timestamp_millis(),
5486                    });
5487                    let sse_event = Event::default()
5488                        .event(STREAM_EVENT_HEARTBEAT)
5489                        .json_data(hb_payload)
5490                        .unwrap_or_else(|_| Event::default()
5491                            .event(STREAM_EVENT_HEARTBEAT));
5492                    return Some((Ok::<Event, Infallible>(sse_event), state));
5493                }
5494            }
5495        }
5496    })
5497}
5498
5499// ---------------------------------------------------------------------------
5500// /v1/status — authenticated readiness/status (tenant-aware)
5501
5502#[derive(Debug, Serialize)]
5503struct StatusEmbedder {
5504    name: String,
5505    version: String,
5506    dim: usize,
5507    dtype: String,
5508}
5509
5510#[derive(Debug, Serialize)]
5511struct StatusTenant {
5512    id: String,
5513    registered: bool,
5514    status: Option<TenantStatusJson>,
5515    quota_bytes: Option<u64>,
5516    last_accessed_ms: Option<i64>,
5517}
5518
5519#[derive(Debug, Serialize)]
5520struct StatusMcp {
5521    sessions: usize,
5522}
5523
5524#[derive(Debug, Serialize)]
5525struct StatusResponse {
5526    ok: bool,
5527    version: &'static str,
5528    tenant: StatusTenant,
5529    embedder: StatusEmbedder,
5530    active_tenants: usize,
5531    mcp: StatusMcp,
5532}
5533
5534async fn status_handler(
5535    State(state): State<SoloHttpState>,
5536    TenantExtractor(tenant): TenantExtractor,
5537) -> Result<Json<StatusResponse>, ApiError> {
5538    let active_tenants = state.registry.list_active().await.map_err(ApiError::from)?;
5539    let tenant_record = active_tenants
5540        .iter()
5541        .find(|record| &record.tenant_id == tenant.tenant_id());
5542    let embedder = tenant.embedder();
5543    Ok(Json(StatusResponse {
5544        ok: true,
5545        version: env!("CARGO_PKG_VERSION"),
5546        tenant: StatusTenant {
5547            id: tenant.tenant_id().to_string(),
5548            registered: tenant_record.is_some(),
5549            status: tenant_record.map(|record| TenantStatusJson::from(&record.status)),
5550            quota_bytes: tenant_record.and_then(|record| record.quota_bytes),
5551            last_accessed_ms: tenant_record.and_then(|record| record.last_accessed_ms),
5552        },
5553        embedder: StatusEmbedder {
5554            name: embedder.name().to_string(),
5555            version: embedder.version().to_string(),
5556            dim: embedder.dim(),
5557            dtype: format!("{:?}", embedder.dtype()).to_ascii_lowercase(),
5558        },
5559        active_tenants: active_tenants.len(),
5560        mcp: StatusMcp {
5561            sessions: state.mcp_sessions.len(),
5562        },
5563    }))
5564}
5565
5566// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
5567//
5568// Powers solo-web's top-bar tenant picker (Decision F in
5569// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
5570// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
5571// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
5572// That keeps the privileged tenant-mutation surface off HTTP entirely
5573// while still letting an authenticated browser session enumerate the
5574// tenants it's allowed to see.
5575//
5576// Wire shape (200 OK):
5577//
5578//   ```json
5579//   {
5580//     "tenants": [
5581//       {
5582//         "id": "default",
5583//         "display_name": "Default tenant",
5584//         "created_at_ms": 1715625600000,
5585//         "last_accessed_ms": 1715625900000,
5586//         "status": "active",
5587//         "quota_bytes": null,
5588//         "episode_count": null,
5589//         "size_bytes": null,
5590//         "pct_used": null
5591//       }
5592//     ]
5593//   }
5594//   ```
5595//
5596// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
5597// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
5598// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
5599//
5600//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
5601//     Cheap; runs for every visible tenant.
5602//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
5603//     status='active'` against the per-tenant SQLCipher DB.
5604//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
5605//     100.0) when both are known; `null` if `quota_bytes` is unset.
5606//
5607// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
5608// budget is tight, so we cap `episode_count` hydration at
5609// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
5610// cap get `episode_count: null` and the response carries an
5611// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
5612// counts for the tail tenants out-of-band if needed (mirroring the
5613// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
5614// capped — it's just a `metadata` call.
5615//
5616// The CLI's `solo tenants list` retains the canonical per-tenant
5617// cost-numbers path for operators who need exhaustive data.
5618//
5619// ## Visibility filter (load-bearing — three cases)
5620//
5621// The handler reads `AuthenticatedPrincipal` out of request extensions
5622// via `MaybePrincipal` and filters the registry list before
5623// serialisation:
5624//
5625//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
5626//      loopback path, no `[auth]` block in `solo.config.toml`. Return
5627//      every `Active` tenant. Same scope as `solo tenants list` CLI.
5628//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
5629//      the `AuthenticatedPrincipal::bearer` signature emitted by
5630//      `BearerValidator::validate`). Single-principal daemon — the
5631//      bearer holder is the operator, so return every `Active`
5632//      tenant. Functionally equivalent to (1) from a leakage
5633//      standpoint.
5634//   3. **OIDC principal** (any other principal — `claims` carries the
5635//      JWT object). Filter to ONLY the tenant id matching
5636//      `principal.tenant_claim`. The configured OIDC tenant_claim is
5637//      already validated to a real `TenantId` by the auth middleware
5638//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
5639//      403 BEFORE this handler runs). If the claim doesn't match any
5640//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
5641//      — don't leak whether a tenant exists by 404'ing on names
5642//      outside the principal's scope.
5643//
5644// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
5645// list in every case. solo-web's tenant picker should not surface a
5646// tenant that's mid-migration or queued for hard-delete — clicking
5647// such a row would race the admin tooling. The CLI's `solo tenants
5648// list` still shows them under an explicit `--include-pending` flag
5649// (out of scope here).
5650//
5651// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
5652// ---------------------------------------------------------------------------
5653
5654/// One row of the `/v1/tenants` response body. Shape mirrors
5655/// `solo_storage::TenantRecord` for the persisted fields plus the
5656/// reserved-for-future cost-numbers triple (`episode_count`,
5657/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
5658#[derive(Debug, Clone, Serialize)]
5659struct TenantListItem {
5660    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
5661    /// `X-Solo-Tenant` header value clients send to other routes.
5662    id: String,
5663    /// Human-readable display name set at `solo tenants create`.
5664    /// `None` ⇒ omit from the JSON body.
5665    #[serde(skip_serializing_if = "Option::is_none")]
5666    display_name: Option<String>,
5667    /// Epoch ms when this tenant was registered.
5668    created_at_ms: i64,
5669    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
5670    /// (v0.9.0 P1). `None` for tenants that have never been opened
5671    /// since the migration ran.
5672    #[serde(skip_serializing_if = "Option::is_none")]
5673    last_accessed_ms: Option<i64>,
5674    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
5675    /// filter `PendingMigration` / `PendingDelete` out at list time).
5676    /// Surfaced for forward-compat — a future `?include_pending=1`
5677    /// query param could relax the filter without a shape change.
5678    status: TenantStatusJson,
5679    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
5680    /// ⇒ unlimited.
5681    #[serde(skip_serializing_if = "Option::is_none")]
5682    quota_bytes: Option<u64>,
5683    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
5684    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
5685    /// response; `null` for tenants beyond the cap (in which case the
5686    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
5687    /// Also `null` if the per-tenant DB file is missing or the COUNT
5688    /// failed.
5689    episode_count: Option<i64>,
5690    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
5691    /// `null` only if the file is missing or unreadable (corruption /
5692    /// permissions). Not affected by the cap — `std::fs::metadata` is
5693    /// cheap.
5694    size_bytes: Option<u64>,
5695    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
5696    /// when both `size_bytes` and `quota_bytes` are known. `null` if
5697    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
5698    /// is unknown.
5699    pct_used: Option<f64>,
5700}
5701
5702/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
5703/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
5704/// already has via `#[serde(rename_all = "snake_case")]`) so the
5705/// HTTP-side wire shape stays decoupled from the storage-side enum.
5706/// Today both serialise identically; a future status variant added to
5707/// storage doesn't automatically leak onto the wire.
5708#[derive(Debug, Clone, Copy, Serialize)]
5709#[serde(rename_all = "snake_case")]
5710enum TenantStatusJson {
5711    Active,
5712}
5713
5714impl From<&solo_storage::TenantStatus> for TenantStatusJson {
5715    fn from(s: &solo_storage::TenantStatus) -> Self {
5716        // We only ever build this enum from `Active` records (the list
5717        // handler filters at source); the match exhausts so future
5718        // variants force a compile error here, not a wire mismatch.
5719        match s {
5720            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
5721            // Defensive: should be filtered upstream. Map to Active to
5722            // avoid a panic, but the handler MUST keep filtering at
5723            // source. A clippy warning catches dead branches.
5724            solo_storage::TenantStatus::PendingMigration
5725            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
5726        }
5727    }
5728}
5729
5730/// Response body for `GET /v1/tenants`.
5731#[derive(Debug, Serialize)]
5732struct TenantsListResponse {
5733    tenants: Vec<TenantListItem>,
5734}
5735
5736/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
5737/// per `/v1/tenants` request. Opening + counting one tenant DB is
5738/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
5739/// first-paint budget tight. Tenants beyond the cap get
5740/// `episode_count: null` AND the response carries
5741/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
5742/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
5743/// the tail. The 50 figure mirrors the entity-cap pattern from
5744/// `/v1/graph/nodes`.
5745const TENANTS_COUNT_HYDRATION_CAP: usize = 50;
5746
5747/// v0.10.1: response header name set to `"true"` when the per-request
5748/// `episode_count` hydration cap was reached. Absent otherwise.
5749/// Grep-able by both server- and client-side code. Stored lowercase
5750/// per `axum::http::HeaderName::from_static` (header names are
5751/// case-insensitive on the wire; the canonical spelling is
5752/// `X-Solo-Tenants-Count-Cap-Reached`).
5753const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";
5754
5755/// `GET /v1/tenants` — list every tenant visible to the request's
5756/// principal. See module comment for the three-case visibility rule.
5757///
5758/// Errors:
5759///   * **401** — bearer required but missing/invalid (handled by
5760///     `auth_middleware` before this handler runs).
5761///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
5762///
5763/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
5764/// any registered tenant, the response is `200 OK` with `tenants:
5765/// []`. That keeps tenant existence out of side-channel range for an
5766/// OIDC user — they cannot probe for other tenants by id.
5767async fn tenants_list_handler(
5768    State(state): State<SoloHttpState>,
5769    MaybePrincipal(maybe_principal): MaybePrincipal,
5770) -> Result<Response, ApiError> {
5771    // Pull every registered tenant. `list_active` is the registry's
5772    // wrapper around `TenantsIndex::list`, which returns rows ordered
5773    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
5774    // doesn't shift between requests, which solo-web relies on to keep
5775    // its tenant picker entries from reordering visually.
5776    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;
5777
5778    // Filter at source: status MUST be Active (PendingMigration /
5779    // PendingDelete are admin-transient states that solo-web should
5780    // not surface). Matches the brief's
5781    // `tenants_status_filter_excludes_deleted` test.
5782    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));
5783
5784    // Apply the principal-driven visibility filter. The three cases
5785    // are exhaustive — see the module comment for the rationale on
5786    // each. `tenant_visibility_filter` is split out so the unit
5787    // tests can assert the rule independent of the SQL read.
5788    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());
5789
5790    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
5791    // registry helper handles missing DB files + the cap behavior. We
5792    // pass the cap so tenants beyond it return `None` for episode_count
5793    // — `size_bytes` is computed for everyone (cheap fs::metadata).
5794    let cap = TENANTS_COUNT_HYDRATION_CAP;
5795    let costs = state
5796        .registry
5797        .hydrate_tenant_cost_numbers(&filtered, cap)
5798        .await;
5799    let cap_reached = filtered.len() > cap;
5800
5801    let tenants: Vec<TenantListItem> = filtered
5802        .iter()
5803        .zip(costs.iter())
5804        .map(|(r, cost)| {
5805            let pct_used = match (cost.size_bytes, r.quota_bytes) {
5806                (Some(size), Some(quota)) if quota > 0 => {
5807                    let raw = (size as f64) * 100.0 / (quota as f64);
5808                    Some(raw.min(100.0))
5809                }
5810                _ => None,
5811            };
5812            TenantListItem {
5813                id: r.tenant_id.to_string(),
5814                display_name: r.display_name.clone(),
5815                created_at_ms: r.created_at_ms,
5816                last_accessed_ms: r.last_accessed_ms,
5817                status: TenantStatusJson::from(&r.status),
5818                quota_bytes: r.quota_bytes,
5819                episode_count: cost.episode_count,
5820                size_bytes: cost.size_bytes,
5821                pct_used,
5822            }
5823        })
5824        .collect();
5825
5826    let body = Json(TenantsListResponse { tenants });
5827    if cap_reached {
5828        let mut resp = body.into_response();
5829        resp.headers_mut().insert(
5830            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
5831            axum::http::HeaderValue::from_static("true"),
5832        );
5833        Ok(resp)
5834    } else {
5835        Ok(body.into_response())
5836    }
5837}
5838
5839/// Pure function: apply the three-case principal-driven visibility
5840/// rule to a list of `TenantRecord`s. Extracted from the handler so
5841/// unit tests can exercise the rule without driving an axum router.
5842///
5843///   * `principal == None` ⇒ all records returned (no-auth path).
5844///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
5845///     ⇒ all records returned (single-principal daemon).
5846///   * Any other principal (OIDC) ⇒ filter to records whose
5847///     `tenant_id == principal.tenant_claim`. An OIDC principal with
5848///     no `tenant_claim` (theoretically unreachable — the middleware
5849///     short-circuits at 403 before us, but we defend) returns an
5850///     empty list.
5851fn filter_tenants_for_principal(
5852    records: Vec<solo_storage::TenantRecord>,
5853    principal: Option<&AuthenticatedPrincipal>,
5854) -> Vec<solo_storage::TenantRecord> {
5855    let Some(p) = principal else {
5856        // Case 1: no auth configured — return all tenants. Same scope
5857        // as `solo tenants list`.
5858        return records;
5859    };
5860    if is_single_principal_bearer(p) {
5861        // Case 2: bearer principal — return all tenants. The single
5862        // bearer holder is functionally the daemon operator.
5863        return records;
5864    }
5865    // Case 3: OIDC principal — filter to the claimed tenant only. An
5866    // unmatched claim falls through to an empty list, NOT 404, to
5867    // avoid leaking tenant existence.
5868    let Some(claim) = p.tenant_claim.as_ref() else {
5869        return Vec::new();
5870    };
5871    records
5872        .into_iter()
5873        .filter(|r| r.tenant_id == *claim)
5874        .collect()
5875}
5876
5877/// True iff `principal` looks like a bearer-mode principal — the shape
5878/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
5879/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
5880/// empty. OIDC principals carry a JWT object in `claims` and the JWT
5881/// `sub` in `subject`, so they fail this predicate.
5882///
5883/// Split out so the unit tests can assert the discriminator
5884/// independent of the rest of the handler. Keeping the predicate in
5885/// one place also makes future expansion easier — e.g., a v0.11
5886/// "admin scope" might add an OIDC variant that passes this gate by
5887/// looking for a `"solo:admin"` entry in `scopes`.
5888fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
5889    principal.subject == "bearer" && principal.claims.is_null() && principal.scopes.is_empty()
5890}
5891
5892// ---------------------------------------------------------------------------
5893// v0.10.2 — MCP-over-HTTP transport on /mcp
5894// ---------------------------------------------------------------------------
5895
5896// v0.11.0 P2: the per-event names that used to live here as
5897// `MCP_STREAM_EVENT_INIT` moved into `crate::mcp_session` alongside the
5898// `McpEventKind` enum so the publisher (`SessionState::publish_event`)
5899// and the subscriber (`build_mcp_session_stream`) share one source of
5900// truth for the wire format. See `MCP_STREAM_EVENT_INIT_NAME`,
5901// `MCP_STREAM_EVENT_MESSAGE_NAME`, `MCP_STREAM_EVENT_PROGRESS_NAME`,
5902// `MCP_STREAM_EVENT_LAGGED_NAME`, and `MCP_STREAM_EVENT_HEARTBEAT_NAME`
5903// for the canonical strings.
5904
5905/// `POST /mcp` — JSON-RPC request/response.
5906///
5907/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
5908/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
5909/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
5910/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
5911/// application/json`. **Status 200** for valid JSON-RPC (in-body
5912/// errors); **status 400** for malformed JSON; **status 401** when
5913/// auth is configured and the bearer check fails (handled by the
5914/// `auth_middleware` ahead of this handler).
5915///
5916/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
5917/// one tenant at process start via `--tenant`. HTTP resolves the tenant
5918/// per request from the `X-Solo-Tenant` header (or
5919/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
5920/// daemon process can answer MCP calls for any tenant the registry
5921/// knows. The audit principal is `Some("bearer")` for bearer-
5922/// authenticated calls and the JWT `sub` for OIDC; `None` for
5923/// unauthenticated loopback. Documented in v0.10.2 dev log.
5924async fn mcp_http_post_handler(
5925    TenantExtractor(tenant): TenantExtractor,
5926    State(state): State<SoloHttpState>,
5927    AuditPrincipal(principal): AuditPrincipal,
5928    request: axum::extract::Request,
5929) -> Response {
5930    // v0.11.0 P1: read the session extension the middleware planted on
5931    // a hit; if absent, this is the session-init request — create one
5932    // and echo the assigned id back via `Mcp-Session-Id`.
5933    let existing_session_id: Option<crate::mcp_session::SessionId> = request
5934        .extensions()
5935        .get::<crate::mcp_session::SessionId>()
5936        .cloned();
5937    let principal_full = request
5938        .extensions()
5939        .get::<crate::auth::AuthenticatedPrincipal>()
5940        .cloned();
5941    let body_bytes = match axum::body::to_bytes(
5942        request.into_body(),
5943        // Match the 8 MiB cap solo-api already uses for JSON bodies in
5944        // other handlers (validated by `tower-http::limit::RequestBodyLimitLayer`
5945        // elsewhere). Locally we cap at 8 MiB so a malformed Content-Length
5946        // can't OOM the dispatch task.
5947        8 * 1024 * 1024,
5948    )
5949    .await
5950    {
5951        Ok(b) => b,
5952        Err(e) => {
5953            return (
5954                StatusCode::BAD_REQUEST,
5955                Json(serde_json::json!({
5956                    "error": format!("invalid request body: {e}"),
5957                    "status": 400,
5958                })),
5959            )
5960                .into_response();
5961        }
5962    };
5963    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
5964    // calls out 4xx for malformed wire input even though JSON-RPC's own
5965    // parse-error code is in-body — operator-facing tooling needs the
5966    // HTTP status to distinguish "the server rejected the request
5967    // shape" from "the method returned an error").
5968    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body_bytes) {
5969        Ok(r) => r,
5970        Err(e) => {
5971            return (
5972                StatusCode::BAD_REQUEST,
5973                Json(serde_json::json!({
5974                    "error": format!("invalid JSON-RPC request: {e}"),
5975                    "status": 400,
5976                })),
5977            )
5978                .into_response();
5979        }
5980    };
5981    if request.jsonrpc != "2.0" {
5982        return (
5983            StatusCode::BAD_REQUEST,
5984            Json(serde_json::json!({
5985                "error": format!(
5986                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
5987                    request.jsonrpc
5988                ),
5989                "status": 400,
5990            })),
5991        )
5992            .into_response();
5993    }
5994
5995    // v0.11.0 P1: assign a session id if the request arrived without
5996    // one. The assigned id is echoed back via the `Mcp-Session-Id`
5997    // response header so the client can reuse it.
5998    let (session_id, freshly_assigned) = match existing_session_id {
5999        Some(id) => (id, false),
6000        None => {
6001            let new_state =
6002                crate::mcp_session::SessionState::new(tenant.tenant_id().clone(), principal_full);
6003            let id = state.mcp_sessions.insert(new_state);
6004            (id, true)
6005        }
6006    };
6007
6008    // v0.11.0 P3: resolve the `Arc<SessionState>` for the dispatcher so
6009    // per-tool progress events can be published into the session's
6010    // broadcast channel. On a session-init request we just inserted
6011    // the state; for a continuing request the middleware planted an
6012    // Arc onto the request extensions, but we lost ownership when we
6013    // consumed the request above (`request.into_body()`). Re-fetch
6014    // via `mcp_sessions.get(&session_id)` — this is a single lock-free
6015    // DashMap shard read.
6016    let session_state: Option<std::sync::Arc<crate::mcp_session::SessionState>> =
6017        state.mcp_sessions.get(&session_id);
6018
6019    // v0.11.0 P4: on a freshly-assigned session, spawn the
6020    // invalidate-bridge task that forwards per-tenant `InvalidateEvent`
6021    // broadcasts to this session's event channel as MCP
6022    // `notifications/message` envelopes. Skipped for continuing requests
6023    // because the bridge spawned at session-init is still running (the
6024    // bridge auto-exits when the session drops from the store via the
6025    // `Weak<SessionState>` upgrade-fails path).
6026    if freshly_assigned && let Some(session_state_for_bridge) = session_state.clone() {
6027        // The JoinHandle is intentionally detached — the bridge task
6028        // owns its own exit path (Weak<SessionState> upgrade fails or
6029        // tenant broadcast closes). Holding the handle would require
6030        // a per-session reaper; the bridge's own lifecycle is enough.
6031        // `drop` is the clippy-clean way to discard a future.
6032        drop(crate::mcp_notify::spawn_invalidate_bridge(
6033            tenant.clone(),
6034            session_state_for_bridge,
6035        ));
6036    }
6037
6038    // Build the dispatcher with the resolved tenant + audit principal.
6039    // Dispatcher integration is Option B per v0.11.0 P1 plan: sessions
6040    // are HTTP-transport-only; the dispatcher stays session-agnostic.
6041    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
6042        state.registry.clone(),
6043        tenant,
6044        (*state.user_aliases).clone(),
6045        principal,
6046    );
6047
6048    let mut response = match dispatcher.dispatch(request, session_state).await {
6049        Some(response) => {
6050            // JSON-RPC errors are in-body; the HTTP status is 200 for
6051            // any valid JSON-RPC request, including ones that return an
6052            // error envelope. The client distinguishes success from
6053            // error by the presence of `result` vs `error` in the body.
6054            (StatusCode::OK, Json(response)).into_response()
6055        }
6056        None => {
6057            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
6058            // respond. The MCP Streamable HTTP transport spec uses
6059            // 202 Accepted for this shape so client-side polling does
6060            // not block on a body.
6061            StatusCode::ACCEPTED.into_response()
6062        }
6063    };
6064    // v0.11.0 P1: stamp the `Mcp-Session-Id` response header on every
6065    // response — both freshly-assigned (so the client learns it) and
6066    // continuing (so the client confirms the id is still valid). The
6067    // spec is loose here; echoing always is the safer client contract.
6068    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6069    // Tracing hook lets operators see new-session creation rate in
6070    // `solo daemon` logs without grepping body bytes.
6071    if freshly_assigned {
6072        tracing::debug!(
6073            session_id = %session_id,
6074            "mcp-http: assigned new session id"
6075        );
6076    }
6077    response
6078}
6079
6080/// Heartbeat cadence for the resumable `/mcp` GET stream. Matches the
6081/// `/v1/graph/stream` discipline (30s) so operator tooling can use one
6082/// timeout knob. v0.11.0 P2 makes this configurable indirectly via the
6083/// helper signature of [`build_mcp_session_stream`] so tests can pass
6084/// a short interval without driving the real clock for 30s.
6085pub const MCP_STREAM_HEARTBEAT_SECS: u64 = 30;
6086
6087/// `GET /mcp` — resumable Server-Sent Events stream for one MCP session.
6088///
6089/// v0.11.0 P2 replaces v0.10.2's `pending().await` stub with a real
6090/// `select!` loop over the session's broadcast event channel. Per the
6091/// MCP Streamable HTTP transport spec, the GET endpoint is the
6092/// server's path to push:
6093///
6094///   - `event: init` — handshake confirming the stream is live;
6095///   - `event: message` — JSON-RPC `notifications/message` (P4 bridge);
6096///   - `event: progress` — JSON-RPC `notifications/progress` (P3 long
6097///     tool calls);
6098///   - `event: heartbeat` — periodic liveness ping every
6099///     [`MCP_STREAM_HEARTBEAT_SECS`] seconds;
6100///   - `event: lagged` — emitted once when a reconnecting client's
6101///     `Last-Event-ID` is older than the broadcast buffer's oldest
6102///     retained event (Decision E).
6103///
6104/// Wire format per the SSE spec — each event carries:
6105/// `id: <u64>\nevent: <kind>\ndata: <json>\n\n`. The `id:` field is the
6106/// monotonic per-session event id; clients echo the last-seen value
6107/// back in the `Last-Event-ID` header on reconnect to drive the
6108/// replay-from-cursor path.
6109///
6110/// **Session id REQUIRED.** Unlike `POST /mcp` (which auto-creates a
6111/// session on the session-init request), `GET /mcp` returns `404 Not
6112/// Found` if the request arrived without a `Mcp-Session-Id` header.
6113/// The GET stream's whole point is to attach to an existing session's
6114/// notification channel — a client opening a stream without a session
6115/// to attach it to is a programming error, not the entry point to the
6116/// session lifecycle.
6117async fn mcp_http_get_handler(
6118    TenantExtractor(tenant): TenantExtractor,
6119    State(state): State<SoloHttpState>,
6120    AuditPrincipal(principal): AuditPrincipal,
6121    request: axum::extract::Request,
6122) -> Response {
6123    let _ = principal; // audit principal pre-resolved by extractor; unused on GET
6124    let _ = state; // session resolution lives in the middleware; state unused here
6125
6126    // v0.11.0 P2: session is REQUIRED on GET. The middleware planted
6127    // the SessionId + Arc<SessionState> extensions on a hit. If the
6128    // request arrived without an `Mcp-Session-Id` header, the
6129    // middleware passes through (so unauth'd POSTs can session-init);
6130    // we observe that as a missing extension and return 404 here.
6131    let session_id = match request.extensions().get::<crate::mcp_session::SessionId>() {
6132        Some(id) => id.clone(),
6133        None => {
6134            return (
6135                StatusCode::NOT_FOUND,
6136                Json(serde_json::json!({
6137                    "error": crate::mcp_session::MCP_SESSION_EXPIRED_ERROR,
6138                    "status": 404,
6139                    "message": "GET /mcp requires an `Mcp-Session-Id` header \
6140                                from a prior POST /mcp; open one first",
6141                    "retry": "re-initialize",
6142                })),
6143            )
6144                .into_response();
6145        }
6146    };
6147    let session_state = match request
6148        .extensions()
6149        .get::<std::sync::Arc<crate::mcp_session::SessionState>>()
6150    {
6151        Some(state) => state.clone(),
6152        None => {
6153            // Defensive: middleware should plant both extensions
6154            // together or neither, but log + 404 if we somehow see one
6155            // without the other.
6156            tracing::error!(
6157                "mcp_http_get_handler: SessionId extension present but \
6158                 SessionState extension missing — middleware bug"
6159            );
6160            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
6161        }
6162    };
6163
6164    // Optional `Last-Event-ID` header — parse as u64; on parse failure
6165    // treat as `0` (the "never seen anything" sentinel) so a
6166    // malformed header doesn't 400 the reconnect.
6167    let last_event_id: u64 = request
6168        .headers()
6169        .get(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER)
6170        .and_then(|v| v.to_str().ok())
6171        .and_then(|s| s.trim().parse::<u64>().ok())
6172        .unwrap_or(0);
6173
6174    let tenant_id = tenant.tenant_id().to_string();
6175    let stream = build_mcp_session_stream(
6176        session_state,
6177        session_id.clone(),
6178        tenant_id,
6179        last_event_id,
6180        MCP_STREAM_HEARTBEAT_SECS,
6181    );
6182    // No axum keep-alive comment lines — we ship our own typed
6183    // `heartbeat` event the way `/v1/graph/stream` does. Setting the
6184    // axum-side interval to 1 hour effectively disables it; clients
6185    // distinguish liveness via the typed events on the stream.
6186    let sse = Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)));
6187    let mut response = sse.into_response();
6188    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6189    response
6190}
6191
6192/// Per-subscriber state threaded through `futures::stream::unfold` for
6193/// the resumable `/mcp` GET stream. Carries the broadcast receiver +
6194/// heartbeat ticker + the queue of replay events (if any) that need
6195/// to drain before live events start flowing.
6196///
6197/// Holds an `Arc<SessionState>` for the SOLE purpose of keeping the
6198/// broadcast `Sender` alive for as long as this subscriber's stream
6199/// is open. Without that strong ref, dropping the only Arc the
6200/// handler held would close the channel and `rx.recv()` would
6201/// immediately return `Err(Closed)` — the integration test for the
6202/// heartbeat cadence caught this regression.
6203struct McpStreamState {
6204    /// Live event receiver subscribed BEFORE the replay snapshot is
6205    /// drained — so any event published during the replay window
6206    /// lands here and the subscriber's `last_replayed_id` cursor
6207    /// dedupes it against the replayed copy.
6208    rx: broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
6209    /// Heartbeat tick — fires every `heartbeat_secs` regardless of
6210    /// real-event volume. Matches `/v1/graph/stream`'s discipline.
6211    heartbeat: tokio::time::Interval,
6212    /// FIFO queue of replay events still to emit before live events
6213    /// take over. Empties to `Vec::new()` after the last drain.
6214    replay_queue: Vec<crate::mcp_session::McpStreamEvent>,
6215    /// `Some(id)` once at least one event has been emitted (replayed
6216    /// OR live). Live broadcast events with `id <= last_emitted_id`
6217    /// are skipped — handles the race where an event lands in BOTH
6218    /// the replay snapshot AND the live broadcast receiver (because
6219    /// we subscribed before snapshotting).
6220    last_emitted_id: Option<u64>,
6221    /// `true` until the synthetic `event: init` has been emitted.
6222    /// Flipped to `false` on first poll.
6223    needs_init: bool,
6224    /// Init-event payload metadata. Pre-computed at handler entry so
6225    /// the unfold closure stays `Send`.
6226    session_id_str: String,
6227    tenant_id: String,
6228    /// Held only to keep the broadcast `Sender` (and thus the channel)
6229    /// alive for the stream's lifetime. The session store also holds
6230    /// an Arc, but that one expires under TTL — this Arc keeps the
6231    /// channel open for this single subscriber for as long as the
6232    /// client is connected.
6233    _session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6234}
6235
6236/// Build the resumable SSE stream for one `/mcp` GET subscriber.
6237///
6238/// Flow per `unfold` iteration:
6239///
6240///   1. **needs_init poll** — emit one `event: init` with id 0 (we
6241///      never allocate event id 0 in `SessionState::publish_event`;
6242///      0 is reserved for the init event + the client's "never seen"
6243///      sentinel on `Last-Event-ID`). Returns immediately.
6244///   2. **replay drain** — while `replay_queue` is non-empty, pop the
6245///      front entry and emit it. Updates `last_emitted_id`.
6246///   3. **live select** — `tokio::select!` between
6247///      `rx.recv()` and `heartbeat.tick()`:
6248///      - `rx.recv() = Ok(event)` and `event.id > last_emitted_id` →
6249///        emit and update cursor;
6250///      - `rx.recv() = Ok(event)` and `event.id <= last_emitted_id` →
6251///        skip (dedupe overlap with the replayed copy);
6252///      - `rx.recv() = Err(Lagged(n))` → emit one synthetic
6253///        `event: lagged` with `data: {dropped: n}` and continue;
6254///      - `rx.recv() = Err(Closed)` → end the stream (session
6255///        dropped);
6256///      - `heartbeat.tick()` → emit an unaccounted-id `event: heartbeat`
6257///        (heartbeats DO NOT consume the session's event id space —
6258///        they're synthetic and idempotent, so a reconnecting client
6259///        doesn't need to see them in replay).
6260///
6261/// Heartbeats use SSE event id `0` (the same id space the init event
6262/// uses) and clients filter them client-side; the broadcast-channel
6263/// events use the session's real monotonic ids.
6264fn build_mcp_session_stream(
6265    session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6266    session_id: crate::mcp_session::SessionId,
6267    tenant_id: String,
6268    last_event_id: u64,
6269    heartbeat_secs: u64,
6270) -> impl Stream<Item = Result<Event, Infallible>> {
6271    // 1. Subscribe BEFORE snapshotting so any event published during
6272    //    the snapshot window lands in the live receiver. We dedupe
6273    //    overlap against `last_emitted_id` below.
6274    let rx = session_state.subscribe_events();
6275
6276    // 2. Snapshot the replay buffer, then filter to events the client
6277    //    hasn't seen.
6278    let snapshot = session_state.snapshot_replay_buffer();
6279
6280    // 3. Decide replay shape based on `last_event_id` vs the snapshot.
6281    let mut replay_queue: Vec<crate::mcp_session::McpStreamEvent> = Vec::new();
6282    if last_event_id > 0 {
6283        // Client is reconnecting with a known cursor.
6284        let oldest_in_buffer = snapshot.first().map(|e| e.id);
6285        let newest_in_buffer = snapshot.last().map(|e| e.id);
6286        if let (Some(oldest), Some(newest)) = (oldest_in_buffer, newest_in_buffer) {
6287            if last_event_id + 1 < oldest {
6288                // Client missed events that have since been evicted
6289                // from the buffer. Emit one synthetic `event: lagged`
6290                // describing the gap, then resume from the buffer.
6291                let dropped = oldest.saturating_sub(last_event_id + 1);
6292                replay_queue.push(crate::mcp_session::McpStreamEvent {
6293                    id: 0,
6294                    event: crate::mcp_session::McpEventKind::Lagged,
6295                    data: serde_json::json!({
6296                        "dropped": dropped,
6297                        "last_event_id": last_event_id,
6298                        "oldest_available": oldest,
6299                    }),
6300                });
6301                replay_queue.extend(snapshot);
6302            } else if last_event_id >= newest {
6303                // Client is already caught up; nothing to replay.
6304            } else {
6305                replay_queue.extend(snapshot.into_iter().filter(|e| e.id > last_event_id));
6306            }
6307        }
6308        // Empty snapshot + non-zero last_event_id: nothing to replay.
6309    }
6310    // last_event_id == 0: brand-new subscriber; no replay needed
6311    // (the `init` event below is the start of the stream from the
6312    // client's POV).
6313
6314    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
6315    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
6316
6317    let stream_state = McpStreamState {
6318        rx,
6319        heartbeat,
6320        replay_queue,
6321        last_emitted_id: None,
6322        needs_init: true,
6323        session_id_str: session_id.to_string(),
6324        tenant_id,
6325        _session_state: session_state,
6326    };
6327
6328    futures::stream::unfold(stream_state, move |mut state| async move {
6329        // Phase 1: init event (one-shot).
6330        if state.needs_init {
6331            state.needs_init = false;
6332            let init_payload = serde_json::json!({
6333                "connected": true,
6334                "session_id": state.session_id_str,
6335                "tenant_id": state.tenant_id,
6336                "ts_ms": chrono::Utc::now().timestamp_millis(),
6337            });
6338            let ev = build_mcp_sse_event(0, crate::mcp_session::McpEventKind::Init, &init_payload);
6339            return Some((Ok::<Event, Infallible>(ev), state));
6340        }
6341        // Phase 2: replay-queue drain (one entry per poll).
6342        if !state.replay_queue.is_empty() {
6343            let entry = state.replay_queue.remove(0);
6344            // Lagged synthetic entries don't bump last_emitted_id —
6345            // they have id 0 and consuming them as the cursor would
6346            // cause every subsequent live event to dedupe against
6347            // them. Real events DO bump the cursor.
6348            if entry.event != crate::mcp_session::McpEventKind::Lagged {
6349                state.last_emitted_id = Some(entry.id);
6350            }
6351            let ev = build_mcp_sse_event(entry.id, entry.event, &entry.data);
6352            return Some((Ok::<Event, Infallible>(ev), state));
6353        }
6354        // Phase 3: live select loop.
6355        loop {
6356            tokio::select! {
6357                event = state.rx.recv() => {
6358                    match event {
6359                        Ok(ev) => {
6360                            // Dedupe against the replay overlap: any
6361                            // event whose id we've already emitted
6362                            // (because it was in the replay snapshot)
6363                            // gets skipped here.
6364                            if let Some(last) = state.last_emitted_id
6365                                && ev.id <= last
6366                            {
6367                                continue;
6368                            }
6369                            state.last_emitted_id = Some(ev.id);
6370                            let sse = build_mcp_sse_event(ev.id, ev.event, &ev.data);
6371                            return Some((Ok::<Event, Infallible>(sse), state));
6372                        }
6373                        Err(broadcast::error::RecvError::Lagged(n)) => {
6374                            // Live subscriber drifted past the
6375                            // broadcast buffer's capacity. Emit one
6376                            // synthetic `event: lagged` and resume —
6377                            // clients re-fetch state on this signal.
6378                            tracing::warn!(
6379                                lagged = n,
6380                                session_id = %state.session_id_str,
6381                                "mcp GET stream subscriber lagged"
6382                            );
6383                            let lagged_payload = serde_json::json!({
6384                                "dropped": n,
6385                            });
6386                            let sse = build_mcp_sse_event(
6387                                0,
6388                                crate::mcp_session::McpEventKind::Lagged,
6389                                &lagged_payload,
6390                            );
6391                            return Some((Ok::<Event, Infallible>(sse), state));
6392                        }
6393                        Err(broadcast::error::RecvError::Closed) => {
6394                            tracing::debug!(
6395                                session_id = %state.session_id_str,
6396                                "mcp GET stream broadcast closed; ending SSE stream"
6397                            );
6398                            return None;
6399                        }
6400                    }
6401                }
6402                _ = state.heartbeat.tick() => {
6403                    let hb_payload = serde_json::json!({
6404                        "ts_ms": chrono::Utc::now().timestamp_millis(),
6405                    });
6406                    let sse = build_mcp_sse_event(
6407                        0,
6408                        crate::mcp_session::McpEventKind::Heartbeat,
6409                        &hb_payload,
6410                    );
6411                    return Some((Ok::<Event, Infallible>(sse), state));
6412                }
6413            }
6414        }
6415    })
6416}
6417
6418/// Build an SSE [`Event`] from a `(id, kind, payload)` triple. Falls
6419/// back to an event-only frame on JSON serialisation failure (matches
6420/// `/v1/graph/stream`'s defensive pattern).
6421fn build_mcp_sse_event(
6422    id: u64,
6423    kind: crate::mcp_session::McpEventKind,
6424    data: &serde_json::Value,
6425) -> Event {
6426    Event::default()
6427        .id(id.to_string())
6428        .event(kind.as_str())
6429        .json_data(data)
6430        .unwrap_or_else(|_| Event::default().id(id.to_string()).event(kind.as_str()))
6431}
6432
6433// ---------------------------------------------------------------------------
6434// Error mapping
6435// ---------------------------------------------------------------------------
6436
6437#[derive(Debug)]
6438pub struct ApiError {
6439    status: StatusCode,
6440    message: String,
6441}
6442
6443impl ApiError {
6444    fn bad_request(msg: impl Into<String>) -> Self {
6445        Self {
6446            status: StatusCode::BAD_REQUEST,
6447            message: msg.into(),
6448        }
6449    }
6450    fn not_found(msg: impl Into<String>) -> Self {
6451        Self {
6452            status: StatusCode::NOT_FOUND,
6453            message: msg.into(),
6454        }
6455    }
6456    fn internal(msg: impl Into<String>) -> Self {
6457        Self {
6458            status: StatusCode::INTERNAL_SERVER_ERROR,
6459            message: msg.into(),
6460        }
6461    }
6462}
6463
6464impl From<solo_core::Error> for ApiError {
6465    fn from(e: solo_core::Error) -> Self {
6466        use solo_core::Error;
6467        match e {
6468            Error::NotFound(msg) => ApiError::not_found(msg),
6469            Error::InvalidInput(msg) => ApiError::bad_request(msg),
6470            Error::Conflict(msg) => Self {
6471                status: StatusCode::CONFLICT,
6472                message: msg,
6473            },
6474            other => ApiError::internal(other.to_string()),
6475        }
6476    }
6477}
6478
6479impl IntoResponse for ApiError {
6480    fn into_response(self) -> Response {
6481        let body = serde_json::json!({
6482            "error": self.message,
6483            "status": self.status.as_u16(),
6484        });
6485        (self.status, Json(body)).into_response()
6486    }
6487}
6488
6489// SQL helper for recall used to live here; consolidated into
6490// solo_query::recall.
6491
6492#[cfg(test)]
6493mod handler_tests {
6494    //! In-process integration tests for the HTTP handler surface. We
6495    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
6496    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
6497    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
6498    //!
6499    //! Tests live inline in this module rather than in a `tests/` dir
6500    //! because external integration-test exes triggered Windows UAC
6501    //! ERROR_ELEVATION_REQUIRED on the dev machine.
6502    use super::*;
6503    use axum::body::Body;
6504    use axum::http::{Request, StatusCode};
6505    use http_body_util::BodyExt;
6506    use serde_json::{Value, json};
6507    use solo_core::VectorIndex;
6508    use solo_storage::test_support::StubVectorIndex;
6509    use solo_storage::{
6510        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
6511        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
6512    };
6513    use std::sync::Arc as StdArc;
6514    use tower::ServiceExt;
6515
6516    fn fake_config(dim: u32) -> SoloConfig {
6517        SoloConfig {
6518            schema_version: 1,
6519            salt_hex: "00000000000000000000000000000000".to_string(),
6520            embedder: EmbedderConfig {
6521                name: "stub".to_string(),
6522                version: "v1".to_string(),
6523                dim,
6524                dtype: "f32".to_string(),
6525            },
6526            identity: IdentityConfig::default(),
6527            documents: solo_storage::DocumentConfig::default(),
6528            auth: None,
6529            audit: solo_storage::AuditSettings::default(),
6530            redaction: solo_storage::RedactionConfig::default(),
6531            llm: None,
6532            triples: solo_storage::TriplesConfig::default(),
6533            sampling: solo_storage::SamplingConfig::default(),
6534            steward: solo_storage::StewardSettings::default(),
6535        }
6536    }
6537
6538    struct Harness {
6539        router: axum::Router,
6540        _tmp: tempfile::TempDir,
6541        db_path: std::path::PathBuf,
6542        write_handle_extra: Option<solo_storage::WriteHandle>,
6543        join: Option<std::thread::JoinHandle<()>>,
6544        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
6545        /// tests can call `harness.invalidate_sender().send(...)` to
6546        /// simulate writer-actor invalidations (or grab a Receiver via
6547        /// `.subscribe()` for subscriber-count assertions).
6548        tenant_handle: StdArc<TenantHandle>,
6549        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
6550        /// seed additional tenant rows into the in-memory tenants_index
6551        /// stub via `registry.with_index(|idx| idx.register(...))`.
6552        registry: StdArc<TenantRegistry>,
6553        /// v0.11.0 P1: clone of the per-process MCP session store so
6554        /// tests can simulate TTL eviction (`delete` an id) without
6555        /// having to drive the full 30-min inactivity clock.
6556        mcp_sessions: crate::mcp_session::SessionStore,
6557    }
6558
6559    impl Harness {
6560        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
6561        /// fire `InvalidateEvent`s directly without going through the
6562        /// writer-actor. The harness's writer is spawned via
6563        /// `WriterActor::spawn_full` (legacy variant, no invalidate
6564        /// plumb) so writer-driven events won't reach SSE subscribers
6565        /// in tests — tests use this Sender to simulate them.
6566        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
6567            self.tenant_handle.invalidate_sender().clone()
6568        }
6569    }
6570
6571    impl Harness {
6572        fn new(runtime: &tokio::runtime::Runtime) -> Self {
6573            Self::new_with_auth(runtime, None)
6574        }
6575
6576        /// Open a fresh side connection against the harness's DB. Used
6577        /// by graph_expand tests to seed clusters / triples / documents
6578        /// directly (the writer-actor doesn't expose those write paths).
6579        fn open_db(&self) -> rusqlite::Connection {
6580            solo_storage::test_support::open_test_db_at(&self.db_path)
6581        }
6582
6583        fn new_with_auth(runtime: &tokio::runtime::Runtime, bearer_token: Option<String>) -> Self {
6584            Self::new_with_auth_config(
6585                runtime,
6586                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
6587            )
6588        }
6589
6590        fn new_with_auth_config(
6591            runtime: &tokio::runtime::Runtime,
6592            auth: Option<crate::auth::AuthConfig>,
6593        ) -> Self {
6594            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
6595
6596            let tmp = tempfile::TempDir::new().unwrap();
6597            let dim = 16usize;
6598            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
6599                StdArc::new(StubVectorIndex::new(dim));
6600            let embedder: StdArc<dyn solo_core::Embedder> =
6601                StdArc::new(StubEmbedder::new("stub", "v1", dim));
6602            let path = tmp.path().join("test.db");
6603
6604            let embedder_id = {
6605                let conn = solo_storage::test_support::open_test_db_at(&path);
6606                get_or_insert_embedder_id(
6607                    &conn,
6608                    &EmbedderIdentity {
6609                        name: "stub".into(),
6610                        version: "v1".into(),
6611                        dim: dim as u32,
6612                        dtype: "f32".into(),
6613                    },
6614                )
6615                .unwrap()
6616            };
6617
6618            let conn = solo_storage::test_support::open_test_db_at(&path);
6619            let WriterSpawn { handle, join } =
6620                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);
6621            let pool: ReaderPool =
6622                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
6623
6624            // Build a TenantHandle from the assembled parts and wrap it
6625            // in a single-tenant test registry.
6626            let tenant_id = solo_core::TenantId::default_tenant();
6627            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
6628                tenant_id.clone(),
6629                fake_config(dim as u32),
6630                path.clone(),
6631                tmp.path().to_path_buf(),
6632                embedder_id,
6633                hnsw,
6634                embedder.clone(),
6635                handle.clone(),
6636                // The harness owns ANOTHER WriteHandle clone + the join.
6637                // We give the TenantHandle a dummy join that immediately
6638                // returns — it never gets joined because shutdown_all
6639                // can't get exclusive Arc ownership when the harness
6640                // also holds a writer clone.
6641                std::thread::spawn(|| {}),
6642                pool,
6643            ));
6644            let tenant_handle_clone = tenant_handle.clone();
6645
6646            // Suppress the auto-spawned dummy thread by letting it finish.
6647            // We DON'T put the real `join` into the TenantHandle because
6648            // we keep our own clone of `handle` for the shutdown path.
6649            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
6650            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
6651                tmp.path().to_path_buf(),
6652                key,
6653                embedder,
6654                tenant_handle,
6655            ));
6656            let registry_clone = registry.clone();
6657
6658            // v0.11.0 P1: build the MCP session store inside the
6659            // harness runtime so the background sweep task's
6660            // `tokio::spawn` finds a runtime context. The store is
6661            // cheap to construct; the spawn happens once on `new()`.
6662            let mcp_sessions = runtime.block_on(async { crate::mcp_session::SessionStore::new() });
6663            let mcp_sessions_clone = mcp_sessions.clone();
6664            let state = SoloHttpState {
6665                registry,
6666                default_tenant: tenant_id,
6667                user_aliases: Arc::new(Vec::new()),
6668                mcp_sessions,
6669            };
6670            let router = router_with_auth_config(state, auth);
6671            Harness {
6672                router,
6673                _tmp: tmp,
6674                db_path: path,
6675                write_handle_extra: Some(handle),
6676                join: Some(join),
6677                tenant_handle: tenant_handle_clone,
6678                registry: registry_clone,
6679                mcp_sessions: mcp_sessions_clone,
6680            }
6681        }
6682
6683        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
6684            let join = self.join.take();
6685            let extra = self.write_handle_extra.take();
6686            // v0.10.0: the new `tenant_handle` Harness field holds another
6687            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
6688            // We must drop our reference here so the inner WriteHandle
6689            // can be released when the registry drops below. Without
6690            // this, the writer thread's mpsc never closes and the join
6691            // times out at 5s.
6692            let tenant_handle = self.tenant_handle;
6693            // v0.10.0: same story for the new `registry` Arc clone the
6694            // tenants-list tests use to seed extra index rows — the
6695            // state inside the router holds one Arc, this is the
6696            // other; both must drop before the underlying registry
6697            // dies and releases its index-mutex / cached handles.
6698            let registry = self.registry;
6699            runtime.block_on(async move {
6700                drop(extra);
6701                drop(tenant_handle); // drop Harness's direct tenant Arc
6702                drop(registry); // drop Harness's direct registry Arc
6703                drop(self.router); // drops state → drops pool inside runtime ctx
6704                drop(self._tmp);
6705                if let Some(join) = join {
6706                    let (tx, rx) = std::sync::mpsc::channel();
6707                    std::thread::spawn(move || {
6708                        let _ = tx.send(join.join());
6709                    });
6710                    tokio::task::spawn_blocking(move || {
6711                        rx.recv_timeout(std::time::Duration::from_secs(5))
6712                    })
6713                    .await
6714                    .expect("blocking task")
6715                    .expect("writer thread did not exit within 5s")
6716                    .expect("writer thread panicked");
6717                }
6718            });
6719        }
6720    }
6721
6722    fn rt() -> tokio::runtime::Runtime {
6723        tokio::runtime::Builder::new_multi_thread()
6724            .worker_threads(2)
6725            .enable_all()
6726            .build()
6727            .unwrap()
6728    }
6729
6730    /// Issue one HTTP request through the router and capture status +
6731    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
6732    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
6733    async fn call(
6734        router: axum::Router,
6735        method: &str,
6736        uri: &str,
6737        body: Option<Value>,
6738    ) -> (StatusCode, Value) {
6739        call_with_auth(router, method, uri, body, None).await
6740    }
6741
6742    async fn call_with_auth(
6743        router: axum::Router,
6744        method: &str,
6745        uri: &str,
6746        body: Option<Value>,
6747        auth: Option<&str>,
6748    ) -> (StatusCode, Value) {
6749        let mut req_builder = Request::builder()
6750            .method(method)
6751            .uri(uri)
6752            .header("content-type", "application/json");
6753        if let Some(a) = auth {
6754            req_builder = req_builder.header("authorization", a);
6755        }
6756        let req = if let Some(b) = body {
6757            let bytes = serde_json::to_vec(&b).unwrap();
6758            req_builder.body(Body::from(bytes)).unwrap()
6759        } else {
6760            req_builder = req_builder.header("content-length", "0");
6761            req_builder.body(Body::empty()).unwrap()
6762        };
6763        let resp = router.oneshot(req).await.expect("oneshot");
6764        let status = resp.status();
6765        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6766        let v: Value = if body_bytes.is_empty() {
6767            Value::Null
6768        } else {
6769            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6770        };
6771        (status, v)
6772    }
6773
6774    async fn call_with_tenant(
6775        router: axum::Router,
6776        method: &str,
6777        uri: &str,
6778        body: Option<Value>,
6779        tenant: &str,
6780    ) -> (StatusCode, Value) {
6781        let mut req_builder = Request::builder()
6782            .method(method)
6783            .uri(uri)
6784            .header("content-type", "application/json")
6785            .header("x-solo-tenant", tenant);
6786        let req = if let Some(b) = body {
6787            let bytes = serde_json::to_vec(&b).unwrap();
6788            req_builder.body(Body::from(bytes)).unwrap()
6789        } else {
6790            req_builder = req_builder.header("content-length", "0");
6791            req_builder.body(Body::empty()).unwrap()
6792        };
6793        let resp = router.oneshot(req).await.expect("oneshot");
6794        let status = resp.status();
6795        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6796        let v: Value = if body_bytes.is_empty() {
6797            Value::Null
6798        } else {
6799            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6800        };
6801        (status, v)
6802    }
6803
6804    #[test]
6805    fn health_returns_ok() {
6806        let runtime = rt();
6807        let h = Harness::new(&runtime);
6808        let r = h.router.clone();
6809        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
6810        assert_eq!(status, StatusCode::OK);
6811        h.shutdown(&runtime);
6812    }
6813
6814    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
6815    /// the four `memory.*` endpoints + their request/response schemas.
6816    /// Acts as a drift detector: if a future commit adds/removes a route
6817    /// without updating `openapi_spec`, this test fails loudly.
6818    #[test]
6819    fn openapi_json_describes_all_endpoints() {
6820        let runtime = rt();
6821        let h = Harness::new(&runtime);
6822        let r = h.router.clone();
6823        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
6824        assert_eq!(status, StatusCode::OK);
6825        assert!(spec.is_object(), "openapi.json must be a JSON object");
6826
6827        // Top-level shape per OpenAPI 3.1.
6828        assert!(
6829            spec.get("openapi")
6830                .and_then(|v| v.as_str())
6831                .is_some_and(|s| s.starts_with("3.")),
6832            "missing or wrong openapi version: {spec}"
6833        );
6834        assert!(spec.pointer("/info/title").is_some());
6835        assert!(spec.pointer("/info/version").is_some());
6836
6837        // Every route the router serves must be documented.
6838        let paths = spec
6839            .get("paths")
6840            .and_then(|v| v.as_object())
6841            .expect("paths must be an object");
6842        for expected in [
6843            "/health",
6844            "/openapi.json",
6845            "/memory",
6846            "/memory/search",
6847            "/memory/context",
6848            "/memory/consolidate",
6849            "/memory/{id}",
6850            "/backup",
6851            // Path 1 derived-layer endpoints (v0.4.0+):
6852            "/memory/themes",
6853            "/memory/facts_about",
6854            "/memory/entities",
6855            "/memory/contradictions",
6856            "/memory/contradictions/resolve",
6857            // v0.5.0 Priority 3:
6858            "/memory/clusters/{cluster_id}",
6859            // v0.7.0 P6 — document operations:
6860            "/memory/documents",
6861            "/memory/documents/search",
6862            "/memory/documents/{id}",
6863            // v0.9.x/v0.10.x solo-web graph + tenant endpoints:
6864            "/v1/graph/expand",
6865            "/v1/graph/nodes",
6866            "/v1/graph/edges",
6867            "/v1/graph/inspect/{id}",
6868            "/v1/graph/neighbors/{id}",
6869            "/v1/graph/stream",
6870            "/v1/status",
6871            "/v1/tenants",
6872            // v0.10.2+ MCP Streamable HTTP transport:
6873            "/mcp",
6874        ] {
6875            assert!(
6876                paths.contains_key(expected),
6877                "openapi paths missing {expected}: {paths:?}"
6878            );
6879        }
6880
6881        // Method coverage on /memory/documents: must document both POST
6882        // (ingest) and GET (list).
6883        let docs = paths.get("/memory/documents").expect("/memory/documents");
6884        assert!(
6885            docs.get("post").is_some(),
6886            "POST /memory/documents undocumented"
6887        );
6888        assert!(
6889            docs.get("get").is_some(),
6890            "GET /memory/documents undocumented"
6891        );
6892
6893        // Method coverage on /memory/documents/{id}: must document both
6894        // GET (inspect) and DELETE (forget).
6895        let docid = paths
6896            .get("/memory/documents/{id}")
6897            .expect("/memory/documents/{id}");
6898        assert!(
6899            docid.get("get").is_some(),
6900            "GET /memory/documents/{{id}} undocumented"
6901        );
6902        assert!(
6903            docid.get("delete").is_some(),
6904            "DELETE /memory/documents/{{id}} undocumented"
6905        );
6906
6907        // Method coverage on /memory/{id}: must document GET (inspect),
6908        // PATCH (update), and DELETE (forget).
6909        let memid = paths.get("/memory/{id}").expect("memory/{id}");
6910        assert!(
6911            memid.get("get").is_some(),
6912            "GET /memory/{{id}} undocumented"
6913        );
6914        assert!(
6915            memid.get("patch").is_some(),
6916            "PATCH /memory/{{id}} undocumented"
6917        );
6918        assert!(
6919            memid.get("delete").is_some(),
6920            "DELETE /memory/{{id}} undocumented"
6921        );
6922
6923        // Component schemas referenced from paths must be defined.
6924        for schema_name in [
6925            "RememberRequest",
6926            "RememberResponse",
6927            "RecallRequest",
6928            "RecallResult",
6929            "MemoryContextRequest",
6930            "MemoryContextResult",
6931            "MemoryUpdateRequest",
6932            "MemoryUpdateResult",
6933            "EpisodeRecord",
6934            "ApiError",
6935            "ConsolidationScope",
6936            "ConsolidationReport",
6937            // Path 1 derived-layer schemas (v0.4.0+):
6938            "ThemeHit",
6939            "FactHit",
6940            "EntityHit",
6941            "ContradictionHit",
6942            "ContradictionResolveRequest",
6943            "ContradictionResolution",
6944            // v0.5.0 Priority 3:
6945            "ClusterRecord",
6946            // v0.7.0 P6 — document schemas:
6947            "IngestDocumentRequest",
6948            "IngestReport",
6949            "ForgetDocumentReport",
6950            "SearchDocsRequest",
6951            "DocSearchHit",
6952            "DocumentInspectResult",
6953            "DocumentSummary",
6954            // solo-web graph + tenant schemas:
6955            "GraphNode",
6956            "GraphEdge",
6957            "GraphResponse",
6958            "GraphNodesResponse",
6959            "GraphEdgesResponse",
6960            "GraphInspectResponse",
6961            "TenantListItem",
6962            "TenantsListResponse",
6963            "StatusResponse",
6964            // MCP HTTP JSON-RPC schemas:
6965            "JsonRpcRequest",
6966            "JsonRpcResponse",
6967        ] {
6968            let ptr = format!("/components/schemas/{schema_name}");
6969            assert!(
6970                spec.pointer(&ptr).is_some(),
6971                "component schema {schema_name} missing"
6972            );
6973        }
6974
6975        let mcp = paths.get("/mcp").expect("/mcp");
6976        assert!(mcp.get("post").is_some(), "POST /mcp undocumented");
6977        assert!(mcp.get("get").is_some(), "GET /mcp undocumented");
6978
6979        let tenants = paths.get("/v1/tenants").expect("/v1/tenants");
6980        assert!(tenants.get("get").is_some(), "GET /v1/tenants undocumented");
6981
6982        let status_path = paths.get("/v1/status").expect("/v1/status");
6983        let status_get = status_path.get("get").expect("GET /v1/status undocumented");
6984        assert_eq!(
6985            status_get.pointer("/responses/200/content/application~1json/schema/$ref"),
6986            Some(&json!("#/components/schemas/StatusResponse")),
6987            "GET /v1/status must return StatusResponse"
6988        );
6989
6990        let status_schema = spec
6991            .pointer("/components/schemas/StatusResponse")
6992            .expect("StatusResponse schema");
6993        for field in [
6994            "ok",
6995            "version",
6996            "tenant",
6997            "embedder",
6998            "active_tenants",
6999            "mcp",
7000        ] {
7001            assert!(
7002                status_schema
7003                    .pointer("/required")
7004                    .and_then(|v| v.as_array())
7005                    .is_some_and(|required| required.iter().any(|v| v == field)),
7006                "StatusResponse missing required field {field}"
7007            );
7008        }
7009        for ptr in [
7010            "/properties/tenant/required",
7011            "/properties/embedder/required",
7012            "/properties/mcp/required",
7013            "/properties/embedder/properties/dim/minimum",
7014            "/properties/mcp/properties/sessions/minimum",
7015        ] {
7016            assert!(
7017                status_schema.pointer(ptr).is_some(),
7018                "StatusResponse schema missing {ptr}"
7019            );
7020        }
7021
7022        // bearerAuth security scheme is declared (LAN deployments need it).
7023        assert!(
7024            spec.pointer("/components/securitySchemes/bearerAuth")
7025                .is_some(),
7026            "bearerAuth security scheme missing"
7027        );
7028
7029        h.shutdown(&runtime);
7030    }
7031
7032    /// `/openapi.json` must remain unauthenticated even when bearer auth
7033    /// is enabled — the spec describes the API shape, not secrets, and
7034    /// codegen tooling shouldn't need a credential to fetch it.
7035    #[test]
7036    fn openapi_json_is_exempt_from_bearer_auth() {
7037        let runtime = rt();
7038        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
7039        let r = h.router.clone();
7040        // No Authorization header → still 200 for /openapi.json.
7041        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
7042        assert_eq!(status, StatusCode::OK);
7043        h.shutdown(&runtime);
7044    }
7045
7046    #[test]
7047    fn remember_returns_memory_id() {
7048        let runtime = rt();
7049        let h = Harness::new(&runtime);
7050        let r = h.router.clone();
7051        let (status, body) = runtime.block_on(call(
7052            r,
7053            "POST",
7054            "/memory",
7055            Some(json!({ "content": "http harness test" })),
7056        ));
7057        assert_eq!(status, StatusCode::OK);
7058        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
7059        assert_eq!(mid.len(), 36, "uuid length");
7060        h.shutdown(&runtime);
7061    }
7062
7063    #[test]
7064    fn update_memory_rewrites_content_and_inspect_sees_it() {
7065        let runtime = rt();
7066        let h = Harness::new(&runtime);
7067        let r = h.router.clone();
7068        let (status, body) = runtime.block_on(call(
7069            r.clone(),
7070            "POST",
7071            "/memory",
7072            Some(json!({ "content": "old transport memory" })),
7073        ));
7074        assert_eq!(status, StatusCode::OK);
7075        let mid = body
7076            .get("memory_id")
7077            .and_then(|v| v.as_str())
7078            .expect("memory_id")
7079            .to_string();
7080
7081        let (status, body) = runtime.block_on(call(
7082            r.clone(),
7083            "PATCH",
7084            &format!("/memory/{mid}"),
7085            Some(json!({ "content": "new transport memory" })),
7086        ));
7087        assert_eq!(status, StatusCode::OK, "update failed: {body}");
7088        assert_eq!(
7089            body.get("content").and_then(|v| v.as_str()),
7090            Some("new transport memory")
7091        );
7092
7093        let (status, body) = runtime.block_on(call(r, "GET", &format!("/memory/{mid}"), None));
7094        assert_eq!(status, StatusCode::OK);
7095        assert_eq!(
7096            body.get("content").and_then(|v| v.as_str()),
7097            Some("new transport memory")
7098        );
7099        h.shutdown(&runtime);
7100    }
7101
7102    #[test]
7103    fn empty_content_returns_400() {
7104        let runtime = rt();
7105        let h = Harness::new(&runtime);
7106        let r = h.router.clone();
7107        let (status, body) =
7108            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
7109        assert_eq!(status, StatusCode::BAD_REQUEST);
7110        assert!(
7111            body.get("error")
7112                .and_then(|e| e.as_str())
7113                .map(|s| s.contains("must not be empty"))
7114                .unwrap_or(false),
7115            "got: {body}"
7116        );
7117        h.shutdown(&runtime);
7118    }
7119
7120    #[test]
7121    fn empty_query_returns_400() {
7122        let runtime = rt();
7123        let h = Harness::new(&runtime);
7124        let r = h.router.clone();
7125        let (status, body) = runtime.block_on(call(
7126            r,
7127            "POST",
7128            "/memory/search",
7129            Some(json!({ "query": "" })),
7130        ));
7131        assert_eq!(status, StatusCode::BAD_REQUEST);
7132        assert!(
7133            body.get("error")
7134                .and_then(|e| e.as_str())
7135                .map(|s| s.contains("must not be empty"))
7136                .unwrap_or(false),
7137            "got: {body}"
7138        );
7139        h.shutdown(&runtime);
7140    }
7141
7142    #[test]
7143    fn inspect_unknown_returns_404() {
7144        let runtime = rt();
7145        let h = Harness::new(&runtime);
7146        let r = h.router.clone();
7147        let (status, body) = runtime.block_on(call(
7148            r,
7149            "GET",
7150            "/memory/00000000-0000-7000-8000-000000000000",
7151            None,
7152        ));
7153        assert_eq!(status, StatusCode::NOT_FOUND);
7154        assert!(body.get("error").is_some(), "got: {body}");
7155        h.shutdown(&runtime);
7156    }
7157
7158    #[test]
7159    fn inspect_invalid_id_returns_400() {
7160        let runtime = rt();
7161        let h = Harness::new(&runtime);
7162        let r = h.router.clone();
7163        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
7164        assert_eq!(status, StatusCode::BAD_REQUEST);
7165        h.shutdown(&runtime);
7166    }
7167
7168    #[test]
7169    fn forget_unknown_returns_404() {
7170        let runtime = rt();
7171        let h = Harness::new(&runtime);
7172        let r = h.router.clone();
7173        let (status, _body) = runtime.block_on(call(
7174            r,
7175            "DELETE",
7176            "/memory/00000000-0000-7000-8000-000000000000",
7177            None,
7178        ));
7179        assert_eq!(status, StatusCode::NOT_FOUND);
7180        h.shutdown(&runtime);
7181    }
7182
7183    /// `POST /memory/consolidate` runs the cluster pass and returns
7184    /// the report as JSON. With an empty body, `ConsolidationScope`
7185    /// defaults to unbounded; with a non-empty body, the
7186    /// `window_days` field is honored. The Harness's writer is
7187    /// spawned without a Steward, so `abstractions_built` stays 0
7188    /// even when `clusters_built` is nonzero — same posture as the
7189    /// daemon today.
7190    #[test]
7191    fn consolidate_endpoint_returns_report() {
7192        let runtime = rt();
7193        let h = Harness::new(&runtime);
7194        let r = h.router.clone();
7195        runtime.block_on(async move {
7196            // Empty DB → all-zero report; structural assertion only.
7197            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
7198            assert_eq!(status, StatusCode::OK);
7199            for field in [
7200                "episodes_seen",
7201                "clusters_built",
7202                "episodes_clustered",
7203                "abstractions_built",
7204                "triples_built",
7205                "contradictions_found",
7206            ] {
7207                assert!(
7208                    body.get(field).and_then(|v| v.as_u64()).is_some(),
7209                    "missing field {field}: {body}"
7210                );
7211            }
7212            assert_eq!(body["episodes_seen"], 0);
7213            assert_eq!(body["clusters_built"], 0);
7214
7215            // Non-empty body with window_days → still 200; unmistakable
7216            // shape round-trips through ConsolidationScope's serde.
7217            let (status2, _body2) = call(
7218                r,
7219                "POST",
7220                "/memory/consolidate",
7221                Some(json!({ "window_days": 7 })),
7222            )
7223            .await;
7224            assert_eq!(status2, StatusCode::OK);
7225        });
7226        h.shutdown(&runtime);
7227    }
7228
7229    #[test]
7230    fn auth_required_routes_reject_missing_token() {
7231        let runtime = rt();
7232        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
7233        let r = h.router.clone();
7234        runtime.block_on(async move {
7235            // No Authorization header → 401.
7236            let (status, _body) = call(
7237                r.clone(),
7238                "POST",
7239                "/memory",
7240                Some(json!({ "content": "x" })),
7241            )
7242            .await;
7243            assert_eq!(status, StatusCode::UNAUTHORIZED);
7244
7245            // Wrong token → 401.
7246            let (status, _body) = call_with_auth(
7247                r.clone(),
7248                "POST",
7249                "/memory",
7250                Some(json!({ "content": "x" })),
7251                Some("Bearer wrong-token"),
7252            )
7253            .await;
7254            assert_eq!(status, StatusCode::UNAUTHORIZED);
7255
7256            // Correct token → handler runs (200).
7257            let (status, body) = call_with_auth(
7258                r.clone(),
7259                "POST",
7260                "/memory",
7261                Some(json!({ "content": "authed" })),
7262                Some("Bearer secret-xyz"),
7263            )
7264            .await;
7265            assert_eq!(status, StatusCode::OK);
7266            assert!(body.get("memory_id").is_some());
7267        });
7268        h.shutdown(&runtime);
7269    }
7270
7271    #[test]
7272    fn health_endpoint_does_not_require_auth() {
7273        let runtime = rt();
7274        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7275        let r = h.router.clone();
7276        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
7277        // Liveness probes should work without credentials.
7278        assert_eq!(status, StatusCode::OK);
7279        h.shutdown(&runtime);
7280    }
7281
7282    #[test]
7283    fn auth_response_includes_www_authenticate_header() {
7284        // Verify the WWW-Authenticate hint that lets a well-behaved
7285        // client know it's a bearer-auth scheme. We check via raw
7286        // request → response (oneshot returns Response, but our
7287        // call() helper drops the headers; build the request manually).
7288        let runtime = rt();
7289        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7290        let r = h.router.clone();
7291        runtime.block_on(async move {
7292            let req = Request::builder()
7293                .method("POST")
7294                .uri("/memory")
7295                .header("content-type", "application/json")
7296                .body(Body::from(
7297                    serde_json::to_vec(&json!({ "content": "x" })).unwrap(),
7298                ))
7299                .unwrap();
7300            let resp = r.oneshot(req).await.unwrap();
7301            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
7302            let www = resp
7303                .headers()
7304                .get("www-authenticate")
7305                .and_then(|v| v.to_str().ok())
7306                .unwrap_or("");
7307            assert!(
7308                www.starts_with("Bearer"),
7309                "expected WWW-Authenticate: Bearer..., got: {www}"
7310            );
7311        });
7312        h.shutdown(&runtime);
7313    }
7314
7315    // ---------------------------------------------------------------------
7316    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
7317    // serves an OIDC discovery doc + JWKS, mint a token claiming
7318    // `solo_tenant = "default"`, and verify it routes through the
7319    // middleware + TenantExtractor + handler.
7320    // ---------------------------------------------------------------------
7321
7322    fn base64_url_for_test(bytes: &[u8]) -> String {
7323        use base64::Engine;
7324        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
7325    }
7326
7327    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
7328    /// (mock_server, discovery_url, secret, kid).
7329    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
7330        use wiremock::matchers::{method, path};
7331        use wiremock::{Mock, MockServer, ResponseTemplate};
7332        let server = MockServer::start().await;
7333        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
7334        let kid = "http-test-kid";
7335        let discovery = serde_json::json!({
7336            "issuer": server.uri(),
7337            "jwks_uri": format!("{}/jwks", server.uri()),
7338        });
7339        Mock::given(method("GET"))
7340            .and(path("/.well-known/openid-configuration"))
7341            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
7342            .mount(&server)
7343            .await;
7344        let jwks = serde_json::json!({
7345            "keys": [
7346                {
7347                    "kty": "oct",
7348                    "kid": kid,
7349                    "alg": "HS256",
7350                    "k": base64_url_for_test(&secret),
7351                }
7352            ]
7353        });
7354        Mock::given(method("GET"))
7355            .and(path("/jwks"))
7356            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
7357            .mount(&server)
7358            .await;
7359        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
7360        (server, discovery_url, secret, kid)
7361    }
7362
7363    fn mint_idp_token(
7364        server_uri: &str,
7365        kid: &str,
7366        secret: &[u8],
7367        tenant_claim: &str,
7368        audience: &str,
7369    ) -> String {
7370        use jsonwebtoken::{Algorithm, EncodingKey, Header};
7371        let mut header = Header::new(Algorithm::HS256);
7372        header.kid = Some(kid.to_string());
7373        let now = std::time::SystemTime::now()
7374            .duration_since(std::time::UNIX_EPOCH)
7375            .unwrap()
7376            .as_secs();
7377        let claims = serde_json::json!({
7378            "iss": server_uri,
7379            "sub": "test-user-1",
7380            "aud": audience,
7381            "exp": now + 600,
7382            "iat": now,
7383            "solo_tenant": tenant_claim,
7384        });
7385        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
7386            .expect("mint token")
7387    }
7388
7389    #[test]
7390    fn http_oidc_accept_resolves_to_tenant_from_claim() {
7391        let runtime = rt();
7392        let (fake_server, discovery_url, secret, kid) =
7393            runtime.block_on(async { spin_fake_idp().await });
7394        let server_uri = fake_server.uri();
7395        // Keep the wiremock server alive for the duration of this test.
7396        let _server_guard = fake_server;
7397
7398        let auth = crate::auth::AuthConfig::Oidc {
7399            discovery_url,
7400            audience: "test-audience".to_string(),
7401            tenant_claim_name: "solo_tenant".to_string(),
7402        };
7403        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7404        let r = h.router.clone();
7405
7406        // Mint a token claiming the harness's default tenant.
7407        let token = mint_idp_token(&server_uri, kid, &secret, "default", "test-audience");
7408
7409        runtime.block_on(async move {
7410            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
7411            let (status, body) = call_with_auth(
7412                r.clone(),
7413                "POST",
7414                "/memory",
7415                Some(json!({ "content": "oidc-routed content" })),
7416                Some(&format!("Bearer {token}")),
7417            )
7418            .await;
7419            assert_eq!(status, StatusCode::OK, "got body: {body}");
7420            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
7421        });
7422        h.shutdown(&runtime);
7423    }
7424
7425    #[test]
7426    fn http_oidc_reject_missing_token_returns_401() {
7427        let runtime = rt();
7428        let (fake_server, discovery_url, _secret, _kid) =
7429            runtime.block_on(async { spin_fake_idp().await });
7430        let _server_guard = fake_server;
7431        let auth = crate::auth::AuthConfig::Oidc {
7432            discovery_url,
7433            audience: "test-audience".to_string(),
7434            tenant_claim_name: "solo_tenant".to_string(),
7435        };
7436        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7437        let r = h.router.clone();
7438        runtime.block_on(async move {
7439            // No Authorization header.
7440            let (status, _body) = call(
7441                r.clone(),
7442                "POST",
7443                "/memory",
7444                Some(json!({ "content": "x" })),
7445            )
7446            .await;
7447            assert_eq!(status, StatusCode::UNAUTHORIZED);
7448
7449            // Garbage token → 401 (invalid signature / not a JWT).
7450            let (status, _body) = call_with_auth(
7451                r.clone(),
7452                "POST",
7453                "/memory",
7454                Some(json!({ "content": "x" })),
7455                Some("Bearer not-a-real-jwt"),
7456            )
7457            .await;
7458            assert_eq!(status, StatusCode::UNAUTHORIZED);
7459        });
7460        h.shutdown(&runtime);
7461    }
7462
7463    #[test]
7464    fn full_remember_recall_inspect_forget_round_trip() {
7465        let runtime = rt();
7466        let h = Harness::new(&runtime);
7467        let r = h.router.clone();
7468        runtime.block_on(async move {
7469            // POST /memory
7470            let (status, body) = call(
7471                r.clone(),
7472                "POST",
7473                "/memory",
7474                Some(json!({ "content": "round-trip content" })),
7475            )
7476            .await;
7477            assert_eq!(status, StatusCode::OK);
7478            let mid = body
7479                .get("memory_id")
7480                .and_then(|v| v.as_str())
7481                .unwrap()
7482                .to_string();
7483
7484            // POST /memory/search — exact-match (StubEmbedder) returns the row.
7485            let (status, body) = call(
7486                r.clone(),
7487                "POST",
7488                "/memory/search",
7489                Some(json!({ "query": "round-trip content", "limit": 5 })),
7490            )
7491            .await;
7492            assert_eq!(status, StatusCode::OK);
7493            assert!(
7494                body.get("candidates_considered")
7495                    .and_then(|v| v.as_u64())
7496                    .is_some_and(|n| n >= 1),
7497                "recall should expose pre-filter candidate diagnostics: {body}"
7498            );
7499            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7500            assert!(
7501                hits.iter().any(
7502                    |h| h.get("content").and_then(|c| c.as_str()) == Some("round-trip content")
7503                ),
7504                "expected hit with content; got: {body}"
7505            );
7506
7507            // GET /memory/{id}
7508            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7509            assert_eq!(status, StatusCode::OK);
7510            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));
7511
7512            // DELETE /memory/{id}
7513            let (status, _body) = call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
7514            assert_eq!(status, StatusCode::NO_CONTENT);
7515
7516            // GET again — still readable but status='forgotten'
7517            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7518            assert_eq!(status, StatusCode::OK);
7519            assert_eq!(
7520                body.get("status").and_then(|v| v.as_str()),
7521                Some("forgotten")
7522            );
7523
7524            // POST /memory/search — forgotten row excluded.
7525            let (status, body) = call(
7526                r.clone(),
7527                "POST",
7528                "/memory/search",
7529                Some(json!({ "query": "round-trip content", "limit": 5 })),
7530            )
7531            .await;
7532            assert_eq!(status, StatusCode::OK);
7533            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7534            assert!(
7535                hits.iter()
7536                    .all(|h| h.get("memory_id").and_then(|m| m.as_str()) != Some(mid.as_str())),
7537                "forgotten row should be excluded from recall: {body}"
7538            );
7539        });
7540        h.shutdown(&runtime);
7541    }
7542
7543    #[test]
7544    fn memory_context_endpoint_returns_bundle() {
7545        let runtime = rt();
7546        let h = Harness::new(&runtime);
7547        let r = h.router.clone();
7548        runtime.block_on(async move {
7549            let (status, _body) = call(
7550                r.clone(),
7551                "POST",
7552                "/memory",
7553                Some(json!({ "content": "http memory context needle" })),
7554            )
7555            .await;
7556            assert_eq!(status, StatusCode::OK);
7557
7558            let (status, body) = call(
7559                r,
7560                "POST",
7561                "/memory/context",
7562                Some(json!({ "query": "memory context needle", "limit": 5 })),
7563            )
7564            .await;
7565            assert_eq!(status, StatusCode::OK);
7566            assert_eq!(
7567                body.get("query").and_then(|v| v.as_str()),
7568                Some("memory context needle")
7569            );
7570            let hits = body
7571                .pointer("/recall/hits")
7572                .and_then(|v| v.as_array())
7573                .unwrap_or_else(|| panic!("missing /recall/hits: {body}"));
7574            assert!(
7575                hits.iter()
7576                    .any(|h| h.get("content").and_then(|c| c.as_str())
7577                        == Some("http memory context needle")),
7578                "expected context recall hit: {body}"
7579            );
7580            assert!(body.get("themes").is_some_and(|v| v.is_array()));
7581            assert!(body.get("facts").is_some_and(|v| v.is_array()));
7582            assert!(body.get("contradictions").is_some_and(|v| v.is_array()));
7583        });
7584        h.shutdown(&runtime);
7585    }
7586
7587    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
7588    // the actual content correctness is covered by solo-query::derived's
7589    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
7590    // Query-string param parsing, JSON-array response body, validation
7591    // 400s for invalid inputs.
7592
7593    #[test]
7594    fn themes_endpoint_returns_empty_array_on_empty_db() {
7595        let runtime = rt();
7596        let h = Harness::new(&runtime);
7597        let r = h.router.clone();
7598        let (status, body) = runtime.block_on(call(r, "GET", "/memory/themes", None));
7599        assert_eq!(status, StatusCode::OK);
7600        assert!(body.is_array(), "expected array, got {body}");
7601        assert_eq!(body.as_array().unwrap().len(), 0);
7602        h.shutdown(&runtime);
7603    }
7604
7605    #[test]
7606    fn themes_endpoint_passes_through_query_params() {
7607        let runtime = rt();
7608        let h = Harness::new(&runtime);
7609        let r = h.router.clone();
7610        let (status, body) = runtime.block_on(call(
7611            r,
7612            "GET",
7613            "/memory/themes?window_days=7&limit=20",
7614            None,
7615        ));
7616        assert_eq!(status, StatusCode::OK);
7617        assert!(body.is_array(), "expected array, got {body}");
7618        h.shutdown(&runtime);
7619    }
7620
7621    #[test]
7622    fn facts_about_endpoint_requires_subject() {
7623        let runtime = rt();
7624        let h = Harness::new(&runtime);
7625        let r = h.router.clone();
7626        // Missing subject — axum's Query extractor 422 (Unprocessable
7627        // Entity) on missing required field; some axum versions
7628        // surface as 400. Accept either.
7629        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/facts_about", None));
7630        assert!(
7631            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
7632            "expected 400 or 422 for missing subject, got {status}"
7633        );
7634        h.shutdown(&runtime);
7635    }
7636
7637    #[test]
7638    fn facts_about_endpoint_rejects_blank_subject() {
7639        let runtime = rt();
7640        let h = Harness::new(&runtime);
7641        let r = h.router.clone();
7642        // Whitespace-only subject reaches the handler then trips its
7643        // own validation → ApiError::bad_request → 400.
7644        let (status, body) =
7645            runtime.block_on(call(r, "GET", "/memory/facts_about?subject=%20%20", None));
7646        assert_eq!(status, StatusCode::BAD_REQUEST);
7647        assert!(
7648            body.get("error")
7649                .and_then(|v| v.as_str())
7650                .is_some_and(|s| s.contains("subject")),
7651            "expected error mentioning subject, got {body}"
7652        );
7653        h.shutdown(&runtime);
7654    }
7655
7656    #[test]
7657    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
7658        let runtime = rt();
7659        let h = Harness::new(&runtime);
7660        let r = h.router.clone();
7661        let (status, body) = runtime.block_on(call(
7662            r,
7663            "GET",
7664            "/memory/facts_about?subject=NobodyKnows",
7665            None,
7666        ));
7667        assert_eq!(status, StatusCode::OK);
7668        assert_eq!(body.as_array().unwrap().len(), 0);
7669        h.shutdown(&runtime);
7670    }
7671
7672    #[test]
7673    fn facts_about_endpoint_parses_include_as_object_query_param() {
7674        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
7675        // through the `Query<FactsAboutQuery>` extractor. If the
7676        // struct field is missing or wrongly typed, axum returns
7677        // 400/422 before reaching the handler. We don't seed
7678        // triples; we only need the request to reach the handler
7679        // and produce a normal 200 + empty array. Mirrors
7680        // `inspect_cluster_endpoint_passes_full_content_query_param`.
7681        let runtime = rt();
7682        let h = Harness::new(&runtime);
7683        let r = h.router.clone();
7684        let (status, body) = runtime.block_on(call(
7685            r,
7686            "GET",
7687            "/memory/facts_about?subject=Maya&include_as_object=true",
7688            None,
7689        ));
7690        assert_eq!(
7691            status,
7692            StatusCode::OK,
7693            "expected 200 with include_as_object query param, got {status}"
7694        );
7695        assert!(body.is_array());
7696        h.shutdown(&runtime);
7697    }
7698
7699    #[test]
7700    fn entities_endpoint_returns_matching_graph_entities() {
7701        let runtime = rt();
7702        let h = Harness::new(&runtime);
7703        {
7704            let conn = h.open_db();
7705            let memory_id = MemoryId::new().to_string();
7706            let rowid = seed_episode(&conn, &memory_id, 100, "Alice works with graph transport");
7707            seed_triple_row(
7708                &conn,
7709                "t-http-entity-1",
7710                "Alice",
7711                "knows",
7712                "Bob",
7713                Some(rowid),
7714            );
7715            seed_triple_row(
7716                &conn,
7717                "t-http-entity-2",
7718                "Alicia",
7719                "works_at",
7720                "Solo",
7721                Some(rowid),
7722            );
7723        }
7724
7725        let r = h.router.clone();
7726        let (status, body) =
7727            runtime.block_on(call(r, "GET", "/memory/entities?query=Ali&limit=5", None));
7728        assert_eq!(status, StatusCode::OK);
7729        let arr = body.as_array().expect("entities array");
7730        assert!(
7731            arr.iter()
7732                .any(|v| v.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
7733            "expected Alice entity, got {body}"
7734        );
7735        h.shutdown(&runtime);
7736    }
7737
7738    #[test]
7739    fn inspect_cluster_endpoint_unknown_id_returns_404() {
7740        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
7741        // through `ApiError::from` → 404. Mirrors the unknown-memory
7742        // case for `GET /memory/{id}`.
7743        let runtime = rt();
7744        let h = Harness::new(&runtime);
7745        let r = h.router.clone();
7746        let (status, body) =
7747            runtime.block_on(call(r, "GET", "/memory/clusters/no-such-cluster", None));
7748        assert_eq!(status, StatusCode::NOT_FOUND);
7749        assert!(
7750            body.get("error")
7751                .and_then(|v| v.as_str())
7752                .is_some_and(|s| s.contains("no-such-cluster")),
7753            "expected error mentioning cluster id, got {body}"
7754        );
7755        h.shutdown(&runtime);
7756    }
7757
7758    #[test]
7759    fn inspect_cluster_endpoint_passes_full_content_query_param() {
7760        // Even with no matching cluster (→ 404), the request must
7761        // reach the handler — proves the `?full_content=true` query
7762        // string parses cleanly (Query<InspectClusterQuery>::default
7763        // path didn't choke). If we accidentally fail at the extractor
7764        // we'd get a 400/422, not the expected 404.
7765        let runtime = rt();
7766        let h = Harness::new(&runtime);
7767        let r = h.router.clone();
7768        let (status, _body) = runtime.block_on(call(
7769            r,
7770            "GET",
7771            "/memory/clusters/missing?full_content=true",
7772            None,
7773        ));
7774        assert_eq!(status, StatusCode::NOT_FOUND);
7775        h.shutdown(&runtime);
7776    }
7777
7778    #[test]
7779    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
7780        let runtime = rt();
7781        let h = Harness::new(&runtime);
7782        let r = h.router.clone();
7783        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7784        assert_eq!(status, StatusCode::OK);
7785        assert!(body.is_array());
7786        assert_eq!(body.as_array().unwrap().len(), 0);
7787        h.shutdown(&runtime);
7788    }
7789
7790    #[test]
7791    fn contradiction_resolve_endpoint_updates_lifecycle() {
7792        let runtime = rt();
7793        let h = Harness::new(&runtime);
7794        {
7795            let conn = h.open_db();
7796            let memory_id = MemoryId::new().to_string();
7797            let rowid = seed_episode(&conn, &memory_id, 100, "contradiction source");
7798            seed_triple_row(&conn, "t-http-a", "Alice", "likes", "tea", Some(rowid));
7799            seed_triple_row(&conn, "t-http-b", "Alice", "likes", "coffee", Some(rowid));
7800            seed_contradiction_row(&conn, "t-http-a", "t-http-b", "other");
7801        }
7802
7803        let r = h.router.clone();
7804        let (status, body) = runtime.block_on(call(
7805            r.clone(),
7806            "POST",
7807            "/memory/contradictions/resolve",
7808            Some(json!({
7809                "a_id": "t-http-a",
7810                "b_id": "t-http-b",
7811                "kind": "other",
7812                "resolution_note": "tea is current",
7813                "winning_triple_id": "t-http-a"
7814            })),
7815        ));
7816        assert_eq!(status, StatusCode::OK, "resolve failed: {body}");
7817        assert_eq!(
7818            body.get("status").and_then(|v| v.as_str()),
7819            Some("resolved")
7820        );
7821        assert!(
7822            body.get("resolved_at_ms")
7823                .and_then(|v| v.as_i64())
7824                .is_some()
7825        );
7826
7827        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7828        assert_eq!(status, StatusCode::OK);
7829        assert_eq!(
7830            body.pointer("/0/status").and_then(|v| v.as_str()),
7831            Some("resolved")
7832        );
7833        h.shutdown(&runtime);
7834    }
7835
7836    #[test]
7837    fn derived_endpoints_require_bearer_when_auth_enabled() {
7838        let runtime = rt();
7839        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
7840        // Each of the three new endpoints should reject missing token.
7841        // Per the existing tests' shutdown-timing comment: don't hold a
7842        // long-lived router clone across multiple iterations — drop the
7843        // clone before each subsequent oneshot, and don't keep a `let r =
7844        // h.router.clone()` alive across h.shutdown(). Re-clone per
7845        // iteration; the per-call clone is consumed by oneshot.
7846        for path in [
7847            "/memory/themes",
7848            "/memory/facts_about?subject=Sam",
7849            "/memory/entities?query=Sam",
7850            "/memory/contradictions",
7851            "/memory/clusters/any-id",
7852        ] {
7853            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
7854            assert_eq!(
7855                status,
7856                StatusCode::UNAUTHORIZED,
7857                "{path} should 401 without token"
7858            );
7859        }
7860        h.shutdown(&runtime);
7861    }
7862
7863    // ---- Document endpoints (v0.7.0 P6) ----
7864    //
7865    // Wire-path coverage. The `Harness` here uses
7866    // `WriterActor::spawn_full` without an embedder — same shape as the
7867    // existing handler tests. Ingest/search would fail at the writer
7868    // boundary with "writer has no embedder", but every other path
7869    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
7870    // documentation) is exercisable. Real end-to-end ingest→search
7871    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
7872    // with a fully-wired writer.
7873
7874    #[test]
7875    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
7876        let runtime = rt();
7877        let h = Harness::new(&runtime);
7878        let r = h.router.clone();
7879        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
7880        assert_eq!(status, StatusCode::OK);
7881        assert!(body.is_array(), "expected array, got {body}");
7882        assert_eq!(body.as_array().unwrap().len(), 0);
7883        h.shutdown(&runtime);
7884    }
7885
7886    #[test]
7887    fn list_documents_endpoint_parses_query_params() {
7888        let runtime = rt();
7889        let h = Harness::new(&runtime);
7890        let r = h.router.clone();
7891        let (status, body) = runtime.block_on(call(
7892            r,
7893            "GET",
7894            "/memory/documents?limit=5&offset=0&include_forgotten=true",
7895            None,
7896        ));
7897        assert_eq!(status, StatusCode::OK);
7898        assert!(body.is_array());
7899        h.shutdown(&runtime);
7900    }
7901
7902    #[test]
7903    fn ingest_document_endpoint_rejects_empty_path() {
7904        let runtime = rt();
7905        let h = Harness::new(&runtime);
7906        let r = h.router.clone();
7907        let (status, body) = runtime.block_on(call(
7908            r,
7909            "POST",
7910            "/memory/documents",
7911            Some(json!({ "path": "" })),
7912        ));
7913        assert_eq!(status, StatusCode::BAD_REQUEST);
7914        assert!(
7915            body.get("error")
7916                .and_then(|v| v.as_str())
7917                .is_some_and(|s| s.contains("path")),
7918            "expected error mentioning path, got {body}"
7919        );
7920        h.shutdown(&runtime);
7921    }
7922
7923    #[test]
7924    fn search_docs_endpoint_rejects_empty_query() {
7925        let runtime = rt();
7926        let h = Harness::new(&runtime);
7927        let r = h.router.clone();
7928        let (status, body) = runtime.block_on(call(
7929            r,
7930            "POST",
7931            "/memory/documents/search",
7932            Some(json!({ "query": "   " })),
7933        ));
7934        assert_eq!(status, StatusCode::BAD_REQUEST);
7935        assert!(
7936            body.get("error")
7937                .and_then(|v| v.as_str())
7938                .is_some_and(|s| s.contains("must not be empty") || s.contains("doc_search")),
7939            "expected error mentioning empty query, got {body}"
7940        );
7941        h.shutdown(&runtime);
7942    }
7943
7944    #[test]
7945    fn inspect_document_endpoint_unknown_id_returns_404() {
7946        let runtime = rt();
7947        let h = Harness::new(&runtime);
7948        let r = h.router.clone();
7949        let (status, body) = runtime.block_on(call(
7950            r,
7951            "GET",
7952            "/memory/documents/00000000-0000-7000-8000-000000000000",
7953            None,
7954        ));
7955        assert_eq!(status, StatusCode::NOT_FOUND);
7956        assert!(body.get("error").is_some(), "got: {body}");
7957        h.shutdown(&runtime);
7958    }
7959
7960    #[test]
7961    fn inspect_document_endpoint_rejects_malformed_id() {
7962        let runtime = rt();
7963        let h = Harness::new(&runtime);
7964        let r = h.router.clone();
7965        let (status, _body) =
7966            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
7967        assert_eq!(status, StatusCode::BAD_REQUEST);
7968        h.shutdown(&runtime);
7969    }
7970
7971    #[test]
7972    fn forget_document_endpoint_unknown_id_returns_404() {
7973        // Valid UUID format; no row exists → writer's `forget_document`
7974        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
7975        let runtime = rt();
7976        let h = Harness::new(&runtime);
7977        let r = h.router.clone();
7978        let (status, _body) = runtime.block_on(call(
7979            r,
7980            "DELETE",
7981            "/memory/documents/00000000-0000-7000-8000-000000000000",
7982            None,
7983        ));
7984        assert_eq!(status, StatusCode::NOT_FOUND);
7985        h.shutdown(&runtime);
7986    }
7987
7988    #[test]
7989    fn forget_document_endpoint_rejects_malformed_id() {
7990        let runtime = rt();
7991        let h = Harness::new(&runtime);
7992        let r = h.router.clone();
7993        let (status, _body) =
7994            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
7995        assert_eq!(status, StatusCode::BAD_REQUEST);
7996        h.shutdown(&runtime);
7997    }
7998
7999    #[test]
8000    fn document_endpoints_require_bearer_when_auth_enabled() {
8001        // All five doc endpoints sit behind the same authed Router and
8002        // must 401 without the bearer token. Mirrors
8003        // `derived_endpoints_require_bearer_when_auth_enabled`.
8004        let runtime = rt();
8005        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
8006        let cases: &[(&str, &str, Option<Value>)] = &[
8007            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
8008            ("GET", "/memory/documents", None),
8009            (
8010                "POST",
8011                "/memory/documents/search",
8012                Some(json!({ "query": "x" })),
8013            ),
8014            (
8015                "GET",
8016                "/memory/documents/00000000-0000-7000-8000-000000000000",
8017                None,
8018            ),
8019            (
8020                "DELETE",
8021                "/memory/documents/00000000-0000-7000-8000-000000000000",
8022                None,
8023            ),
8024        ];
8025        for (method, path, body) in cases {
8026            let (status, _) = runtime.block_on(call(h.router.clone(), method, path, body.clone()));
8027            assert_eq!(
8028                status,
8029                StatusCode::UNAUTHORIZED,
8030                "{method} {path} should 401 without token"
8031            );
8032        }
8033        h.shutdown(&runtime);
8034    }
8035
8036    #[test]
8037    fn document_endpoints_accept_correct_bearer_token() {
8038        // Sanity check: with the right token, the same five endpoints
8039        // pass auth and reach the handler. We only assert that the
8040        // status code is NOT 401 — exact downstream behaviour depends
8041        // on the harness (no embedder → ingest/search would 500; empty
8042        // DB → list/inspect/forget return 200/404).
8043        let runtime = rt();
8044        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
8045        runtime.block_on(async {
8046            // GET /memory/documents → 200 + empty array (auth passes).
8047            let (status, _) = call_with_auth(
8048                h.router.clone(),
8049                "GET",
8050                "/memory/documents",
8051                None,
8052                Some("Bearer doc-secret"),
8053            )
8054            .await;
8055            assert_eq!(status, StatusCode::OK);
8056
8057            // GET /memory/documents/<unknown> → 404 (auth passes).
8058            let (status, _) = call_with_auth(
8059                h.router.clone(),
8060                "GET",
8061                "/memory/documents/00000000-0000-7000-8000-000000000000",
8062                None,
8063                Some("Bearer doc-secret"),
8064            )
8065            .await;
8066            assert_eq!(status, StatusCode::NOT_FOUND);
8067        });
8068        h.shutdown(&runtime);
8069    }
8070
8071    // ---------------------------------------------------------------------
8072    // v0.8.0 P2: tenant header extractor tests
8073    // ---------------------------------------------------------------------
8074
8075    /// `X-Solo-Tenant: default` resolves to the default tenant (which
8076    /// in the test harness is the only one wired in the registry).
8077    #[test]
8078    fn tenant_header_default_resolves() {
8079        let runtime = rt();
8080        let h = Harness::new(&runtime);
8081        let r = h.router.clone();
8082        let (status, _body) = runtime.block_on(async {
8083            let req = Request::builder()
8084                .method("GET")
8085                .uri("/memory/00000000-0000-7000-8000-000000000000")
8086                .header("x-solo-tenant", "default")
8087                .body(Body::empty())
8088                .unwrap();
8089            let resp = r.oneshot(req).await.expect("oneshot");
8090            let s = resp.status();
8091            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8092            (s, _b)
8093        });
8094        // 404 because the id doesn't exist — but it's a routed 404 from
8095        // inspect_handler, not a 400 from a bad tenant header. That's
8096        // the proof point.
8097        assert_eq!(status, StatusCode::NOT_FOUND);
8098        h.shutdown(&runtime);
8099    }
8100
8101    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
8102    #[test]
8103    fn tenant_header_invalid_returns_400() {
8104        let runtime = rt();
8105        let h = Harness::new(&runtime);
8106        let r = h.router.clone();
8107        let (status, body) = runtime.block_on(async {
8108            let req = Request::builder()
8109                .method("GET")
8110                .uri("/memory/00000000-0000-7000-8000-000000000000")
8111                .header("x-solo-tenant", "UPPER")
8112                .body(Body::empty())
8113                .unwrap();
8114            let resp = r.oneshot(req).await.expect("oneshot");
8115            let s = resp.status();
8116            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
8117            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
8118            (s, v)
8119        });
8120        assert_eq!(status, StatusCode::BAD_REQUEST);
8121        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
8122        assert!(
8123            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
8124            "error must mention tenant/invalid: {msg}"
8125        );
8126        h.shutdown(&runtime);
8127    }
8128
8129    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
8130    #[test]
8131    fn tenant_header_unknown_returns_404() {
8132        let runtime = rt();
8133        let h = Harness::new(&runtime);
8134        let r = h.router.clone();
8135        let (status, _body) = runtime.block_on(async {
8136            let req = Request::builder()
8137                .method("GET")
8138                .uri("/memory/00000000-0000-7000-8000-000000000000")
8139                .header("x-solo-tenant", "never-registered")
8140                .body(Body::empty())
8141                .unwrap();
8142            let resp = r.oneshot(req).await.expect("oneshot");
8143            let s = resp.status();
8144            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8145            (s, _b)
8146        });
8147        assert_eq!(status, StatusCode::NOT_FOUND);
8148        h.shutdown(&runtime);
8149    }
8150
8151    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
8152    /// The reach-through to `inspect_handler` should produce the normal
8153    /// 404 for an unknown id rather than a tenant-routing error.
8154    #[test]
8155    fn tenant_header_missing_defaults_to_state_default_tenant() {
8156        let runtime = rt();
8157        let h = Harness::new(&runtime);
8158        let r = h.router.clone();
8159        let (status, _body) = runtime.block_on(async {
8160            let req = Request::builder()
8161                .method("GET")
8162                .uri("/memory/00000000-0000-7000-8000-000000000000")
8163                .body(Body::empty())
8164                .unwrap();
8165            let resp = r.oneshot(req).await.expect("oneshot");
8166            let s = resp.status();
8167            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8168            (s, _b)
8169        });
8170        assert_eq!(status, StatusCode::NOT_FOUND);
8171        h.shutdown(&runtime);
8172    }
8173
8174    // ---------------------------------------------------------------------
8175    // v0.9.x: GET /v1/graph/expand
8176    //
8177    // Seeds tables directly via the Harness's side connection and walks
8178    // the four expansion kinds. The Harness is single-tenant (default);
8179    // the routing-isolation case is already covered by the
8180    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
8181    // header against the same node_id surfaces 404 from the registry,
8182    // proving cross-tenant lookups can't bleed).
8183    // ---------------------------------------------------------------------
8184
8185    /// Insert one episode row directly. Returns its rowid for callers
8186    /// that need to wire `triples.source_episode_id`.
8187    fn seed_episode(
8188        conn: &rusqlite::Connection,
8189        memory_id: &str,
8190        ts_ms: i64,
8191        content: &str,
8192    ) -> i64 {
8193        conn.execute(
8194            "INSERT INTO episodes
8195                (memory_id, ts_ms, source_type, content,
8196                 encoding_context_json, tier, status,
8197                 confidence, strength, salience,
8198                 created_at_ms, updated_at_ms)
8199                VALUES (?1, ?2, 'user_message', ?3,
8200                        '{}', 'hot', 'active',
8201                        1.0, 0.5, 0.5, ?2, ?2)",
8202            rusqlite::params![memory_id, ts_ms, content],
8203        )
8204        .expect("seed episode");
8205        conn.last_insert_rowid()
8206    }
8207
8208    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
8209        conn.execute(
8210            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
8211                  VALUES (?1, 0.5, ?2)",
8212            rusqlite::params![cluster_id, created_at_ms],
8213        )
8214        .expect("seed cluster");
8215    }
8216
8217    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
8218        conn.execute(
8219            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
8220            rusqlite::params![cluster_id, memory_id],
8221        )
8222        .expect("seed cluster_episodes");
8223    }
8224
8225    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
8226        conn.execute(
8227            "INSERT INTO documents
8228                (doc_id, source, title, mime_type, ingested_at_ms,
8229                 modified_at_ms, status, chunk_count, content_hash, byte_size)
8230                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
8231                        'active', 0, ?1, NULL)",
8232            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
8233        )
8234        .expect("seed doc");
8235    }
8236
8237    fn seed_chunk_row(
8238        conn: &rusqlite::Connection,
8239        chunk_id: &str,
8240        doc_id: &str,
8241        chunk_index: i64,
8242        content: &str,
8243    ) {
8244        conn.execute(
8245            "INSERT INTO document_chunks
8246                (chunk_id, doc_id, chunk_index, content,
8247                 token_count, start_offset, end_offset, created_at_ms)
8248                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
8249            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
8250        )
8251        .expect("seed chunk");
8252    }
8253
8254    fn seed_triple_row(
8255        conn: &rusqlite::Connection,
8256        triple_id: &str,
8257        subject: &str,
8258        predicate: &str,
8259        object: &str,
8260        source_episode_rowid: Option<i64>,
8261    ) {
8262        conn.execute(
8263            "INSERT INTO triples
8264                 (triple_id, subject_id, predicate, object_id, object_kind,
8265                  valid_from_ms, valid_to_ms, confidence, provenance_json,
8266                  status, created_at_ms, updated_at_ms, source_episode_id)
8267                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
8268                         'active', 0, 0, ?5)",
8269            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
8270        )
8271        .expect("seed triple");
8272    }
8273
8274    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
8275        conn.execute(
8276            "INSERT INTO contradictions
8277                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
8278                  status, resolved_at_ms, resolution_note, winning_triple_id)
8279                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
8280                         'unresolved', NULL, NULL, NULL)",
8281            rusqlite::params![a_id, b_id, kind],
8282        )
8283        .expect("seed contradiction");
8284    }
8285
8286    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
8287    /// by the cluster-inspect test to verify the abstraction concat path.
8288    fn seed_abstraction_row(
8289        conn: &rusqlite::Connection,
8290        abstraction_id: &str,
8291        cluster_id: &str,
8292        content: &str,
8293    ) {
8294        conn.execute(
8295            "INSERT INTO semantic_abstractions
8296                 (abstraction_id, cluster_id, content, provenance_json,
8297                  confidence, created_at_ms)
8298                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
8299            rusqlite::params![abstraction_id, cluster_id, content],
8300        )
8301        .expect("seed abstraction");
8302    }
8303
8304    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
8305    /// so we percent-encode only `:` and a few other delimiters by hand.
8306    fn percent_encode_node_id(node_id: &str) -> String {
8307        let mut out = String::with_capacity(node_id.len());
8308        for c in node_id.chars() {
8309            match c {
8310                ':' => out.push_str("%3A"),
8311                ' ' => out.push_str("%20"),
8312                '&' => out.push_str("%26"),
8313                '+' => out.push_str("%2B"),
8314                '?' => out.push_str("%3F"),
8315                '#' => out.push_str("%23"),
8316                _ => out.push(c),
8317            }
8318        }
8319        out
8320    }
8321
8322    fn graph_uri(node_id: &str, kind: &str) -> String {
8323        let encoded = percent_encode_node_id(node_id);
8324        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
8325    }
8326
8327    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
8328        let encoded = percent_encode_node_id(node_id);
8329        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
8330    }
8331
8332    #[test]
8333    fn expand_cluster_member_from_episode_returns_clusters() {
8334        let runtime = rt();
8335        let h = Harness::new(&runtime);
8336        let memory_id = "11111111-1111-7000-8000-000000000001";
8337        {
8338            let conn = h.open_db();
8339            seed_episode(&conn, memory_id, 100, "ep content");
8340            seed_cluster_row(&conn, "cl-a", 200);
8341            seed_cluster_member(&conn, "cl-a", memory_id);
8342        }
8343        let node_id = format!("ep:{memory_id}");
8344        let (status, body) = runtime.block_on(call(
8345            h.router.clone(),
8346            "GET",
8347            &graph_uri(&node_id, "cluster_member"),
8348            None,
8349        ));
8350        assert_eq!(status, StatusCode::OK, "body: {body}");
8351        let nodes = body
8352            .get("nodes")
8353            .and_then(|v| v.as_array())
8354            .expect("nodes array");
8355        let edges = body
8356            .get("edges")
8357            .and_then(|v| v.as_array())
8358            .expect("edges array");
8359        assert_eq!(nodes.len(), 1, "{body}");
8360        assert_eq!(nodes[0]["id"], "cl:cl-a");
8361        assert_eq!(nodes[0]["kind"], "cluster");
8362        assert_eq!(edges.len(), 1);
8363        assert_eq!(edges[0]["source"], node_id);
8364        assert_eq!(edges[0]["target"], "cl:cl-a");
8365        assert_eq!(edges[0]["kind"], "cluster_member");
8366        h.shutdown(&runtime);
8367    }
8368
8369    #[test]
8370    fn expand_cluster_member_from_cluster_returns_episodes() {
8371        let runtime = rt();
8372        let h = Harness::new(&runtime);
8373        {
8374            let conn = h.open_db();
8375            seed_cluster_row(&conn, "cl-multi", 500);
8376            for i in 0..5 {
8377                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
8378                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8379                seed_cluster_member(&conn, "cl-multi", &mid);
8380            }
8381        }
8382        let (status, body) = runtime.block_on(call(
8383            h.router.clone(),
8384            "GET",
8385            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
8386            None,
8387        ));
8388        assert_eq!(status, StatusCode::OK, "body: {body}");
8389        let nodes = body["nodes"].as_array().unwrap();
8390        let edges = body["edges"].as_array().unwrap();
8391        assert_eq!(nodes.len(), 3, "limit honored: {body}");
8392        assert_eq!(edges.len(), 3);
8393        for n in nodes {
8394            assert_eq!(n["kind"], "episode");
8395        }
8396        h.shutdown(&runtime);
8397    }
8398
8399    #[test]
8400    fn expand_document_chunk_from_document_returns_chunks() {
8401        let runtime = rt();
8402        let h = Harness::new(&runtime);
8403        let doc_id = "33333333-3333-7000-8000-000000000001";
8404        {
8405            let conn = h.open_db();
8406            seed_document_row(&conn, doc_id, "doc A");
8407            // Insert chunks in shuffled order so the ORDER BY chunk_index
8408            // is load-bearing.
8409            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
8410            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
8411            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
8412            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
8413        }
8414        let node_id = format!("doc:{doc_id}");
8415        let (status, body) = runtime.block_on(call(
8416            h.router.clone(),
8417            "GET",
8418            &graph_uri(&node_id, "document_chunk"),
8419            None,
8420        ));
8421        assert_eq!(status, StatusCode::OK, "body: {body}");
8422        let nodes = body["nodes"].as_array().unwrap();
8423        let edges = body["edges"].as_array().unwrap();
8424        assert_eq!(nodes.len(), 4);
8425        assert_eq!(edges.len(), 4);
8426        // Verify in-order chunk_index emission.
8427        assert_eq!(nodes[0]["id"], "chunk:c0");
8428        assert_eq!(nodes[1]["id"], "chunk:c1");
8429        assert_eq!(nodes[2]["id"], "chunk:c2");
8430        assert_eq!(nodes[3]["id"], "chunk:c3");
8431        for e in edges {
8432            assert_eq!(e["kind"], "document_chunk");
8433        }
8434        h.shutdown(&runtime);
8435    }
8436
8437    #[test]
8438    fn expand_document_chunk_from_chunk_returns_parent_document() {
8439        let runtime = rt();
8440        let h = Harness::new(&runtime);
8441        let doc_id = "44444444-4444-7000-8000-000000000001";
8442        {
8443            let conn = h.open_db();
8444            seed_document_row(&conn, doc_id, "parent doc");
8445            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
8446        }
8447        let (status, body) = runtime.block_on(call(
8448            h.router.clone(),
8449            "GET",
8450            &graph_uri("chunk:c-orphan", "document_chunk"),
8451            None,
8452        ));
8453        assert_eq!(status, StatusCode::OK, "body: {body}");
8454        let nodes = body["nodes"].as_array().unwrap();
8455        let edges = body["edges"].as_array().unwrap();
8456        assert_eq!(nodes.len(), 1);
8457        assert_eq!(edges.len(), 1);
8458        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
8459        assert_eq!(edges[0]["source"], "chunk:c-orphan");
8460        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
8461        h.shutdown(&runtime);
8462    }
8463
8464    #[test]
8465    fn expand_triple_from_episode_returns_entities() {
8466        let runtime = rt();
8467        let h = Harness::new(&runtime);
8468        let memory_id = "55555555-5555-7000-8000-000000000001";
8469        let rowid;
8470        {
8471            let conn = h.open_db();
8472            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
8473            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
8474            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
8475            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
8476        }
8477        let node_id = format!("ep:{memory_id}");
8478        let (status, body) = runtime.block_on(call(
8479            h.router.clone(),
8480            "GET",
8481            &graph_uri(&node_id, "triple"),
8482            None,
8483        ));
8484        assert_eq!(status, StatusCode::OK, "body: {body}");
8485        let nodes = body["nodes"].as_array().unwrap();
8486        let edges = body["edges"].as_array().unwrap();
8487        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
8488        assert_eq!(edges.len(), 2);
8489        let ids: std::collections::HashSet<String> = nodes
8490            .iter()
8491            .map(|n| n["id"].as_str().unwrap().to_string())
8492            .collect();
8493        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
8494            assert!(ids.contains(expected), "missing {expected} in {body}");
8495        }
8496        for e in edges {
8497            assert_eq!(e["kind"], "triple");
8498            assert!(e["predicate"].is_string(), "predicate set: {body}");
8499        }
8500        h.shutdown(&runtime);
8501    }
8502
8503    #[test]
8504    fn expand_triple_from_entity_returns_episodes() {
8505        let runtime = rt();
8506        let h = Harness::new(&runtime);
8507        {
8508            let conn = h.open_db();
8509            let r1 = seed_episode(
8510                &conn,
8511                "66666666-6666-7000-8000-000000000001",
8512                100,
8513                "alice ep one",
8514            );
8515            let r2 = seed_episode(
8516                &conn,
8517                "66666666-6666-7000-8000-000000000002",
8518                200,
8519                "alice ep two",
8520            );
8521            let r3 = seed_episode(
8522                &conn,
8523                "66666666-6666-7000-8000-000000000003",
8524                300,
8525                "alice ep three",
8526            );
8527            // 3 triples all mentioning Alice on one side or another.
8528            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
8529            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
8530            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
8531            // One triple with no source — must be skipped by the IS NOT NULL filter.
8532            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
8533        }
8534        let (status, body) = runtime.block_on(call(
8535            h.router.clone(),
8536            "GET",
8537            &graph_uri("ent:Alice", "triple"),
8538            None,
8539        ));
8540        assert_eq!(status, StatusCode::OK, "body: {body}");
8541        let nodes = body["nodes"].as_array().unwrap();
8542        let edges = body["edges"].as_array().unwrap();
8543        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
8544        assert_eq!(edges.len(), 3);
8545        for n in nodes {
8546            assert_eq!(n["kind"], "episode");
8547        }
8548        for e in edges {
8549            assert_eq!(e["source"], "ent:Alice");
8550            assert_eq!(e["kind"], "triple");
8551        }
8552        h.shutdown(&runtime);
8553    }
8554
8555    #[test]
8556    fn expand_semantic_from_episode_returns_similar() {
8557        let runtime = rt();
8558        let h = Harness::new(&runtime);
8559        // Seed three episodes via the writer-actor so they get embedded
8560        // + inserted into HNSW. StubEmbedder is deterministic: identical
8561        // content → identical vector → cos_distance = 0. So we use
8562        // distinct strings, then expand from one of them and assert at
8563        // least one similar peer comes back.
8564        runtime.block_on(async {
8565            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
8566            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
8567            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8568            // Expand from mid1.
8569            let (status, body) = call(
8570                h.router.clone(),
8571                "GET",
8572                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
8573                None,
8574            )
8575            .await;
8576            assert_eq!(status, StatusCode::OK, "body: {body}");
8577            let nodes = body["nodes"].as_array().unwrap();
8578            let edges = body["edges"].as_array().unwrap();
8579            // Must NOT include the source.
8580            for n in nodes {
8581                assert_ne!(
8582                    n["id"].as_str().unwrap(),
8583                    format!("ep:{mid1}"),
8584                    "self must be excluded: {body}"
8585                );
8586            }
8587            // Edges must be tagged semantic with a numeric weight.
8588            for e in edges {
8589                assert_eq!(e["kind"], "semantic");
8590                assert!(e["weight"].is_number(), "weight set: {body}");
8591            }
8592        });
8593        h.shutdown(&runtime);
8594    }
8595
8596    /// Helper: POST /memory and return the new memory_id.
8597    async fn post_remember(router: axum::Router, content: &str) -> String {
8598        let (status, body) = call(
8599            router,
8600            "POST",
8601            "/memory",
8602            Some(json!({ "content": content })),
8603        )
8604        .await;
8605        assert_eq!(status, StatusCode::OK, "post failed: {body}");
8606        body["memory_id"].as_str().unwrap().to_string()
8607    }
8608
8609    #[test]
8610    fn expand_400_on_invalid_kind() {
8611        let runtime = rt();
8612        let h = Harness::new(&runtime);
8613        let (status, _body) = runtime.block_on(call(
8614            h.router.clone(),
8615            "GET",
8616            "/v1/graph/expand?node_id=ep:any&kind=banana",
8617            None,
8618        ));
8619        // axum's Query extractor rejects unknown enum value with 400/422.
8620        assert!(
8621            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
8622            "expected 400/422 for bad kind, got {status}"
8623        );
8624        h.shutdown(&runtime);
8625    }
8626
8627    #[test]
8628    fn expand_400_on_invalid_node_for_kind() {
8629        let runtime = rt();
8630        let h = Harness::new(&runtime);
8631        // kind=semantic from a cluster source → 400.
8632        let (status, body) = runtime.block_on(call(
8633            h.router.clone(),
8634            "GET",
8635            &graph_uri("cl:doesnt-matter", "semantic"),
8636            None,
8637        ));
8638        assert_eq!(status, StatusCode::BAD_REQUEST);
8639        assert!(
8640            body["error"]
8641                .as_str()
8642                .is_some_and(|s| s.contains("semantic only valid for episode")),
8643            "got: {body}"
8644        );
8645        h.shutdown(&runtime);
8646    }
8647
8648    #[test]
8649    fn expand_404_on_missing_node_id() {
8650        let runtime = rt();
8651        let h = Harness::new(&runtime);
8652        let (status, body) = runtime.block_on(call(
8653            h.router.clone(),
8654            "GET",
8655            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8656            None,
8657        ));
8658        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
8659        h.shutdown(&runtime);
8660    }
8661
8662    #[test]
8663    fn expand_limit_clamped_at_100() {
8664        let runtime = rt();
8665        let h = Harness::new(&runtime);
8666        // Seed > 100 cluster members so we can see the clamp in action.
8667        {
8668            let conn = h.open_db();
8669            seed_cluster_row(&conn, "cl-huge", 1_000);
8670            for i in 0..150 {
8671                let mid = format!("77777777-7777-7000-8000-{:012}", i);
8672                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8673                seed_cluster_member(&conn, "cl-huge", &mid);
8674            }
8675        }
8676        let (status, body) = runtime.block_on(call(
8677            h.router.clone(),
8678            "GET",
8679            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
8680            None,
8681        ));
8682        assert_eq!(status, StatusCode::OK, "body: {body}");
8683        let nodes = body["nodes"].as_array().unwrap();
8684        assert_eq!(
8685            nodes.len(),
8686            100,
8687            "limit must be silently clamped to 100, got {}",
8688            nodes.len()
8689        );
8690        h.shutdown(&runtime);
8691    }
8692
8693    #[test]
8694    fn expand_bad_node_id_prefix_returns_400() {
8695        let runtime = rt();
8696        let h = Harness::new(&runtime);
8697        let (status, body) = runtime.block_on(call(
8698            h.router.clone(),
8699            "GET",
8700            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
8701            None,
8702        ));
8703        assert_eq!(status, StatusCode::BAD_REQUEST);
8704        assert!(
8705            body["error"]
8706                .as_str()
8707                .is_some_and(|s| s.contains("node_id must be")),
8708            "got: {body}"
8709        );
8710        h.shutdown(&runtime);
8711    }
8712
8713    #[test]
8714    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
8715        // Routing via X-Solo-Tenant: a header pointing to an unknown
8716        // tenant must 404 before the handler even runs — the
8717        // TenantExtractor is the gatekeeper, so node ids can't be
8718        // resolved against the wrong tenant's DB.
8719        let runtime = rt();
8720        let h = Harness::new(&runtime);
8721        // Seed a real episode in the default tenant so we know it
8722        // exists there. If tenant scoping leaked, this lookup would 200
8723        // even with the wrong tenant header.
8724        let memory_id = "88888888-8888-7000-8000-000000000001";
8725        {
8726            let conn = h.open_db();
8727            seed_episode(&conn, memory_id, 100, "scoped");
8728            seed_cluster_row(&conn, "cl-scoped", 200);
8729            seed_cluster_member(&conn, "cl-scoped", memory_id);
8730        }
8731        let node_id = format!("ep:{memory_id}");
8732        let r = h.router.clone();
8733        let (status, _body) = runtime.block_on(async {
8734            let req = Request::builder()
8735                .method("GET")
8736                .uri(graph_uri(&node_id, "cluster_member"))
8737                .header("x-solo-tenant", "never-registered-tenant")
8738                .body(Body::empty())
8739                .unwrap();
8740            let resp = r.oneshot(req).await.expect("oneshot");
8741            let s = resp.status();
8742            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8743            (s, _b)
8744        });
8745        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
8746        // lookups can't smuggle through this endpoint.
8747        assert_eq!(status, StatusCode::NOT_FOUND);
8748        h.shutdown(&runtime);
8749    }
8750
8751    #[test]
8752    fn expand_respects_auth_when_enabled() {
8753        let runtime = rt();
8754        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
8755        // No Authorization header → 401.
8756        let (status, _) = runtime.block_on(call(
8757            h.router.clone(),
8758            "GET",
8759            &graph_uri("ep:any", "cluster_member"),
8760            None,
8761        ));
8762        assert_eq!(status, StatusCode::UNAUTHORIZED);
8763        // Right token → handler runs (404 for unknown node, NOT 401).
8764        let (status, _) = runtime.block_on(call_with_auth(
8765            h.router.clone(),
8766            "GET",
8767            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8768            None,
8769            Some("Bearer graph-secret"),
8770        ));
8771        assert_eq!(status, StatusCode::NOT_FOUND);
8772        h.shutdown(&runtime);
8773    }
8774
8775    #[test]
8776    fn expand_works_when_auth_none() {
8777        let runtime = rt();
8778        let h = Harness::new(&runtime);
8779        // Unauthenticated request hits the handler; 404 for unknown node
8780        // proves the auth-none path doesn't reject the request.
8781        let (status, _) = runtime.block_on(call(
8782            h.router.clone(),
8783            "GET",
8784            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8785            None,
8786        ));
8787        assert_eq!(status, StatusCode::NOT_FOUND);
8788        h.shutdown(&runtime);
8789    }
8790
8791    // ---------------------------------------------------------------------
8792    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
8793    //
8794    // Paginated catalog reads. Both endpoints share auth + tenant +
8795    // cursor scaffolding from /v1/graph/expand, so tests focus on the
8796    // new surface: filter parsing, entity synthesis cap, cursor round-
8797    // trip, edge-type defaults (semantic excluded), and the semantic
8798    // 400 redirect to /v1/graph/neighbors.
8799    // ---------------------------------------------------------------------
8800
8801    /// Lower-level helper that captures response headers in addition to
8802    /// status + JSON body. Used by the entity-cap header test.
8803    async fn call_with_headers(
8804        router: axum::Router,
8805        method: &str,
8806        uri: &str,
8807    ) -> (StatusCode, axum::http::HeaderMap, Value) {
8808        let req = Request::builder()
8809            .method(method)
8810            .uri(uri)
8811            .header("content-length", "0")
8812            .body(Body::empty())
8813            .unwrap();
8814        let resp = router.oneshot(req).await.expect("oneshot");
8815        let status = resp.status();
8816        let headers = resp.headers().clone();
8817        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
8818        let v: Value = if body_bytes.is_empty() {
8819            Value::Null
8820        } else {
8821            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
8822        };
8823        (status, headers, v)
8824    }
8825
8826    #[test]
8827    fn nodes_returns_all_kinds_when_no_filter() {
8828        let runtime = rt();
8829        let h = Harness::new(&runtime);
8830        {
8831            let conn = h.open_db();
8832            let rowid = seed_episode(
8833                &conn,
8834                "aaaaaaaa-0000-7000-8000-000000000001",
8835                100,
8836                "episode one",
8837            );
8838            seed_document_row(&conn, "doc-1", "doc one");
8839            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
8840            seed_cluster_row(&conn, "cl-one", 200);
8841            seed_triple_row(&conn, "t-one", "Alice", "knows", "Bob", Some(rowid));
8842        }
8843        let (status, body) =
8844            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
8845        assert_eq!(status, StatusCode::OK, "body: {body}");
8846        let nodes = body["nodes"].as_array().unwrap();
8847        let kinds: std::collections::HashSet<&str> =
8848            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8849        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
8850            assert!(
8851                kinds.contains(expected),
8852                "expected {expected} kind in response: {body}"
8853            );
8854        }
8855        h.shutdown(&runtime);
8856    }
8857
8858    #[test]
8859    fn nodes_filter_by_single_kind() {
8860        let runtime = rt();
8861        let h = Harness::new(&runtime);
8862        {
8863            let conn = h.open_db();
8864            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
8865            seed_document_row(&conn, "doc-only", "d");
8866            seed_cluster_row(&conn, "cl-only", 300);
8867        }
8868        let (status, body) = runtime.block_on(call(
8869            h.router.clone(),
8870            "GET",
8871            "/v1/graph/nodes?kind=episode",
8872            None,
8873        ));
8874        assert_eq!(status, StatusCode::OK, "body: {body}");
8875        let nodes = body["nodes"].as_array().unwrap();
8876        assert!(!nodes.is_empty(), "{body}");
8877        for n in nodes {
8878            assert_eq!(
8879                n["kind"], "episode",
8880                "kind filter must be exclusive: {body}"
8881            );
8882        }
8883        h.shutdown(&runtime);
8884    }
8885
8886    #[test]
8887    fn nodes_filter_by_multiple_kinds() {
8888        let runtime = rt();
8889        let h = Harness::new(&runtime);
8890        {
8891            let conn = h.open_db();
8892            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
8893            seed_document_row(&conn, "doc-multi", "d");
8894            seed_cluster_row(&conn, "cl-multi", 300);
8895        }
8896        let (status, body) = runtime.block_on(call(
8897            h.router.clone(),
8898            "GET",
8899            "/v1/graph/nodes?kind=episode,document",
8900            None,
8901        ));
8902        assert_eq!(status, StatusCode::OK, "body: {body}");
8903        let nodes = body["nodes"].as_array().unwrap();
8904        let kinds: std::collections::HashSet<&str> =
8905            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8906        assert!(kinds.contains("episode"), "{body}");
8907        assert!(kinds.contains("document"), "{body}");
8908        assert!(
8909            !kinds.contains("cluster"),
8910            "cluster must be filtered out: {body}"
8911        );
8912        h.shutdown(&runtime);
8913    }
8914
8915    #[test]
8916    fn nodes_entity_synthesis_caps_at_200() {
8917        let runtime = rt();
8918        let h = Harness::new(&runtime);
8919        {
8920            let conn = h.open_db();
8921            // Seed one episode + 250 distinct triple object values so the
8922            // entity rollup surfaces >200 entities. ref_count is 1 for
8923            // each; pick subject = "Alice" for all so the entity count
8924            // collapses on subject (1 "Alice") + 250 distinct objects.
8925            let rowid = seed_episode(&conn, "dddddddd-0000-7000-8000-000000000001", 100, "ep");
8926            for i in 0..250 {
8927                let triple_id = format!("t-cap-{i:03}");
8928                let obj = format!("Entity{i:03}");
8929                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
8930            }
8931        }
8932        let (status, headers, body) = runtime.block_on(call_with_headers(
8933            h.router.clone(),
8934            "GET",
8935            "/v1/graph/nodes?kind=entity&limit=500",
8936        ));
8937        assert_eq!(status, StatusCode::OK, "body: {body}");
8938        let nodes = body["nodes"].as_array().unwrap();
8939        assert_eq!(
8940            nodes.len(),
8941            200,
8942            "entity cap must be enforced at 200, got {}",
8943            nodes.len()
8944        );
8945        assert_eq!(
8946            headers
8947                .get("x-solo-entity-cap-reached")
8948                .and_then(|v| v.to_str().ok()),
8949            Some("true"),
8950            "cap-reached header missing: headers={headers:?}"
8951        );
8952        for n in nodes {
8953            assert_eq!(n["kind"], "entity");
8954        }
8955        h.shutdown(&runtime);
8956    }
8957
8958    #[test]
8959    fn nodes_since_until_filter_works() {
8960        let runtime = rt();
8961        let h = Harness::new(&runtime);
8962        {
8963            let conn = h.open_db();
8964            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000001", 100, "early");
8965            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000002", 500, "middle");
8966            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000003", 1000, "late");
8967        }
8968        let (status, body) = runtime.block_on(call(
8969            h.router.clone(),
8970            "GET",
8971            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
8972            None,
8973        ));
8974        assert_eq!(status, StatusCode::OK, "body: {body}");
8975        let nodes = body["nodes"].as_array().unwrap();
8976        assert_eq!(nodes.len(), 1, "{body}");
8977        assert_eq!(nodes[0]["id"], "ep:eeeeeeee-0000-7000-8000-000000000002");
8978        h.shutdown(&runtime);
8979    }
8980
8981    #[test]
8982    fn nodes_pagination_round_trip() {
8983        let runtime = rt();
8984        let h = Harness::new(&runtime);
8985        {
8986            let conn = h.open_db();
8987            for i in 0..150 {
8988                let mid = format!("f0000000-0000-7000-8000-{i:012}");
8989                // ts_ms scales with i so the sort order is deterministic;
8990                // newest (highest i) appears first.
8991                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
8992            }
8993        }
8994        let limit = 50u32;
8995        let mut seen: std::collections::HashSet<String> = Default::default();
8996        let mut next_cursor: Option<String> = None;
8997        for page_idx in 0..4 {
8998            let cursor_param = next_cursor
8999                .as_deref()
9000                .map(|c| format!("&cursor={c}"))
9001                .unwrap_or_default();
9002            let uri = format!("/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}");
9003            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9004            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
9005            let nodes = body["nodes"].as_array().unwrap();
9006            assert!(
9007                nodes.len() <= limit as usize,
9008                "page {page_idx} over-fetched: {body}"
9009            );
9010            for n in nodes {
9011                let id = n["id"].as_str().unwrap().to_string();
9012                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
9013            }
9014            next_cursor = body
9015                .get("next_cursor")
9016                .and_then(|v| v.as_str())
9017                .map(|s| s.to_string());
9018            if next_cursor.is_none() {
9019                break;
9020            }
9021        }
9022        assert_eq!(
9023            seen.len(),
9024            150,
9025            "expected 150 distinct ids across pages, got {}",
9026            seen.len()
9027        );
9028        assert!(
9029            next_cursor.is_none(),
9030            "cursor should be null after last page; got {next_cursor:?}"
9031        );
9032        h.shutdown(&runtime);
9033    }
9034
9035    #[test]
9036    fn nodes_respects_tenant_scoping() {
9037        let runtime = rt();
9038        let h = Harness::new(&runtime);
9039        {
9040            let conn = h.open_db();
9041            seed_episode(
9042                &conn,
9043                "11110000-0000-7000-8000-000000000001",
9044                100,
9045                "tenant scope",
9046            );
9047        }
9048        // Request against a never-registered tenant header → 404 from
9049        // the tenant extractor before the handler runs.
9050        let r = h.router.clone();
9051        let (status, _body) = runtime.block_on(async {
9052            let req = Request::builder()
9053                .method("GET")
9054                .uri("/v1/graph/nodes")
9055                .header("x-solo-tenant", "never-registered-tenant")
9056                .body(Body::empty())
9057                .unwrap();
9058            let resp = r.oneshot(req).await.expect("oneshot");
9059            let s = resp.status();
9060            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9061            (s, _b)
9062        });
9063        assert_eq!(status, StatusCode::NOT_FOUND);
9064        h.shutdown(&runtime);
9065    }
9066
9067    #[test]
9068    fn nodes_respects_auth_when_enabled() {
9069        let runtime = rt();
9070        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
9071        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9072        assert_eq!(
9073            status,
9074            StatusCode::UNAUTHORIZED,
9075            "must reject unauthenticated request"
9076        );
9077        let (status, _) = runtime.block_on(call_with_auth(
9078            h.router.clone(),
9079            "GET",
9080            "/v1/graph/nodes",
9081            None,
9082            Some("Bearer nodes-secret"),
9083        ));
9084        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
9085        h.shutdown(&runtime);
9086    }
9087
9088    #[test]
9089    fn nodes_works_with_auth_none() {
9090        let runtime = rt();
9091        let h = Harness::new(&runtime);
9092        let (status, body) =
9093            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9094        assert_eq!(status, StatusCode::OK, "{body}");
9095        assert!(body.get("nodes").is_some());
9096        h.shutdown(&runtime);
9097    }
9098
9099    // --- /v1/graph/edges ---
9100
9101    #[test]
9102    fn edges_returns_all_default_kinds() {
9103        let runtime = rt();
9104        let h = Harness::new(&runtime);
9105        {
9106            let conn = h.open_db();
9107            let rowid = seed_episode(&conn, "22220000-0000-7000-8000-000000000001", 100, "ep src");
9108            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
9109            seed_document_row(&conn, "doc-e", "doc");
9110            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
9111            seed_cluster_row(&conn, "cl-e", 200);
9112            seed_cluster_member(&conn, "cl-e", "22220000-0000-7000-8000-000000000001");
9113        }
9114        let (status, body) =
9115            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9116        assert_eq!(status, StatusCode::OK, "body: {body}");
9117        let edges = body["edges"].as_array().unwrap();
9118        let kinds: std::collections::HashSet<&str> =
9119            edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9120        assert!(kinds.contains("triple"), "{body}");
9121        assert!(kinds.contains("document_chunk"), "{body}");
9122        assert!(kinds.contains("cluster_member"), "{body}");
9123        assert!(
9124            !kinds.contains("semantic"),
9125            "semantic is NOT in default response: {body}"
9126        );
9127        h.shutdown(&runtime);
9128    }
9129
9130    #[test]
9131    fn edges_filter_by_node_id_finds_incident_edges() {
9132        let runtime = rt();
9133        let h = Harness::new(&runtime);
9134        let memory_id = "33330000-0000-7000-8000-000000000001";
9135        {
9136            let conn = h.open_db();
9137            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
9138            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
9139            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
9140            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
9141            // Decoy episode with its own triple — must NOT come back.
9142            let decoy_rowid =
9143                seed_episode(&conn, "33330000-0000-7000-8000-000000000999", 200, "decoy");
9144            seed_triple_row(&conn, "t-decoy", "Alice", "p", "Eve", Some(decoy_rowid));
9145        }
9146        let uri = format!(
9147            "/v1/graph/edges?type=triple&node_id={}",
9148            percent_encode_node_id(&format!("ep:{memory_id}"))
9149        );
9150        let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9151        assert_eq!(status, StatusCode::OK, "body: {body}");
9152        let edges = body["edges"].as_array().unwrap();
9153        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
9154        for e in edges {
9155            assert_eq!(e["source"], format!("ep:{memory_id}"));
9156            assert_eq!(e["kind"], "triple");
9157        }
9158        h.shutdown(&runtime);
9159    }
9160
9161    #[test]
9162    fn edges_filter_by_type_works() {
9163        let runtime = rt();
9164        let h = Harness::new(&runtime);
9165        {
9166            let conn = h.open_db();
9167            let rowid = seed_episode(&conn, "44440000-0000-7000-8000-000000000001", 100, "ep");
9168            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
9169            seed_document_row(&conn, "doc-skip", "doc");
9170            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
9171        }
9172        let (status, body) = runtime.block_on(call(
9173            h.router.clone(),
9174            "GET",
9175            "/v1/graph/edges?type=triple",
9176            None,
9177        ));
9178        assert_eq!(status, StatusCode::OK, "{body}");
9179        let edges = body["edges"].as_array().unwrap();
9180        assert!(!edges.is_empty(), "{body}");
9181        for e in edges {
9182            assert_eq!(e["kind"], "triple", "{body}");
9183        }
9184        h.shutdown(&runtime);
9185    }
9186
9187    #[test]
9188    fn edges_rejects_semantic_type_with_400() {
9189        let runtime = rt();
9190        let h = Harness::new(&runtime);
9191        let (status, body) = runtime.block_on(call(
9192            h.router.clone(),
9193            "GET",
9194            "/v1/graph/edges?type=semantic",
9195            None,
9196        ));
9197        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9198        let err = body["error"].as_str().unwrap_or_default();
9199        assert!(
9200            err.contains("/v1/graph/neighbors"),
9201            "error must point to /v1/graph/neighbors: {body}"
9202        );
9203        h.shutdown(&runtime);
9204    }
9205
9206    #[test]
9207    fn edges_pagination_round_trip() {
9208        let runtime = rt();
9209        let h = Harness::new(&runtime);
9210        {
9211            let conn = h.open_db();
9212            let rowid = seed_episode(&conn, "55550000-0000-7000-8000-000000000001", 100, "ep big");
9213            // 60 triples → 60 triple edges. limit=25 → 3 pages.
9214            for i in 0..60 {
9215                let tid = format!("t-page-{i:03}");
9216                let obj = format!("Obj{i:03}");
9217                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
9218            }
9219        }
9220        let limit = 25u32;
9221        let mut seen: std::collections::HashSet<String> = Default::default();
9222        let mut next_cursor: Option<String> = None;
9223        for page_idx in 0..5 {
9224            let cursor_param = next_cursor
9225                .as_deref()
9226                .map(|c| format!("&cursor={c}"))
9227                .unwrap_or_default();
9228            let uri = format!("/v1/graph/edges?type=triple&limit={limit}{cursor_param}");
9229            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9230            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
9231            let edges = body["edges"].as_array().unwrap();
9232            for e in edges {
9233                let id = e["id"].as_str().unwrap().to_string();
9234                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
9235            }
9236            next_cursor = body
9237                .get("next_cursor")
9238                .and_then(|v| v.as_str())
9239                .map(|s| s.to_string());
9240            if next_cursor.is_none() {
9241                break;
9242            }
9243        }
9244        assert_eq!(
9245            seen.len(),
9246            60,
9247            "expected 60 distinct edges, got {}",
9248            seen.len()
9249        );
9250        assert!(next_cursor.is_none(), "expected exhausted cursor");
9251        h.shutdown(&runtime);
9252    }
9253
9254    #[test]
9255    fn edges_respects_tenant_scoping() {
9256        let runtime = rt();
9257        let h = Harness::new(&runtime);
9258        {
9259            let conn = h.open_db();
9260            let rowid = seed_episode(&conn, "66660000-0000-7000-8000-000000000001", 100, "ep");
9261            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
9262        }
9263        let r = h.router.clone();
9264        let (status, _) = runtime.block_on(async {
9265            let req = Request::builder()
9266                .method("GET")
9267                .uri("/v1/graph/edges")
9268                .header("x-solo-tenant", "never-registered-tenant")
9269                .body(Body::empty())
9270                .unwrap();
9271            let resp = r.oneshot(req).await.expect("oneshot");
9272            let s = resp.status();
9273            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9274            (s, _b)
9275        });
9276        assert_eq!(status, StatusCode::NOT_FOUND);
9277        h.shutdown(&runtime);
9278    }
9279
9280    #[test]
9281    fn edges_respects_auth_when_enabled() {
9282        let runtime = rt();
9283        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
9284        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9285        assert_eq!(status, StatusCode::UNAUTHORIZED);
9286        let (status, _) = runtime.block_on(call_with_auth(
9287            h.router.clone(),
9288            "GET",
9289            "/v1/graph/edges",
9290            None,
9291            Some("Bearer edges-secret"),
9292        ));
9293        assert_eq!(status, StatusCode::OK);
9294        h.shutdown(&runtime);
9295    }
9296
9297    // ---------------------------------------------------------------------
9298    // v0.10.0: GET /v1/graph/inspect/{id}
9299    //
9300    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
9301    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
9302    // so tests focus on the new surface: per-kind full_text source +
9303    // triples_in/out shape + entity zero-triple 404 semantics + the
9304    // standard 400/404/auth/tenant cases.
9305    // ---------------------------------------------------------------------
9306
9307    fn inspect_uri(node_id: &str) -> String {
9308        // Path parameter must be percent-encoded (`:` is `%3A` after
9309        // the URI parser splits segments). axum's Path<String>
9310        // extractor percent-decodes automatically.
9311        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
9312    }
9313
9314    #[test]
9315    fn inspect_episode_returns_full_text_plus_triples_out() {
9316        let runtime = rt();
9317        let h = Harness::new(&runtime);
9318        let memory_id = "a1110000-0000-7000-8000-000000000001";
9319        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
9320        {
9321            let conn = h.open_db();
9322            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
9323            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
9324            seed_triple_row(
9325                &conn,
9326                "t-ep-2",
9327                "user",
9328                "discussed",
9329                "deploy_pipeline",
9330                Some(rowid),
9331            );
9332            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
9333        }
9334        let (status, body) = runtime.block_on(call(
9335            h.router.clone(),
9336            "GET",
9337            &inspect_uri(&format!("ep:{memory_id}")),
9338            None,
9339        ));
9340        assert_eq!(status, StatusCode::OK, "body: {body}");
9341        assert_eq!(body["node"]["kind"], "episode");
9342        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
9343        assert_eq!(
9344            body["full_text"].as_str().unwrap(),
9345            full_text,
9346            "full_text must match episodes.content verbatim, untruncated"
9347        );
9348        let triples_out = body["triples_out"].as_array().unwrap();
9349        assert_eq!(triples_out.len(), 3, "{body}");
9350        let triples_in = body["triples_in"].as_array().unwrap();
9351        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
9352        for e in triples_out {
9353            assert_eq!(e["kind"], "triple");
9354            assert_eq!(e["source"], format!("ep:{memory_id}"));
9355            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9356            assert!(e["predicate"].as_str().is_some());
9357            assert!(e["weight"].as_f64().is_some());
9358        }
9359        h.shutdown(&runtime);
9360    }
9361
9362    #[test]
9363    fn inspect_episode_triples_in_is_empty_for_v10p1() {
9364        // Seed an episode + a triple from a DIFFERENT episode that
9365        // happens to mention the focal episode's content. Even with
9366        // entities referencing the episode topic, episode.triples_in
9367        // is structurally empty in v0.10.0 P1.
9368        let runtime = rt();
9369        let h = Harness::new(&runtime);
9370        let focal = "a2220000-0000-7000-8000-000000000001";
9371        let other = "a2220000-0000-7000-8000-000000000002";
9372        {
9373            let conn = h.open_db();
9374            seed_episode(&conn, focal, 100, "focal episode body");
9375            let other_rowid = seed_episode(&conn, other, 200, "another episode");
9376            // Entity "user" gets referenced heavily; doesn't matter --
9377            // episode triples_in stays empty.
9378            for i in 0..5 {
9379                let tid = format!("t-other-{i}");
9380                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
9381            }
9382        }
9383        let (status, body) = runtime.block_on(call(
9384            h.router.clone(),
9385            "GET",
9386            &inspect_uri(&format!("ep:{focal}")),
9387            None,
9388        ));
9389        assert_eq!(status, StatusCode::OK, "body: {body}");
9390        let triples_in = body["triples_in"].as_array().unwrap();
9391        assert!(
9392            triples_in.is_empty(),
9393            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
9394        );
9395        h.shutdown(&runtime);
9396    }
9397
9398    #[test]
9399    fn inspect_document_returns_full_text_concatenated_from_chunks() {
9400        let runtime = rt();
9401        let h = Harness::new(&runtime);
9402        let doc_id = "d3330000-0000-7000-8000-000000000001";
9403        {
9404            let conn = h.open_db();
9405            seed_document_row(&conn, doc_id, "doc-title");
9406            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
9407            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
9408            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
9409        }
9410        let (status, body) = runtime.block_on(call(
9411            h.router.clone(),
9412            "GET",
9413            &inspect_uri(&format!("doc:{doc_id}")),
9414            None,
9415        ));
9416        assert_eq!(status, StatusCode::OK, "body: {body}");
9417        assert_eq!(body["node"]["kind"], "document");
9418        let full_text = body["full_text"].as_str().unwrap();
9419        // Concatenation order matches chunk_index ASC; separator is "\n\n".
9420        assert_eq!(
9421            full_text,
9422            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
9423        );
9424        assert!(body["triples_in"].as_array().unwrap().is_empty());
9425        assert!(body["triples_out"].as_array().unwrap().is_empty());
9426        h.shutdown(&runtime);
9427    }
9428
9429    #[test]
9430    fn inspect_chunk_returns_text() {
9431        let runtime = rt();
9432        let h = Harness::new(&runtime);
9433        let chunk_body = "This is the body of the chunk being inspected.";
9434        {
9435            let conn = h.open_db();
9436            seed_document_row(&conn, "doc-chunk-host", "host");
9437            seed_chunk_row(
9438                &conn,
9439                "chunk-inspect-target",
9440                "doc-chunk-host",
9441                0,
9442                chunk_body,
9443            );
9444        }
9445        let (status, body) = runtime.block_on(call(
9446            h.router.clone(),
9447            "GET",
9448            &inspect_uri("chunk:chunk-inspect-target"),
9449            None,
9450        ));
9451        assert_eq!(status, StatusCode::OK, "body: {body}");
9452        assert_eq!(body["node"]["kind"], "chunk");
9453        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
9454        assert!(body["triples_in"].as_array().unwrap().is_empty());
9455        assert!(body["triples_out"].as_array().unwrap().is_empty());
9456        h.shutdown(&runtime);
9457    }
9458
9459    #[test]
9460    fn inspect_cluster_returns_label_and_abstraction() {
9461        let runtime = rt();
9462        let h = Harness::new(&runtime);
9463        let cluster_id = "cl-inspect-target";
9464        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
9465        {
9466            let conn = h.open_db();
9467            seed_cluster_row(&conn, cluster_id, 12345);
9468            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
9469        }
9470        let (status, body) = runtime.block_on(call(
9471            h.router.clone(),
9472            "GET",
9473            &inspect_uri(&format!("cl:{cluster_id}")),
9474            None,
9475        ));
9476        assert_eq!(status, StatusCode::OK, "body: {body}");
9477        assert_eq!(body["node"]["kind"], "cluster");
9478        let full_text = body["full_text"].as_str().unwrap();
9479        assert!(
9480            full_text.contains(cluster_id),
9481            "full_text must include cluster label: {full_text}"
9482        );
9483        assert!(
9484            full_text.contains(abstraction_text),
9485            "full_text must include abstraction text: {full_text}"
9486        );
9487        // "label\n\nabstraction" -- separated by blank line for the
9488        // inspector renderer.
9489        assert!(
9490            full_text.contains("\n\n"),
9491            "label and abstraction must be separated: {full_text}"
9492        );
9493        h.shutdown(&runtime);
9494    }
9495
9496    #[test]
9497    fn inspect_entity_returns_triples_only() {
9498        let runtime = rt();
9499        let h = Harness::new(&runtime);
9500        {
9501            let conn = h.open_db();
9502            let rowid = seed_episode(
9503                &conn,
9504                "e5550000-0000-7000-8000-000000000001",
9505                100,
9506                "host episode",
9507            );
9508            // 5 triples that reference Alice (as subject or object).
9509            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
9510            seed_triple_row(
9511                &conn,
9512                "t-ent-2",
9513                "Alice",
9514                "works_at",
9515                "Anthropic",
9516                Some(rowid),
9517            );
9518            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
9519            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
9520            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
9521        }
9522        let (status, body) = runtime.block_on(call(
9523            h.router.clone(),
9524            "GET",
9525            &inspect_uri("ent:Alice"),
9526            None,
9527        ));
9528        assert_eq!(status, StatusCode::OK, "body: {body}");
9529        assert_eq!(body["node"]["kind"], "entity");
9530        assert_eq!(body["node"]["id"], "ent:Alice");
9531        assert!(
9532            body["full_text"].is_null(),
9533            "entity full_text must be null (entities have no body): {body}"
9534        );
9535        let triples_out = body["triples_out"].as_array().unwrap();
9536        assert_eq!(triples_out.len(), 5, "{body}");
9537        assert!(body["triples_in"].as_array().unwrap().is_empty());
9538        for e in triples_out {
9539            assert_eq!(e["kind"], "triple");
9540            assert_eq!(e["source"], "ent:Alice");
9541            // Counterpart is always an entity; Alice never appears on
9542            // both ends so target != source.
9543            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9544            assert_ne!(e["target"], "ent:Alice");
9545        }
9546        h.shutdown(&runtime);
9547    }
9548
9549    #[test]
9550    fn inspect_entity_with_zero_triples_returns_404() {
9551        let runtime = rt();
9552        let h = Harness::new(&runtime);
9553        // Seed unrelated triples so the table isn't empty; the target
9554        // entity still has zero references.
9555        {
9556            let conn = h.open_db();
9557            let rowid = seed_episode(&conn, "e6660000-0000-7000-8000-000000000001", 100, "ep");
9558            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
9559        }
9560        let (status, body) = runtime.block_on(call(
9561            h.router.clone(),
9562            "GET",
9563            &inspect_uri("ent:Nonexistent"),
9564            None,
9565        ));
9566        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9567        let err = body["error"].as_str().unwrap_or_default();
9568        assert!(
9569            err.contains("Nonexistent") || err.contains("entity"),
9570            "error must mention entity: {body}"
9571        );
9572        h.shutdown(&runtime);
9573    }
9574
9575    #[test]
9576    fn inspect_404_on_missing_node() {
9577        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
9578        let runtime = rt();
9579        let h = Harness::new(&runtime);
9580        let (status, body) = runtime.block_on(call(
9581            h.router.clone(),
9582            "GET",
9583            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9584            None,
9585        ));
9586        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9587        h.shutdown(&runtime);
9588    }
9589
9590    #[test]
9591    fn inspect_400_on_invalid_prefix() {
9592        let runtime = rt();
9593        let h = Harness::new(&runtime);
9594        let (status, body) =
9595            runtime.block_on(call(h.router.clone(), "GET", &inspect_uri("xyz:foo"), None));
9596        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9597        let err = body["error"].as_str().unwrap_or_default();
9598        assert!(
9599            err.contains("xyz") || err.contains("prefix"),
9600            "error must mention bad prefix: {body}"
9601        );
9602        h.shutdown(&runtime);
9603    }
9604
9605    #[test]
9606    fn inspect_respects_tenant_scoping() {
9607        let runtime = rt();
9608        let h = Harness::new(&runtime);
9609        let memory_id = "a7770000-0000-7000-8000-000000000001";
9610        {
9611            let conn = h.open_db();
9612            seed_episode(&conn, memory_id, 100, "tenant scope");
9613        }
9614        // Real id in default tenant resolves; the same request against
9615        // a never-registered tenant header surfaces 404 from the tenant
9616        // extractor before the handler runs.
9617        let r = h.router.clone();
9618        let (status, _) = runtime.block_on(async {
9619            let req = Request::builder()
9620                .method("GET")
9621                .uri(inspect_uri(&format!("ep:{memory_id}")))
9622                .header("x-solo-tenant", "never-registered-tenant")
9623                .body(Body::empty())
9624                .unwrap();
9625            let resp = r.oneshot(req).await.expect("oneshot");
9626            let s = resp.status();
9627            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9628            (s, _b)
9629        });
9630        assert_eq!(status, StatusCode::NOT_FOUND);
9631        // Sanity: same id resolves on the default tenant.
9632        let (status, body) = runtime.block_on(call(
9633            h.router.clone(),
9634            "GET",
9635            &inspect_uri(&format!("ep:{memory_id}")),
9636            None,
9637        ));
9638        assert_eq!(
9639            status,
9640            StatusCode::OK,
9641            "default tenant must resolve: {body}"
9642        );
9643        h.shutdown(&runtime);
9644    }
9645
9646    #[test]
9647    fn inspect_respects_auth_when_enabled() {
9648        let runtime = rt();
9649        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
9650        // Missing bearer -> 401 before handler runs.
9651        let (status, _) = runtime.block_on(call(
9652            h.router.clone(),
9653            "GET",
9654            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9655            None,
9656        ));
9657        assert_eq!(status, StatusCode::UNAUTHORIZED);
9658        // Valid bearer + unknown node -> handler runs and returns 404,
9659        // proving auth passed through.
9660        let (status, _) = runtime.block_on(call_with_auth(
9661            h.router.clone(),
9662            "GET",
9663            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9664            None,
9665            Some("Bearer inspect-secret"),
9666        ));
9667        assert_eq!(status, StatusCode::NOT_FOUND);
9668        h.shutdown(&runtime);
9669    }
9670
9671    // ---------------------------------------------------------------------
9672    // v0.10.0: GET /v1/graph/neighbors/{id}
9673    //
9674    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
9675    // "show similar" overlay. Tests cover the kind dispatch (explicit /
9676    // semantic / both default), threshold filter, limit clamp, dedupe
9677    // rule, and the standard 400/404/auth/tenant gates.
9678    // ---------------------------------------------------------------------
9679
9680    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
9681    /// are all optional; pass `None` to omit the corresponding query
9682    /// parameter. The node id is percent-encoded so `:` survives the path
9683    /// extractor.
9684    fn neighbors_uri(
9685        node_id: &str,
9686        kind: Option<&str>,
9687        threshold: Option<f32>,
9688        limit: Option<u32>,
9689    ) -> String {
9690        let mut qs: Vec<String> = Vec::new();
9691        if let Some(k) = kind {
9692            qs.push(format!("kind={k}"));
9693        }
9694        if let Some(t) = threshold {
9695            qs.push(format!("threshold={t}"));
9696        }
9697        if let Some(l) = limit {
9698            qs.push(format!("limit={l}"));
9699        }
9700        let encoded = percent_encode_node_id(node_id);
9701        if qs.is_empty() {
9702            format!("/v1/graph/neighbors/{encoded}")
9703        } else {
9704            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
9705        }
9706    }
9707
9708    /// 1. `?kind=explicit` returns only structural edges (no semantic).
9709    /// Seeds an episode with 2 explicit (triple) neighbors + several
9710    /// distinct other episodes so the semantic path COULD surface
9711    /// candidates. The `kind=explicit` filter must drop all of them.
9712    #[test]
9713    fn neighbors_explicit_only_returns_no_semantic_edges() {
9714        let runtime = rt();
9715        let h = Harness::new(&runtime);
9716        runtime.block_on(async {
9717            // Seed several episodes via the writer-actor so they get HNSW
9718            // entries -- the semantic path would surface these if it
9719            // wasn't filtered out.
9720            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9721            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9722            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9723            // Add explicit triples sourced from `focal`. seed_triple_row
9724            // needs the focal rowid -- look it up via a side connection.
9725            {
9726                let conn = h.open_db();
9727                let rowid: i64 = conn
9728                    .query_row(
9729                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9730                        rusqlite::params![&focal],
9731                        |r| r.get(0),
9732                    )
9733                    .unwrap();
9734                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9735                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
9736            }
9737            let (status, body) = call(
9738                h.router.clone(),
9739                "GET",
9740                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
9741                None,
9742            )
9743            .await;
9744            assert_eq!(status, StatusCode::OK, "body: {body}");
9745            let edges = body["edges"].as_array().unwrap();
9746            assert!(!edges.is_empty(), "expected explicit edges: {body}");
9747            for e in edges {
9748                assert_ne!(
9749                    e["kind"], "semantic",
9750                    "kind=explicit must drop semantic edges: {body}"
9751                );
9752            }
9753        });
9754        h.shutdown(&runtime);
9755    }
9756
9757    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
9758    /// Inverse of test 1 -- same fixture, opposite filter.
9759    #[test]
9760    fn neighbors_semantic_only_returns_no_explicit_edges() {
9761        let runtime = rt();
9762        let h = Harness::new(&runtime);
9763        runtime.block_on(async {
9764            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9765            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9766            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9767            {
9768                let conn = h.open_db();
9769                let rowid: i64 = conn
9770                    .query_row(
9771                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9772                        rusqlite::params![&focal],
9773                        |r| r.get(0),
9774                    )
9775                    .unwrap();
9776                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9777            }
9778            // Threshold=0 so every HNSW hit clears the filter.
9779            let (status, body) = call(
9780                h.router.clone(),
9781                "GET",
9782                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9783                None,
9784            )
9785            .await;
9786            assert_eq!(status, StatusCode::OK, "body: {body}");
9787            let edges = body["edges"].as_array().unwrap();
9788            for e in edges {
9789                assert_eq!(
9790                    e["kind"], "semantic",
9791                    "kind=semantic must drop explicit edges: {body}"
9792                );
9793                assert!(
9794                    e["weight"].is_number(),
9795                    "semantic edges carry weight: {body}"
9796                );
9797            }
9798        });
9799        h.shutdown(&runtime);
9800    }
9801
9802    /// 3. Default (no `kind=` param) returns both explicit + semantic.
9803    #[test]
9804    fn neighbors_both_default_returns_combined() {
9805        let runtime = rt();
9806        let h = Harness::new(&runtime);
9807        runtime.block_on(async {
9808            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9809            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9810            {
9811                let conn = h.open_db();
9812                let rowid: i64 = conn
9813                    .query_row(
9814                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9815                        rusqlite::params![&focal],
9816                        |r| r.get(0),
9817                    )
9818                    .unwrap();
9819                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
9820            }
9821            let (status, body) = call(
9822                h.router.clone(),
9823                "GET",
9824                // No kind param -> default = both. Threshold 0 so semantic
9825                // hits make it through the filter.
9826                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
9827                None,
9828            )
9829            .await;
9830            assert_eq!(status, StatusCode::OK, "body: {body}");
9831            let edges = body["edges"].as_array().unwrap();
9832            let kinds: std::collections::HashSet<&str> =
9833                edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9834            assert!(
9835                kinds.contains("triple"),
9836                "expected at least one triple edge: {body}"
9837            );
9838            assert!(
9839                kinds.contains("semantic"),
9840                "expected at least one semantic edge: {body}"
9841            );
9842        });
9843        h.shutdown(&runtime);
9844    }
9845
9846    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
9847    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
9848    /// both produce an edge X -> Y. After dedupe only the explicit edge
9849    /// survives.
9850    #[test]
9851    fn neighbors_dedupes_semantic_when_explicit_exists() {
9852        let runtime = rt();
9853        let h = Harness::new(&runtime);
9854        runtime.block_on(async {
9855            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9856            // Seed an explicit triple from focal -> ent:peer-target.
9857            // The semantic path produces edges focal -> ep:<other>; we
9858            // ensure both paths produce an edge ending at the same id by
9859            // wiring `peer-target = ep:<other_memory_id>` -- but the
9860            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
9861            // collision we need an edge form where source+target overlap.
9862            //
9863            // Simpler construction: the `expand_triple_from_episode` path
9864            // emits an edge `ent:subject -> ent:object`, not from the
9865            // focal episode -- meaning the explicit edges don't end at
9866            // an ep: node in the first place. So we have to engineer a
9867            // collision via the cluster_member path:
9868            //   * explicit: focal (episode) -> cluster (via cluster_member)
9869            //   * semantic: focal -> similar episode
9870            // The two endpoints (cluster vs. episode) never collide in
9871            // shape. To produce a real (source, target) overlap that
9872            // exercises the dedupe code, mint a synthetic semantic edge
9873            // by adding an explicit triple sourced from the focal that
9874            // happens to end at the SAME entity the semantic path would
9875            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
9876            //
9877            // The brief flagged this scenario as unlikely. Build the
9878            // simplest collision the codebase admits: have the focal
9879            // episode's semantic neighbor's memory_id appear as a
9880            // triple's object_id (formatted as ent:<that-uuid>). The
9881            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
9882            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
9883            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
9884            // would NOT fire -- which is correct: those are structurally
9885            // different relationships.
9886            //
9887            // Therefore the realistic dedupe test is the trivial
9888            // tautology: explicit and semantic produce no collisions in
9889            // practice. Lock that in by asserting that the same memory_id
9890            // never appears with an edge from both paths.
9891            let _other = post_remember(h.router.clone(), "beta beta beta").await;
9892            {
9893                let conn = h.open_db();
9894                let rowid: i64 = conn
9895                    .query_row(
9896                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9897                        rusqlite::params![&focal],
9898                        |r| r.get(0),
9899                    )
9900                    .unwrap();
9901                seed_triple_row(&conn, "t-dedupe-1", "Alice", "knows", "Bob", Some(rowid));
9902            }
9903            let (status, body) = call(
9904                h.router.clone(),
9905                "GET",
9906                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
9907                None,
9908            )
9909            .await;
9910            assert_eq!(status, StatusCode::OK, "body: {body}");
9911            // For every edge, count occurrences of (source, target). No
9912            // pair should appear twice (which is what the dedupe rule
9913            // guarantees).
9914            let edges = body["edges"].as_array().unwrap();
9915            let mut seen: std::collections::HashMap<(String, String), i32> =
9916                std::collections::HashMap::new();
9917            for e in edges {
9918                let key = (
9919                    e["source"].as_str().unwrap().to_string(),
9920                    e["target"].as_str().unwrap().to_string(),
9921                );
9922                *seen.entry(key).or_insert(0) += 1;
9923            }
9924            for (pair, count) in &seen {
9925                assert_eq!(
9926                    *count, 1,
9927                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
9928                );
9929            }
9930        });
9931        h.shutdown(&runtime);
9932    }
9933
9934    /// 5. Threshold filter -- raising the threshold drops low-similarity
9935    /// semantic neighbors.
9936    #[test]
9937    fn neighbors_threshold_filters_low_similarity() {
9938        let runtime = rt();
9939        let h = Harness::new(&runtime);
9940        runtime.block_on(async {
9941            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9942            let _o1 = post_remember(h.router.clone(), "beta one").await;
9943            let _o2 = post_remember(h.router.clone(), "beta two").await;
9944            let _o3 = post_remember(h.router.clone(), "beta three").await;
9945            // Low threshold -- expect more semantic hits.
9946            let (status, low_body) = call(
9947                h.router.clone(),
9948                "GET",
9949                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9950                None,
9951            )
9952            .await;
9953            assert_eq!(status, StatusCode::OK, "body: {low_body}");
9954            let low_edge_count = low_body["edges"].as_array().unwrap().len();
9955            // High threshold -- expect fewer (or equal) semantic hits.
9956            let (status, high_body) = call(
9957                h.router.clone(),
9958                "GET",
9959                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
9960                None,
9961            )
9962            .await;
9963            assert_eq!(status, StatusCode::OK, "body: {high_body}");
9964            let high_edge_count = high_body["edges"].as_array().unwrap().len();
9965            assert!(
9966                high_edge_count <= low_edge_count,
9967                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
9968            );
9969            // Also assert every surviving high-threshold edge satisfies
9970            // the filter.
9971            for e in high_body["edges"].as_array().unwrap() {
9972                if let Some(w) = e["weight"].as_f64() {
9973                    assert!(
9974                        w >= 0.99,
9975                        "edge with weight {w} survived threshold=0.99: {e}"
9976                    );
9977                }
9978            }
9979        });
9980        h.shutdown(&runtime);
9981    }
9982
9983    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
9984    /// same policy as `/v1/graph/expand`.
9985    #[test]
9986    fn neighbors_limit_clamped_at_100() {
9987        let runtime = rt();
9988        let h = Harness::new(&runtime);
9989        // Seed a cluster with > 100 episodes so the explicit cluster_member
9990        // path could surface > 100 -- clamp must cap at 100.
9991        {
9992            let conn = h.open_db();
9993            seed_cluster_row(&conn, "cl-huge-n", 1000);
9994            for i in 0..150 {
9995                let mid = format!("99119911-1111-7000-8000-{:012}", i);
9996                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
9997                seed_cluster_member(&conn, "cl-huge-n", &mid);
9998            }
9999        }
10000        let (status, body) = runtime.block_on(call(
10001            h.router.clone(),
10002            "GET",
10003            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
10004            None,
10005        ));
10006        assert_eq!(status, StatusCode::OK, "body: {body}");
10007        let edges = body["edges"].as_array().unwrap();
10008        assert_eq!(
10009            edges.len(),
10010            100,
10011            "limit must be silently clamped to 100, got {}",
10012            edges.len()
10013        );
10014        h.shutdown(&runtime);
10015    }
10016
10017    /// 7. `kind=semantic` on a document focal node returns 400.
10018    #[test]
10019    fn neighbors_semantic_rejects_document_source() {
10020        let runtime = rt();
10021        let h = Harness::new(&runtime);
10022        let doc_id = "d-semrej-0000-7000-8000-000000000001";
10023        {
10024            let conn = h.open_db();
10025            seed_document_row(&conn, doc_id, "host");
10026        }
10027        let (status, body) = runtime.block_on(call(
10028            h.router.clone(),
10029            "GET",
10030            &neighbors_uri(&format!("doc:{doc_id}"), Some("semantic"), None, None),
10031            None,
10032        ));
10033        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10034        let err = body["error"].as_str().unwrap_or_default();
10035        assert!(
10036            err.contains("episode") && err.contains("chunk"),
10037            "error must list supported kinds: {body}"
10038        );
10039        h.shutdown(&runtime);
10040    }
10041
10042    /// 8. `kind=semantic` on a cluster focal node returns 400.
10043    #[test]
10044    fn neighbors_semantic_rejects_cluster_source() {
10045        let runtime = rt();
10046        let h = Harness::new(&runtime);
10047        let cluster_id = "cl-semrej-target";
10048        {
10049            let conn = h.open_db();
10050            seed_cluster_row(&conn, cluster_id, 12345);
10051        }
10052        let (status, body) = runtime.block_on(call(
10053            h.router.clone(),
10054            "GET",
10055            &neighbors_uri(&format!("cl:{cluster_id}"), Some("semantic"), None, None),
10056            None,
10057        ));
10058        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10059        h.shutdown(&runtime);
10060    }
10061
10062    /// 9. Entity focal node returns only explicit triple edges; no
10063    /// semantic edges (entities have no embeddings, semantic path is
10064    /// silently skipped under `kind=both`).
10065    #[test]
10066    fn neighbors_entity_returns_triples_only() {
10067        let runtime = rt();
10068        let h = Harness::new(&runtime);
10069        runtime.block_on(async {
10070            // Use the writer-actor so the host episode lands in HNSW too
10071            // (any HNSW state is irrelevant since entities can't trigger
10072            // semantic recall; included to prove the semantic path is
10073            // silently skipped, not erroring).
10074            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
10075            {
10076                let conn = h.open_db();
10077                let rowid: i64 = conn
10078                    .query_row(
10079                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
10080                        rusqlite::params![&host_mid],
10081                        |r| r.get(0),
10082                    )
10083                    .unwrap();
10084                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
10085                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
10086            }
10087            let (status, body) = call(
10088                h.router.clone(),
10089                "GET",
10090                &neighbors_uri("ent:Alice", None, Some(0.0), None),
10091                None,
10092            )
10093            .await;
10094            assert_eq!(status, StatusCode::OK, "body: {body}");
10095            let edges = body["edges"].as_array().unwrap();
10096            assert!(!edges.is_empty(), "expected explicit triples: {body}");
10097            for e in edges {
10098                assert_eq!(
10099                    e["kind"], "triple",
10100                    "entity focal must produce only triple edges: {body}"
10101                );
10102            }
10103        });
10104        h.shutdown(&runtime);
10105    }
10106
10107    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
10108    /// the handler runs.
10109    #[test]
10110    fn neighbors_respects_tenant_scoping() {
10111        let runtime = rt();
10112        let h = Harness::new(&runtime);
10113        let memory_id = "a8880000-0000-7000-8000-000000000001";
10114        {
10115            let conn = h.open_db();
10116            seed_episode(&conn, memory_id, 100, "tenant scope");
10117        }
10118        // Wrong tenant header -> 404 from registry, before handler runs.
10119        let r = h.router.clone();
10120        let (status, _) = runtime.block_on(async {
10121            let req = Request::builder()
10122                .method("GET")
10123                .uri(neighbors_uri(
10124                    &format!("ep:{memory_id}"),
10125                    Some("explicit"),
10126                    None,
10127                    None,
10128                ))
10129                .header("x-solo-tenant", "never-registered-tenant-n")
10130                .body(Body::empty())
10131                .unwrap();
10132            let resp = r.oneshot(req).await.expect("oneshot");
10133            let s = resp.status();
10134            let _b = resp.into_body().collect().await.unwrap().to_bytes();
10135            (s, _b)
10136        });
10137        assert_eq!(status, StatusCode::NOT_FOUND);
10138        // Sanity: same id resolves on default tenant.
10139        let (status, body) = runtime.block_on(call(
10140            h.router.clone(),
10141            "GET",
10142            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
10143            None,
10144        ));
10145        assert_eq!(
10146            status,
10147            StatusCode::OK,
10148            "default tenant must resolve: {body}"
10149        );
10150        h.shutdown(&runtime);
10151    }
10152
10153    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
10154    /// node -> 404 (auth passed, handler ran).
10155    #[test]
10156    fn neighbors_respects_auth_when_enabled() {
10157        let runtime = rt();
10158        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
10159        // Missing Authorization -> 401.
10160        let (status, _) = runtime.block_on(call(
10161            h.router.clone(),
10162            "GET",
10163            &neighbors_uri(
10164                "ep:99999999-9999-7000-8000-000000000999",
10165                Some("explicit"),
10166                None,
10167                None,
10168            ),
10169            None,
10170        ));
10171        assert_eq!(status, StatusCode::UNAUTHORIZED);
10172        // Valid bearer + unknown node -> 404 from the handler.
10173        let (status, _) = runtime.block_on(call_with_auth(
10174            h.router.clone(),
10175            "GET",
10176            &neighbors_uri(
10177                "ep:99999999-9999-7000-8000-000000000999",
10178                Some("explicit"),
10179                None,
10180                None,
10181            ),
10182            None,
10183            Some("Bearer neighbors-secret"),
10184        ));
10185        assert_eq!(status, StatusCode::NOT_FOUND);
10186        h.shutdown(&runtime);
10187    }
10188
10189    // ---------------------------------------------------------------------
10190    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
10191    //
10192    // Driving SSE through axum's in-process router (`oneshot`) requires
10193    // reading the response body as a stream of frames and parsing each
10194    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
10195    // The `read_one_sse_event` helper below does that incrementally so
10196    // tests don't have to wait for the stream to close (which would
10197    // never happen — the SSE loop runs until the client drops).
10198    // ---------------------------------------------------------------------
10199
10200    /// One parsed SSE event: the `event:` field plus the `data:` payload
10201    /// re-parsed as JSON. The `id:` field is captured for v0.11.0 P2's
10202    /// `/mcp` GET stream which threads monotonic event ids through
10203    /// the wire — `None` for streams (`/v1/graph/stream`) that don't
10204    /// emit `id:` lines. Empty / comment-only frames are filtered out
10205    /// by the parser; callers only see real events.
10206    #[derive(Debug, Clone)]
10207    struct ParsedSseEvent {
10208        event: String,
10209        data: Value,
10210        /// Raw SSE `id:` field, if present. v0.11.0 P2 emits monotonic
10211        /// `u64` ids for `/mcp` events; the wire encodes them as
10212        /// strings.
10213        id: Option<String>,
10214    }
10215
10216    /// Read frames off the SSE body until ONE complete event lands, then
10217    /// return it. Times out after `timeout` to keep red-test feedback
10218    /// fast. On timeout returns `None`.
10219    async fn read_one_sse_event(
10220        body: &mut axum::body::Body,
10221        timeout: std::time::Duration,
10222    ) -> Option<ParsedSseEvent> {
10223        use http_body_util::BodyExt;
10224        let mut buf = String::new();
10225        let start = std::time::Instant::now();
10226        loop {
10227            if start.elapsed() >= timeout {
10228                return None;
10229            }
10230            let remaining = timeout.saturating_sub(start.elapsed());
10231            let frame_res = tokio::time::timeout(remaining, body.frame()).await;
10232            let frame = match frame_res {
10233                Ok(Some(Ok(f))) => f,
10234                Ok(Some(Err(_))) | Ok(None) => return None,
10235                Err(_) => return None,
10236            };
10237            if let Ok(data) = frame.into_data() {
10238                buf.push_str(&String::from_utf8_lossy(&data));
10239                // Parse complete events (double newline separator).
10240                while let Some(idx) = buf.find("\n\n") {
10241                    let block: String = buf.drain(..idx + 2).collect();
10242                    if let Some(parsed) = parse_sse_block(&block) {
10243                        return Some(parsed);
10244                    }
10245                }
10246            }
10247        }
10248    }
10249
10250    /// Parse one SSE block (raw text between two `\n\n` separators).
10251    /// Returns `None` for comment-only blocks (lines starting with `:`)
10252    /// or blocks missing either `event:` or `data:`.
10253    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
10254        let mut event: Option<String> = None;
10255        let mut data: Option<String> = None;
10256        let mut id: Option<String> = None;
10257        for line in block.lines() {
10258            if let Some(rest) = line.strip_prefix("event:") {
10259                event = Some(rest.trim().to_string());
10260            } else if let Some(rest) = line.strip_prefix("data:") {
10261                data = Some(rest.trim().to_string());
10262            } else if let Some(rest) = line.strip_prefix("id:") {
10263                id = Some(rest.trim().to_string());
10264            }
10265        }
10266        let event = event?;
10267        let data_str = data?;
10268        let data_json = serde_json::from_str(&data_str).ok()?;
10269        Some(ParsedSseEvent {
10270            event,
10271            data: data_json,
10272            id,
10273        })
10274    }
10275
10276    /// Open the SSE stream and return the response body for further
10277    /// frame-level reads. The headers are validated (Content-Type +
10278    /// status) before the body is returned.
10279    async fn open_sse_stream_inner(
10280        router: axum::Router,
10281        auth: Option<&str>,
10282        tenant: Option<&str>,
10283    ) -> (StatusCode, axum::body::Body) {
10284        let mut builder = Request::builder().method("GET").uri("/v1/graph/stream");
10285        if let Some(a) = auth {
10286            builder = builder.header("authorization", a);
10287        }
10288        if let Some(t) = tenant {
10289            builder = builder.header("x-solo-tenant", t);
10290        }
10291        let req = builder
10292            .header("content-length", "0")
10293            .body(Body::empty())
10294            .unwrap();
10295        let resp = router.oneshot(req).await.expect("oneshot");
10296        let status = resp.status();
10297        let body = resp.into_body();
10298        (status, body)
10299    }
10300
10301    /// 1. `init` event lands as the first chunk.
10302    #[test]
10303    fn stream_emits_init_event_on_connect() {
10304        let runtime = rt();
10305        let h = Harness::new(&runtime);
10306        let r = h.router.clone();
10307        runtime.block_on(async {
10308            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10309            assert_eq!(status, StatusCode::OK);
10310            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10311                .await
10312                .expect("must receive init event within 2s");
10313            assert_eq!(ev.event, "init");
10314            assert_eq!(ev.data["connected"].as_bool(), Some(true));
10315            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10316            assert!(ev.data["ts_ms"].is_number());
10317        });
10318        h.shutdown(&runtime);
10319    }
10320
10321    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
10322    /// as an `invalidate` SSE event.
10323    #[test]
10324    fn stream_emits_invalidate_after_writer_event() {
10325        let runtime = rt();
10326        let h = Harness::new(&runtime);
10327        let r = h.router.clone();
10328        let sender = h.invalidate_sender();
10329        runtime.block_on(async {
10330            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10331            assert_eq!(status, StatusCode::OK);
10332            // Discard the init event.
10333            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10334                .await
10335                .unwrap();
10336            assert_eq!(init.event, "init");
10337            // Fire a writer-actor-style event on the broadcast.
10338            sender
10339                .send(InvalidateEvent {
10340                    reason: "memory.remember".to_string(),
10341                    tenant_id: "default".to_string(),
10342                    ts_ms: 1_715_625_600_000,
10343                    kind: "episode".to_string(),
10344                })
10345                .expect("must have at least one subscriber");
10346            // The SSE handler must surface it.
10347            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10348                .await
10349                .expect("invalidate event must arrive within 2s");
10350            assert_eq!(ev.event, "invalidate");
10351            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10352            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10353            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
10354        });
10355        h.shutdown(&runtime);
10356    }
10357
10358    /// 3. Each kind of writer-actor event surfaces with its mapped
10359    /// `(reason, kind)` shape.
10360    #[test]
10361    fn stream_emits_invalidate_for_each_writer_command() {
10362        let runtime = rt();
10363        let h = Harness::new(&runtime);
10364        let r = h.router.clone();
10365        let sender = h.invalidate_sender();
10366        let cases = [
10367            ("memory.remember", "episode"),
10368            ("memory.forget", "episode"),
10369            ("memory.consolidate", "cluster"),
10370            ("memory.ingest_document", "document"),
10371            ("memory.forget_document", "document"),
10372            ("memory.triples_extract", "cluster"),
10373            ("memory.reembed", "episode"),
10374            ("gdpr.forget_user", "tenant"),
10375        ];
10376        runtime.block_on(async {
10377            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10378            assert_eq!(status, StatusCode::OK);
10379            // Discard the init.
10380            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10381                .await
10382                .unwrap();
10383            for (reason, kind) in cases {
10384                sender
10385                    .send(InvalidateEvent {
10386                        reason: reason.to_string(),
10387                        tenant_id: "default".to_string(),
10388                        ts_ms: 1_715_625_600_000,
10389                        kind: kind.to_string(),
10390                    })
10391                    .unwrap();
10392                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10393                    .await
10394                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
10395                assert_eq!(ev.event, "invalidate");
10396                assert_eq!(ev.data["reason"].as_str(), Some(reason), "reason mismatch");
10397                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
10398            }
10399        });
10400        h.shutdown(&runtime);
10401    }
10402
10403    /// 4. Heartbeat events fire on the configured interval when no real
10404    /// events arrive. Drives `build_invalidate_stream` at a 1-second
10405    /// heartbeat (the public handler uses 30s in prod), wraps it in an
10406    /// `Sse` response, then reads + parses the SSE body via the same
10407    /// `read_one_sse_event` helper the HTTP-layer tests use. This
10408    /// exercises the public Event → body byte path without touching
10409    /// `Event::finalize` (which is private).
10410    #[test]
10411    fn stream_emits_heartbeat_when_no_events() {
10412        let runtime = rt();
10413        let h = Harness::new(&runtime);
10414        let sender = h.invalidate_sender();
10415        runtime.block_on(async {
10416            // Subscribe FIRST so a later writer-side `send` would lag
10417            // the receiver if the subscriber stalled.
10418            let rx = sender.subscribe();
10419            // Build the SSE stream with a 1-second heartbeat interval —
10420            // bypassing the 30s production default.
10421            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
10422            // Wrap in an Sse response + extract the body bytes through
10423            // axum's IntoResponse path. This produces real on-the-wire
10424            // SSE bytes that `read_one_sse_event` can parse.
10425            let sse: Sse<_> = Sse::new(stream);
10426            let resp = sse.into_response();
10427            let mut body = resp.into_body();
10428            // First event must be `init`.
10429            let first = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10430                .await
10431                .expect("init event must arrive");
10432            assert_eq!(first.event, "init");
10433            // Second must be heartbeat (no invalidates fired, ~1s
10434            // interval; allow 3s window for runtime jitter).
10435            let second = read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
10436                .await
10437                .expect("heartbeat event must arrive within 3s");
10438            assert_eq!(second.event, "heartbeat");
10439            assert!(second.data["ts_ms"].is_number());
10440        });
10441        h.shutdown(&runtime);
10442    }
10443
10444    /// 5. Two subscribers connected to the same tenant both receive
10445    /// every invalidate.
10446    #[test]
10447    fn stream_concurrent_subscribers_same_tenant() {
10448        let runtime = rt();
10449        let h = Harness::new(&runtime);
10450        let r1 = h.router.clone();
10451        let r2 = h.router.clone();
10452        let r3 = h.router.clone();
10453        let sender = h.invalidate_sender();
10454        runtime.block_on(async {
10455            // Open three subscribers.
10456            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
10457            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
10458            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
10459            assert_eq!(s1, StatusCode::OK);
10460            assert_eq!(s2, StatusCode::OK);
10461            assert_eq!(s3, StatusCode::OK);
10462            // Drain init events from each.
10463            for body in [&mut body1, &mut body2, &mut body3] {
10464                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10465                    .await
10466                    .unwrap();
10467                assert_eq!(ev.event, "init");
10468            }
10469            // Receiver count should be at least 3 now.
10470            assert!(
10471                sender.receiver_count() >= 3,
10472                "expected ≥3 subscribers, got {}",
10473                sender.receiver_count()
10474            );
10475            // Fire one invalidate.
10476            sender
10477                .send(InvalidateEvent {
10478                    reason: "memory.remember".to_string(),
10479                    tenant_id: "default".to_string(),
10480                    ts_ms: 1_715_625_600_000,
10481                    kind: "episode".to_string(),
10482                })
10483                .expect("send must succeed");
10484            // All three receive it.
10485            for body in [&mut body1, &mut body2, &mut body3] {
10486                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10487                    .await
10488                    .unwrap();
10489                assert_eq!(ev.event, "invalidate");
10490                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10491            }
10492        });
10493        h.shutdown(&runtime);
10494    }
10495
10496    /// 6. Dropping the SSE client decrements the per-tenant subscriber
10497    /// count — graceful cleanup invariant.
10498    #[test]
10499    fn stream_handles_client_disconnect_gracefully() {
10500        let runtime = rt();
10501        let h = Harness::new(&runtime);
10502        let r = h.router.clone();
10503        let sender = h.invalidate_sender();
10504        let before = sender.receiver_count();
10505        runtime.block_on(async {
10506            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10507            assert_eq!(status, StatusCode::OK);
10508            // Drain the init so the stream is fully active.
10509            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10510                .await
10511                .unwrap();
10512            let during = sender.receiver_count();
10513            assert!(
10514                during > before,
10515                "subscriber count must increase while stream is live (before={before}, during={during})"
10516            );
10517            // Drop the body — simulates the client closing the
10518            // connection. axum drops the stream future, which drops the
10519            // Receiver.
10520            drop(body);
10521        });
10522        // Allow tokio a beat to drop the Receiver task.
10523        runtime.block_on(async {
10524            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
10525        });
10526        let after = sender.receiver_count();
10527        assert!(
10528            after <= before,
10529            "subscriber count must drop back after disconnect (before={before}, after={after})"
10530        );
10531        h.shutdown(&runtime);
10532    }
10533
10534    /// 7. Bearer-auth gate: missing token -> 401.
10535    #[test]
10536    fn stream_respects_auth_when_enabled() {
10537        let runtime = rt();
10538        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10539        let r = h.router.clone();
10540        runtime.block_on(async {
10541            let (status, _body) = open_sse_stream_inner(r, None, None).await;
10542            assert_eq!(status, StatusCode::UNAUTHORIZED);
10543        });
10544        h.shutdown(&runtime);
10545    }
10546
10547    /// 8. Anonymous OK when auth=None (loopback default).
10548    #[test]
10549    fn stream_works_with_auth_none() {
10550        let runtime = rt();
10551        let h = Harness::new(&runtime);
10552        let r = h.router.clone();
10553        runtime.block_on(async {
10554            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10555            assert_eq!(status, StatusCode::OK);
10556            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10557                .await
10558                .expect("must receive init event");
10559            assert_eq!(ev.event, "init");
10560        });
10561        h.shutdown(&runtime);
10562    }
10563
10564    /// 9. Bearer-auth gate: valid token allows the stream to open.
10565    #[test]
10566    fn stream_respects_auth_accepts_valid_token() {
10567        let runtime = rt();
10568        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10569        let r = h.router.clone();
10570        runtime.block_on(async {
10571            let (status, mut body) =
10572                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
10573            assert_eq!(status, StatusCode::OK);
10574            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10575                .await
10576                .expect("must receive init event with valid bearer");
10577            assert_eq!(ev.event, "init");
10578            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10579        });
10580        h.shutdown(&runtime);
10581    }
10582
10583    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
10584    /// stream opens — wrong tenant header never reaches the handler.
10585    #[test]
10586    fn stream_respects_tenant_scoping() {
10587        let runtime = rt();
10588        let h = Harness::new(&runtime);
10589        let r = h.router.clone();
10590        runtime.block_on(async {
10591            let (status, _body) =
10592                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
10593            // The single-tenant test registry returns NotFound from
10594            // get_or_open when the header points to a tenant that isn't
10595            // cached; the TenantExtractor maps that to 404.
10596            assert_eq!(status, StatusCode::NOT_FOUND);
10597        });
10598        h.shutdown(&runtime);
10599    }
10600
10601    // -----------------------------------------------------------------
10602    // /v1/status — authenticated tenant-aware readiness
10603    // -----------------------------------------------------------------
10604
10605    #[test]
10606    fn status_returns_tenant_aware_payload() {
10607        let runtime = rt();
10608        let h = Harness::new(&runtime);
10609        let r = h.router.clone();
10610        runtime.block_on(async {
10611            let tid = solo_core::TenantId::default_tenant();
10612            h.registry
10613                .with_index(|idx| {
10614                    idx.register_with_quota(
10615                        &tid,
10616                        "default.db",
10617                        Some("Default tenant"),
10618                        Some(1_234_567),
10619                    )
10620                    .unwrap();
10621                })
10622                .await;
10623
10624            let (status, body) = call(r, "GET", "/v1/status", None).await;
10625            assert_eq!(status, StatusCode::OK, "body: {body}");
10626            assert_eq!(body["ok"].as_bool(), Some(true));
10627            assert_eq!(body["version"].as_str(), Some(env!("CARGO_PKG_VERSION")));
10628            assert_eq!(
10629                body.pointer("/tenant/id").and_then(|v| v.as_str()),
10630                Some("default")
10631            );
10632            assert_eq!(
10633                body.pointer("/tenant/registered").and_then(|v| v.as_bool()),
10634                Some(true)
10635            );
10636            assert_eq!(
10637                body.pointer("/tenant/status").and_then(|v| v.as_str()),
10638                Some("active")
10639            );
10640            assert_eq!(
10641                body.pointer("/tenant/quota_bytes").and_then(|v| v.as_u64()),
10642                Some(1_234_567)
10643            );
10644            assert!(
10645                body.pointer("/tenant/last_accessed_ms")
10646                    .and_then(|v| v.as_i64())
10647                    .is_some(),
10648                "status should surface the TenantExtractor touch: {body}"
10649            );
10650            assert_eq!(
10651                body.pointer("/embedder/name").and_then(|v| v.as_str()),
10652                Some("stub")
10653            );
10654            assert_eq!(
10655                body.pointer("/embedder/version").and_then(|v| v.as_str()),
10656                Some("v1")
10657            );
10658            assert_eq!(
10659                body.pointer("/embedder/dim").and_then(|v| v.as_u64()),
10660                Some(16)
10661            );
10662            assert_eq!(
10663                body.pointer("/embedder/dtype").and_then(|v| v.as_str()),
10664                Some("f32")
10665            );
10666            assert_eq!(body["active_tenants"].as_u64(), Some(1));
10667            assert_eq!(
10668                body.pointer("/mcp/sessions").and_then(|v| v.as_u64()),
10669                Some(0)
10670            );
10671        });
10672        h.shutdown(&runtime);
10673    }
10674
10675    #[test]
10676    fn status_respects_auth_when_enabled() {
10677        let runtime = rt();
10678        let h = Harness::new_with_auth(&runtime, Some("status-secret".into()));
10679        let r = h.router.clone();
10680        runtime.block_on(async {
10681            let (status, _body) = call(r.clone(), "GET", "/v1/status", None).await;
10682            assert_eq!(status, StatusCode::UNAUTHORIZED);
10683
10684            let (status, body) =
10685                call_with_auth(r, "GET", "/v1/status", None, Some("Bearer status-secret")).await;
10686            assert_eq!(status, StatusCode::OK, "body: {body}");
10687            assert_eq!(body["ok"].as_bool(), Some(true));
10688        });
10689        h.shutdown(&runtime);
10690    }
10691
10692    #[test]
10693    fn status_respects_tenant_scoping() {
10694        let runtime = rt();
10695        let h = Harness::new(&runtime);
10696        let r = h.router.clone();
10697        runtime.block_on(async {
10698            let (status, body) =
10699                call_with_tenant(r, "GET", "/v1/status", None, "never-registered").await;
10700            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
10701        });
10702        h.shutdown(&runtime);
10703    }
10704
10705    // -----------------------------------------------------------------
10706    // /v1/tenants — principal-scoped tenant list (v0.10.0)
10707    //
10708    // Seeds the harness's in-memory tenants_index stub via
10709    // `harness.registry.with_index(|idx| idx.register(...))` to drive
10710    // the read-only list endpoint. The default tenant from the
10711    // harness's HashMap is NOT in the index stub by construction (the
10712    // `for_tests_with_single_tenant` factory only wires the cached
10713    // HashMap entry; the index starts empty after migrations), so each
10714    // test that wants the default tenant listed registers it
10715    // explicitly. This keeps the test setup explicit about what's
10716    // visible to `list_active` versus what's open in memory.
10717    // -----------------------------------------------------------------
10718
10719    /// Seed three Active tenants into the registry's index. Returns the
10720    /// ids in the order they were registered, which is the order
10721    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
10722    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
10723        use solo_core::TenantId as TenantIdT;
10724        let ids = ["alice", "bob", "default"];
10725        for id in ids {
10726            let tid = TenantIdT::new(id).unwrap();
10727            registry
10728                .with_index(|idx| {
10729                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
10730                        .unwrap();
10731                    // Ensure created_at_ms diverges so the ASC sort is
10732                    // deterministic — the index uses `chrono::Utc::now()`
10733                    // per row and 3 sequential inserts can land in the
10734                    // same ms on fast hardware.
10735                })
10736                .await;
10737            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
10738        }
10739        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
10740        // `TenantsIndex::list` returns. We inserted in (alice, bob,
10741        // default) order with 2ms gaps, so that's the expected order.
10742        vec!["alice".into(), "bob".into(), "default".into()]
10743    }
10744
10745    /// 1. With `AuthConfig::None`, the handler returns every tenant
10746    ///    visible in the registry — same scope as `solo tenants list`.
10747    ///    Exercises the "no principal" branch of the visibility filter.
10748    #[test]
10749    fn tenants_returns_all_when_auth_none() {
10750        let runtime = rt();
10751        let h = Harness::new(&runtime);
10752        let r = h.router.clone();
10753        runtime.block_on(async {
10754            let _expected = seed_three_tenants(&h.registry).await;
10755            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10756            assert_eq!(status, StatusCode::OK);
10757            let arr = body
10758                .get("tenants")
10759                .and_then(|v| v.as_array())
10760                .expect("tenants array");
10761            assert_eq!(arr.len(), 3, "got body: {body}");
10762            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
10763            assert_eq!(ids, vec!["alice", "bob", "default"]);
10764        });
10765        h.shutdown(&runtime);
10766    }
10767
10768    /// 2. Under Bearer auth (single-principal mode), the handler
10769    ///    returns every tenant — the bearer holder is treated as the
10770    ///    daemon operator with full visibility. Exercises the bearer
10771    ///    branch of the visibility filter.
10772    #[test]
10773    fn tenants_returns_all_when_bearer_auth() {
10774        let runtime = rt();
10775        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
10776        let r = h.router.clone();
10777        runtime.block_on(async {
10778            seed_three_tenants(&h.registry).await;
10779            let (status, body) =
10780                call_with_auth(r, "GET", "/v1/tenants", None, Some("Bearer tlist-secret")).await;
10781            assert_eq!(status, StatusCode::OK, "got body: {body}");
10782            let arr = body["tenants"].as_array().expect("tenants array");
10783            assert_eq!(arr.len(), 3, "bearer must see all tenants");
10784        });
10785        h.shutdown(&runtime);
10786    }
10787
10788    /// 3. Under OIDC, an authenticated principal carrying
10789    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
10790    ///    default. Exercises the OIDC branch of the visibility filter.
10791    #[test]
10792    fn tenants_filters_to_principal_claim_when_oidc() {
10793        let runtime = rt();
10794        let (fake_server, discovery_url, secret, kid) =
10795            runtime.block_on(async { spin_fake_idp().await });
10796        let server_uri = fake_server.uri();
10797        let _server_guard = fake_server;
10798
10799        let auth = crate::auth::AuthConfig::Oidc {
10800            discovery_url,
10801            audience: "tlist-audience".to_string(),
10802            tenant_claim_name: "solo_tenant".to_string(),
10803        };
10804        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10805        let r = h.router.clone();
10806
10807        runtime.block_on(async {
10808            seed_three_tenants(&h.registry).await;
10809            let token = mint_idp_token(&server_uri, kid, &secret, "alice", "tlist-audience");
10810            let (status, body) = call_with_auth(
10811                r,
10812                "GET",
10813                "/v1/tenants",
10814                None,
10815                Some(&format!("Bearer {token}")),
10816            )
10817            .await;
10818            assert_eq!(status, StatusCode::OK, "got body: {body}");
10819            let arr = body["tenants"].as_array().expect("tenants array");
10820            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
10821            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
10822        });
10823        h.shutdown(&runtime);
10824    }
10825
10826    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
10827    ///    registered tenant, the response is `200 OK` with
10828    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
10829    ///    exist via a status-code side-channel for an OIDC principal
10830    ///    that lacks visibility to them.
10831    #[test]
10832    fn tenants_returns_empty_when_oidc_claim_unmatched() {
10833        let runtime = rt();
10834        let (fake_server, discovery_url, secret, kid) =
10835            runtime.block_on(async { spin_fake_idp().await });
10836        let server_uri = fake_server.uri();
10837        let _server_guard = fake_server;
10838
10839        let auth = crate::auth::AuthConfig::Oidc {
10840            discovery_url,
10841            audience: "tlist-audience".to_string(),
10842            tenant_claim_name: "solo_tenant".to_string(),
10843        };
10844        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10845        let r = h.router.clone();
10846
10847        runtime.block_on(async {
10848            seed_three_tenants(&h.registry).await;
10849            // Mint a token claiming a tenant that IS a valid TenantId
10850            // (passes middleware) but doesn't exist in the index.
10851            let token = mint_idp_token(&server_uri, kid, &secret, "nonexistent", "tlist-audience");
10852            let (status, body) = call_with_auth(
10853                r,
10854                "GET",
10855                "/v1/tenants",
10856                None,
10857                Some(&format!("Bearer {token}")),
10858            )
10859            .await;
10860            assert_eq!(
10861                status,
10862                StatusCode::OK,
10863                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
10864            );
10865            let arr = body["tenants"].as_array().expect("tenants array");
10866            assert_eq!(
10867                arr.len(),
10868                0,
10869                "unmatched OIDC claim must produce empty list, got: {body}"
10870            );
10871        });
10872        h.shutdown(&runtime);
10873    }
10874
10875    /// 5. JSON response shape matches what solo-web's TypeScript
10876    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
10877    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
10878    ///    last_accessed_ms}`. Catches accidental field renames at PR
10879    ///    time.
10880    ///
10881    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
10882    ///    hydrated when the per-tenant DB file exists. This test
10883    ///    registers a tenant whose DB file does NOT exist (the
10884    ///    `for_tests_with_single_tenant` harness only writes the
10885    ///    `default` tenant's DB), so the three numeric fields land as
10886    ///    JSON `null` — verifying the `null` JSON value (not absence)
10887    ///    so clients see a stable shape regardless of hydration
10888    ///    success.
10889    #[test]
10890    fn tenants_response_shape_matches_solo_web_types() {
10891        let runtime = rt();
10892        let h = Harness::new(&runtime);
10893        let r = h.router.clone();
10894        runtime.block_on(async {
10895            // Register one tenant with a display_name + quota so all
10896            // optional fields are present in the response.
10897            let tid = solo_core::TenantId::new("shaped").unwrap();
10898            h.registry
10899                .with_index(|idx| {
10900                    idx.register_with_quota(
10901                        &tid,
10902                        "shaped.db",
10903                        Some("Shaped tenant"),
10904                        Some(1_048_576),
10905                    )
10906                    .unwrap();
10907                })
10908                .await;
10909            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10910            assert_eq!(status, StatusCode::OK);
10911            let item = &body["tenants"][0];
10912            // id, display_name, created_at_ms, status: required
10913            assert_eq!(item["id"].as_str(), Some("shaped"));
10914            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
10915            assert!(
10916                item["created_at_ms"].is_i64(),
10917                "created_at_ms must be an i64, got {item}"
10918            );
10919            assert_eq!(item["status"].as_str(), Some("active"));
10920            // quota_bytes: present + numeric
10921            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
10922            // v0.10.1: episode_count / size_bytes / pct_used become
10923            // null when the per-tenant DB file is missing on disk
10924            // (this harness only writes the default tenant's file —
10925            // shaped.db does not exist). Clients must tolerate the
10926            // null JSON shape; absence would be a breaking change.
10927            assert!(
10928                item["episode_count"].is_null(),
10929                "episode_count must be JSON null when tenant DB is missing, got {item}"
10930            );
10931            assert!(
10932                item["size_bytes"].is_null(),
10933                "size_bytes must be JSON null when tenant DB is missing, got {item}"
10934            );
10935            assert!(
10936                item["pct_used"].is_null(),
10937                "pct_used must be JSON null when size_bytes is null, got {item}"
10938            );
10939        });
10940        h.shutdown(&runtime);
10941    }
10942
10943    /// 6. Bearer auth enabled + missing Authorization header → 401
10944    ///    before the handler runs. Confirms the route is plumbed
10945    ///    through `auth_middleware` (it sits inside the `authed`
10946    ///    sub-router, not the `public` one).
10947    /// `last_accessed_ms` is observational but user-facing: the
10948    /// registry stamps it when a tenant is resolved, and `/v1/tenants`
10949    /// must surface that value for solo-web's tenant/status UI. Listing
10950    /// alone must not fake a touch; a real tenant-scoped request should.
10951    #[test]
10952    fn tenants_response_surfaces_last_accessed_after_tenant_request() {
10953        let runtime = rt();
10954        let h = Harness::new(&runtime);
10955        let r = h.router.clone();
10956        runtime.block_on(async {
10957            let tid = solo_core::TenantId::default_tenant();
10958            h.registry
10959                .with_index(|idx| {
10960                    idx.register(&tid, "default.db", Some("Default tenant"))
10961                        .unwrap();
10962                })
10963                .await;
10964
10965            let (status, before_body) = call(r.clone(), "GET", "/v1/tenants", None).await;
10966            assert_eq!(status, StatusCode::OK);
10967            let before_item = &before_body["tenants"][0];
10968            assert_eq!(before_item["id"].as_str(), Some("default"));
10969            assert!(
10970                before_item["last_accessed_ms"].is_null(),
10971                "freshly registered tenant should start untouched: {before_item}"
10972            );
10973
10974            let before_touch_ms = chrono::Utc::now().timestamp_millis();
10975            let (graph_status, graph_body) = call(
10976                r.clone(),
10977                "GET",
10978                "/v1/graph/nodes?kind=episode&limit=1",
10979                None,
10980            )
10981            .await;
10982            assert_eq!(graph_status, StatusCode::OK, "graph body: {graph_body}");
10983
10984            let (status, after_body) = call(r, "GET", "/v1/tenants", None).await;
10985            assert_eq!(status, StatusCode::OK);
10986            let after_item = &after_body["tenants"][0];
10987            let last_accessed = after_item["last_accessed_ms"]
10988                .as_i64()
10989                .unwrap_or_else(|| panic!("last_accessed_ms must be stamped: {after_item}"));
10990            assert!(
10991                last_accessed >= before_touch_ms,
10992                "last_accessed_ms should reflect the graph request touch: {after_item}"
10993            );
10994        });
10995        h.shutdown(&runtime);
10996    }
10997
10998    #[test]
10999    fn tenants_respects_auth_when_enabled() {
11000        let runtime = rt();
11001        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
11002        let r = h.router.clone();
11003        runtime.block_on(async {
11004            seed_three_tenants(&h.registry).await;
11005            // No Authorization header → 401.
11006            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
11007            assert_eq!(status, StatusCode::UNAUTHORIZED);
11008        });
11009        h.shutdown(&runtime);
11010    }
11011
11012    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
11013    ///    from the response. solo-web's tenant picker should never
11014    ///    surface a row that's mid-admin-operation (race with admin
11015    ///    tooling). Only Active tenants make the list.
11016    #[test]
11017    fn tenants_status_filter_excludes_non_active() {
11018        let runtime = rt();
11019        let h = Harness::new(&runtime);
11020        let r = h.router.clone();
11021        runtime.block_on(async {
11022            // Three tenants, three statuses. Only `keeper` (Active)
11023            // should appear on the wire.
11024            let keeper = solo_core::TenantId::new("keeper").unwrap();
11025            let migrating = solo_core::TenantId::new("migrating").unwrap();
11026            let deleting = solo_core::TenantId::new("deleting").unwrap();
11027            h.registry
11028                .with_index(|idx| {
11029                    idx.register(&keeper, "keeper.db", None).unwrap();
11030                    idx.register_with_status(
11031                        &migrating,
11032                        "migrating.db",
11033                        None,
11034                        solo_storage::TenantStatus::PendingMigration,
11035                    )
11036                    .unwrap();
11037                    idx.register_with_status(
11038                        &deleting,
11039                        "deleting.db",
11040                        None,
11041                        solo_storage::TenantStatus::PendingDelete,
11042                    )
11043                    .unwrap();
11044                })
11045                .await;
11046            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11047            assert_eq!(status, StatusCode::OK);
11048            let arr = body["tenants"].as_array().expect("tenants array");
11049            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
11050            assert_eq!(
11051                ids,
11052                vec!["keeper"],
11053                "only Active tenants visible; got: {body}"
11054            );
11055        });
11056        h.shutdown(&runtime);
11057    }
11058
11059    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
11060    ///    against accidental `None` serialisation or 404'ing on an
11061    ///    empty list. solo-web's first paint on a brand-new daemon
11062    ///    needs an empty array to render the "no tenants yet" state.
11063    #[test]
11064    fn tenants_returns_empty_array_when_no_tenants_registered() {
11065        let runtime = rt();
11066        let h = Harness::new(&runtime);
11067        let r = h.router.clone();
11068        runtime.block_on(async {
11069            // Don't seed anything — the harness's in-memory index
11070            // starts at zero rows (the cached default-tenant handle in
11071            // the HashMap is invisible to `list_active`).
11072            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11073            assert_eq!(status, StatusCode::OK);
11074            let arr = body["tenants"].as_array().expect("tenants array");
11075            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
11076        });
11077        h.shutdown(&runtime);
11078    }
11079
11080    // ---- v0.10.1: cost-number hydration tests ----
11081    //
11082    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
11083    // to-end through the `/v1/tenants` handler. The harness's
11084    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
11085    // DB (not real SQLCipher); the hydration helper has a fallback
11086    // open path for that case (see registry.rs). The
11087    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
11088    // hydration helper looks. These tests create real files there to
11089    // exercise the size_bytes path; episode_count requires the file to
11090    // be a SQLite DB with the `episodes` table.
11091    //
11092    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
11093    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
11094    // So we either (a) register a fresh tenant id pointing at a DB we
11095    // create at the expected layout, or (b) check the documented
11096    // behavior under "file missing" (returns null counts gracefully).
11097    // Both shapes are tested here.
11098    //
11099    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.
11100
11101    /// Helper: create a per-tenant DB file at the layout the hydration
11102    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
11103    /// with the `episodes` table + `n_active` active episodes +
11104    /// `n_forgotten` forgotten episodes. Returns the absolute path.
11105    fn seed_per_tenant_db_with_episodes(
11106        data_dir: &std::path::Path,
11107        db_filename: &str,
11108        n_active: i64,
11109        n_forgotten: i64,
11110    ) -> std::path::PathBuf {
11111        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
11112        std::fs::create_dir_all(&tenants_dir).unwrap();
11113        let db_path = tenants_dir.join(db_filename);
11114        // Open as plain SQLite (test path; matches the harness's
11115        // `open_test_db_at` shape; hydration helper falls back to plain
11116        // open when SQLCipher open fails).
11117        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
11118        // Run the same migrations the real per-tenant DB does so the
11119        // `episodes` table + `status` CHECK constraint match production.
11120        solo_storage::run_migrations(&mut conn).unwrap();
11121        for i in 0..n_active {
11122            conn.execute(
11123                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11124                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
11125                rusqlite::params![format!("a-{i}")],
11126            )
11127            .unwrap();
11128        }
11129        for i in 0..n_forgotten {
11130            conn.execute(
11131                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11132                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
11133                rusqlite::params![format!("f-{i}")],
11134            )
11135            .unwrap();
11136        }
11137        drop(conn);
11138        db_path
11139    }
11140
11141    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
11142    /// episode count when the per-tenant DB exists. Seed 3 active + 2
11143    /// forgotten episodes; expect `episode_count: 3` (the `status =
11144    /// 'active'` filter excludes the forgotten rows).
11145    #[test]
11146    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
11147        let runtime = rt();
11148        let h = Harness::new(&runtime);
11149        let r = h.router.clone();
11150        let data_dir = h._tmp.path().to_path_buf();
11151        runtime.block_on(async {
11152            let tid = solo_core::TenantId::new("counted").unwrap();
11153            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
11154            h.registry
11155                .with_index(|idx| {
11156                    idx.register(&tid, "counted.db", Some("Counted tenant"))
11157                        .unwrap();
11158                })
11159                .await;
11160            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11161            assert_eq!(status, StatusCode::OK);
11162            let item = &body["tenants"][0];
11163            assert_eq!(item["id"].as_str(), Some("counted"));
11164            assert_eq!(
11165                item["episode_count"].as_i64(),
11166                Some(3),
11167                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
11168            );
11169        });
11170        h.shutdown(&runtime);
11171    }
11172
11173    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
11174    /// per-tenant DB file. Asserts the response value matches
11175    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
11176    /// read the right file, not e.g. data_dir or a temp.
11177    #[test]
11178    fn tenants_response_hydrates_size_bytes_from_db_file() {
11179        let runtime = rt();
11180        let h = Harness::new(&runtime);
11181        let r = h.router.clone();
11182        let data_dir = h._tmp.path().to_path_buf();
11183        runtime.block_on(async {
11184            let tid = solo_core::TenantId::new("sized").unwrap();
11185            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
11186            h.registry
11187                .with_index(|idx| {
11188                    idx.register(&tid, "sized.db", None).unwrap();
11189                })
11190                .await;
11191            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11192            assert!(on_disk > 0, "test setup: db file should be non-empty");
11193            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11194            assert_eq!(status, StatusCode::OK);
11195            let item = &body["tenants"][0];
11196            assert_eq!(item["id"].as_str(), Some("sized"));
11197            assert_eq!(
11198                item["size_bytes"].as_u64(),
11199                Some(on_disk),
11200                "size_bytes must match fs::metadata; got {item}"
11201            );
11202        });
11203        h.shutdown(&runtime);
11204    }
11205
11206    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
11207    /// quota_bytes * 100` when both are known. Pick a quota much
11208    /// larger than the DB so the percentage stays in a sane range
11209    /// (and survives any unrelated DB-page padding).
11210    #[test]
11211    fn tenants_response_computes_pct_used_when_quota_set() {
11212        let runtime = rt();
11213        let h = Harness::new(&runtime);
11214        let r = h.router.clone();
11215        let data_dir = h._tmp.path().to_path_buf();
11216        runtime.block_on(async {
11217            let tid = solo_core::TenantId::new("quoted").unwrap();
11218            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
11219            // Pick a quota that's large enough that pct_used lands
11220            // between 0 and 50% regardless of SQLite page boundary
11221            // rounding. Asserting an exact float would be flaky.
11222            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11223            let quota = on_disk * 4; // pct_used should be ~25%
11224            h.registry
11225                .with_index(|idx| {
11226                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
11227                        .unwrap();
11228                })
11229                .await;
11230            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11231            assert_eq!(status, StatusCode::OK);
11232            let item = &body["tenants"][0];
11233            let pct = item["pct_used"]
11234                .as_f64()
11235                .expect("pct_used must be a number");
11236            assert!(
11237                (0.0..=100.0).contains(&pct),
11238                "pct_used must be in [0, 100], got {pct}"
11239            );
11240            // Allow a wide band — exact value depends on SQLite page
11241            // size — but the recipe (size/quota*100) means a
11242            // size=quota/4 setup must land near 25%.
11243            assert!(
11244                (20.0..=30.0).contains(&pct),
11245                "pct_used must be ~25% for size=quota/4, got {pct}"
11246            );
11247        });
11248        h.shutdown(&runtime);
11249    }
11250
11251    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
11252    /// null (the "unlimited" case). Pins that we don't accidentally
11253    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
11254    #[test]
11255    fn tenants_response_pct_used_null_when_quota_null() {
11256        let runtime = rt();
11257        let h = Harness::new(&runtime);
11258        let r = h.router.clone();
11259        let data_dir = h._tmp.path().to_path_buf();
11260        runtime.block_on(async {
11261            let tid = solo_core::TenantId::new("unlimited").unwrap();
11262            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
11263            h.registry
11264                .with_index(|idx| {
11265                    idx.register(&tid, "unlimited.db", None).unwrap();
11266                })
11267                .await;
11268            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11269            assert_eq!(status, StatusCode::OK);
11270            let item = &body["tenants"][0];
11271            assert_eq!(item["id"].as_str(), Some("unlimited"));
11272            assert!(
11273                item["quota_bytes"].is_null(),
11274                "test setup: quota_bytes must be null, got {item}"
11275            );
11276            assert!(
11277                item["pct_used"].is_null(),
11278                "pct_used must be JSON null when quota_bytes is null, got {item}"
11279            );
11280            // size_bytes still present (no quota doesn't suppress
11281            // size — only pct_used).
11282            assert!(
11283                item["size_bytes"].is_u64(),
11284                "size_bytes must still be present when quota_bytes is null, got {item}"
11285            );
11286        });
11287        h.shutdown(&runtime);
11288    }
11289
11290    /// v0.10.1 test 5: the response includes
11291    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
11292    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
11293    /// beyond the cap have `episode_count: null` even though their
11294    /// `size_bytes` is still hydrated (fs::metadata is cheap).
11295    ///
11296    /// We don't seed 51 real DBs (would be slow); instead, we
11297    /// register 51 tenant rows in the index. The cap is documented
11298    /// to apply to `episode_count` hydration, and the header is
11299    /// emitted purely from the count of filtered records. The
11300    /// header semantics here are independent of per-tenant DB
11301    /// existence.
11302    #[test]
11303    fn tenants_response_sets_cap_reached_header_when_over_cap() {
11304        let runtime = rt();
11305        let h = Harness::new(&runtime);
11306        let r = h.router.clone();
11307        runtime.block_on(async {
11308            // Register 51 tenants (cap = 50, so we exceed it).
11309            h.registry
11310                .with_index(|idx| {
11311                    for i in 0..51 {
11312                        let id = format!("t{i:02}");
11313                        let tid = solo_core::TenantId::new(&id).unwrap();
11314                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
11315                    }
11316                })
11317                .await;
11318            // Send a raw request so we can inspect headers.
11319            use axum::body::Body;
11320            use axum::http::Request;
11321            use http_body_util::BodyExt;
11322            let req = Request::builder()
11323                .method("GET")
11324                .uri("/v1/tenants")
11325                .body(Body::empty())
11326                .unwrap();
11327            let resp = r.oneshot(req).await.unwrap();
11328            assert_eq!(resp.status(), StatusCode::OK);
11329            let cap_header = resp
11330                .headers()
11331                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11332                .expect("cap-reached header must be present");
11333            assert_eq!(
11334                cap_header.to_str().unwrap(),
11335                "true",
11336                "cap-reached header value must be 'true' when over cap"
11337            );
11338            // Parse body to verify shape — beyond-cap tenants have
11339            // null episode_count.
11340            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
11341            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
11342            let arr = body["tenants"].as_array().expect("tenants array");
11343            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
11344            // The last (sorted-by-created_at_ms) tenant should be
11345            // beyond the cap. The hydration order matches the
11346            // filtered list order, so index 50 is the 51st tenant
11347            // and should have null episode_count.
11348            assert!(
11349                arr[50]["episode_count"].is_null(),
11350                "the 51st tenant (beyond cap) must have null episode_count, got {}",
11351                arr[50]
11352            );
11353        });
11354        h.shutdown(&runtime);
11355    }
11356
11357    /// v0.10.1 test 6: when the response is under the cap, the
11358    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
11359    /// negative case so a future refactor that always emits the
11360    /// header (with "false") doesn't pass silently.
11361    #[test]
11362    fn tenants_response_omits_cap_header_when_under_cap() {
11363        let runtime = rt();
11364        let h = Harness::new(&runtime);
11365        let r = h.router.clone();
11366        runtime.block_on(async {
11367            seed_three_tenants(&h.registry).await;
11368            use axum::body::Body;
11369            use axum::http::Request;
11370            let req = Request::builder()
11371                .method("GET")
11372                .uri("/v1/tenants")
11373                .body(Body::empty())
11374                .unwrap();
11375            let resp = r.oneshot(req).await.unwrap();
11376            assert_eq!(resp.status(), StatusCode::OK);
11377            assert!(
11378                resp.headers()
11379                    .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11380                    .is_none(),
11381                "cap-reached header must be absent under the cap"
11382            );
11383        });
11384        h.shutdown(&runtime);
11385    }
11386
11387    // ---- Pure unit tests on the visibility filter ----
11388    //
11389    // These exercise `filter_tenants_for_principal` and
11390    // `is_single_principal_bearer` without an axum router — fast
11391    // feedback for the load-bearing visibility rule. The
11392    // router-level tests above cover the wire path.
11393
11394    /// Build a synthetic `TenantRecord` so the pure unit tests don't
11395    /// need a real SQLCipher round-trip.
11396    fn make_record(id: &str) -> solo_storage::TenantRecord {
11397        solo_storage::TenantRecord {
11398            tenant_id: solo_core::TenantId::new(id).unwrap(),
11399            db_filename: format!("{id}.db"),
11400            display_name: None,
11401            created_at_ms: 0,
11402            status: solo_storage::TenantStatus::Active,
11403            quota_bytes: None,
11404            last_accessed_ms: None,
11405        }
11406    }
11407
11408    #[test]
11409    fn filter_no_principal_returns_all() {
11410        let records = vec![make_record("a"), make_record("b")];
11411        let out = filter_tenants_for_principal(records.clone(), None);
11412        assert_eq!(out.len(), 2);
11413        assert_eq!(out[0].tenant_id.as_str(), "a");
11414        assert_eq!(out[1].tenant_id.as_str(), "b");
11415    }
11416
11417    #[test]
11418    fn filter_bearer_principal_returns_all() {
11419        let records = vec![make_record("a"), make_record("b")];
11420        let p = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("a").unwrap());
11421        let out = filter_tenants_for_principal(records, Some(&p));
11422        assert_eq!(out.len(), 2);
11423    }
11424
11425    #[test]
11426    fn filter_oidc_principal_keeps_only_claim() {
11427        let records = vec![make_record("a"), make_record("b"), make_record("c")];
11428        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
11429        let p = AuthenticatedPrincipal {
11430            subject: "alice@example.com".to_string(),
11431            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
11432            scopes: vec!["read".to_string()],
11433            claims: serde_json::json!({ "sub": "alice@example.com" }),
11434        };
11435        let out = filter_tenants_for_principal(records, Some(&p));
11436        assert_eq!(out.len(), 1);
11437        assert_eq!(out[0].tenant_id.as_str(), "b");
11438    }
11439
11440    #[test]
11441    fn filter_oidc_principal_with_no_claim_returns_empty() {
11442        // Theoretically unreachable — middleware short-circuits at 403
11443        // before we see a no-claim OIDC principal. Defend anyway.
11444        let records = vec![make_record("a")];
11445        let p = AuthenticatedPrincipal {
11446            subject: "alice@example.com".to_string(),
11447            tenant_claim: None,
11448            scopes: vec![],
11449            claims: serde_json::json!({ "sub": "alice@example.com" }),
11450        };
11451        let out = filter_tenants_for_principal(records, Some(&p));
11452        assert!(out.is_empty());
11453    }
11454
11455    #[test]
11456    fn is_single_principal_bearer_discriminator() {
11457        let bearer = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("default").unwrap());
11458        assert!(is_single_principal_bearer(&bearer));
11459
11460        let oidc = AuthenticatedPrincipal {
11461            subject: "alice".to_string(),
11462            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
11463            scopes: vec![],
11464            claims: serde_json::json!({ "x": 1 }),
11465        };
11466        assert!(!is_single_principal_bearer(&oidc));
11467
11468        // Subject == "bearer" but claims is a non-null object → not a
11469        // bearer-shaped principal. Defends against a forged-bearer
11470        // shape that might smuggle JWT claims.
11471        let weird = AuthenticatedPrincipal {
11472            subject: "bearer".to_string(),
11473            tenant_claim: Some(solo_core::TenantId::default_tenant()),
11474            scopes: vec![],
11475            claims: serde_json::json!({ "leak": 1 }),
11476        };
11477        assert!(!is_single_principal_bearer(&weird));
11478    }
11479
11480    // ---------------------------------------------------------------
11481    // v0.10.2 — MCP-over-HTTP transport on /mcp
11482    // ---------------------------------------------------------------
11483    //
11484    // These tests pin the wire contract for the new `/mcp` route added
11485    // in v0.10.2 P2. We exercise the route through the same `Harness`
11486    // pattern the rest of the file uses (in-process axum Router via
11487    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
11488    //
11489    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
11490    // cover the JSON-RPC envelope shape in isolation. These tests are
11491    // the integration layer: real `TenantHandle`, real `WriterActor`,
11492    // real `SoloMcpServer::dispatch_tool` path.
11493
11494    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
11495    /// the canonical 18 tools. Matches the stdio smoke test
11496    /// `mcp_stdio_lists_eighteen_canonical_tools` from
11497    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
11498    /// two transports fails one of the two suites loudly.
11499    #[test]
11500    fn mcp_http_tools_list_returns_eighteen_canonical_tools() {
11501        let runtime = rt();
11502        let h = Harness::new(&runtime);
11503        let r = h.router.clone();
11504        runtime.block_on(async move {
11505            let req = json!({
11506                "jsonrpc": "2.0",
11507                "id": 1,
11508                "method": "tools/list",
11509            });
11510            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11511            assert_eq!(status, StatusCode::OK);
11512            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
11513            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
11514            let tools = body
11515                .pointer("/result/tools")
11516                .and_then(|v| v.as_array())
11517                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
11518            let mut names: Vec<String> = tools
11519                .iter()
11520                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
11521                .collect();
11522            names.sort();
11523            assert_eq!(
11524                names,
11525                vec![
11526                    "memory_context".to_string(),
11527                    "memory_contradiction_resolve".to_string(),
11528                    "memory_contradictions".to_string(),
11529                    "memory_entities".to_string(),
11530                    "memory_facts_about".to_string(),
11531                    "memory_forget".to_string(),
11532                    "memory_forget_document".to_string(),
11533                    "memory_ingest_document".to_string(),
11534                    "memory_inspect".to_string(),
11535                    "memory_inspect_cluster".to_string(),
11536                    "memory_inspect_document".to_string(),
11537                    "memory_list_documents".to_string(),
11538                    "memory_recall".to_string(),
11539                    "memory_remember".to_string(),
11540                    "memory_remember_batch".to_string(),
11541                    "memory_search_docs".to_string(),
11542                    "memory_themes".to_string(),
11543                    "memory_update".to_string(),
11544                ],
11545                "mcp_http: tools/list returned unexpected name set"
11546            );
11547        });
11548        h.shutdown(&runtime);
11549    }
11550
11551    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
11552    /// episode and returns a confirmation string. Then a separate
11553    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
11554    /// proving one process is serving both surfaces against the same
11555    /// writer.
11556    #[test]
11557    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
11558        let runtime = rt();
11559        let h = Harness::new(&runtime);
11560        let r = h.router.clone();
11561        runtime.block_on(async move {
11562            // 1. memory_remember via /mcp.
11563            let req = json!({
11564                "jsonrpc": "2.0",
11565                "id": 2,
11566                "method": "tools/call",
11567                "params": {
11568                    "name": "memory_remember",
11569                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
11570                },
11571            });
11572            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11573            assert_eq!(status, StatusCode::OK);
11574            let result_text = body
11575                .pointer("/result/content/0/text")
11576                .and_then(|v| v.as_str())
11577                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11578            assert!(
11579                result_text.starts_with("remembered "),
11580                "expected `remembered <id>`, got: {result_text}"
11581            );
11582
11583            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
11584            //    tenant — the cross-surface smoke that motivates v0.10.2.
11585            //    Episode nodes carry the content under `label` +
11586            //    `preview` (the v0.10.0 graph-nodes wire shape).
11587            let (status2, nodes_body) =
11588                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11589            assert_eq!(status2, StatusCode::OK);
11590            let nodes = nodes_body
11591                .get("nodes")
11592                .and_then(|v| v.as_array())
11593                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11594            assert!(
11595                nodes.iter().any(|n| {
11596                    let label_hit = n
11597                        .get("label")
11598                        .and_then(|c| c.as_str())
11599                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11600                    let preview_hit = n
11601                        .get("preview")
11602                        .and_then(|c| c.as_str())
11603                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11604                    label_hit || preview_hit
11605                }),
11606                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
11607            );
11608        });
11609        h.shutdown(&runtime);
11610    }
11611
11612    /// `memory_remember_batch` must accept the canonical `{ items: [...] }`
11613    /// argument envelope and land all rows in the same graph REST surface.
11614    /// This is the batch variant of the cross-surface smoke above and
11615    /// protects external clients from drifting to a renamed field.
11616    #[test]
11617    fn mcp_http_remember_batch_items_visible_via_graph_nodes() {
11618        let runtime = rt();
11619        let h = Harness::new(&runtime);
11620        let r = h.router.clone();
11621        runtime.block_on(async move {
11622            let marker_a = "mcp-http-batch-cross-surface-smoke-a";
11623            let marker_b = "mcp-http-batch-cross-surface-smoke-b";
11624            let req = json!({
11625                "jsonrpc": "2.0",
11626                "id": 22,
11627                "method": "tools/call",
11628                "params": {
11629                    "name": "memory_remember_batch",
11630                    "arguments": {
11631                        "items": [
11632                            { "content": marker_a, "source_type": "smoke", "salience": 0.7 },
11633                            { "content": marker_b, "source_type": "smoke", "salience": 0.7 }
11634                        ]
11635                    },
11636                },
11637            });
11638            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11639            assert_eq!(status, StatusCode::OK, "batch body: {body}");
11640            let result_text = body
11641                .pointer("/result/content/0/text")
11642                .and_then(|v| v.as_str())
11643                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11644            let ids: Vec<String> = serde_json::from_str(result_text)
11645                .unwrap_or_else(|e| panic!("batch result should be JSON id array: {e}: {body}"));
11646            assert_eq!(ids.len(), 2, "two items in, two ids out: {result_text}");
11647
11648            let (status2, nodes_body) =
11649                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11650            assert_eq!(status2, StatusCode::OK);
11651            let nodes = nodes_body
11652                .get("nodes")
11653                .and_then(|v| v.as_array())
11654                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11655            for marker in [marker_a, marker_b] {
11656                assert!(
11657                    nodes.iter().any(|n| {
11658                        let label_hit = n
11659                            .get("label")
11660                            .and_then(|c| c.as_str())
11661                            .is_some_and(|s| s.contains(marker));
11662                        let preview_hit = n
11663                            .get("preview")
11664                            .and_then(|c| c.as_str())
11665                            .is_some_and(|s| s.contains(marker));
11666                        label_hit || preview_hit
11667                    }),
11668                    "graph/nodes didn't surface batch marker {marker}: {nodes_body}"
11669                );
11670            }
11671        });
11672        h.shutdown(&runtime);
11673    }
11674
11675    /// `memory_remember_batch`'s HTTP MCP contract is the canonical
11676    /// `{ items: [...] }` envelope. A drift back to `{ entries: [...] }`
11677    /// must fail as JSON-RPC invalid_params, not silently accept or
11678    /// partially write.
11679    #[test]
11680    fn mcp_http_remember_batch_rejects_entries_envelope() {
11681        let runtime = rt();
11682        let h = Harness::new(&runtime);
11683        let r = h.router.clone();
11684        runtime.block_on(async move {
11685            let req = json!({
11686                "jsonrpc": "2.0",
11687                "id": 23,
11688                "method": "tools/call",
11689                "params": {
11690                    "name": "memory_remember_batch",
11691                    "arguments": {
11692                        "entries": [
11693                            { "content": "legacy-entries-envelope" }
11694                        ]
11695                    },
11696                },
11697            });
11698            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11699            assert_eq!(
11700                status,
11701                StatusCode::OK,
11702                "JSON-RPC errors stay in-body: {body}"
11703            );
11704            assert_eq!(
11705                body.pointer("/error/code").and_then(|v| v.as_i64()),
11706                Some(-32602),
11707                "expected JSON-RPC INVALID_PARAMS (-32602), got: {body}"
11708            );
11709            let message = body
11710                .pointer("/error/message")
11711                .and_then(|v| v.as_str())
11712                .unwrap_or("");
11713            assert!(
11714                message.contains("invalid tool arguments") && message.contains("items"),
11715                "error must point clients back to the canonical items envelope: {body}"
11716            );
11717        });
11718        h.shutdown(&runtime);
11719    }
11720
11721    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
11722    /// just-remembered episode. Smoke for the read path under the new
11723    /// transport.
11724    #[test]
11725    fn mcp_http_recall_returns_just_remembered_episode() {
11726        let runtime = rt();
11727        let h = Harness::new(&runtime);
11728        let r = h.router.clone();
11729        runtime.block_on(async move {
11730            // Remember first.
11731            let needle = "mcp-http-recall-needle-deadbeef";
11732            let req = json!({
11733                "jsonrpc": "2.0",
11734                "id": 3,
11735                "method": "tools/call",
11736                "params": {
11737                    "name": "memory_remember",
11738                    "arguments": { "content": needle },
11739                },
11740            });
11741            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11742            assert_eq!(status, StatusCode::OK);
11743
11744            // Recall via the same /mcp transport.
11745            let req = json!({
11746                "jsonrpc": "2.0",
11747                "id": 4,
11748                "method": "tools/call",
11749                "params": {
11750                    "name": "memory_recall",
11751                    "arguments": { "query": needle, "limit": 5 },
11752                },
11753            });
11754            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11755            assert_eq!(status, StatusCode::OK);
11756            let recall_text = body
11757                .pointer("/result/content/0/text")
11758                .and_then(|v| v.as_str())
11759                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11760            assert!(
11761                recall_text.contains(needle),
11762                "recall didn't surface needle `{needle}`: {recall_text}"
11763            );
11764        });
11765        h.shutdown(&runtime);
11766    }
11767
11768    /// Malformed JSON body must surface as 400 (the wire envelope is
11769    /// invalid; the JSON-RPC layer never sees the request). The error
11770    /// body shape matches the rest of the API (`{error, status}`) so
11771    /// existing client error-handling paths keep working.
11772    #[test]
11773    fn mcp_http_malformed_body_returns_400() {
11774        let runtime = rt();
11775        let h = Harness::new(&runtime);
11776        let r = h.router.clone();
11777        runtime.block_on(async move {
11778            let req = Request::builder()
11779                .method("POST")
11780                .uri("/mcp")
11781                .header("content-type", "application/json")
11782                .body(Body::from("not-json-at-all".as_bytes()))
11783                .unwrap();
11784            let resp = r.oneshot(req).await.unwrap();
11785            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
11786            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
11787            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11788            assert!(
11789                v.get("error")
11790                    .and_then(|e| e.as_str())
11791                    .map(|s| s.contains("invalid JSON-RPC request"))
11792                    .unwrap_or(false),
11793                "got: {v}"
11794            );
11795        });
11796        h.shutdown(&runtime);
11797    }
11798
11799    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
11800    /// requires the literal string `"2.0"`.
11801    #[test]
11802    fn mcp_http_wrong_jsonrpc_version_returns_400() {
11803        let runtime = rt();
11804        let h = Harness::new(&runtime);
11805        let r = h.router.clone();
11806        runtime.block_on(async move {
11807            let req = json!({
11808                "jsonrpc": "1.0",
11809                "id": 1,
11810                "method": "tools/list",
11811            });
11812            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
11813            assert_eq!(status, StatusCode::BAD_REQUEST);
11814        });
11815        h.shutdown(&runtime);
11816    }
11817
11818    /// Unknown method returns a JSON-RPC error envelope with code
11819    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
11820    /// envelope itself parsed fine — JSON-RPC errors are in-body.
11821    #[test]
11822    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
11823        let runtime = rt();
11824        let h = Harness::new(&runtime);
11825        let r = h.router.clone();
11826        runtime.block_on(async move {
11827            let req = json!({
11828                "jsonrpc": "2.0",
11829                "id": 5,
11830                "method": "definitely/not/a/method",
11831            });
11832            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11833            assert_eq!(status, StatusCode::OK);
11834            assert_eq!(
11835                body.pointer("/error/code").and_then(|v| v.as_i64()),
11836                Some(-32601),
11837                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
11838            );
11839        });
11840        h.shutdown(&runtime);
11841    }
11842
11843    /// `POST /mcp` with the bearer-auth middleware enabled returns
11844    /// 401 without the token and 200 with the correct token.
11845    #[test]
11846    fn mcp_http_post_respects_bearer_auth() {
11847        let runtime = rt();
11848        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
11849        let r = h.router.clone();
11850        runtime.block_on(async move {
11851            // No Authorization header → 401.
11852            let req = json!({
11853                "jsonrpc": "2.0",
11854                "id": 6,
11855                "method": "tools/list",
11856            });
11857            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
11858            assert_eq!(status, StatusCode::UNAUTHORIZED);
11859
11860            // With correct bearer → 200 + valid JSON-RPC reply.
11861            let (status, body) = call_with_auth(
11862                r,
11863                "POST",
11864                "/mcp",
11865                Some(req),
11866                Some("Bearer secret-mcp-token"),
11867            )
11868            .await;
11869            assert_eq!(status, StatusCode::OK);
11870            assert_eq!(
11871                body.pointer("/result/tools")
11872                    .and_then(|v| v.as_array())
11873                    .map(|a| a.len()),
11874                Some(18),
11875                "authed tools/list should still return 18 tools: {body}"
11876            );
11877        });
11878        h.shutdown(&runtime);
11879    }
11880
11881    /// `/mcp` goes through the same `TenantExtractor` as REST graph
11882    /// routes. Invalid tenant ids are rejected before JSON-RPC dispatch
11883    /// so clients don't accidentally create a session against a bad
11884    /// tenant key.
11885    #[test]
11886    fn mcp_http_post_rejects_invalid_tenant_header() {
11887        let runtime = rt();
11888        let h = Harness::new(&runtime);
11889        let r = h.router.clone();
11890        runtime.block_on(async move {
11891            let req = json!({
11892                "jsonrpc": "2.0",
11893                "id": 7,
11894                "method": "tools/list",
11895            });
11896            let (status, body) = call_with_tenant(r, "POST", "/mcp", Some(req), "UPPER").await;
11897            assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
11898            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11899            assert!(
11900                msg.contains("x-solo-tenant") && msg.contains("invalid tenant id"),
11901                "error must identify the invalid tenant header: {body}"
11902            );
11903        });
11904        h.shutdown(&runtime);
11905    }
11906
11907    /// Unknown tenants should be a route-level 404 on `/mcp`, matching
11908    /// REST. This protects solo-jarvis from receiving a JSON-RPC-looking
11909    /// success envelope for a typoed tenant.
11910    #[test]
11911    fn mcp_http_post_rejects_unknown_tenant_header() {
11912        let runtime = rt();
11913        let h = Harness::new(&runtime);
11914        let r = h.router.clone();
11915        runtime.block_on(async move {
11916            let req = json!({
11917                "jsonrpc": "2.0",
11918                "id": 8,
11919                "method": "tools/list",
11920            });
11921            let (status, body) =
11922                call_with_tenant(r, "POST", "/mcp", Some(req), "never-registered").await;
11923            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
11924            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11925            assert!(
11926                msg.contains("tenant") && msg.contains("not found"),
11927                "error must identify the missing tenant: {body}"
11928            );
11929        });
11930        h.shutdown(&runtime);
11931    }
11932
11933    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
11934    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
11935    /// the `access-control-allow-headers` carries both
11936    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
11937    /// allow-list addition.
11938    #[test]
11939    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
11940        let runtime = rt();
11941        let h = Harness::new(&runtime);
11942        let r = h.router.clone();
11943        runtime.block_on(async move {
11944            let req = Request::builder()
11945                .method("OPTIONS")
11946                .uri("/mcp")
11947                .header("origin", "http://localhost:5173")
11948                .header("access-control-request-method", "POST")
11949                .header(
11950                    "access-control-request-headers",
11951                    "content-type, mcp-session-id, x-solo-tenant, authorization",
11952                )
11953                .body(Body::empty())
11954                .unwrap();
11955            let resp = r.oneshot(req).await.unwrap();
11956            // tower-http CorsLayer returns 200 for permitted preflight.
11957            assert_eq!(resp.status(), StatusCode::OK);
11958            let allow_headers = resp
11959                .headers()
11960                .get("access-control-allow-headers")
11961                .and_then(|h| h.to_str().ok())
11962                .unwrap_or("")
11963                .to_lowercase();
11964            assert!(
11965                allow_headers.contains("mcp-session-id"),
11966                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
11967            );
11968            assert!(
11969                allow_headers.contains("x-solo-tenant"),
11970                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
11971            );
11972            // Allow-origin must echo the localhost origin (per the
11973            // permissive-localhost predicate).
11974            let allow_origin = resp
11975                .headers()
11976                .get("access-control-allow-origin")
11977                .and_then(|h| h.to_str().ok())
11978                .unwrap_or("");
11979            assert_eq!(allow_origin, "http://localhost:5173");
11980        });
11981        h.shutdown(&runtime);
11982    }
11983
11984    /// CORS preflight for browser memory corrections must allow PATCH.
11985    /// solo-web sends `PATCH /memory/{id}` with `content-type` and
11986    /// `x-solo-tenant`; if PATCH is absent from the allow-methods list,
11987    /// the browser rejects the request before it reaches the API.
11988    #[test]
11989    fn memory_update_cors_preflight_allows_patch() {
11990        let runtime = rt();
11991        let h = Harness::new(&runtime);
11992        let r = h.router.clone();
11993        runtime.block_on(async move {
11994            let req = Request::builder()
11995                .method("OPTIONS")
11996                .uri("/memory/ep:test")
11997                .header("origin", "http://localhost:5173")
11998                .header("access-control-request-method", "PATCH")
11999                .header(
12000                    "access-control-request-headers",
12001                    "content-type, x-solo-tenant",
12002                )
12003                .body(Body::empty())
12004                .unwrap();
12005            let resp = r.oneshot(req).await.unwrap();
12006            assert_eq!(resp.status(), StatusCode::OK);
12007            let allow_methods = resp
12008                .headers()
12009                .get("access-control-allow-methods")
12010                .and_then(|h| h.to_str().ok())
12011                .unwrap_or("")
12012                .to_lowercase();
12013            assert!(
12014                allow_methods.contains("patch"),
12015                "preflight allow-methods must include patch; got: {allow_methods}"
12016            );
12017            let allow_headers = resp
12018                .headers()
12019                .get("access-control-allow-headers")
12020                .and_then(|h| h.to_str().ok())
12021                .unwrap_or("")
12022                .to_lowercase();
12023            assert!(
12024                allow_headers.contains("x-solo-tenant"),
12025                "preflight allow-headers must include x-solo-tenant; got: {allow_headers}"
12026            );
12027            assert_eq!(
12028                resp.headers()
12029                    .get("access-control-allow-origin")
12030                    .and_then(|h| h.to_str().ok()),
12031                Some("http://localhost:5173")
12032            );
12033        });
12034        h.shutdown(&runtime);
12035    }
12036
12037    /// Notification messages (no `id`) return 202 Accepted with an
12038    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
12039    #[test]
12040    fn mcp_http_notification_returns_202_accepted() {
12041        let runtime = rt();
12042        let h = Harness::new(&runtime);
12043        let r = h.router.clone();
12044        runtime.block_on(async move {
12045            let req = json!({
12046                "jsonrpc": "2.0",
12047                "method": "notifications/initialized",
12048                "params": {},
12049            });
12050            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12051            assert_eq!(status, StatusCode::ACCEPTED);
12052            // Empty body — call() returns Value::Null when the body is
12053            // empty.
12054            assert_eq!(body, Value::Null);
12055        });
12056        h.shutdown(&runtime);
12057    }
12058
12059    // ---------------------------------------------------------------
12060    // v0.11.0 P1 — MCP `Mcp-Session-Id` middleware integration tests
12061    // ---------------------------------------------------------------
12062    //
12063    // These pin the per-request session contract: the POST handler
12064    // creates a fresh session id on a request that arrives without
12065    // the header (echoed back via `Mcp-Session-Id` response header);
12066    // a subsequent request carrying that same id continues using the
12067    // same session record; unknown or stale ids surface as 404 with
12068    // a re-init instruction. The lazy/background expiry semantics are
12069    // unit-tested in `mcp_session::tests`.
12070
12071    /// `POST /mcp` with `tools/list` (no `Mcp-Session-Id` header) must
12072    /// echo back a fresh session id in the response header. The
12073    /// session count in the store grows by exactly 1.
12074    #[test]
12075    fn mcp_post_without_session_id_creates_new_session() {
12076        let runtime = rt();
12077        let h = Harness::new(&runtime);
12078        let r = h.router.clone();
12079        runtime.block_on(async move {
12080            let req = Request::builder()
12081                .method("POST")
12082                .uri("/mcp")
12083                .header("content-type", "application/json")
12084                .body(Body::from(
12085                    serde_json::to_vec(&json!({
12086                        "jsonrpc": "2.0",
12087                        "id": 100,
12088                        "method": "tools/list",
12089                    }))
12090                    .unwrap(),
12091                ))
12092                .unwrap();
12093            let resp = r.oneshot(req).await.unwrap();
12094            assert_eq!(resp.status(), StatusCode::OK);
12095            let session_id = resp
12096                .headers()
12097                .get("mcp-session-id")
12098                .and_then(|v| v.to_str().ok())
12099                .map(|s| s.to_string())
12100                .unwrap_or_else(|| {
12101                    panic!(
12102                        "mcp-session-id response header missing on session-init POST: {:?}",
12103                        resp.headers()
12104                    )
12105                });
12106            assert!(
12107                !session_id.is_empty(),
12108                "session id must be a non-empty string"
12109            );
12110        });
12111        h.shutdown(&runtime);
12112    }
12113
12114    /// Two `POST /mcp` calls with the same session id in the request
12115    /// header must hit the same `SessionState` (i.e. no new entry
12116    /// gets allocated). The second response echoes the same id back.
12117    #[test]
12118    fn mcp_post_with_valid_session_id_continues_session() {
12119        let runtime = rt();
12120        let h = Harness::new(&runtime);
12121        let r = h.router.clone();
12122        runtime.block_on(async move {
12123            // First request: no header → fresh id.
12124            let req = Request::builder()
12125                .method("POST")
12126                .uri("/mcp")
12127                .header("content-type", "application/json")
12128                .body(Body::from(
12129                    serde_json::to_vec(&json!({
12130                        "jsonrpc": "2.0",
12131                        "id": 101,
12132                        "method": "tools/list",
12133                    }))
12134                    .unwrap(),
12135                ))
12136                .unwrap();
12137            let resp1 = r.clone().oneshot(req).await.unwrap();
12138            assert_eq!(resp1.status(), StatusCode::OK);
12139            let assigned_id = resp1
12140                .headers()
12141                .get("mcp-session-id")
12142                .and_then(|v| v.to_str().ok())
12143                .map(|s| s.to_string())
12144                .expect("first response must carry mcp-session-id");
12145
12146            // Second request: carry the same id forward.
12147            let req2 = Request::builder()
12148                .method("POST")
12149                .uri("/mcp")
12150                .header("content-type", "application/json")
12151                .header("mcp-session-id", &assigned_id)
12152                .body(Body::from(
12153                    serde_json::to_vec(&json!({
12154                        "jsonrpc": "2.0",
12155                        "id": 102,
12156                        "method": "tools/list",
12157                    }))
12158                    .unwrap(),
12159                ))
12160                .unwrap();
12161            let resp2 = r.oneshot(req2).await.unwrap();
12162            assert_eq!(resp2.status(), StatusCode::OK);
12163            let echoed = resp2
12164                .headers()
12165                .get("mcp-session-id")
12166                .and_then(|v| v.to_str().ok())
12167                .map(|s| s.to_string())
12168                .expect("continuation response must echo mcp-session-id");
12169            assert_eq!(
12170                echoed, assigned_id,
12171                "second response must echo the same session id"
12172            );
12173        });
12174        h.shutdown(&runtime);
12175    }
12176
12177    /// A `POST /mcp` carrying a random / never-assigned `Mcp-Session-Id`
12178    /// must surface as 404 with the `session_expired` error
12179    /// discriminator and the re-initialize instruction in the body.
12180    #[test]
12181    fn mcp_post_with_unknown_session_id_returns_404() {
12182        let runtime = rt();
12183        let h = Harness::new(&runtime);
12184        let r = h.router.clone();
12185        runtime.block_on(async move {
12186            let req = Request::builder()
12187                .method("POST")
12188                .uri("/mcp")
12189                .header("content-type", "application/json")
12190                // A plausibly-shaped id the server never assigned.
12191                .header("mcp-session-id", "11111111-2222-3333-4444-555555555555")
12192                .body(Body::from(
12193                    serde_json::to_vec(&json!({
12194                        "jsonrpc": "2.0",
12195                        "id": 103,
12196                        "method": "tools/list",
12197                    }))
12198                    .unwrap(),
12199                ))
12200                .unwrap();
12201            let resp = r.oneshot(req).await.unwrap();
12202            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12203            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12204            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12205            assert_eq!(
12206                v.get("error").and_then(|e| e.as_str()),
12207                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12208                "404 body must carry the session_expired discriminator: {v}"
12209            );
12210            assert!(
12211                v.get("retry")
12212                    .and_then(|e| e.as_str())
12213                    .map(|s| s == "re-initialize")
12214                    .unwrap_or(false),
12215                "404 body must instruct re-initialize: {v}"
12216            );
12217        });
12218        h.shutdown(&runtime);
12219    }
12220
12221    /// A `POST /mcp` carrying a `Mcp-Session-Id` that WAS assigned but
12222    /// has since been expired (we evict it directly from the store to
12223    /// simulate the TTL sweep) must surface the same 404 +
12224    /// `session_expired` discriminator. Distinct from the
12225    /// "unknown id" test above — same wire response, different cause.
12226    #[test]
12227    fn mcp_post_with_expired_session_id_returns_404() {
12228        let runtime = rt();
12229        let h = Harness::new(&runtime);
12230        let r = h.router.clone();
12231        let store = h.mcp_sessions.clone();
12232        runtime.block_on(async move {
12233            // First request to allocate a session id.
12234            let req1 = Request::builder()
12235                .method("POST")
12236                .uri("/mcp")
12237                .header("content-type", "application/json")
12238                .body(Body::from(
12239                    serde_json::to_vec(&json!({
12240                        "jsonrpc": "2.0",
12241                        "id": 104,
12242                        "method": "tools/list",
12243                    }))
12244                    .unwrap(),
12245                ))
12246                .unwrap();
12247            let resp1 = r.clone().oneshot(req1).await.unwrap();
12248            let assigned_id_str = resp1
12249                .headers()
12250                .get("mcp-session-id")
12251                .and_then(|v| v.to_str().ok())
12252                .map(|s| s.to_string())
12253                .expect("first response must carry mcp-session-id");
12254
12255            // Force-evict the session directly via the harness's
12256            // SessionStore clone. This is the moral equivalent of the
12257            // background sweep evicting an entry past TTL — same
12258            // observable from the wire (the handler's middleware sees
12259            // `SessionStore::get` return `None`). Driving the real
12260            // 30-min inactivity clock is not test-friendly.
12261            let parsed = crate::mcp_session::SessionId::parse(&assigned_id_str)
12262                .expect("just-assigned id must parse");
12263            assert!(store.delete(&parsed), "stored session must be deletable");
12264
12265            // Now the id is "stale" (no longer in the store) — same
12266            // observable as a TTL eviction.
12267            let req2 = Request::builder()
12268                .method("POST")
12269                .uri("/mcp")
12270                .header("content-type", "application/json")
12271                .header("mcp-session-id", &assigned_id_str)
12272                .body(Body::from(
12273                    serde_json::to_vec(&json!({
12274                        "jsonrpc": "2.0",
12275                        "id": 105,
12276                        "method": "tools/list",
12277                    }))
12278                    .unwrap(),
12279                ))
12280                .unwrap();
12281            let resp2 = r.oneshot(req2).await.unwrap();
12282            assert_eq!(resp2.status(), StatusCode::NOT_FOUND);
12283            let body_bytes = resp2.into_body().collect().await.unwrap().to_bytes();
12284            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12285            assert_eq!(
12286                v.get("error").and_then(|e| e.as_str()),
12287                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12288                "expired-session 404 body must carry session_expired: {v}"
12289            );
12290        });
12291        h.shutdown(&runtime);
12292    }
12293
12294    /// v0.11.0 P2: `GET /mcp` REQUIRES an `Mcp-Session-Id` header. The
12295    /// GET stream is "attach to an existing session's notification
12296    /// channel" — there's no session-init story over GET (POST owns
12297    /// session creation). A GET without the header must return 404
12298    /// with the `session_expired` discriminator + `re-initialize`
12299    /// instruction, mirroring the unknown-id 404 wire shape so clients
12300    /// have a single recovery code path.
12301    ///
12302    /// Diverges deliberately from v0.11.0 P1's behaviour (which
12303    /// auto-created on GET) — see `docs/dev-log/0134-v0.11.0-p2-impl.md`
12304    /// for the rationale.
12305    #[test]
12306    fn mcp_get_without_session_id_returns_404() {
12307        let runtime = rt();
12308        let h = Harness::new(&runtime);
12309        let r = h.router.clone();
12310        runtime.block_on(async move {
12311            let req = Request::builder()
12312                .method("GET")
12313                .uri("/mcp")
12314                .header("accept", "text/event-stream")
12315                .body(Body::empty())
12316                .unwrap();
12317            let resp = r.oneshot(req).await.unwrap();
12318            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12319            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12320            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12321            assert_eq!(
12322                v.get("error").and_then(|e| e.as_str()),
12323                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12324                "GET /mcp without session id must carry session_expired: {v}"
12325            );
12326            assert_eq!(
12327                v.get("retry").and_then(|e| e.as_str()),
12328                Some("re-initialize"),
12329            );
12330        });
12331        h.shutdown(&runtime);
12332    }
12333
12334    // ---------------------------------------------------------------
12335    // v0.11.0 P2 — resumable /mcp GET stream + Last-Event-ID
12336    // ---------------------------------------------------------------
12337    //
12338    // These pin the v0.11.0 P2 wire contract for the resumable GET
12339    // stream: an `Mcp-Session-Id`-bound subscriber sees `event: init`
12340    // first, then any buffered replay events past `Last-Event-ID`,
12341    // then live broadcast events as they're published. The unit-test
12342    // half of the contract (publish_event monotonic + buffer cap) lives
12343    // in `crate::mcp_session::tests`.
12344
12345    /// Open the `/mcp` GET stream for one session id. Returns
12346    /// `(status, body)` where the body is the SSE frame stream.
12347    async fn open_mcp_get_stream(
12348        router: axum::Router,
12349        session_id: &str,
12350        last_event_id: Option<&str>,
12351    ) -> (StatusCode, axum::body::Body, axum::http::HeaderMap) {
12352        let mut builder = Request::builder()
12353            .method("GET")
12354            .uri("/mcp")
12355            .header("accept", "text/event-stream")
12356            .header(crate::mcp_session::MCP_SESSION_ID_HEADER, session_id);
12357        if let Some(leid) = last_event_id {
12358            builder = builder.header(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER, leid);
12359        }
12360        let req = builder
12361            .header("content-length", "0")
12362            .body(Body::empty())
12363            .unwrap();
12364        let resp = router.oneshot(req).await.expect("oneshot");
12365        let status = resp.status();
12366        let headers = resp.headers().clone();
12367        let body = resp.into_body();
12368        (status, body, headers)
12369    }
12370
12371    /// Allocate one session via a POST so a follow-up GET can attach.
12372    /// Returns the assigned session id from the response header.
12373    async fn allocate_mcp_session(router: axum::Router) -> String {
12374        let req = Request::builder()
12375            .method("POST")
12376            .uri("/mcp")
12377            .header("content-type", "application/json")
12378            .body(Body::from(
12379                serde_json::to_vec(&json!({
12380                    "jsonrpc": "2.0",
12381                    "id": 1,
12382                    "method": "tools/list",
12383                }))
12384                .unwrap(),
12385            ))
12386            .unwrap();
12387        let resp = router.oneshot(req).await.expect("oneshot");
12388        assert_eq!(resp.status(), StatusCode::OK, "POST must allocate session");
12389        resp.headers()
12390            .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12391            .and_then(|v| v.to_str().ok())
12392            .map(|s| s.to_string())
12393            .expect("POST must echo Mcp-Session-Id")
12394    }
12395
12396    /// Look up the in-store `Arc<SessionState>` so a test can publish
12397    /// events directly onto the same record the GET handler subscribed
12398    /// to. Takes the [`SessionStore`] directly so callers can clone it
12399    /// out of the harness before moving the harness into the async
12400    /// block.
12401    fn session_state_for_test(
12402        store: &crate::mcp_session::SessionStore,
12403        session_id: &str,
12404    ) -> std::sync::Arc<crate::mcp_session::SessionState> {
12405        let parsed =
12406            crate::mcp_session::SessionId::parse(session_id).expect("test session id must parse");
12407        store.get(&parsed).expect("session must still be in store")
12408    }
12409
12410    /// GET `/mcp` against a session that's been force-evicted (TTL
12411    /// sweep) returns 404 with the `session_expired` discriminator —
12412    /// same wire shape as POST.
12413    #[test]
12414    fn mcp_get_with_expired_session_id_returns_404() {
12415        let runtime = rt();
12416        let h = Harness::new(&runtime);
12417        let r = h.router.clone();
12418        let store = h.mcp_sessions.clone();
12419        runtime.block_on(async move {
12420            let session_id = allocate_mcp_session(r.clone()).await;
12421            // Force-evict via the harness store handle.
12422            let parsed = crate::mcp_session::SessionId::parse(&session_id).unwrap();
12423            assert!(store.delete(&parsed));
12424            // Now GET against the stale id.
12425            let req = Request::builder()
12426                .method("GET")
12427                .uri("/mcp")
12428                .header("accept", "text/event-stream")
12429                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12430                .body(Body::empty())
12431                .unwrap();
12432            let resp = r.oneshot(req).await.unwrap();
12433            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12434            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12435            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12436            assert_eq!(
12437                v.get("error").and_then(|e| e.as_str()),
12438                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12439            );
12440        });
12441        h.shutdown(&runtime);
12442    }
12443
12444    /// Happy-path subscribe: open `/mcp` with a freshly-allocated
12445    /// session id, expect `event: init` as the first frame with the
12446    /// session id echoed in both the response header AND the init
12447    /// payload.
12448    #[test]
12449    fn mcp_get_with_valid_session_id_subscribes() {
12450        let runtime = rt();
12451        let h = Harness::new(&runtime);
12452        let r = h.router.clone();
12453        runtime.block_on(async move {
12454            let session_id = allocate_mcp_session(r.clone()).await;
12455            let (status, mut body, headers) = open_mcp_get_stream(r, &session_id, None).await;
12456            assert_eq!(status, StatusCode::OK);
12457            // Response header echoes the session id.
12458            let echoed = headers
12459                .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12460                .and_then(|v| v.to_str().ok())
12461                .unwrap();
12462            assert_eq!(echoed, session_id);
12463            // First frame is the init event.
12464            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12465                .await
12466                .expect("init event must arrive within 2s");
12467            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12468            assert_eq!(ev.data["connected"].as_bool(), Some(true));
12469            assert_eq!(ev.data["session_id"].as_str(), Some(session_id.as_str()));
12470            // Init carries id "0" (reserved sentinel — the first real
12471            // publish_event allocates id 1).
12472            assert_eq!(ev.id.as_deref(), Some("0"));
12473        });
12474        h.shutdown(&runtime);
12475    }
12476
12477    /// Publish 5 events on the session, reconnect with
12478    /// `Last-Event-ID: 2`, observe `init` then events 3, 4, 5 (in
12479    /// order). Pins the resume-from-cursor contract.
12480    #[test]
12481    fn mcp_get_resumes_from_last_event_id() {
12482        let runtime = rt();
12483        let h = Harness::new(&runtime);
12484        let r = h.router.clone();
12485        let store = h.mcp_sessions.clone();
12486        runtime.block_on(async move {
12487            let session_id = allocate_mcp_session(r.clone()).await;
12488            let state = session_state_for_test(&store, &session_id);
12489            for i in 1..=5 {
12490                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({"n": i}));
12491            }
12492            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("2")).await;
12493            assert_eq!(status, StatusCode::OK);
12494            // First frame is init.
12495            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12496                .await
12497                .unwrap();
12498            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12499            // Then events 3, 4, 5 in order.
12500            for expected_id in 3..=5 {
12501                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12502                    .await
12503                    .expect("replay event must arrive within 2s");
12504                assert_eq!(
12505                    ev.event,
12506                    crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,
12507                    "expected replay of message event id {expected_id}, got {ev:?}",
12508                );
12509                assert_eq!(ev.id.as_deref(), Some(expected_id.to_string().as_str()));
12510                assert_eq!(ev.data["n"].as_u64(), Some(expected_id));
12511            }
12512        });
12513        h.shutdown(&runtime);
12514    }
12515
12516    /// Publish past the broadcast buffer's capacity (300 events) then
12517    /// reconnect with `Last-Event-ID: 0` (the sentinel for "I just
12518    /// joined and missed everything since event 1"). Observe `event:
12519    /// init`, then a synthetic `event: lagged` describing the gap,
12520    /// then the tail of the buffer.
12521    #[test]
12522    fn mcp_get_emits_lagged_when_last_event_id_too_old() {
12523        let runtime = rt();
12524        let h = Harness::new(&runtime);
12525        let r = h.router.clone();
12526        let store = h.mcp_sessions.clone();
12527        runtime.block_on(async move {
12528            let session_id = allocate_mcp_session(r.clone()).await;
12529            let state = session_state_for_test(&store, &session_id);
12530            // Publish 300 events — buffer cap is 256, so events 1..=44
12531            // get evicted (oldest retained id = 45).
12532            for _ in 0..300 {
12533                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({}));
12534            }
12535            // Last-Event-ID: 1 — claim we've only seen event 1, but
12536            // event 2 (and 3..=44) are gone from the buffer.
12537            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12538            assert_eq!(status, StatusCode::OK);
12539            // First frame: init.
12540            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12541                .await
12542                .unwrap();
12543            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12544            // Second frame: lagged (synthetic) with id 0.
12545            let lagged = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12546                .await
12547                .expect("lagged event must arrive within 2s");
12548            assert_eq!(
12549                lagged.event,
12550                crate::mcp_session::MCP_STREAM_EVENT_LAGGED_NAME,
12551                "expected `event: lagged` after Last-Event-ID before buffer",
12552            );
12553            assert_eq!(lagged.id.as_deref(), Some("0"));
12554            assert!(
12555                lagged.data["dropped"].as_u64().unwrap_or(0) > 0,
12556                "lagged event must carry a non-zero `dropped` count: {:?}",
12557                lagged.data,
12558            );
12559        });
12560        h.shutdown(&runtime);
12561    }
12562
12563    /// CORS preflight (OPTIONS) with `Access-Control-Request-Headers:
12564    /// last-event-id` must succeed and the `last-event-id` header must
12565    /// appear in `Access-Control-Allow-Headers`. Without this, a
12566    /// browser reconnecting an SSE stream with `Last-Event-ID:` fails
12567    /// the preflight before the actual GET lands.
12568    #[test]
12569    fn cors_preflight_allows_last_event_id_header() {
12570        let runtime = rt();
12571        let h = Harness::new(&runtime);
12572        let r = h.router.clone();
12573        runtime.block_on(async move {
12574            let req = Request::builder()
12575                .method("OPTIONS")
12576                .uri("/mcp")
12577                .header("origin", "http://localhost:5173")
12578                .header("access-control-request-method", "GET")
12579                .header(
12580                    "access-control-request-headers",
12581                    "last-event-id,mcp-session-id",
12582                )
12583                .body(Body::empty())
12584                .unwrap();
12585            let resp = r.oneshot(req).await.unwrap();
12586            assert!(
12587                resp.status().is_success() || resp.status() == StatusCode::NO_CONTENT,
12588                "preflight must succeed, got: {}",
12589                resp.status(),
12590            );
12591            let allow = resp
12592                .headers()
12593                .get("access-control-allow-headers")
12594                .and_then(|h| h.to_str().ok())
12595                .map(|s| s.to_ascii_lowercase())
12596                .unwrap_or_default();
12597            assert!(
12598                allow.contains("last-event-id"),
12599                "preflight must allow `last-event-id`; allow-headers = {allow:?}",
12600            );
12601            assert!(
12602                allow.contains("mcp-session-id"),
12603                "preflight must allow `mcp-session-id` too; allow-headers = {allow:?}",
12604            );
12605        });
12606        h.shutdown(&runtime);
12607    }
12608
12609    /// Heartbeat cadence: with a short interval, the stream emits a
12610    /// typed `event: heartbeat` after the init event. The production
12611    /// cadence is [`MCP_STREAM_HEARTBEAT_SECS`] (30s); the test
12612    /// exercises `build_mcp_session_stream` directly with a 1-second
12613    /// interval so we don't burn 30s of CI wall time.
12614    #[test]
12615    fn mcp_get_heartbeats_after_init() {
12616        let runtime = rt();
12617        let h = Harness::new(&runtime);
12618        runtime.block_on(async move {
12619            let state = std::sync::Arc::new(crate::mcp_session::SessionState::new(
12620                solo_core::TenantId::default_tenant(),
12621                None,
12622            ));
12623            let session_id = crate::mcp_session::SessionId::new();
12624            let stream = build_mcp_session_stream(
12625                state,
12626                session_id.clone(),
12627                "default".to_string(),
12628                0,
12629                1, // 1-second heartbeat for the test
12630            );
12631            // Pull frames off the stream. Should see init then
12632            // (with no live events) a heartbeat within ~1.5s.
12633            use futures::StreamExt;
12634            let mut stream = std::pin::pin!(stream);
12635            let init_ev = tokio::time::timeout(std::time::Duration::from_secs(2), stream.next())
12636                .await
12637                .expect("init must arrive within 2s")
12638                .expect("stream must yield init");
12639            // Rendering the Event is opaque; we don't introspect it
12640            // here — the wire-format integration test
12641            // `mcp_get_with_valid_session_id_subscribes` covers that.
12642            // This test pins that a SECOND frame lands within the
12643            // heartbeat window. Drop the init frame.
12644            drop(init_ev);
12645            let hb = tokio::time::timeout(std::time::Duration::from_secs(3), stream.next())
12646                .await
12647                .expect("heartbeat must arrive within ~3s")
12648                .expect("stream must yield heartbeat");
12649            // Same opacity — we observe presence, not content. The
12650            // integration-level test
12651            // `mcp_get_with_valid_session_id_subscribes` covers wire
12652            // content.
12653            drop(hb);
12654        });
12655        h.shutdown(&runtime);
12656    }
12657
12658    /// v0.11.0 P3: `memory_ingest_document` emits the first two phase
12659    /// events (parsed, chunked) BEFORE the writer-actor call, so they
12660    /// fire even when the underlying writer has no embedder configured.
12661    /// This pins the upstream half of the 4-phase ingest progress
12662    /// taxonomy without needing a fully-equipped writer harness — the
12663    /// post-writer phases (embedded, inserted) are pinned indirectly
12664    /// by the `MCP_NOTIFICATION_PROGRESS_METHOD` grep-ability and by
12665    /// the dispatch_tests-level progress-emission tests for the other
12666    /// two long-running tools (search_docs / remember_batch).
12667    #[test]
12668    fn mcp_http_ingest_document_emits_parsed_and_chunked_progress_events() {
12669        let runtime = rt();
12670        let h = Harness::new(&runtime);
12671        let r = h.router.clone();
12672        let store = h.mcp_sessions.clone();
12673        runtime.block_on(async move {
12674            let session_id = allocate_mcp_session(r.clone()).await;
12675            let state = session_state_for_test(&store, &session_id);
12676            let mut rx = state.subscribe_events();
12677            // Write a `.txt` so the parser doesn't reject before the
12678            // first progress event fires — handle_ingest_document
12679            // emits `parsed` BEFORE the writer call but AFTER the
12680            // empty-path guard; both pre-writer phases fire regardless
12681            // of writer-side embedder configuration.
12682            let tmpdir = tempfile::TempDir::new().unwrap();
12683            let tmpfile = tmpdir.path().join("ingest-progress.txt");
12684            std::fs::write(&tmpfile, b"hello world progress test").unwrap();
12685            let body = json!({
12686                "jsonrpc": "2.0",
12687                "id": 2,
12688                "method": "tools/call",
12689                "params": {
12690                    "name": "memory_ingest_document",
12691                    "arguments": { "path": tmpfile.to_string_lossy() },
12692                    "_meta": { "progressToken": "ingest-tok" },
12693                },
12694            });
12695            let req = Request::builder()
12696                .method("POST")
12697                .uri("/mcp")
12698                .header("content-type", "application/json")
12699                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12700                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12701                .unwrap();
12702            let resp = r.clone().oneshot(req).await.expect("oneshot");
12703            assert_eq!(resp.status(), StatusCode::OK);
12704            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12705            let mut events = Vec::new();
12706            while let Ok(ev) = rx.try_recv() {
12707                events.push(ev);
12708            }
12709            // We expect AT LEAST the 2 pre-writer phase events. In a
12710            // fully-equipped harness the writer would succeed and the
12711            // post-writer phases (embedded + inserted) would also fire;
12712            // here we pin the pre-writer half + the spec envelope shape.
12713            assert!(
12714                events.len() >= 2,
12715                "expected at least 2 progress events (parsed + chunked), got {}: {events:?}",
12716                events.len()
12717            );
12718            // Phase 1 = "parsed"; phase 2 = "chunked"; both carry
12719            // total=4 and progressToken="ingest-tok".
12720            assert_eq!(events[0].data["params"]["progress"], json!(1));
12721            assert_eq!(events[0].data["params"]["message"], json!("parsed"));
12722            assert_eq!(events[1].data["params"]["progress"], json!(2));
12723            assert_eq!(events[1].data["params"]["message"], json!("chunked"));
12724            for ev in &events {
12725                assert_eq!(ev.event, crate::mcp_session::McpEventKind::Progress,);
12726                assert_eq!(
12727                    ev.data["method"],
12728                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12729                );
12730                assert_eq!(ev.data["params"]["progressToken"], json!("ingest-tok"));
12731                assert_eq!(ev.data["params"]["total"], json!(4));
12732            }
12733        });
12734        h.shutdown(&runtime);
12735    }
12736
12737    /// v0.11.0 P3: end-to-end progress event roundtrip — POST a
12738    /// `tools/call` carrying `_meta.progressToken`, then reconnect via
12739    /// `GET /mcp` with a `Last-Event-ID` that triggers buffer replay.
12740    /// Confirms the wire path:
12741    /// `tools/call params._meta.progressToken` → ProgressReporter →
12742    /// SessionState.publish_event → replay buffer → GET SSE replay
12743    /// drain → client receives spec-shape envelope.
12744    ///
12745    /// `Last-Event-ID: 0` is treated as "brand new subscriber, no
12746    /// replay" per the v0.11.0 P2 contract — so we drive a non-zero
12747    /// `Last-Event-ID` smaller than every event id by first force-
12748    /// publishing one synthetic seed event (id 1), then issuing the
12749    /// real `tools/call` (which publishes 3 progress events with
12750    /// ids 2..=4), then GET with `Last-Event-ID: 1` to replay
12751    /// exactly the progress trio.
12752    #[test]
12753    fn mcp_http_progress_event_subscribers_receive_via_get_mcp_stream() {
12754        let runtime = rt();
12755        let h = Harness::new(&runtime);
12756        let r = h.router.clone();
12757        let store = h.mcp_sessions.clone();
12758        runtime.block_on(async move {
12759            // 1. Allocate a session via an initial POST.
12760            let session_id = allocate_mcp_session(r.clone()).await;
12761            // 2. Seed one synthetic event (id 1) so the buffer is
12762            //    non-empty before the real progress events. The GET
12763            //    handler's replay path only fires when last_event_id
12764            //    > 0; we'll pass Last-Event-ID: 1 to skip the seed and
12765            //    replay the progress events that follow.
12766            let state = session_state_for_test(&store, &session_id);
12767            state.publish_event(
12768                crate::mcp_session::McpEventKind::Message,
12769                json!({"seed": true}),
12770            );
12771            // 3. POST a `memory_search_docs` tools/call carrying
12772            //    `_meta.progressToken` (well above the top_k threshold
12773            //    so progress IS emitted). The query returns empty hits
12774            //    in the harness — what matters here is that the 3
12775            //    progress events fire as side effects of the call.
12776            let body = json!({
12777                "jsonrpc": "2.0",
12778                "id": 2,
12779                "method": "tools/call",
12780                "params": {
12781                    "name": "memory_search_docs",
12782                    "arguments": { "query": "anything", "limit": 150 },
12783                    "_meta": { "progressToken": "progress-roundtrip" },
12784                },
12785            });
12786            let req = Request::builder()
12787                .method("POST")
12788                .uri("/mcp")
12789                .header("content-type", "application/json")
12790                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12791                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12792                .unwrap();
12793            let resp = r.clone().oneshot(req).await.expect("oneshot");
12794            assert_eq!(resp.status(), StatusCode::OK);
12795            // Drain the POST response so the future completes before
12796            // we open the GET stream.
12797            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12798            // 4. Open the GET stream with Last-Event-ID: 1 — replay
12799            //    every event past the seed.
12800            let (status, mut stream_body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12801            assert_eq!(status, StatusCode::OK);
12802            // First frame: init (id 0, reserved sentinel).
12803            let init = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12804                .await
12805                .expect("init must arrive within 2s");
12806            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12807            // Then 3 progress events (the search_docs handler emits 3
12808            // when top_k > 100). Collect them and assert the spec
12809            // envelope shape.
12810            for expected_progress in 1u64..=3u64 {
12811                let ev = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12812                    .await
12813                    .expect("progress event must arrive within 2s");
12814                assert_eq!(
12815                    ev.event,
12816                    crate::mcp_session::MCP_STREAM_EVENT_PROGRESS_NAME,
12817                    "expected progress event #{expected_progress}, got {ev:?}",
12818                );
12819                // Spec-shape envelope: jsonrpc + method + params{progressToken, progress, total}.
12820                assert_eq!(ev.data["jsonrpc"], json!("2.0"));
12821                assert_eq!(
12822                    ev.data["method"],
12823                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12824                );
12825                assert_eq!(
12826                    ev.data["params"]["progressToken"],
12827                    json!("progress-roundtrip")
12828                );
12829                assert_eq!(ev.data["params"]["progress"], json!(expected_progress));
12830                assert_eq!(ev.data["params"]["total"], json!(3));
12831            }
12832        });
12833        h.shutdown(&runtime);
12834    }
12835
12836    /// `initialize` returns the `{name: "solo", version: <crate
12837    /// version>}` server-info pinned by the stdio invariant test
12838    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
12839    /// check that the v0.10.2 HTTP transport doesn't drift away from
12840    /// the stdio identity.
12841    #[test]
12842    fn mcp_http_initialize_returns_solo_server_info() {
12843        let runtime = rt();
12844        let h = Harness::new(&runtime);
12845        let r = h.router.clone();
12846        runtime.block_on(async move {
12847            let req = json!({
12848                "jsonrpc": "2.0",
12849                "id": 7,
12850                "method": "initialize",
12851                "params": {
12852                    "protocolVersion": "2024-11-05",
12853                    "capabilities": {},
12854                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
12855                },
12856            });
12857            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12858            assert_eq!(status, StatusCode::OK);
12859            assert_eq!(
12860                body.pointer("/result/serverInfo/name")
12861                    .and_then(|v| v.as_str()),
12862                Some("solo"),
12863                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
12864            );
12865            // `protocolVersion` is the static value the dispatcher
12866            // emits today (2024-11-05). The stdio loop emits rmcp's
12867            // own default — we cross-check those two stay aligned in
12868            // the v0.10.3+ session work; for v0.10.2 we just pin the
12869            // HTTP-side value.
12870            assert_eq!(
12871                body.pointer("/result/protocolVersion")
12872                    .and_then(|v| v.as_str()),
12873                Some("2024-11-05"),
12874            );
12875        });
12876        h.shutdown(&runtime);
12877    }
12878
12879    // ----------------------------------------------------------------
12880    // v0.11.0 P4 — notifications/message bridge from InvalidateEvent
12881    // ----------------------------------------------------------------
12882
12883    /// v0.11.0 P4: a fresh POST /mcp (no session id) causes the per-
12884    /// session invalidate bridge to be spawned. Pin by firing an
12885    /// invalidate on the harness's broadcast sender AFTER the session
12886    /// is allocated and asserting the session's own event channel
12887    /// receives an MCP `notifications/message` event.
12888    #[test]
12889    fn session_subscribes_to_tenant_invalidate_on_creation() {
12890        let runtime = rt();
12891        let h = Harness::new(&runtime);
12892        let r = h.router.clone();
12893        let store = h.mcp_sessions.clone();
12894        let sender = h.invalidate_sender();
12895        runtime.block_on(async move {
12896            // Allocate session — POST handler spawns the bridge.
12897            let session_id = allocate_mcp_session(r).await;
12898            let state = session_state_for_test(&store, &session_id);
12899            let mut rx = state.subscribe_events();
12900            // Fire one invalidate on the tenant's broadcast.
12901            sender
12902                .send(InvalidateEvent {
12903                    reason: "memory.remember".to_string(),
12904                    tenant_id: "default".to_string(),
12905                    ts_ms: 1_715_625_600_000,
12906                    kind: "episode".to_string(),
12907                })
12908                .expect("at least one subscriber (the bridge)");
12909            // Bridge forwards it to the session as an MCP Message.
12910            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
12911                .await
12912                .expect("bridge must forward invalidate within 2s")
12913                .expect("session receiver must observe published event");
12914            assert_eq!(received.event, crate::mcp_session::McpEventKind::Message);
12915            assert_eq!(
12916                received.data["method"].as_str(),
12917                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
12918            );
12919        });
12920        h.shutdown(&runtime);
12921    }
12922
12923    /// v0.11.0 P4: pin the exact envelope shape — `jsonrpc=2.0`,
12924    /// `method=notifications/message`, `params.{level,logger,data,details}`.
12925    /// One full round-trip through the bridge so a future refactor
12926    /// that changes the wire format trips this test.
12927    #[test]
12928    fn invalidate_event_translates_to_mcp_notifications_message() {
12929        let runtime = rt();
12930        let h = Harness::new(&runtime);
12931        let r = h.router.clone();
12932        let store = h.mcp_sessions.clone();
12933        let sender = h.invalidate_sender();
12934        runtime.block_on(async move {
12935            let session_id = allocate_mcp_session(r).await;
12936            let state = session_state_for_test(&store, &session_id);
12937            let mut rx = state.subscribe_events();
12938            sender
12939                .send(InvalidateEvent {
12940                    reason: "memory.ingest_document".to_string(),
12941                    tenant_id: "default".to_string(),
12942                    ts_ms: 1_715_625_999_999,
12943                    kind: "document".to_string(),
12944                })
12945                .expect("at least one subscriber");
12946            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
12947                .await
12948                .expect("forward within 2s")
12949                .expect("session must receive event");
12950            // Envelope shape.
12951            assert_eq!(received.data["jsonrpc"].as_str(), Some("2.0"));
12952            assert_eq!(
12953                received.data["method"].as_str(),
12954                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
12955            );
12956            let params = &received.data["params"];
12957            assert_eq!(
12958                params["level"].as_str(),
12959                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LEVEL),
12960            );
12961            assert_eq!(
12962                params["logger"].as_str(),
12963                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LOGGER),
12964            );
12965            // document kind maps to documents_updated.
12966            assert_eq!(
12967                params["data"].as_str(),
12968                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_DOCUMENTS_UPDATED),
12969            );
12970            // details preserves the structured original event.
12971            assert_eq!(
12972                params["details"]["reason"].as_str(),
12973                Some("memory.ingest_document"),
12974            );
12975            assert_eq!(params["details"]["kind"].as_str(), Some("document"),);
12976            assert_eq!(params["details"]["ts_ms"].as_i64(), Some(1_715_625_999_999),);
12977        });
12978        h.shutdown(&runtime);
12979    }
12980
12981    /// v0.11.0 P4: two sessions exist; each has its own bridge. An
12982    /// invalidate fires once on the (shared, single-tenant) broadcast
12983    /// and BOTH sessions receive it. Pins that the bridge is correctly
12984    /// per-session-scoped: it doesn't leak to a wrong session AND it
12985    /// doesn't fail to fan out to all sessions of the same tenant.
12986    ///
12987    /// The harness is single-tenant by design, so the "wrong tenant
12988    /// doesn't receive" half is structurally guaranteed (different
12989    /// tenants would have different `invalidate_sender`s — the
12990    /// `mcp_notify` unit tests pin the bridge wiring against a fake
12991    /// channel directly). This integration test pins the
12992    /// per-session-of-same-tenant fan-out behaviour.
12993    #[test]
12994    fn invalidate_event_published_to_correct_session_only() {
12995        let runtime = rt();
12996        let h = Harness::new(&runtime);
12997        let r = h.router.clone();
12998        let store = h.mcp_sessions.clone();
12999        let sender = h.invalidate_sender();
13000        runtime.block_on(async move {
13001            // Allocate two distinct sessions.
13002            let session_id_a = allocate_mcp_session(r.clone()).await;
13003            let session_id_b = allocate_mcp_session(r).await;
13004            assert_ne!(session_id_a, session_id_b);
13005            let state_a = session_state_for_test(&store, &session_id_a);
13006            let state_b = session_state_for_test(&store, &session_id_b);
13007            let mut rx_a = state_a.subscribe_events();
13008            let mut rx_b = state_b.subscribe_events();
13009            // Fire one invalidate.
13010            sender
13011                .send(InvalidateEvent {
13012                    reason: "memory.consolidate".to_string(),
13013                    tenant_id: "default".to_string(),
13014                    ts_ms: 1_715_625_600_000,
13015                    kind: "cluster".to_string(),
13016                })
13017                .expect("at least one subscriber");
13018            // Both sessions' bridges receive it independently.
13019            let a = tokio::time::timeout(std::time::Duration::from_secs(2), rx_a.recv())
13020                .await
13021                .expect("session A receives within 2s")
13022                .expect("session A receiver alive");
13023            let b = tokio::time::timeout(std::time::Duration::from_secs(2), rx_b.recv())
13024                .await
13025                .expect("session B receives within 2s")
13026                .expect("session B receiver alive");
13027            for evt in [&a, &b] {
13028                assert_eq!(evt.event, crate::mcp_session::McpEventKind::Message);
13029                assert_eq!(
13030                    evt.data["params"]["data"].as_str(),
13031                    Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_CONSOLIDATION_UPDATED),
13032                );
13033            }
13034        });
13035        h.shutdown(&runtime);
13036    }
13037
13038    /// v0.11.0 P4: full GET-stream integration. A POST opens a session
13039    /// AND spawns its bridge; an invalidate fires on the tenant's
13040    /// broadcast; a GET subscriber reading the SSE wire format
13041    /// observes the `event: message` SSE frame carrying the spec-shape
13042    /// `notifications/message` envelope.
13043    ///
13044    /// Uses the `Last-Event-ID` resume path with id 0 (sentinel —
13045    /// "I'm a new subscriber, no replay"); the invalidate fires AFTER
13046    /// the GET opens so the live broadcast receiver picks it up.
13047    #[test]
13048    fn mcp_get_subscriber_receives_notifications_message_event() {
13049        let runtime = rt();
13050        let h = Harness::new(&runtime);
13051        let r = h.router.clone();
13052        let sender = h.invalidate_sender();
13053        runtime.block_on(async move {
13054            let session_id = allocate_mcp_session(r.clone()).await;
13055            // Open the GET stream first so the live broadcast receiver
13056            // is attached BEFORE the invalidate fires.
13057            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, None).await;
13058            assert_eq!(status, StatusCode::OK);
13059            // Drain the init frame.
13060            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13061                .await
13062                .expect("init event must arrive within 2s");
13063            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME,);
13064            // Now fire the invalidate.
13065            sender
13066                .send(InvalidateEvent {
13067                    reason: "memory.triples_extract".to_string(),
13068                    tenant_id: "default".to_string(),
13069                    ts_ms: 1_715_625_600_000,
13070                    kind: "triple".to_string(),
13071                })
13072                .expect("send must succeed");
13073            // Bridge forwards → SessionState.publish_event → broadcast
13074            // → GET stream consumer → SSE wire frame.
13075            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13076                .await
13077                .expect("message event must arrive within 2s");
13078            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,);
13079            assert_eq!(ev.data["jsonrpc"].as_str(), Some("2.0"));
13080            assert_eq!(
13081                ev.data["method"].as_str(),
13082                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
13083            );
13084            assert_eq!(
13085                ev.data["params"]["data"].as_str(),
13086                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_GRAPH_UPDATED),
13087            );
13088            assert_eq!(
13089                ev.data["params"]["details"]["reason"].as_str(),
13090                Some("memory.triples_extract"),
13091            );
13092        });
13093        h.shutdown(&runtime);
13094    }
13095}
13096
13097#[cfg(test)]
13098mod cors_tests {
13099    use super::is_localhost_origin;
13100
13101    #[test]
13102    fn accepts_canonical_localhost_origins() {
13103        assert!(is_localhost_origin("http://localhost"));
13104        assert!(is_localhost_origin("http://localhost:3000"));
13105        assert!(is_localhost_origin("https://localhost:8443"));
13106        assert!(is_localhost_origin("http://127.0.0.1"));
13107        assert!(is_localhost_origin("http://127.0.0.1:5173"));
13108        assert!(is_localhost_origin("http://[::1]"));
13109        assert!(is_localhost_origin("http://[::1]:8080"));
13110    }
13111
13112    #[test]
13113    fn rejects_remote_origins() {
13114        assert!(!is_localhost_origin("http://example.com"));
13115        assert!(!is_localhost_origin("https://malicious.example"));
13116        assert!(!is_localhost_origin("http://192.168.1.5"));
13117        assert!(!is_localhost_origin("http://10.0.0.1"));
13118    }
13119
13120    #[test]
13121    fn rejects_dns_rebinding_tricks() {
13122        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
13123        // Origin header carries the public-DNS name. Rejecting these
13124        // closes the rebinding-via-Origin gap.
13125        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
13126        assert!(!is_localhost_origin("http://localhost.evil.com"));
13127        assert!(!is_localhost_origin("http://evil.localhost"));
13128    }
13129
13130    #[test]
13131    fn rejects_non_http_schemes() {
13132        assert!(!is_localhost_origin("file:///"));
13133        assert!(!is_localhost_origin("ws://localhost:3000"));
13134        assert!(!is_localhost_origin("javascript:alert(1)"));
13135    }
13136
13137    #[test]
13138    fn rejects_malformed() {
13139        assert!(!is_localhost_origin(""));
13140        assert!(!is_localhost_origin("localhost"));
13141        assert!(!is_localhost_origin("//localhost"));
13142    }
13143}