Skip to main content

solo_api/
http.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! HTTP/JSON transport for Solo. Local-only by default — binds to
4//! `127.0.0.1:<port>` and serves the same operations the MCP server
5//! exposes:
6//!
7//! Episode operations:
8//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
9//!   - `POST /memory/search`         — recall  (body: { query, limit? })
10//!   - `POST /memory/context`        — recall + themes + facts + contradictions bundle
11//!   - `GET  /memory/{id}`           — inspect
12//!   - `PATCH /memory/{id}`           — correct/update one active memory
13//!   - `DELETE /memory/{id}?reason=…` — forget
14//!
15//! Maintenance:
16//!   - `POST /memory/consolidate`    — trigger a consolidation pass
17//!   - `POST /backup`                — encrypted online backup
18//!
19//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
20//!   - `GET  /memory/themes?window_days=N&limit=K`
21//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
22//!   - `GET  /memory/entities?query=X&limit=K`
23//!   - `GET  /memory/contradictions?limit=K`
24//!   - `POST /memory/contradictions/resolve`
25//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
26//!
27//! Document operations (v0.7.0+):
28//!   - `POST   /memory/documents`               — ingest a file
29//!   - `POST   /memory/documents/search`        — vector search over chunks
30//!   - `GET    /memory/documents`               — paginate documents
31//!   - `GET    /memory/documents/{id}`          — inspect one document
32//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
33//!
34//! There's no auth at this layer. The threat model is local-machine
35//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
36//! A future commit can add bearer-token auth + LAN binding.
37//!
38//! ## Lifecycle
39//!
40//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
41//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
42//! the listener errors. `solo http-serve` invokes this from inside a
43//! `OneShotContext`, so writer + reader pool + lockfile stay live for
44//! the server's lifetime and clean up properly afterwards.
45
46use std::convert::Infallible;
47use std::net::SocketAddr;
48use std::str::FromStr;
49use std::sync::Arc;
50use std::time::Duration;
51
52use axum::extract::{FromRequestParts, Path, Query, State};
53use axum::http::request::Parts;
54use axum::http::{HeaderValue, Method, StatusCode};
55use axum::response::sse::{Event, KeepAlive, Sse};
56use axum::response::{IntoResponse, Response};
57use axum::routing::{get, post};
58use axum::{Json, Router};
59use futures::Stream;
60use serde::{Deserialize, Serialize};
61use solo_core::{
62    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId, Tier,
63};
64use solo_storage::{TenantHandle, TenantRegistry};
65use tokio::sync::broadcast;
66use tower_http::cors::{AllowOrigin, CorsLayer};
67use tower_http::trace::TraceLayer;
68
69use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};
70
71/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
72/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
73/// request via the `X-Solo-Tenant` header (default tenant if absent).
74#[derive(Clone)]
75pub struct SoloHttpState {
76    /// Multi-tenant registry. Lazy-loads tenants on first request.
77    pub registry: Arc<TenantRegistry>,
78    /// Default tenant used when the `X-Solo-Tenant` header is absent.
79    /// Typically `TenantId::default_tenant()`.
80    pub default_tenant: TenantId,
81    /// Read-path aliases for the canonical `"user"` subject. Sourced
82    /// from `solo.config.toml` `[identity] user_aliases`; threaded
83    /// through to `solo_query::facts_about` so a query for `"alex"`
84    /// also surfaces rows historically extracted as `"user"`. Empty
85    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
86    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
87    pub user_aliases: Arc<Vec<String>>,
88    /// v0.11.0 P1: MCP `Mcp-Session-Id` session store. In-memory,
89    /// TTL-bounded (30 min inactivity / 4 hr absolute). The middleware
90    /// on the `/mcp` route validates request headers against this
91    /// store; the POST handler creates new entries on the first
92    /// request without a session id. See
93    /// `crates/solo-api/src/mcp_session.rs` +
94    /// `docs/dev-log/0132-v0.11.0-implementation-plan.md` §3 Decision A.
95    pub mcp_sessions: crate::mcp_session::SessionStore,
96}
97
98/// HTTP header that routes a request to a specific tenant. Optional;
99/// absent → state.default_tenant.
100pub const TENANT_HEADER: &str = "x-solo-tenant";
101
102/// Axum extractor that resolves the request's target tenant, then
103/// lazy-opens the tenant via the registry.
104///
105/// Resolution order (v0.8.0 P3):
106///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
107///      set by the auth middleware. In OIDC mode this is the validated
108///      value of the configured custom claim (default `solo_tenant`);
109///      in bearer mode this is the daemon's default tenant.
110///   2. `X-Solo-Tenant` header — falls back to this when no
111///      authenticated principal is on the request (unauthenticated
112///      loopback deployments — the default).
113///   3. `state.default_tenant` when neither is present.
114///
115/// Bad header values → 400. Lazy-open failures → 500 unless the failure
116/// kind is `NotFound` (unknown tenant id) → 404.
117pub struct TenantExtractor(pub Arc<TenantHandle>);
118
119impl<S> FromRequestParts<S> for TenantExtractor
120where
121    SoloHttpState: FromRef<S>,
122    S: Send + Sync,
123{
124    type Rejection = ApiError;
125
126    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
127        let state = SoloHttpState::from_ref(state);
128        // Order: (1) principal.tenant_claim (set by auth middleware),
129        // (2) X-Solo-Tenant header, (3) state.default_tenant.
130        //
131        // The principal wins because in OIDC mode the JWT is the source
132        // of truth — letting the header override an OIDC claim would
133        // be a tenant-impersonation hole.
134        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
135            && let Some(claim) = principal.tenant_claim.clone()
136        {
137            claim
138        } else {
139            match parts.headers.get(TENANT_HEADER) {
140                None => state.default_tenant.clone(),
141                Some(raw) => {
142                    let s = raw.to_str().map_err(|e| {
143                        ApiError::bad_request(format!(
144                            "{TENANT_HEADER}: header value must be ASCII ({e})"
145                        ))
146                    })?;
147                    TenantId::new(s.to_string()).map_err(|e| {
148                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
149                    })?
150                }
151            }
152        };
153        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
154            // Map NotFound → 404; everything else → 500.
155            use solo_core::Error;
156            match &e {
157                Error::NotFound(_) => ApiError::not_found(e.to_string()),
158                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
159                _ => ApiError::internal(e.to_string()),
160            }
161        })?;
162        Ok(TenantExtractor(handle))
163    }
164}
165
166use axum::extract::FromRef;
167
168/// v0.8.0 P4: extractor that pulls the authenticated principal's
169/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
170/// audit log. `None` when no `AuthenticatedPrincipal` is present
171/// (unauthenticated loopback deployments).
172pub struct AuditPrincipal(pub Option<String>);
173
174impl<S> FromRequestParts<S> for AuditPrincipal
175where
176    S: Send + Sync,
177{
178    type Rejection = std::convert::Infallible;
179
180    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
181        Ok(AuditPrincipal(
182            parts
183                .extensions
184                .get::<AuthenticatedPrincipal>()
185                .map(|p| p.subject.clone()),
186        ))
187    }
188}
189
190/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
191/// of request extensions for the `/v1/tenants` handler. Distinct from
192/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
193/// tenant-list handler needs the `tenant_claim` and `claims` fields to
194/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
195/// principals.
196///
197/// `None` when no `AuthenticatedPrincipal` is on the request — the
198/// unauthenticated loopback deployment path, which the tenant-list
199/// handler treats as "all tenants visible" (same scope as the
200/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
201/// for the three-case visibility rule.
202pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);
203
204impl<S> FromRequestParts<S> for MaybePrincipal
205where
206    S: Send + Sync,
207{
208    type Rejection = std::convert::Infallible;
209
210    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
211        Ok(MaybePrincipal(
212            parts.extensions.get::<AuthenticatedPrincipal>().cloned(),
213        ))
214    }
215}
216
217/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
218///
219/// When `bearer_token` is `Some(t)`, every request except `GET /health`
220/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
221/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
222/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
223/// is attached to every authenticated request (the `TenantExtractor`
224/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
225pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
226    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
227    router_with_auth_config(state, auth)
228}
229
230/// Build the router with a config-driven auth block (v0.8.0 P3+).
231///
232/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
233/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
234/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
235/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
236///
237/// Public routes (`/health`, `/openapi.json`) are always exempt from
238/// auth — load balancers, uptime monitors, and codegen tools shouldn't
239/// need credentials.
240pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
241    let cors = build_cors_layer();
242    // Public, always-unauthenticated routes:
243    //   - GET /health: liveness probe (load balancers, uptime monitors).
244    //   - GET /openapi.json: machine-readable API description for client
245    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
246    //     curl-tools, etc.). The spec describes the API shape, not
247    //     secrets — fine to serve unauthenticated even on a LAN-bound
248    //     instance.
249    let public = Router::new()
250        .route("/health", get(|| async { "ok" }))
251        .route("/openapi.json", get(openapi_handler));
252
253    let authed = Router::new()
254        .route("/memory", post(remember_handler))
255        .route("/memory/search", post(recall_handler))
256        .route("/memory/context", post(memory_context_handler))
257        .route("/memory/consolidate", post(consolidate_handler))
258        .route(
259            "/memory/{id}",
260            get(inspect_handler)
261                .patch(update_handler)
262                .delete(forget_handler),
263        )
264        .route("/backup", post(backup_handler))
265        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
266        // these are pure read-only queries; query-string params for
267        // simple filters keep them curl-friendly without a JSON body.
268        .route("/memory/themes", get(themes_handler))
269        .route("/memory/facts_about", get(facts_about_handler))
270        .route("/memory/entities", get(entities_handler))
271        .route("/memory/contradictions", get(contradictions_handler))
272        .route(
273            "/memory/contradictions/resolve",
274            post(contradiction_resolve_handler),
275        )
276        // v0.5.0 Priority 3: drill into one cluster + abstraction +
277        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
278        // does not shadow the single-segment `/memory/{id}` UUID
279        // inspect route.
280        .route(
281            "/memory/clusters/{cluster_id}",
282            get(inspect_cluster_handler),
283        )
284        // v0.7.0 P6: document operations. Two-segment paths
285        // (`/memory/documents/...`) so they don't shadow the
286        // single-segment `/memory/{id}` episode-inspect route. Order
287        // matters: register the literal `/memory/documents/search`
288        // ahead of `/memory/documents/{id}` so axum's matcher prefers
289        // the literal over the path parameter.
290        .route("/memory/documents/search", post(search_docs_handler))
291        .route(
292            "/memory/documents",
293            post(ingest_document_handler).get(list_documents_handler),
294        )
295        .route(
296            "/memory/documents/{id}",
297            get(inspect_document_handler).delete(forget_document_handler),
298        )
299        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
300        // expansion off any node in the memory graph. See
301        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
302        // for the full `/v1/graph/*` family this is the first of.
303        .route("/v1/graph/expand", get(graph_expand_handler))
304        // v0.10.0: paginated catalog reads for solo-web's initial graph
305        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
306        // alongside the same scoping doc.
307        .route("/v1/graph/nodes", get(graph_nodes_handler))
308        .route("/v1/graph/edges", get(graph_edges_handler))
309        // v0.10.0: kind-discriminated full-record drill for solo-web's
310        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
311        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
312        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
313        // web's "show similar" overlay. See
314        // `docs/dev-log/0116-graph-neighbors-impl.md`.
315        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
316        // v0.10.0: Server-Sent Events stream of graph-data invalidations
317        // for solo-web's live update story. The wire format is
318        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
319        // scoping doc §3 Decision C — clients refetch the affected page
320        // on each event rather than receiving row payloads. See
321        // `docs/dev-log/0117-graph-stream-impl.md`.
322        .route("/v1/graph/stream", get(graph_stream_handler))
323        // Authenticated readiness/status surface for local UIs and
324        // agent bridges. `/health` stays public and tiny; this route
325        // resolves the tenant and reports operator-facing JSON.
326        .route("/v1/status", get(status_handler))
327        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
328        // tenant picker. Read-only — admin CRUD (create/delete) remains
329        // CLI-only per ADR-0004 §"Admin operations". The visibility
330        // filter is principal-driven: no-auth + bearer principals see
331        // every active tenant; OIDC principals see only the tenant
332        // named by their `tenant_claim`. See
333        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
334        // Decision F + §4 Route 6.
335        .route("/v1/tenants", get(tenants_list_handler))
336        .with_state(state.clone());
337
338    // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo process
339    // serve both `/v1/graph/*` (REST, for solo-web) and `/mcp`
340    // (JSON-RPC, for solo-jarvis) without the
341    // single-writer-per-data-dir lock dance. See
342    // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
343    // POST + GET share the same path; axum's `MethodRouter` muxes by
344    // HTTP method. OPTIONS is handled by the `CorsLayer` (already
345    // wired below) — we don't need an explicit handler.
346    //
347    // v0.11.0 P1: the route gets its own session middleware layer
348    // (`mcp_session_middleware`) that validates the `Mcp-Session-Id`
349    // request header against the per-process `SessionStore`. Expired
350    // / unknown sessions return 404 with a re-init instruction; the
351    // POST handler creates a new session on a request that arrived
352    // without the header and echoes the assigned id back via
353    // `Mcp-Session-Id` response header. The middleware lives on this
354    // sub-router (not the outer `authed`) so the rest of the API
355    // surface is unaffected — only `/mcp` carries session semantics.
356    let mcp_router: Router<SoloHttpState> = Router::new()
357        .route(
358            "/mcp",
359            post(mcp_http_post_handler)
360                .get(mcp_http_get_handler)
361                .delete(mcp_http_delete_handler),
362        )
363        .layer(axum::middleware::from_fn_with_state(
364            state.mcp_sessions.clone(),
365            crate::mcp_session::mcp_session_middleware,
366        ));
367    let authed = authed.merge(mcp_router.with_state(state.clone()));
368
369    let authed = if let Some(cfg) = auth {
370        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
371        // AuthenticatedPrincipal into request extensions for the
372        // TenantExtractor + audit-log to read.
373        let validator = Arc::new(AuthValidator::from_config(
374            &cfg,
375            state.default_tenant.clone(),
376        ));
377        authed.layer(axum::middleware::from_fn_with_state(
378            validator,
379            crate::auth::middleware::auth_middleware,
380        ))
381    } else {
382        authed
383    };
384
385    public
386        .merge(authed)
387        .layer(cors)
388        .layer(TraceLayer::new_for_http())
389}
390
391/// Convenience wrapper: no auth (loopback-only deployments).
392pub fn router(state: SoloHttpState) -> Router {
393    router_with_auth_config(state, None)
394}
395
396fn build_cors_layer() -> CorsLayer {
397    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
398    // browser-based UIs running on a different local port can call the API
399    // without preflight friction. We do NOT use `Any` because that would
400    // allow arbitrary remote origins to talk to our localhost server via
401    // a victim's browser. With bearer-token auth enabled the practical
402    // impact is reduced (the cross-origin attacker still can't supply
403    // the token), but principle of least privilege says refuse anyway.
404    //
405    // When the server is bound to a non-loopback address (auth required),
406    // the same CORS predicate keeps localhost-only browser clients —
407    // suitable for trusted-LAN deployments where the LAN client itself
408    // tunnels through ssh/wireguard back to localhost. Wider CORS for
409    // genuine cross-origin browser use is a future config knob.
410    CorsLayer::new()
411        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
412            origin.to_str().map(is_localhost_origin).unwrap_or(false)
413        }))
414        .allow_methods([
415            Method::GET,
416            Method::POST,
417            Method::PATCH,
418            Method::DELETE,
419            Method::OPTIONS,
420        ])
421        .allow_headers([
422            axum::http::header::CONTENT_TYPE,
423            axum::http::header::AUTHORIZATION,
424            // Custom Solo headers — browsers preflight-check these and
425            // refuse the actual request if they're not in the allow list.
426            // Without `x-solo-tenant` solo-web's browser fetches all fail
427            // with "Failed to fetch" (CORS preflight rejection).
428            axum::http::HeaderName::from_static("x-solo-tenant"),
429            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
430            // HTTP transport spec (sessions, resumable streams). v0.11.0
431            // P1/P2 implement the real session affinity + resumable GET
432            // stream behind this header; the allow-list entry was
433            // pre-wired in v0.10.2 so browser-based MCP clients that
434            // preflight for it (per the spec) succeed instead of
435            // failing with a CORS error before the first request even
436            // lands.
437            axum::http::HeaderName::from_static("mcp-session-id"),
438            // v0.11.0 P2: `Last-Event-ID` is the SSE-spec header carrying
439            // the client's last-seen event id on reconnect. The
440            // resumable `GET /mcp` handler reads it and replays the
441            // missed events from the per-session ring buffer
442            // (Decision E). Browsers preflight any non-CORS-safelisted
443            // request header; without this entry the preflight fails
444            // before the actual reconnect lands.
445            axum::http::HeaderName::from_static(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER),
446        ])
447}
448
449/// True if `origin` is `http(s)://localhost[:port]` or
450/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
451/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
452fn is_localhost_origin(origin: &str) -> bool {
453    let rest = origin
454        .strip_prefix("http://")
455        .or_else(|| origin.strip_prefix("https://"));
456    let host = match rest {
457        Some(r) => r,
458        None => return false,
459    };
460    // Strip path (shouldn't appear on Origin headers but defend anyway).
461    let host = host.split('/').next().unwrap_or(host);
462    // Strip port.
463    let host = if let Some(idx) = host.rfind(':') {
464        // For [::1]:port, keep the brackets in the host part.
465        if host.starts_with('[') {
466            // Find matching ']'; everything up to and including it is the host.
467            host.find(']').map(|i| &host[..=i]).unwrap_or(host)
468        } else {
469            &host[..idx]
470        }
471    } else {
472        host
473    };
474    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
475}
476
477/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
478/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
479/// `bearer_token = None` runs unauthenticated (loopback default);
480/// `Some(t)` requires `Authorization: Bearer t` on every request
481/// except `GET /health` + `GET /openapi.json`.
482pub async fn serve_http(
483    addr: SocketAddr,
484    state: SoloHttpState,
485    bearer_token: Option<String>,
486    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
487) -> std::io::Result<()> {
488    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
489    serve_http_with_auth_config(addr, state, auth, shutdown).await
490}
491
492/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
493/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
494/// for the auth-mode semantics.
495pub async fn serve_http_with_auth_config(
496    addr: SocketAddr,
497    state: SoloHttpState,
498    auth: Option<AuthConfig>,
499    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
500) -> std::io::Result<()> {
501    let auth_kind = match &auth {
502        Some(AuthConfig::Bearer { .. }) => "bearer",
503        Some(AuthConfig::Oidc { .. }) => "oidc",
504        None => "none",
505    };
506    let app = router_with_auth_config(state, auth);
507    let listener = tokio::net::TcpListener::bind(addr).await?;
508    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
509    axum::serve(listener, app)
510        .with_graceful_shutdown(shutdown)
511        .await
512}
513
514// ---------------------------------------------------------------------------
515// OpenAPI 3.1 spec
516// ---------------------------------------------------------------------------
517
518/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
519///
520/// We keep the spec hand-written (rather than deriving via `utoipa`)
521/// for v0.1: 4 simple endpoints, types live across crate boundaries
522/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
523/// `utoipa` retrofit would touch every crate. Hand-crafted is one
524/// JSON literal in this file; a smoke test in `handler_tests` parses
525/// the response and asserts the expected paths + components are
526/// present, so drift between spec and code is caught at PR time.
527async fn openapi_handler() -> Json<serde_json::Value> {
528    Json(openapi_spec())
529}
530
531/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
532/// Public so the smoke test + future client-codegen tooling can
533/// produce the same document without spinning up the server.
534pub fn openapi_spec() -> serde_json::Value {
535    serde_json::json!({
536        "openapi": "3.1.0",
537        "info": {
538            "title": "Solo HTTP API",
539            "description":
540                "Local-first personal memory daemon. The HTTP transport \
541                 mirrors the MCP memory tools. Default deployment is loopback-only \
542                 (127.0.0.1); LAN-bound deployments require a bearer \
543                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
544            "version": env!("CARGO_PKG_VERSION"),
545            "license": { "name": "Apache-2.0" }
546        },
547        "servers": [
548            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
549        ],
550        "components": {
551            "securitySchemes": {
552                "bearerAuth": {
553                    "type": "http",
554                    "scheme": "bearer",
555                    "description":
556                        "Bearer-token auth. Required only on LAN-bound deployments \
557                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
558                         the default `127.0.0.1` deployment is unauthenticated. \
559                         `GET /health` and `GET /openapi.json` are exempt from auth even \
560                         on bearer-protected instances."
561                }
562            },
563            "schemas": {
564                "RememberRequest": {
565                    "type": "object",
566                    "required": ["content"],
567                    "properties": {
568                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
569                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
570                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." },
571                        "salience": { "type": "number", "minimum": 0.0, "maximum": 1.0, "default": 0.5, "description": "v0.9.2+. Optional priority hint in [0.0, 1.0]; defaults to 0.5. Parity with the `memory_remember` MCP tool." }
572                    },
573                    "additionalProperties": false
574                },
575                "RememberResponse": {
576                    "type": "object",
577                    "required": ["memory_id"],
578                    "properties": {
579                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
580                    }
581                },
582                "RecallRequest": {
583                    "type": "object",
584                    "required": ["query"],
585                    "properties": {
586                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
587                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5, "description": "Max number of hits to return. Server clamps to [1, 100]." }
588                    },
589                    "additionalProperties": false
590                },
591                "RecallResult": {
592                    "type": "object",
593                    "description":
594                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
595                         see `solo_query::RecallResult` in the source for the canonical shape. \
596                         Treat as a forward-compatible JSON object.",
597                    "additionalProperties": true
598                },
599                "MemoryContextRequest": {
600                    "type": "object",
601                    "required": ["query"],
602                    "properties": {
603                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query for episodic recall." },
604                        "subject": { "type": "string", "description": "Optional subject for structured facts; when present, facts also match object-position references." },
605                        "window_days": { "type": "integer", "minimum": 1, "description": "Optional recency window for themes." },
606                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5, "description": "Per-section result limit." }
607                    },
608                    "additionalProperties": false
609                },
610                "MemoryContextResult": {
611                    "type": "object",
612                    "description": "Agent-oriented memory context bundle: recall, themes, facts, and contradictions.",
613                    "additionalProperties": true
614                },
615                "MemoryUpdateRequest": {
616                    "type": "object",
617                    "required": ["content"],
618                    "properties": {
619                        "content": { "type": "string", "minLength": 1, "description": "Replacement content for the active memory." }
620                    },
621                    "additionalProperties": false
622                },
623                "MemoryUpdateResult": {
624                    "type": "object",
625                    "description": "Result of PATCH /memory/{id}. See `solo_query::MemoryUpdateResult`.",
626                    "additionalProperties": true
627                },
628                "ConsolidationScope": {
629                    "type": "object",
630                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
631                    "properties": {
632                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
633                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
634                    },
635                    "additionalProperties": false
636                },
637                "ConsolidationReport": {
638                    "type": "object",
639                    "required": [
640                        "episodes_seen", "clusters_built", "clusters_merged",
641                        "clusters_absorbed", "existing_clusters_merged",
642                        "episodes_clustered", "abstractions_built",
643                        "abstractions_regenerated", "triples_built",
644                        "contradictions_found"
645                    ],
646                    "properties": {
647                        "episodes_seen":             { "type": "integer", "minimum": 0 },
648                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
649                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
650                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
651                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
652                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
653                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
654                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
655                        "triples_built":             { "type": "integer", "minimum": 0 },
656                        "contradictions_found":      { "type": "integer", "minimum": 0 }
657                    }
658                },
659                "EpisodeRecord": {
660                    "type": "object",
661                    "description":
662                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
663                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
664                         Treat as a forward-compatible JSON object.",
665                    "additionalProperties": true
666                },
667                "ThemeHit": {
668                    "type": "object",
669                    "description":
670                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
671                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
672                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
673                    "additionalProperties": true
674                },
675                "FactHit": {
676                    "type": "object",
677                    "description":
678                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
679                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
680                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
681                    "additionalProperties": true
682                },
683                "EntityHit": {
684                    "type": "object",
685                    "description":
686                        "One discovered entity-like id from the structured-fact graph. Returned by \
687                         GET /memory/entities. See `solo_query::EntityHit`.",
688                    "additionalProperties": true
689                },
690                "ContradictionHit": {
691                    "type": "object",
692                    "description":
693                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
694                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
695                         a_id, b_id, kind, explanation, detected_at_ms, status, resolved_at_ms?, \
696                         resolution_note?, winning_triple_id?, a_triple?, b_triple?.",
697                    "additionalProperties": true
698                },
699                "ContradictionResolveRequest": {
700                    "type": "object",
701                    "required": ["a_id", "b_id", "kind"],
702                    "properties": {
703                        "a_id": { "type": "string", "minLength": 1 },
704                        "b_id": { "type": "string", "minLength": 1 },
705                        "kind": { "type": "string", "minLength": 1 },
706                        "status": {
707                            "type": "string",
708                            "enum": ["unresolved", "resolved", "reopened"],
709                            "default": "resolved"
710                        },
711                        "resolution_note": { "type": "string" },
712                        "winning_triple_id": { "type": "string" }
713                    },
714                    "additionalProperties": false
715                },
716                "ContradictionResolution": {
717                    "type": "object",
718                    "description": "Lifecycle update result for POST /memory/contradictions/resolve.",
719                    "additionalProperties": true
720                },
721                "ClusterRecord": {
722                    "type": "object",
723                    "description":
724                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
725                         (content truncated to 200 chars unless ?full_content=true). Returned by \
726                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
727                    "additionalProperties": true
728                },
729                "IngestDocumentRequest": {
730                    "type": "object",
731                    "required": ["path"],
732                    "properties": {
733                        "path": {
734                            "type": "string",
735                            "minLength": 1,
736                            "description":
737                                "Server-side absolute path to the file to ingest. The file must be \
738                                 readable by the Solo process. Supported formats: plaintext / \
739                                 markdown / code, HTML, PDF."
740                        }
741                    },
742                    "additionalProperties": false
743                },
744                "IngestReport": {
745                    "type": "object",
746                    "description":
747                        "Returned by POST /memory/documents. Reports the document id assigned, \
748                         the number of chunks persisted + embedded, the total byte size, and a \
749                         `deduped` flag (true when the same content_hash was already present and \
750                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
751                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
752                    "properties": {
753                        "doc_id":            { "type": "string", "format": "uuid" },
754                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
755                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
756                        "deduped":           { "type": "boolean" }
757                    },
758                    "additionalProperties": false
759                },
760                "ForgetDocumentReport": {
761                    "type": "object",
762                    "description":
763                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
764                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
765                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
766                    "required": ["doc_id", "chunks_tombstoned"],
767                    "properties": {
768                        "doc_id":             { "type": "string", "format": "uuid" },
769                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
770                    },
771                    "additionalProperties": false
772                },
773                "SearchDocsRequest": {
774                    "type": "object",
775                    "required": ["query"],
776                    "properties": {
777                        "query": { "type": "string", "minLength": 1 },
778                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
779                    },
780                    "additionalProperties": false
781                },
782                "DocSearchHit": {
783                    "type": "object",
784                    "description":
785                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
786                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
787                         content, cos_distance, start_offset, end_offset.",
788                    "additionalProperties": true
789                },
790                "DocumentInspectResult": {
791                    "type": "object",
792                    "description":
793                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
794                         plus an ordered list of chunk summaries (each preview truncated to 200 \
795                         chars). See `solo_query::DocumentInspectResult`.",
796                    "additionalProperties": true
797                },
798                "DocumentSummary": {
799                    "type": "object",
800                    "description":
801                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
802                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
803                    "additionalProperties": true
804                },
805                "GraphNode": {
806                    "type": "object",
807                    "required": ["id", "kind", "label", "tenant_id"],
808                    "properties": {
809                        "id": { "type": "string", "description": "Prefixed graph node id, e.g. ep:<uuid>, doc:<uuid>, chunk:<uuid>, cl:<id>, ent:<value>." },
810                        "kind": { "type": "string", "enum": ["episode", "document", "chunk", "cluster", "entity"] },
811                        "label": { "type": "string" },
812                        "tenant_id": { "type": "string" },
813                        "preview": { "type": ["string", "null"] },
814                        "score": { "type": ["number", "null"] },
815                        "meta": { "type": ["object", "null"], "additionalProperties": true }
816                    },
817                    "additionalProperties": true
818                },
819                "GraphEdge": {
820                    "type": "object",
821                    "required": ["id", "source", "target", "kind"],
822                    "properties": {
823                        "id": { "type": "string" },
824                        "source": { "type": "string" },
825                        "target": { "type": "string" },
826                        "kind": { "type": "string" },
827                        "label": { "type": ["string", "null"] },
828                        "weight": { "type": ["number", "null"] },
829                        "meta": { "type": ["object", "null"], "additionalProperties": true }
830                    },
831                    "additionalProperties": true
832                },
833                "GraphResponse": {
834                    "type": "object",
835                    "required": ["nodes", "edges"],
836                    "properties": {
837                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
838                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
839                    }
840                },
841                "GraphNodesResponse": {
842                    "type": "object",
843                    "required": ["nodes"],
844                    "properties": {
845                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
846                        "next_cursor": { "type": ["string", "null"] }
847                    }
848                },
849                "GraphEdgesResponse": {
850                    "type": "object",
851                    "required": ["edges"],
852                    "properties": {
853                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
854                        "next_cursor": { "type": ["string", "null"] }
855                    }
856                },
857                "GraphInspectResponse": {
858                    "type": "object",
859                    "required": ["node"],
860                    "properties": {
861                        "node": { "$ref": "#/components/schemas/GraphNode" },
862                        "record": { "type": ["object", "null"], "additionalProperties": true },
863                        "triples_in": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
864                        "triples_out": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
865                    },
866                    "additionalProperties": true
867                },
868                "TenantListItem": {
869                    "type": "object",
870                    "required": ["id", "display_name", "created_at_ms", "last_accessed_ms", "status", "quota_bytes", "episode_count", "size_bytes", "pct_used"],
871                    "properties": {
872                        "id": { "type": "string" },
873                        "display_name": { "type": ["string", "null"] },
874                        "created_at_ms": { "type": "integer", "format": "int64" },
875                        "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" },
876                        "status": { "type": "string", "enum": ["active"] },
877                        "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
878                        "episode_count": { "type": ["integer", "null"], "minimum": 0 },
879                        "size_bytes": { "type": ["integer", "null"], "minimum": 0 },
880                        "pct_used": { "type": ["number", "null"], "minimum": 0, "maximum": 100 }
881                    }
882                },
883                "TenantsListResponse": {
884                    "type": "object",
885                    "required": ["tenants"],
886                    "properties": {
887                        "tenants": { "type": "array", "items": { "$ref": "#/components/schemas/TenantListItem" } }
888                    }
889                },
890                "StatusResponse": {
891                    "type": "object",
892                    "required": ["ok", "version", "tenant", "embedder", "active_tenants", "mcp"],
893                    "properties": {
894                        "ok": { "type": "boolean" },
895                        "version": { "type": "string" },
896                        "tenant": {
897                            "type": "object",
898                            "required": ["id", "registered", "status", "quota_bytes", "last_accessed_ms"],
899                            "properties": {
900                                "id": { "type": "string" },
901                                "registered": { "type": "boolean" },
902                                "status": { "type": ["string", "null"], "enum": ["active", null] },
903                                "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
904                                "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" }
905                            }
906                        },
907                        "embedder": {
908                            "type": "object",
909                            "required": ["name", "version", "dim", "dtype"],
910                            "properties": {
911                                "name": { "type": "string" },
912                                "version": { "type": "string" },
913                                "dim": { "type": "integer", "minimum": 1 },
914                                "dtype": { "type": "string" }
915                            }
916                        },
917                        "active_tenants": { "type": "integer", "minimum": 0 },
918                        "mcp": {
919                            "type": "object",
920                            "required": ["sessions"],
921                            "properties": {
922                                "sessions": { "type": "integer", "minimum": 0 }
923                            }
924                        }
925                    }
926                },
927                "JsonRpcRequest": {
928                    "type": "object",
929                    "required": ["jsonrpc", "method"],
930                    "properties": {
931                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
932                        "id": { "description": "String or number request id. Omit for notifications." },
933                        "method": { "type": "string" },
934                        "params": { "type": ["object", "array", "null"], "additionalProperties": true }
935                    },
936                    "additionalProperties": true
937                },
938                "JsonRpcResponse": {
939                    "type": "object",
940                    "required": ["jsonrpc", "id"],
941                    "properties": {
942                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
943                        "id": {},
944                        "result": {},
945                        "error": {
946                            "type": "object",
947                            "required": ["code", "message"],
948                            "properties": {
949                                "code": { "type": "integer" },
950                                "message": { "type": "string" },
951                                "data": {}
952                            }
953                        }
954                    },
955                    "additionalProperties": true
956                },
957                "ApiError": {
958                    "type": "object",
959                    "required": ["error", "status"],
960                    "properties": {
961                        "error": { "type": "string" },
962                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
963                    }
964                }
965            }
966        },
967        "paths": {
968            "/health": {
969                "get": {
970                    "summary": "Liveness probe",
971                    "description": "Returns plain text `ok`. Always unauthenticated.",
972                    "responses": {
973                        "200": {
974                            "description": "Server is up.",
975                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
976                        }
977                    }
978                }
979            },
980            "/openapi.json": {
981                "get": {
982                    "summary": "Self-describing OpenAPI 3.1 spec",
983                    "description": "Returns this document. Always unauthenticated.",
984                    "responses": {
985                        "200": {
986                            "description": "OpenAPI 3.1 document.",
987                            "content": { "application/json": { "schema": { "type": "object" } } }
988                        }
989                    }
990                }
991            },
992            "/memory": {
993                "post": {
994                    "summary": "Remember (store an episode)",
995                    "description": "Equivalent to MCP tool `memory_remember`.",
996                    "security": [{ "bearerAuth": [] }, {}],
997                    "requestBody": {
998                        "required": true,
999                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
1000                    },
1001                    "responses": {
1002                        "200": {
1003                            "description": "Memory stored; returns the new MemoryId.",
1004                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
1005                        },
1006                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1007                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1008                    }
1009                }
1010            },
1011            "/memory/search": {
1012                "post": {
1013                    "summary": "Recall (vector search)",
1014                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
1015                    "security": [{ "bearerAuth": [] }, {}],
1016                    "requestBody": {
1017                        "required": true,
1018                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
1019                    },
1020                    "responses": {
1021                        "200": {
1022                            "description": "Search results.",
1023                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
1024                        },
1025                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1026                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1027                    }
1028                }
1029            },
1030            "/memory/context": {
1031                "post": {
1032                    "summary": "Build agent memory context",
1033                    "description": "Equivalent to MCP tool `memory_context`. Returns one bounded bundle containing episodic recall, recent themes, optional facts about a subject, and contradictions.",
1034                    "security": [{ "bearerAuth": [] }, {}],
1035                    "requestBody": {
1036                        "required": true,
1037                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextRequest" } } }
1038                    },
1039                    "responses": {
1040                        "200": {
1041                            "description": "Combined memory context.",
1042                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextResult" } } }
1043                        },
1044                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1045                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1046                    }
1047                }
1048            },
1049            "/memory/consolidate": {
1050                "post": {
1051                    "summary": "Run a consolidation pass (clustering + abstraction)",
1052                    "description":
1053                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
1054                         on the server, also runs the REM-equivalent abstraction pass that populates \
1055                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
1056                         window). Equivalent to the `solo consolidate` CLI.",
1057                    "security": [{ "bearerAuth": [] }, {}],
1058                    "requestBody": {
1059                        "required": false,
1060                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
1061                    },
1062                    "responses": {
1063                        "200": {
1064                            "description": "Consolidation complete; report counts the work done.",
1065                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
1066                        },
1067                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1068                    }
1069                }
1070            },
1071            "/backup": {
1072                "post": {
1073                    "summary": "Online encrypted backup",
1074                    "description":
1075                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
1076                         The destination file is encrypted with the same Argon2id-derived raw key as \
1077                         the source, so it restores under the same passphrase + a copy of the source's \
1078                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
1079                         connection without taking the lockfile, so the daemon keeps serving reads + \
1080                         writes during the operation. v0.3.2+.",
1081                    "security": [{ "bearerAuth": [] }, {}],
1082                    "requestBody": {
1083                        "required": true,
1084                        "content": { "application/json": { "schema": {
1085                            "type": "object",
1086                            "properties": {
1087                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
1088                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
1089                            },
1090                            "required": ["to"]
1091                        } } }
1092                    },
1093                    "responses": {
1094                        "200": {
1095                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
1096                            "content": { "application/json": { "schema": {
1097                                "type": "object",
1098                                "properties": {
1099                                    "path": { "type": "string" },
1100                                    "elapsed_ms": { "type": "integer", "format": "int64" }
1101                                }
1102                            } } }
1103                        },
1104                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
1105                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1106                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
1107                    }
1108                }
1109            },
1110            "/memory/{id}": {
1111                "get": {
1112                    "summary": "Inspect a memory by ID",
1113                    "description": "Equivalent to MCP tool `memory_inspect`.",
1114                    "security": [{ "bearerAuth": [] }, {}],
1115                    "parameters": [{
1116                        "name": "id",
1117                        "in": "path",
1118                        "required": true,
1119                        "schema": { "type": "string", "format": "uuid" },
1120                        "description": "MemoryId (UUID v7)."
1121                    }],
1122                    "responses": {
1123                        "200": {
1124                            "description": "Episode record.",
1125                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
1126                        },
1127                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1128                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1129                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1130                    }
1131                },
1132                "patch": {
1133                    "summary": "Correct/update a single active memory",
1134                    "description":
1135                        "Equivalent to MCP tool `memory_update`. Rewrites the active episode content, \
1136                         refreshes its embedding, updates the pending index/HNSW entry, and records \
1137                         an audit event. Forgotten memories cannot be updated.",
1138                    "security": [{ "bearerAuth": [] }, {}],
1139                    "parameters": [
1140                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1141                    ],
1142                    "requestBody": {
1143                        "required": true,
1144                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateRequest" } } }
1145                    },
1146                    "responses": {
1147                        "200": {
1148                            "description": "Updated memory metadata.",
1149                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateResult" } } }
1150                        },
1151                        "400": { "description": "Malformed ID or empty content.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1152                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1153                        "409": { "description": "Memory exists but is not active.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1154                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1155                    }
1156                },
1157                "delete": {
1158                    "summary": "Forget (soft-delete) a memory by ID",
1159                    "description":
1160                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
1161                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
1162                         re-running `solo reembed` after this does NOT restore visibility.",
1163                    "security": [{ "bearerAuth": [] }, {}],
1164                    "parameters": [
1165                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
1166                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
1167                    ],
1168                    "responses": {
1169                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
1170                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1171                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1172                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1173                    }
1174                }
1175            },
1176            "/memory/themes": {
1177                "get": {
1178                    "summary": "List recent cluster themes",
1179                    "description":
1180                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
1181                         most-recent first. Use to surface 'what has the user been thinking about lately' \
1182                         without paging through individual episodes. v0.4.0+.",
1183                    "security": [{ "bearerAuth": [] }, {}],
1184                    "parameters": [
1185                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
1186                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1187                    ],
1188                    "responses": {
1189                        "200": {
1190                            "description": "Array of ThemeHits (possibly empty).",
1191                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
1192                        },
1193                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1194                    }
1195                }
1196            },
1197            "/memory/facts_about": {
1198                "get": {
1199                    "summary": "Query the SPO knowledge graph by subject",
1200                    "description":
1201                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
1202                         subject + optional predicate + optional time window. Subject is required \
1203                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
1204                         to also surface rows where `subject` appears as the object. v0.4.0+.",
1205                    "security": [{ "bearerAuth": [] }, {}],
1206                    "parameters": [
1207                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
1208                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
1209                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
1210                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
1211                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
1212                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1213                    ],
1214                    "responses": {
1215                        "200": {
1216                            "description": "Array of FactHits (possibly empty).",
1217                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
1218                        },
1219                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1220                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1221                    }
1222                }
1223            },
1224            "/memory/entities": {
1225                "get": {
1226                    "summary": "Discover structured-graph entities",
1227                    "description":
1228                        "Equivalent to MCP tool `memory_entities`. Searches entity-like ids found in \
1229                         active triples and returns counts plus common predicates. Use before \
1230                         `/memory/facts_about` when the exact subject id is uncertain.",
1231                    "security": [{ "bearerAuth": [] }, {}],
1232                    "parameters": [
1233                        { "name": "query", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Partial or exact entity id." },
1234                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1235                    ],
1236                    "responses": {
1237                        "200": {
1238                            "description": "Array of EntityHits (possibly empty).",
1239                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/EntityHit" } } } }
1240                        },
1241                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1242                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1243                    }
1244                }
1245            },
1246            "/memory/contradictions": {
1247                "get": {
1248                    "summary": "List Steward-flagged contradictions",
1249                    "description":
1250                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
1251                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
1252                    "security": [{ "bearerAuth": [] }, {}],
1253                    "parameters": [
1254                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1255                    ],
1256                    "responses": {
1257                        "200": {
1258                            "description": "Array of ContradictionHits (possibly empty).",
1259                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
1260                        },
1261                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1262                    }
1263                }
1264            },
1265            "/memory/contradictions/resolve": {
1266                "post": {
1267                    "summary": "Resolve or reopen a contradiction",
1268                    "description":
1269                        "Equivalent to MCP tool `memory_contradiction_resolve`. Updates the lifecycle \
1270                         fields on one contradiction row after the user clarifies which memory is current.",
1271                    "security": [{ "bearerAuth": [] }, {}],
1272                    "requestBody": {
1273                        "required": true,
1274                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolveRequest" } } }
1275                    },
1276                    "responses": {
1277                        "200": {
1278                            "description": "Contradiction lifecycle update result.",
1279                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolution" } } }
1280                        },
1281                        "400": { "description": "Bad request (missing ids/kind or invalid status).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1282                        "404": { "description": "No matching contradiction.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1283                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1284                    }
1285                }
1286            },
1287            "/memory/clusters/{cluster_id}": {
1288                "get": {
1289                    "summary": "Inspect a single cluster",
1290                    "description":
1291                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
1292                         its (optional) abstraction, and its source episodes. By default each \
1293                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
1294                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
1295                    "security": [{ "bearerAuth": [] }, {}],
1296                    "parameters": [
1297                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
1298                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
1299                    ],
1300                    "responses": {
1301                        "200": {
1302                            "description": "Cluster snapshot.",
1303                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
1304                        },
1305                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1306                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1307                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1308                    }
1309                }
1310            },
1311            "/memory/documents": {
1312                "post": {
1313                    "summary": "Ingest a document",
1314                    "description":
1315                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
1316                         supplied server-side path, parses + chunks + embeds, and persists under \
1317                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
1318                         a `deduped` flag (true when an existing document with the same content_hash \
1319                         was returned without re-embedding). v0.7.0+.",
1320                    "security": [{ "bearerAuth": [] }, {}],
1321                    "requestBody": {
1322                        "required": true,
1323                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
1324                    },
1325                    "responses": {
1326                        "200": {
1327                            "description": "Document ingested (or deduplicated).",
1328                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
1329                        },
1330                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1331                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1332                    }
1333                },
1334                "get": {
1335                    "summary": "List ingested documents (paginated)",
1336                    "description":
1337                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
1338                         newest first. Forgotten documents are hidden by default; pass \
1339                         `?include_forgotten=true` to see them too. v0.7.0+.",
1340                    "security": [{ "bearerAuth": [] }, {}],
1341                    "parameters": [
1342                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
1343                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
1344                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
1345                    ],
1346                    "responses": {
1347                        "200": {
1348                            "description": "Array of DocumentSummary (possibly empty).",
1349                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
1350                        },
1351                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1352                    }
1353                }
1354            },
1355            "/memory/documents/search": {
1356                "post": {
1357                    "summary": "Vector search across document chunks",
1358                    "description":
1359                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
1360                         up to `limit` matching chunks, best match first, each annotated with the \
1361                         parent document's title + source path. Forgotten documents are excluded. \
1362                         v0.7.0+.",
1363                    "security": [{ "bearerAuth": [] }, {}],
1364                    "requestBody": {
1365                        "required": true,
1366                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
1367                    },
1368                    "responses": {
1369                        "200": {
1370                            "description": "Array of DocSearchHits (possibly empty).",
1371                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
1372                        },
1373                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1374                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1375                    }
1376                }
1377            },
1378            "/memory/documents/{id}": {
1379                "get": {
1380                    "summary": "Inspect one document",
1381                    "description":
1382                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
1383                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
1384                    "security": [{ "bearerAuth": [] }, {}],
1385                    "parameters": [
1386                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
1387                    ],
1388                    "responses": {
1389                        "200": {
1390                            "description": "Document inspection result.",
1391                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
1392                        },
1393                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1394                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1395                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1396                    }
1397                },
1398                "delete": {
1399                    "summary": "Forget (soft-delete) one document",
1400                    "description":
1401                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
1402                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
1403                         survive in SQL for forensic value. v0.7.0+.",
1404                    "security": [{ "bearerAuth": [] }, {}],
1405                    "parameters": [
1406                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1407                    ],
1408                    "responses": {
1409                        "200": {
1410                            "description": "Document soft-deleted; report counts chunks tombstoned.",
1411                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
1412                        },
1413                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1414                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1415                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1416                    }
1417                }
1418            },
1419            "/v1/graph/expand": {
1420                "get": {
1421                    "summary": "Expand one graph node",
1422                    "description": "Return neighboring nodes and edges for one graph node id. Powers solo-web graph expansion.",
1423                    "security": [{ "bearerAuth": [] }, {}],
1424                    "parameters": [
1425                        { "name": "node_id", "in": "query", "required": true, "schema": { "type": "string" } },
1426                        { "name": "kind", "in": "query", "required": true, "schema": { "type": "string", "enum": ["cluster_member", "document_chunk", "triple", "semantic"] } },
1427                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1428                    ],
1429                    "responses": {
1430                        "200": { "description": "Expanded graph neighborhood.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1431                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1432                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1433                        "404": { "description": "Tenant or node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1434                    }
1435                }
1436            },
1437            "/v1/graph/nodes": {
1438                "get": {
1439                    "summary": "List graph nodes",
1440                    "description": "Paginated graph-node catalog used by solo-web's initial render.",
1441                    "security": [{ "bearerAuth": [] }, {}],
1442                    "parameters": [
1443                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Comma-separated node kinds, e.g. episode,document,entity." },
1444                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1445                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } },
1446                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } },
1447                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } }
1448                    ],
1449                    "responses": {
1450                        "200": { "description": "Page of graph nodes.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphNodesResponse" } } } },
1451                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1452                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1453                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1454                    }
1455                }
1456            },
1457            "/v1/graph/edges": {
1458                "get": {
1459                    "summary": "List graph edges",
1460                    "description": "Paginated graph-edge catalog for explicit graph relations. Semantic HNSW edges are exposed through /v1/graph/neighbors/{id}.",
1461                    "security": [{ "bearerAuth": [] }, {}],
1462                    "parameters": [
1463                        { "name": "type", "in": "query", "required": false, "schema": { "type": "string" } },
1464                        { "name": "node_id", "in": "query", "required": false, "schema": { "type": "string" } },
1465                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1466                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } }
1467                    ],
1468                    "responses": {
1469                        "200": { "description": "Page of graph edges.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphEdgesResponse" } } } },
1470                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1471                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1472                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1473                    }
1474                }
1475            },
1476            "/v1/graph/inspect/{id}": {
1477                "get": {
1478                    "summary": "Inspect one graph node",
1479                    "description": "Kind-discriminated full-record drill for solo-web's inspector panel.",
1480                    "security": [{ "bearerAuth": [] }, {}],
1481                    "parameters": [
1482                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } }
1483                    ],
1484                    "responses": {
1485                        "200": { "description": "Graph node inspection payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphInspectResponse" } } } },
1486                        "400": { "description": "Bad graph node id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1487                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1488                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1489                    }
1490                }
1491            },
1492            "/v1/graph/neighbors/{id}": {
1493                "get": {
1494                    "summary": "List graph neighbors",
1495                    "description": "Unified explicit and semantic neighbor lookup for solo-web's show-similar overlay.",
1496                    "security": [{ "bearerAuth": [] }, {}],
1497                    "parameters": [
1498                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } },
1499                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string", "enum": ["explicit", "semantic", "both"] } },
1500                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1501                    ],
1502                    "responses": {
1503                        "200": { "description": "Neighbor graph.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1504                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1505                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1506                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1507                    }
1508                }
1509            },
1510            "/v1/graph/stream": {
1511                "get": {
1512                    "summary": "Stream graph invalidations",
1513                    "description": "Server-Sent Events stream of graph-data invalidation notifications. Clients refetch affected pages on each event.",
1514                    "security": [{ "bearerAuth": [] }, {}],
1515                    "responses": {
1516                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1517                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1518                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1519                    }
1520                }
1521            },
1522            "/v1/status": {
1523                "get": {
1524                    "summary": "Authenticated Solo status",
1525                    "description": "Tenant-aware readiness payload for local UIs and agent bridges. Unlike public /health, this resolves auth and tenant routing.",
1526                    "security": [{ "bearerAuth": [] }, {}],
1527                    "parameters": [
1528                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1529                    ],
1530                    "responses": {
1531                        "200": { "description": "Solo status payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusResponse" } } } },
1532                        "400": { "description": "Invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1533                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1534                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1535                    }
1536                }
1537            },
1538            "/v1/tenants": {
1539                "get": {
1540                    "summary": "List visible tenants",
1541                    "description": "Principal-scoped active tenant list for solo-web's tenant picker and status UI.",
1542                    "security": [{ "bearerAuth": [] }, {}],
1543                    "responses": {
1544                        "200": {
1545                            "description": "Visible tenants.",
1546                            "headers": {
1547                                "X-Solo-Tenants-Count-Cap-Reached": {
1548                                    "schema": { "type": "string", "enum": ["true"] },
1549                                    "description": "Present when episode_count hydration was capped."
1550                                }
1551                            },
1552                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TenantsListResponse" } } }
1553                        },
1554                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1555                    }
1556                }
1557            },
1558            "/mcp": {
1559                "post": {
1560                    "summary": "MCP JSON-RPC request",
1561                    "description": "Streamable HTTP MCP request/response endpoint. A POST without Mcp-Session-Id creates a session and echoes it in the response header.",
1562                    "security": [{ "bearerAuth": [] }, {}],
1563                    "parameters": [
1564                        { "name": "Mcp-Session-Id", "in": "header", "required": false, "schema": { "type": "string" } },
1565                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1566                    ],
1567                    "requestBody": {
1568                        "required": true,
1569                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcRequest" } } }
1570                    },
1571                    "responses": {
1572                        "200": {
1573                            "description": "JSON-RPC success or in-body error response.",
1574                            "headers": { "Mcp-Session-Id": { "schema": { "type": "string" } } },
1575                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcResponse" } } }
1576                        },
1577                        "202": { "description": "JSON-RPC notification accepted; no response body." },
1578                        "400": { "description": "Malformed JSON-RPC envelope or invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1579                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1580                        "404": { "description": "Unknown tenant or unknown/expired MCP session.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1581                    }
1582                },
1583                "get": {
1584                    "summary": "MCP SSE stream",
1585                    "description": "Attach to an existing MCP session's resumable Server-Sent Events stream. Requires Mcp-Session-Id from a prior POST.",
1586                    "security": [{ "bearerAuth": [] }, {}],
1587                    "parameters": [
1588                        { "name": "Mcp-Session-Id", "in": "header", "required": true, "schema": { "type": "string" } },
1589                        { "name": "Last-Event-ID", "in": "header", "required": false, "schema": { "type": "string" } },
1590                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1591                    ],
1592                    "responses": {
1593                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1594                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1595                        "404": { "description": "Missing, unknown, or expired MCP session; or tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1596                    }
1597                }
1598            }
1599        }
1600    })
1601}
1602
1603// ---------------------------------------------------------------------------
1604// Handlers
1605// ---------------------------------------------------------------------------
1606
1607#[derive(Debug, Deserialize)]
1608struct RememberBody {
1609    content: String,
1610    #[serde(default)]
1611    source_type: Option<String>,
1612    #[serde(default)]
1613    source_id: Option<String>,
1614    /// v0.9.2+ parity with the MCP `memory_remember` tool. Optional;
1615    /// must be in `[0.0, 1.0]` if supplied. Defaults to 0.5 when absent.
1616    /// Rejected with 400 if out of range.
1617    #[serde(default)]
1618    salience: Option<f32>,
1619}
1620
1621#[derive(Debug, Serialize)]
1622struct RememberResponse {
1623    memory_id: String,
1624}
1625
1626async fn remember_handler(
1627    TenantExtractor(tenant): TenantExtractor,
1628    AuditPrincipal(principal): AuditPrincipal,
1629    Json(body): Json<RememberBody>,
1630) -> Result<Json<RememberResponse>, ApiError> {
1631    let content = body.content.trim_end().to_string();
1632    if content.is_empty() {
1633        return Err(ApiError::bad_request("content must not be empty"));
1634    }
1635    // Validate caller-supplied salience (parity with MCP `memory_remember`).
1636    let salience = match body.salience {
1637        Some(s) if !(0.0..=1.0).contains(&s) || s.is_nan() => {
1638            return Err(ApiError::bad_request(
1639                "salience must be a finite value in [0.0, 1.0]",
1640            ));
1641        }
1642        Some(s) => s,
1643        None => 0.5,
1644    };
1645    let embedding = tenant
1646        .embedder()
1647        .embed(&content)
1648        .await
1649        .map_err(ApiError::from)?;
1650    let episode = Episode {
1651        memory_id: MemoryId::new(),
1652        ts_ms: chrono::Utc::now().timestamp_millis(),
1653        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
1654        source_id: body.source_id,
1655        content,
1656        encoding_context: EncodingContext::default(),
1657        provenance: None,
1658        confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
1659        strength: 0.5,
1660        salience,
1661        tier: Tier::Hot,
1662    };
1663    let mid = tenant
1664        .write()
1665        .remember_as(principal, episode, embedding)
1666        .await
1667        .map_err(ApiError::from)?;
1668    Ok(Json(RememberResponse {
1669        memory_id: mid.to_string(),
1670    }))
1671}
1672
1673#[derive(Debug, Deserialize)]
1674struct RecallBody {
1675    query: String,
1676    #[serde(default = "default_limit")]
1677    limit: usize,
1678}
1679
1680#[derive(Debug, Deserialize)]
1681struct MemoryContextBody {
1682    query: String,
1683    #[serde(default)]
1684    subject: Option<String>,
1685    #[serde(default)]
1686    window_days: Option<i64>,
1687    #[serde(default = "default_limit")]
1688    limit: usize,
1689}
1690
1691fn default_limit() -> usize {
1692    5
1693}
1694
1695async fn recall_handler(
1696    TenantExtractor(tenant): TenantExtractor,
1697    AuditPrincipal(principal): AuditPrincipal,
1698    Json(body): Json<RecallBody>,
1699) -> Result<Json<solo_query::RecallResult>, ApiError> {
1700    // solo_query::run_recall handles empty-query rejection (returns
1701    // InvalidInput → ApiError::bad_request(400)) and clamps limit
1702    // upstream of the embedder call.
1703    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
1704        .await
1705        .map_err(ApiError::from)?;
1706    Ok(Json(result))
1707}
1708
1709async fn memory_context_handler(
1710    State(s): State<SoloHttpState>,
1711    TenantExtractor(tenant): TenantExtractor,
1712    AuditPrincipal(principal): AuditPrincipal,
1713    Json(body): Json<MemoryContextBody>,
1714) -> Result<Json<solo_query::MemoryContextResult>, ApiError> {
1715    let result = solo_query::memory_context(
1716        tenant.as_ref(),
1717        principal,
1718        &body.query,
1719        body.subject.as_deref(),
1720        &s.user_aliases,
1721        body.window_days,
1722        body.limit,
1723    )
1724    .await
1725    .map_err(ApiError::from)?;
1726    Ok(Json(result))
1727}
1728
1729async fn inspect_handler(
1730    TenantExtractor(tenant): TenantExtractor,
1731    AuditPrincipal(principal): AuditPrincipal,
1732    Path(id): Path<String>,
1733) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
1734    let mid =
1735        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1736    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
1737        .await
1738        .map_err(ApiError::from)?;
1739    Ok(Json(row))
1740}
1741
1742#[derive(Debug, Deserialize)]
1743struct MemoryUpdateBody {
1744    content: String,
1745}
1746
1747async fn update_handler(
1748    TenantExtractor(tenant): TenantExtractor,
1749    AuditPrincipal(principal): AuditPrincipal,
1750    Path(id): Path<String>,
1751    Json(body): Json<MemoryUpdateBody>,
1752) -> Result<Json<solo_query::MemoryUpdateResult>, ApiError> {
1753    let mid =
1754        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1755    if body.content.trim().is_empty() {
1756        return Err(ApiError::bad_request("content must not be empty"));
1757    }
1758    let result = solo_query::memory_update(tenant.as_ref(), principal, mid, &body.content)
1759        .await
1760        .map_err(ApiError::from)?;
1761    Ok(Json(result))
1762}
1763
1764// Path 1 derived-layer handlers (v0.4.0+). Read handlers are GET-shaped:
1765// pure read-only queries against the Steward's outputs, query-string
1766// params for simple filters. Each handler delegates to a single
1767// solo_query::derived pipeline and returns the result Vec as JSON.
1768// Empty derived layer → 200 with `[]` body (parseable JSON array).
1769
1770#[derive(Debug, Deserialize)]
1771struct ThemesQuery {
1772    #[serde(default)]
1773    window_days: Option<i64>,
1774    #[serde(default = "default_limit")]
1775    limit: usize,
1776}
1777
1778async fn themes_handler(
1779    TenantExtractor(tenant): TenantExtractor,
1780    AuditPrincipal(principal): AuditPrincipal,
1781    Query(q): Query<ThemesQuery>,
1782) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
1783    let hits = solo_query::themes(
1784        tenant.read(),
1785        tenant.audit(),
1786        principal,
1787        q.window_days,
1788        q.limit,
1789    )
1790    .await
1791    .map_err(ApiError::from)?;
1792    Ok(Json(hits))
1793}
1794
1795#[derive(Debug, Deserialize)]
1796struct FactsAboutQuery {
1797    subject: String,
1798    #[serde(default)]
1799    predicate: Option<String>,
1800    #[serde(default)]
1801    since_ms: Option<i64>,
1802    #[serde(default)]
1803    until_ms: Option<i64>,
1804    /// v0.5.1 Priority 8 — widen the query to also match rows where
1805    /// `subject` appears as the object. Default `false`.
1806    #[serde(default)]
1807    include_as_object: bool,
1808    #[serde(default = "default_limit")]
1809    limit: usize,
1810}
1811
1812async fn facts_about_handler(
1813    State(s): State<SoloHttpState>,
1814    TenantExtractor(tenant): TenantExtractor,
1815    AuditPrincipal(principal): AuditPrincipal,
1816    Query(q): Query<FactsAboutQuery>,
1817) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
1818    if q.subject.trim().is_empty() {
1819        return Err(ApiError::bad_request("subject must not be empty"));
1820    }
1821    let hits = solo_query::facts_about(
1822        tenant.read(),
1823        tenant.audit(),
1824        principal,
1825        &q.subject,
1826        &s.user_aliases,
1827        q.include_as_object,
1828        q.predicate.as_deref(),
1829        q.since_ms,
1830        q.until_ms,
1831        q.limit,
1832    )
1833    .await
1834    .map_err(ApiError::from)?;
1835    Ok(Json(hits))
1836}
1837
1838#[derive(Debug, Deserialize)]
1839struct EntitiesQuery {
1840    query: String,
1841    #[serde(default = "default_limit")]
1842    limit: usize,
1843}
1844
1845async fn entities_handler(
1846    TenantExtractor(tenant): TenantExtractor,
1847    AuditPrincipal(principal): AuditPrincipal,
1848    Query(q): Query<EntitiesQuery>,
1849) -> Result<Json<Vec<solo_query::EntityHit>>, ApiError> {
1850    if q.query.trim().is_empty() {
1851        return Err(ApiError::bad_request("query must not be empty"));
1852    }
1853    let hits = solo_query::entities(tenant.read(), tenant.audit(), principal, &q.query, q.limit)
1854        .await
1855        .map_err(ApiError::from)?;
1856    Ok(Json(hits))
1857}
1858
1859#[derive(Debug, Deserialize)]
1860struct ContradictionsQuery {
1861    #[serde(default = "default_limit")]
1862    limit: usize,
1863}
1864
1865async fn contradictions_handler(
1866    TenantExtractor(tenant): TenantExtractor,
1867    AuditPrincipal(principal): AuditPrincipal,
1868    Query(q): Query<ContradictionsQuery>,
1869) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
1870    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
1871        .await
1872        .map_err(ApiError::from)?;
1873    Ok(Json(hits))
1874}
1875
1876fn default_contradiction_status() -> String {
1877    "resolved".to_string()
1878}
1879
1880#[derive(Debug, Deserialize)]
1881struct ContradictionResolveBody {
1882    a_id: String,
1883    b_id: String,
1884    kind: String,
1885    #[serde(default = "default_contradiction_status")]
1886    status: String,
1887    #[serde(default)]
1888    resolution_note: Option<String>,
1889    #[serde(default)]
1890    winning_triple_id: Option<String>,
1891}
1892
1893async fn contradiction_resolve_handler(
1894    TenantExtractor(tenant): TenantExtractor,
1895    AuditPrincipal(principal): AuditPrincipal,
1896    Json(body): Json<ContradictionResolveBody>,
1897) -> Result<Json<solo_query::ContradictionResolution>, ApiError> {
1898    if body.a_id.trim().is_empty() || body.b_id.trim().is_empty() || body.kind.trim().is_empty() {
1899        return Err(ApiError::bad_request(
1900            "a_id, b_id, and kind must not be empty",
1901        ));
1902    }
1903    // Dev-log 0152 H1: routed through the writer actor for atomic
1904    // UPDATE + audit row. Reader-pool + audit-writer args are kept for
1905    // signature stability but ignored by the function body.
1906    let result = solo_query::resolve_contradiction(
1907        tenant.write(),
1908        tenant.read(),
1909        tenant.audit(),
1910        principal,
1911        &body.a_id,
1912        &body.b_id,
1913        &body.kind,
1914        &body.status,
1915        body.resolution_note.as_deref(),
1916        body.winning_triple_id.as_deref(),
1917    )
1918    .await
1919    .map_err(ApiError::from)?;
1920    Ok(Json(result))
1921}
1922
1923#[derive(Debug, Deserialize, Default)]
1924struct InspectClusterQuery {
1925    /// Default `false` — episode `content` is truncated to
1926    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
1927    /// `?full_content=true` returns each episode's content verbatim.
1928    #[serde(default)]
1929    full_content: bool,
1930}
1931
1932async fn inspect_cluster_handler(
1933    TenantExtractor(tenant): TenantExtractor,
1934    AuditPrincipal(principal): AuditPrincipal,
1935    Path(cluster_id): Path<String>,
1936    Query(q): Query<InspectClusterQuery>,
1937) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
1938    if cluster_id.trim().is_empty() {
1939        return Err(ApiError::bad_request("cluster_id must not be empty"));
1940    }
1941    let record = solo_query::inspect_cluster(
1942        tenant.read(),
1943        tenant.audit(),
1944        principal,
1945        &cluster_id,
1946        q.full_content,
1947    )
1948    .await
1949    .map_err(ApiError::from)?;
1950    Ok(Json(record))
1951}
1952
1953// ---------------------------------------------------------------------------
1954// Document handlers (v0.7.0 P6)
1955// ---------------------------------------------------------------------------
1956
1957#[derive(Debug, Deserialize)]
1958struct IngestDocumentBody {
1959    /// Server-side absolute path to the file. Must be readable by the
1960    /// Solo process. The writer reads, parses, chunks, and embeds.
1961    path: String,
1962}
1963
1964async fn ingest_document_handler(
1965    TenantExtractor(tenant): TenantExtractor,
1966    AuditPrincipal(principal): AuditPrincipal,
1967    Json(body): Json<IngestDocumentBody>,
1968) -> Result<Json<solo_storage::IngestReport>, ApiError> {
1969    if body.path.trim().is_empty() {
1970        return Err(ApiError::bad_request("path must not be empty"));
1971    }
1972    let path = std::path::PathBuf::from(body.path);
1973    let chunk_config = solo_storage::document::ChunkConfig::default();
1974    let report = tenant
1975        .write()
1976        .ingest_document_as(principal, path, chunk_config)
1977        .await
1978        .map_err(ApiError::from)?;
1979    Ok(Json(report))
1980}
1981
1982#[derive(Debug, Deserialize)]
1983struct SearchDocsBody {
1984    query: String,
1985    #[serde(default = "default_limit")]
1986    limit: usize,
1987}
1988
1989async fn search_docs_handler(
1990    TenantExtractor(tenant): TenantExtractor,
1991    AuditPrincipal(principal): AuditPrincipal,
1992    Json(body): Json<SearchDocsBody>,
1993) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
1994    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
1995        .await
1996        .map_err(ApiError::from)?;
1997    Ok(Json(hits))
1998}
1999
2000async fn inspect_document_handler(
2001    TenantExtractor(tenant): TenantExtractor,
2002    AuditPrincipal(principal): AuditPrincipal,
2003    Path(id): Path<String>,
2004) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
2005    let doc_id =
2006        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2007    let result_opt =
2008        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
2009            .await
2010            .map_err(ApiError::from)?;
2011    match result_opt {
2012        Some(record) => Ok(Json(record)),
2013        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
2014    }
2015}
2016
2017#[derive(Debug, Deserialize)]
2018struct ListDocumentsQuery {
2019    #[serde(default = "default_list_documents_limit")]
2020    limit: usize,
2021    #[serde(default)]
2022    offset: usize,
2023    #[serde(default)]
2024    include_forgotten: bool,
2025}
2026
2027fn default_list_documents_limit() -> usize {
2028    20
2029}
2030
2031async fn list_documents_handler(
2032    TenantExtractor(tenant): TenantExtractor,
2033    AuditPrincipal(principal): AuditPrincipal,
2034    Query(q): Query<ListDocumentsQuery>,
2035) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
2036    let rows = solo_query::list_documents(
2037        tenant.read(),
2038        tenant.audit(),
2039        principal,
2040        q.limit,
2041        q.offset,
2042        q.include_forgotten,
2043    )
2044    .await
2045    .map_err(ApiError::from)?;
2046    Ok(Json(rows))
2047}
2048
2049async fn forget_document_handler(
2050    TenantExtractor(tenant): TenantExtractor,
2051    AuditPrincipal(principal): AuditPrincipal,
2052    Path(id): Path<String>,
2053) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
2054    let doc_id =
2055        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2056    let report = tenant
2057        .write()
2058        .forget_document_as(principal, doc_id)
2059        .await
2060        .map_err(ApiError::from)?;
2061    Ok(Json(report))
2062}
2063
2064#[derive(Debug, Deserialize)]
2065struct ForgetQuery {
2066    #[serde(default)]
2067    reason: Option<String>,
2068}
2069
2070async fn forget_handler(
2071    TenantExtractor(tenant): TenantExtractor,
2072    AuditPrincipal(principal): AuditPrincipal,
2073    Path(id): Path<String>,
2074    Query(q): Query<ForgetQuery>,
2075) -> Result<StatusCode, ApiError> {
2076    let mid =
2077        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2078    let reason = q.reason.unwrap_or_else(|| "http".into());
2079    tenant
2080        .write()
2081        .forget_as(principal, mid, reason)
2082        .await
2083        .map_err(ApiError::from)?;
2084    Ok(StatusCode::NO_CONTENT)
2085}
2086
2087async fn consolidate_handler(
2088    TenantExtractor(tenant): TenantExtractor,
2089    AuditPrincipal(principal): AuditPrincipal,
2090    body: axum::body::Bytes,
2091) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
2092    // Empty body = default scope (unbounded window). We parse via
2093    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
2094    // extractor 400s on an empty body when Content-Type is JSON
2095    // (it can't deserialize zero bytes as `T`), and the `Option`
2096    // wrapper doesn't reliably degrade that failure to `None`.
2097    let scope = if body.is_empty() {
2098        solo_storage::ConsolidationScope::default()
2099    } else {
2100        serde_json::from_slice(&body)
2101            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
2102    };
2103    let report = tenant
2104        .write()
2105        .consolidate_as(principal, scope)
2106        .await
2107        .map_err(ApiError::from)?;
2108    Ok(Json(report))
2109}
2110
2111#[derive(Debug, Deserialize)]
2112struct BackupBody {
2113    /// Server-side absolute path where the backup file should be
2114    /// written. Must be writable by the Solo process. Refuses to
2115    /// overwrite an existing file unless `force = true`.
2116    to: String,
2117    #[serde(default)]
2118    force: bool,
2119}
2120
2121#[derive(Debug, Serialize)]
2122struct BackupResponse {
2123    path: String,
2124    elapsed_ms: u64,
2125}
2126
2127async fn backup_handler(
2128    TenantExtractor(tenant): TenantExtractor,
2129    Json(body): Json<BackupBody>,
2130) -> Result<Json<BackupResponse>, ApiError> {
2131    use std::path::PathBuf;
2132
2133    let dest = PathBuf::from(&body.to);
2134    if dest.as_os_str().is_empty() {
2135        return Err(ApiError::bad_request("`to` must not be empty"));
2136    }
2137    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
2138    // The tenant's source DB path comes from the resolved TenantHandle.
2139    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
2140        return Err(ApiError::bad_request(format!(
2141            "destination {} is the same file as the source database; \
2142             refusing to run (would corrupt the live database)",
2143            dest.display()
2144        )));
2145    }
2146    if dest.exists() {
2147        if !body.force {
2148            return Err(ApiError::bad_request(format!(
2149                "destination {} exists; pass force=true to overwrite",
2150                dest.display()
2151            )));
2152        }
2153        std::fs::remove_file(&dest).map_err(|e| {
2154            ApiError::internal(format!(
2155                "remove existing destination {}: {e}",
2156                dest.display()
2157            ))
2158        })?;
2159    }
2160    if let Some(parent) = dest.parent() {
2161        if !parent.as_os_str().is_empty() && !parent.is_dir() {
2162            return Err(ApiError::bad_request(format!(
2163                "destination parent directory {} does not exist",
2164                parent.display()
2165            )));
2166        }
2167    }
2168
2169    let started = std::time::Instant::now();
2170    tenant
2171        .write()
2172        .backup(dest.clone())
2173        .await
2174        .map_err(ApiError::from)?;
2175    let elapsed_ms = started.elapsed().as_millis() as u64;
2176
2177    Ok(Json(BackupResponse {
2178        path: dest.display().to_string(),
2179        elapsed_ms,
2180    }))
2181}
2182
2183// ---------------------------------------------------------------------------
2184// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
2185// ---------------------------------------------------------------------------
2186//
2187// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
2188// drill off any node. Supports four edge kinds:
2189//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
2190//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
2191//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
2192//     object_id / source_episode_id added in migration 0007).
2193//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
2194//     episode's content via the tenant embedder, then calls the same
2195//     pipeline as `/memory/search`; cheaper than a separate embeddings-
2196//     table fetch path and reuses one well-tested code path).
2197//
2198// **Node-id prefix convention** (locked in this PR; the future
2199// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
2200// same scheme):
2201//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
2202//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
2203//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
2204//   * `cl:<cluster_id>`    — cluster
2205//   * `ent:<value>`        — entity (synthetic — minted from a triple's
2206//     subject_id / object_id; value is the raw string verbatim, no
2207//     URL-encoding — `:` and other punctuation appear in real entity
2208//     ids in the wild).
2209//
2210// Entity nodes are synthetic: there's no `entities` table. They're derived
2211// on-the-fly from triples and only exist in the wire format. Two entity
2212// nodes with the same `ent:<value>` are the same node.
2213//
2214// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
2215// over already-audited primitives; the explicit-query audit events from
2216// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
2217// underlying reads).
2218//
2219// Tests live inline in `handler_tests` below.
2220
2221const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
2222const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;
2223
2224/// Edge-kind discriminator. Drives which expansion path runs and what edge
2225/// kind appears in the response.
2226#[derive(Debug, Clone, Copy, Deserialize)]
2227#[serde(rename_all = "snake_case")]
2228enum GraphExpandKind {
2229    ClusterMember,
2230    DocumentChunk,
2231    Triple,
2232    Semantic,
2233}
2234
2235#[derive(Debug, Deserialize)]
2236struct GraphExpandQuery {
2237    node_id: String,
2238    kind: GraphExpandKind,
2239    #[serde(default)]
2240    limit: Option<u32>,
2241}
2242
2243/// Source-node kind, derived from the `node_id` prefix.
2244#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2245enum NodeKind {
2246    Episode,
2247    Document,
2248    Chunk,
2249    Cluster,
2250    Entity,
2251}
2252
2253impl NodeKind {
2254    fn as_wire_str(self) -> &'static str {
2255        match self {
2256            Self::Episode => "episode",
2257            Self::Document => "document",
2258            Self::Chunk => "chunk",
2259            Self::Cluster => "cluster",
2260            Self::Entity => "entity",
2261        }
2262    }
2263}
2264
2265/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
2266/// unknown prefix / empty value / no `:`.
2267fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
2268    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
2269        ApiError::bad_request(format!(
2270            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
2271        ))
2272    })?;
2273    if value.is_empty() {
2274        return Err(ApiError::bad_request(format!(
2275            "node_id value is empty after prefix: {raw:?}"
2276        )));
2277    }
2278    let kind = match prefix {
2279        "ep" => NodeKind::Episode,
2280        "doc" => NodeKind::Document,
2281        "chunk" => NodeKind::Chunk,
2282        "cl" => NodeKind::Cluster,
2283        "ent" => NodeKind::Entity,
2284        other => {
2285            return Err(ApiError::bad_request(format!(
2286                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
2287            )));
2288        }
2289    };
2290    Ok((kind, value))
2291}
2292
2293/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
2294/// TS interface (see `solo-web/src/api/types.ts`).
2295#[derive(Debug, Serialize)]
2296struct GraphNode {
2297    id: String,
2298    kind: &'static str,
2299    label: String,
2300    #[serde(skip_serializing_if = "Option::is_none")]
2301    ts_ms: Option<i64>,
2302    tenant_id: String,
2303    #[serde(skip_serializing_if = "Option::is_none")]
2304    preview: Option<String>,
2305}
2306
2307/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
2308/// `${source}--${kind}--${target}` so the renderer can dedupe.
2309#[derive(Debug, Serialize)]
2310struct GraphEdge {
2311    id: String,
2312    source: String,
2313    target: String,
2314    kind: &'static str,
2315    #[serde(skip_serializing_if = "Option::is_none")]
2316    predicate: Option<String>,
2317    #[serde(skip_serializing_if = "Option::is_none")]
2318    weight: Option<f32>,
2319}
2320
2321#[derive(Debug, Serialize)]
2322struct GraphExpandResponse {
2323    nodes: Vec<GraphNode>,
2324    edges: Vec<GraphEdge>,
2325}
2326
2327fn edge_id(source: &str, kind: &str, target: &str) -> String {
2328    format!("{source}--{kind}--{target}")
2329}
2330
2331/// Episode summary needed to mint a `GraphNode` from an episode row.
2332#[derive(Debug)]
2333struct ExpandedEpisode {
2334    memory_id: String,
2335    ts_ms: i64,
2336    content: String,
2337}
2338
2339/// Document summary.
2340#[derive(Debug)]
2341struct ExpandedDocument {
2342    doc_id: String,
2343    title: Option<String>,
2344    source: Option<String>,
2345    ingested_at_ms: i64,
2346}
2347
2348/// Chunk summary.
2349#[derive(Debug)]
2350struct ExpandedChunk {
2351    chunk_id: String,
2352    chunk_index: i64,
2353    content: String,
2354}
2355
2356fn truncate_preview(s: &str, max: usize) -> String {
2357    if s.chars().count() <= max {
2358        return s.to_string();
2359    }
2360    let mut out: String = s.chars().take(max - 1).collect();
2361    out.push('…');
2362    out
2363}
2364
2365/// First-line label cap. Keeps payloads tight for the graph renderer
2366/// (labels are headings, not full content).
2367const GRAPH_LABEL_CHARS: usize = 80;
2368const GRAPH_PREVIEW_CHARS: usize = 200;
2369
2370fn episode_label(content: &str) -> String {
2371    let first_line = content.lines().next().unwrap_or(content);
2372    truncate_preview(first_line, GRAPH_LABEL_CHARS)
2373}
2374
2375fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
2376    GraphNode {
2377        id: format!("ep:{}", ep.memory_id),
2378        kind: NodeKind::Episode.as_wire_str(),
2379        label: episode_label(&ep.content),
2380        ts_ms: Some(ep.ts_ms),
2381        tenant_id: tenant_id.to_string(),
2382        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
2383    }
2384}
2385
2386fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
2387    let label = d
2388        .title
2389        .clone()
2390        .or_else(|| d.source.clone())
2391        .unwrap_or_else(|| d.doc_id.clone());
2392    GraphNode {
2393        id: format!("doc:{}", d.doc_id),
2394        kind: NodeKind::Document.as_wire_str(),
2395        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
2396        ts_ms: Some(d.ingested_at_ms),
2397        tenant_id: tenant_id.to_string(),
2398        preview: d.source.clone(),
2399    }
2400}
2401
2402fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
2403    GraphNode {
2404        id: format!("chunk:{}", c.chunk_id),
2405        kind: NodeKind::Chunk.as_wire_str(),
2406        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
2407        ts_ms: None,
2408        tenant_id: tenant_id.to_string(),
2409        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
2410    }
2411}
2412
2413fn graph_node_for_cluster(
2414    tenant_id: &str,
2415    cluster_id: &str,
2416    abstraction: Option<&str>,
2417    created_at_ms: i64,
2418) -> GraphNode {
2419    let label = abstraction
2420        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
2421        .unwrap_or_else(|| format!("cluster {cluster_id}"));
2422    GraphNode {
2423        id: format!("cl:{cluster_id}"),
2424        kind: NodeKind::Cluster.as_wire_str(),
2425        label,
2426        ts_ms: Some(created_at_ms),
2427        tenant_id: tenant_id.to_string(),
2428        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
2429    }
2430}
2431
2432fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
2433    GraphNode {
2434        id: format!("ent:{value}"),
2435        kind: NodeKind::Entity.as_wire_str(),
2436        label: truncate_preview(value, GRAPH_LABEL_CHARS),
2437        ts_ms: None,
2438        tenant_id: tenant_id.to_string(),
2439        preview: None,
2440    }
2441}
2442
2443/// `GET /v1/graph/expand`. See module-level comments for the contract.
2444async fn graph_expand_handler(
2445    TenantExtractor(tenant): TenantExtractor,
2446    Query(q): Query<GraphExpandQuery>,
2447) -> Result<Json<GraphExpandResponse>, ApiError> {
2448    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
2449    // solo-query's read pipelines (recall, themes, etc.). Documented in
2450    // the OpenAPI spec.
2451    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
2452    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;
2453
2454    let (node_kind, value) = parse_node_id(&q.node_id)?;
2455    let value = value.to_string();
2456    let node_id_full = q.node_id.clone();
2457    let tenant_id_str = tenant.tenant_id().to_string();
2458
2459    match q.kind {
2460        GraphExpandKind::ClusterMember => {
2461            expand_cluster_member(
2462                &tenant,
2463                &tenant_id_str,
2464                node_kind,
2465                &value,
2466                &node_id_full,
2467                limit,
2468            )
2469            .await
2470        }
2471        GraphExpandKind::DocumentChunk => {
2472            expand_document_chunk(
2473                &tenant,
2474                &tenant_id_str,
2475                node_kind,
2476                &value,
2477                &node_id_full,
2478                limit,
2479            )
2480            .await
2481        }
2482        GraphExpandKind::Triple => {
2483            expand_triple(
2484                &tenant,
2485                &tenant_id_str,
2486                node_kind,
2487                &value,
2488                &node_id_full,
2489                limit,
2490            )
2491            .await
2492        }
2493        GraphExpandKind::Semantic => {
2494            expand_semantic(
2495                &tenant,
2496                &tenant_id_str,
2497                node_kind,
2498                &value,
2499                &node_id_full,
2500                limit,
2501            )
2502            .await
2503        }
2504    }
2505    .map(Json)
2506}
2507
2508// ---- cluster_member ----
2509
2510async fn expand_cluster_member(
2511    tenant: &TenantHandle,
2512    tenant_id: &str,
2513    node_kind: NodeKind,
2514    value: &str,
2515    node_id_full: &str,
2516    limit: i64,
2517) -> Result<GraphExpandResponse, ApiError> {
2518    match node_kind {
2519        NodeKind::Episode => {
2520            expand_cluster_member_from_episode(
2521                tenant,
2522                tenant_id,
2523                value.to_string(),
2524                node_id_full.to_string(),
2525                limit,
2526            )
2527            .await
2528        }
2529        NodeKind::Cluster => {
2530            expand_cluster_member_from_cluster(
2531                tenant,
2532                tenant_id,
2533                value.to_string(),
2534                node_id_full.to_string(),
2535                limit,
2536            )
2537            .await
2538        }
2539        _ => Err(ApiError::bad_request(format!(
2540            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
2541            node_kind.as_wire_str()
2542        ))),
2543    }
2544}
2545
2546async fn expand_cluster_member_from_episode(
2547    tenant: &TenantHandle,
2548    tenant_id: &str,
2549    memory_id: String,
2550    node_id_full: String,
2551    limit: i64,
2552) -> Result<GraphExpandResponse, ApiError> {
2553    let memory_id_for_err = memory_id.clone();
2554    let rows: Vec<(String, Option<String>, i64)> = tenant
2555        .read()
2556        .interact(move |conn| {
2557            // First confirm the source episode exists in this tenant.
2558            let exists: i64 = conn.query_row(
2559                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
2560                rusqlite::params![&memory_id],
2561                |r| r.get(0),
2562            )?;
2563            if exists == 0 {
2564                return Ok(Vec::new());
2565            }
2566            let mut stmt = conn.prepare(
2567                "SELECT c.cluster_id, sa.content, c.created_at_ms
2568                   FROM cluster_episodes ce
2569                   JOIN clusters c ON c.cluster_id = ce.cluster_id
2570                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
2571                  WHERE ce.memory_id = ?1
2572                  ORDER BY c.created_at_ms DESC
2573                  LIMIT ?2",
2574            )?;
2575            let mapped = stmt
2576                .query_map(rusqlite::params![&memory_id, limit], |r| {
2577                    Ok((
2578                        r.get::<_, String>(0)?,
2579                        r.get::<_, Option<String>>(1)?,
2580                        r.get::<_, i64>(2)?,
2581                    ))
2582                })?
2583                .collect::<rusqlite::Result<Vec<_>>>()?;
2584            // Marker tuple to signal "episode found" via Vec emptiness +
2585            // an extra sentinel; we use a different shape:
2586            // pack the "found" flag via an out-of-band trick — actually
2587            // we re-query above. Keep it simple: confirm again here by
2588            // returning the rows; a missing episode short-circuits to
2589            // a 404 below via the `exists == 0` guard.
2590            Ok::<_, rusqlite::Error>(mapped)
2591        })
2592        .await
2593        .map_err(ApiError::from)?;
2594
2595    // The interact() returns Vec<(...)>; but we need to distinguish "no
2596    // such episode" (→ 404) from "episode exists, has no clusters" (→
2597    // 200 with empty arrays). Re-run a cheap existence check separately
2598    // — we already inlined it above and returned `Vec::new()` on miss,
2599    // but a real miss is indistinguishable from "episode in zero
2600    // clusters". Use a separate existence probe.
2601    if rows.is_empty() {
2602        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2603        return Ok(GraphExpandResponse {
2604            nodes: Vec::new(),
2605            edges: Vec::new(),
2606        });
2607    }
2608
2609    let mut nodes = Vec::with_capacity(rows.len());
2610    let mut edges = Vec::with_capacity(rows.len());
2611    for (cluster_id, abstraction, created_at_ms) in rows {
2612        let target_id = format!("cl:{cluster_id}");
2613        edges.push(GraphEdge {
2614            id: edge_id(&node_id_full, "cluster_member", &target_id),
2615            source: node_id_full.clone(),
2616            target: target_id,
2617            kind: "cluster_member",
2618            predicate: None,
2619            weight: None,
2620        });
2621        nodes.push(graph_node_for_cluster(
2622            tenant_id,
2623            &cluster_id,
2624            abstraction.as_deref(),
2625            created_at_ms,
2626        ));
2627    }
2628    Ok(GraphExpandResponse { nodes, edges })
2629}
2630
2631async fn expand_cluster_member_from_cluster(
2632    tenant: &TenantHandle,
2633    tenant_id: &str,
2634    cluster_id: String,
2635    node_id_full: String,
2636    limit: i64,
2637) -> Result<GraphExpandResponse, ApiError> {
2638    let cluster_id_for_err = cluster_id.clone();
2639    let rows: Vec<ExpandedEpisode> = tenant
2640        .read()
2641        .interact(move |conn| {
2642            let exists: i64 = conn.query_row(
2643                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
2644                rusqlite::params![&cluster_id],
2645                |r| r.get(0),
2646            )?;
2647            if exists == 0 {
2648                return Ok(Vec::new());
2649            }
2650            let mut stmt = conn.prepare(
2651                "SELECT e.memory_id, e.ts_ms, e.content
2652                   FROM cluster_episodes ce
2653                   JOIN episodes e ON e.memory_id = ce.memory_id
2654                  WHERE ce.cluster_id = ?1
2655                    AND e.status = 'active'
2656                  ORDER BY e.ts_ms DESC
2657                  LIMIT ?2",
2658            )?;
2659            let mapped = stmt
2660                .query_map(rusqlite::params![&cluster_id, limit], |r| {
2661                    Ok(ExpandedEpisode {
2662                        memory_id: r.get(0)?,
2663                        ts_ms: r.get(1)?,
2664                        content: r.get(2)?,
2665                    })
2666                })?
2667                .collect::<rusqlite::Result<Vec<_>>>()?;
2668            Ok::<_, rusqlite::Error>(mapped)
2669        })
2670        .await
2671        .map_err(ApiError::from)?;
2672
2673    if rows.is_empty() {
2674        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
2675        return Ok(GraphExpandResponse {
2676            nodes: Vec::new(),
2677            edges: Vec::new(),
2678        });
2679    }
2680
2681    let mut nodes = Vec::with_capacity(rows.len());
2682    let mut edges = Vec::with_capacity(rows.len());
2683    for ep in rows {
2684        let target_id = format!("ep:{}", ep.memory_id);
2685        edges.push(GraphEdge {
2686            id: edge_id(&node_id_full, "cluster_member", &target_id),
2687            source: node_id_full.clone(),
2688            target: target_id,
2689            kind: "cluster_member",
2690            predicate: None,
2691            weight: None,
2692        });
2693        nodes.push(graph_node_for_episode(tenant_id, &ep));
2694    }
2695    Ok(GraphExpandResponse { nodes, edges })
2696}
2697
2698// ---- document_chunk ----
2699
2700async fn expand_document_chunk(
2701    tenant: &TenantHandle,
2702    tenant_id: &str,
2703    node_kind: NodeKind,
2704    value: &str,
2705    node_id_full: &str,
2706    limit: i64,
2707) -> Result<GraphExpandResponse, ApiError> {
2708    match node_kind {
2709        NodeKind::Document => {
2710            expand_document_chunk_from_document(
2711                tenant,
2712                tenant_id,
2713                value.to_string(),
2714                node_id_full.to_string(),
2715                limit,
2716            )
2717            .await
2718        }
2719        NodeKind::Chunk => {
2720            expand_document_chunk_from_chunk(
2721                tenant,
2722                tenant_id,
2723                value.to_string(),
2724                node_id_full.to_string(),
2725            )
2726            .await
2727        }
2728        _ => Err(ApiError::bad_request(format!(
2729            "kind=document_chunk only valid for document or chunk source nodes; got {}",
2730            node_kind.as_wire_str()
2731        ))),
2732    }
2733}
2734
2735async fn expand_document_chunk_from_document(
2736    tenant: &TenantHandle,
2737    tenant_id: &str,
2738    doc_id: String,
2739    node_id_full: String,
2740    limit: i64,
2741) -> Result<GraphExpandResponse, ApiError> {
2742    let doc_id_for_err = doc_id.clone();
2743    let rows: Vec<ExpandedChunk> = tenant
2744        .read()
2745        .interact(move |conn| {
2746            let exists: i64 = conn.query_row(
2747                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2748                rusqlite::params![&doc_id],
2749                |r| r.get(0),
2750            )?;
2751            if exists == 0 {
2752                return Ok(Vec::new());
2753            }
2754            let mut stmt = conn.prepare(
2755                "SELECT chunk_id, chunk_index, content
2756                   FROM document_chunks
2757                  WHERE doc_id = ?1
2758                  ORDER BY chunk_index ASC
2759                  LIMIT ?2",
2760            )?;
2761            let mapped = stmt
2762                .query_map(rusqlite::params![&doc_id, limit], |r| {
2763                    Ok(ExpandedChunk {
2764                        chunk_id: r.get(0)?,
2765                        chunk_index: r.get(1)?,
2766                        content: r.get(2)?,
2767                    })
2768                })?
2769                .collect::<rusqlite::Result<Vec<_>>>()?;
2770            Ok::<_, rusqlite::Error>(mapped)
2771        })
2772        .await
2773        .map_err(ApiError::from)?;
2774
2775    if rows.is_empty() {
2776        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
2777        return Ok(GraphExpandResponse {
2778            nodes: Vec::new(),
2779            edges: Vec::new(),
2780        });
2781    }
2782
2783    let mut nodes = Vec::with_capacity(rows.len());
2784    let mut edges = Vec::with_capacity(rows.len());
2785    for c in rows {
2786        let target_id = format!("chunk:{}", c.chunk_id);
2787        edges.push(GraphEdge {
2788            id: edge_id(&node_id_full, "document_chunk", &target_id),
2789            source: node_id_full.clone(),
2790            target: target_id,
2791            kind: "document_chunk",
2792            predicate: None,
2793            weight: None,
2794        });
2795        nodes.push(graph_node_for_chunk(tenant_id, &c));
2796    }
2797    Ok(GraphExpandResponse { nodes, edges })
2798}
2799
2800async fn expand_document_chunk_from_chunk(
2801    tenant: &TenantHandle,
2802    tenant_id: &str,
2803    chunk_id: String,
2804    node_id_full: String,
2805) -> Result<GraphExpandResponse, ApiError> {
2806    let chunk_id_for_err = chunk_id.clone();
2807    let row: Option<ExpandedDocument> = tenant
2808        .read()
2809        .interact(move |conn| {
2810            conn.query_row(
2811                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
2812                   FROM document_chunks c
2813                   JOIN documents d ON d.doc_id = c.doc_id
2814                  WHERE c.chunk_id = ?1",
2815                rusqlite::params![&chunk_id],
2816                |r| {
2817                    Ok(ExpandedDocument {
2818                        doc_id: r.get(0)?,
2819                        title: r.get(1)?,
2820                        source: r.get(2)?,
2821                        ingested_at_ms: r.get(3)?,
2822                    })
2823                },
2824            )
2825            .map(Some)
2826            .or_else(|e| match e {
2827                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2828                other => Err(other),
2829            })
2830        })
2831        .await
2832        .map_err(ApiError::from)?;
2833
2834    let d = row.ok_or_else(|| {
2835        ApiError::not_found(format!(
2836            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
2837        ))
2838    })?;
2839    let target_id = format!("doc:{}", d.doc_id);
2840    let edge = GraphEdge {
2841        id: edge_id(&node_id_full, "document_chunk", &target_id),
2842        source: node_id_full.clone(),
2843        target: target_id,
2844        kind: "document_chunk",
2845        predicate: None,
2846        weight: None,
2847    };
2848    let node = graph_node_for_document(tenant_id, &d);
2849    Ok(GraphExpandResponse {
2850        nodes: vec![node],
2851        edges: vec![edge],
2852    })
2853}
2854
2855// ---- triple ----
2856
2857async fn expand_triple(
2858    tenant: &TenantHandle,
2859    tenant_id: &str,
2860    node_kind: NodeKind,
2861    value: &str,
2862    node_id_full: &str,
2863    limit: i64,
2864) -> Result<GraphExpandResponse, ApiError> {
2865    match node_kind {
2866        NodeKind::Episode => {
2867            expand_triple_from_episode(
2868                tenant,
2869                tenant_id,
2870                value.to_string(),
2871                node_id_full.to_string(),
2872                limit,
2873            )
2874            .await
2875        }
2876        NodeKind::Entity => {
2877            expand_triple_from_entity(
2878                tenant,
2879                tenant_id,
2880                value.to_string(),
2881                node_id_full.to_string(),
2882                limit,
2883            )
2884            .await
2885        }
2886        _ => Err(ApiError::bad_request(format!(
2887            "kind=triple only valid for episode or entity source nodes; got {}",
2888            node_kind.as_wire_str()
2889        ))),
2890    }
2891}
2892
2893#[derive(Debug)]
2894struct TripleRow {
2895    subject_id: String,
2896    predicate: String,
2897    object_id: String,
2898    confidence: f32,
2899}
2900
2901async fn expand_triple_from_episode(
2902    tenant: &TenantHandle,
2903    tenant_id: &str,
2904    memory_id: String,
2905    node_id_full: String,
2906    limit: i64,
2907) -> Result<GraphExpandResponse, ApiError> {
2908    let memory_id_for_err = memory_id.clone();
2909    let rows: Vec<TripleRow> = tenant
2910        .read()
2911        .interact(move |conn| {
2912            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
2913            let rowid_opt: Option<i64> = conn
2914                .query_row(
2915                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
2916                    rusqlite::params![&memory_id],
2917                    |r| r.get(0),
2918                )
2919                .map(Some)
2920                .or_else(|e| match e {
2921                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
2922                    other => Err(other),
2923                })?;
2924            let Some(rowid) = rowid_opt else {
2925                return Ok(Vec::new());
2926            };
2927            let mut stmt = conn.prepare(
2928                "SELECT subject_id, predicate, object_id, confidence
2929                   FROM triples
2930                  WHERE source_episode_id = ?1
2931                    AND status = 'active'
2932                  ORDER BY valid_from_ms DESC
2933                  LIMIT ?2",
2934            )?;
2935            let mapped = stmt
2936                .query_map(rusqlite::params![rowid, limit], |r| {
2937                    Ok(TripleRow {
2938                        subject_id: r.get(0)?,
2939                        predicate: r.get(1)?,
2940                        object_id: r.get(2)?,
2941                        confidence: r.get(3)?,
2942                    })
2943                })?
2944                .collect::<rusqlite::Result<Vec<_>>>()?;
2945            Ok::<_, rusqlite::Error>(mapped)
2946        })
2947        .await
2948        .map_err(ApiError::from)?;
2949
2950    if rows.is_empty() {
2951        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2952        return Ok(GraphExpandResponse {
2953            nodes: Vec::new(),
2954            edges: Vec::new(),
2955        });
2956    }
2957
2958    let mut nodes = Vec::new();
2959    let mut edges = Vec::new();
2960    let mut seen_entities: std::collections::HashSet<String> = Default::default();
2961    for t in rows {
2962        // Mint both endpoints as entity nodes. The source episode is
2963        // node_id_full; each triple becomes two edges (source→subj +
2964        // subj→obj) connected through the entity nodes, OR a single
2965        // edge labelled with the predicate from the source episode to
2966        // a representative entity. The TS schema treats `triple` as a
2967        // single edge with `predicate`; we emit one edge per triple:
2968        // source_episode → subject_entity (kind=triple, predicate=p),
2969        // plus one extra edge subject_entity → object_entity (also
2970        // kind=triple, same predicate) so a renderer can hop along the
2971        // SPO graph.
2972        let subj_id = format!("ent:{}", t.subject_id);
2973        let obj_id = format!("ent:{}", t.object_id);
2974        if seen_entities.insert(t.subject_id.clone()) {
2975            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
2976        }
2977        if seen_entities.insert(t.object_id.clone()) {
2978            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
2979        }
2980        edges.push(GraphEdge {
2981            id: edge_id(&subj_id, "triple", &obj_id),
2982            source: subj_id,
2983            target: obj_id,
2984            kind: "triple",
2985            predicate: Some(t.predicate),
2986            weight: Some(t.confidence),
2987        });
2988    }
2989    Ok(GraphExpandResponse { nodes, edges })
2990}
2991
2992async fn expand_triple_from_entity(
2993    tenant: &TenantHandle,
2994    tenant_id: &str,
2995    entity_value: String,
2996    node_id_full: String,
2997    limit: i64,
2998) -> Result<GraphExpandResponse, ApiError> {
2999    // Entity nodes are synthetic — there's no existence check we can
3000    // run. "Unknown entity" naturally resolves to an empty result.
3001    let entity_q = entity_value.clone();
3002    let rows: Vec<ExpandedEpisode> = tenant
3003        .read()
3004        .interact(move |conn| {
3005            // Find episodes whose triples reference this entity on either
3006            // side. JOIN against episodes.rowid via triples.source_episode_id.
3007            let mut stmt = conn.prepare(
3008                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
3009                   FROM triples t
3010                   JOIN episodes e ON e.rowid = t.source_episode_id
3011                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
3012                    AND t.status = 'active'
3013                    AND t.source_episode_id IS NOT NULL
3014                    AND e.status = 'active'
3015                  ORDER BY e.ts_ms DESC
3016                  LIMIT ?2",
3017            )?;
3018            let mapped = stmt
3019                .query_map(rusqlite::params![&entity_q, limit], |r| {
3020                    Ok(ExpandedEpisode {
3021                        memory_id: r.get(0)?,
3022                        ts_ms: r.get(1)?,
3023                        content: r.get(2)?,
3024                    })
3025                })?
3026                .collect::<rusqlite::Result<Vec<_>>>()?;
3027            Ok::<_, rusqlite::Error>(mapped)
3028        })
3029        .await
3030        .map_err(ApiError::from)?;
3031
3032    // Empty result on entity expand is a valid 200 — the entity exists
3033    // only in the wire format; "no edges" is the right answer.
3034    let mut nodes = Vec::with_capacity(rows.len());
3035    let mut edges = Vec::with_capacity(rows.len());
3036    for ep in rows {
3037        let target_id = format!("ep:{}", ep.memory_id);
3038        edges.push(GraphEdge {
3039            id: edge_id(&node_id_full, "triple", &target_id),
3040            source: node_id_full.clone(),
3041            target: target_id,
3042            kind: "triple",
3043            predicate: None,
3044            weight: None,
3045        });
3046        nodes.push(graph_node_for_episode(tenant_id, &ep));
3047    }
3048    // Annotate _ to suppress unused (only used in match guard).
3049    let _ = entity_value;
3050    Ok(GraphExpandResponse { nodes, edges })
3051}
3052
3053// ---- semantic ----
3054
3055async fn expand_semantic(
3056    tenant: &TenantHandle,
3057    tenant_id: &str,
3058    node_kind: NodeKind,
3059    value: &str,
3060    node_id_full: &str,
3061    limit: i64,
3062) -> Result<GraphExpandResponse, ApiError> {
3063    if node_kind != NodeKind::Episode {
3064        return Err(ApiError::bad_request(format!(
3065            "kind=semantic only valid for episode source nodes; got {}",
3066            node_kind.as_wire_str()
3067        )));
3068    }
3069    let memory_id = value.to_string();
3070    let memory_id_q = memory_id.clone();
3071    // Fetch the source episode's content so we can re-embed it and call
3072    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
3073    // the well-tested `run_recall_inner` path that already filters
3074    // forgotten rows + decodes hnsw ids.
3075    let content: Option<String> = tenant
3076        .read()
3077        .interact(move |conn| {
3078            conn.query_row(
3079                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
3080                rusqlite::params![&memory_id_q],
3081                |r| r.get::<_, String>(0),
3082            )
3083            .map(Some)
3084            .or_else(|e| match e {
3085                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3086                other => Err(other),
3087            })
3088        })
3089        .await
3090        .map_err(ApiError::from)?;
3091
3092    let content = content.ok_or_else(|| {
3093        ApiError::not_found(format!(
3094            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
3095        ))
3096    })?;
3097
3098    // Pull one extra hit so we can drop self without losing user-requested
3099    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
3100    let widened = (limit as usize).saturating_add(1).min(100);
3101    let result = solo_query::recall::run_recall_inner(
3102        tenant.embedder(),
3103        tenant.hnsw(),
3104        tenant.read(),
3105        &content,
3106        widened,
3107    )
3108    .await
3109    .map_err(ApiError::from)?;
3110
3111    let mut nodes = Vec::new();
3112    let mut edges = Vec::new();
3113    for hit in result.hits.into_iter() {
3114        if hit.memory_id == memory_id {
3115            // Skip self.
3116            continue;
3117        }
3118        if nodes.len() as i64 >= limit {
3119            break;
3120        }
3121        // The HNSW `cos_distance` is a distance (smaller = more similar).
3122        // Convert to a weight in [0, 1] (larger = more similar) for the
3123        // wire format: weight = (1 - distance).max(0).
3124        let weight = (1.0 - hit.cos_distance).max(0.0);
3125        let target_id = format!("ep:{}", hit.memory_id);
3126        edges.push(GraphEdge {
3127            id: edge_id(node_id_full, "semantic", &target_id),
3128            source: node_id_full.to_string(),
3129            target: target_id,
3130            kind: "semantic",
3131            predicate: None,
3132            weight: Some(weight),
3133        });
3134        nodes.push(GraphNode {
3135            id: format!("ep:{}", hit.memory_id),
3136            kind: NodeKind::Episode.as_wire_str(),
3137            label: episode_label(&hit.content),
3138            ts_ms: None,
3139            tenant_id: tenant_id.to_string(),
3140            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
3141        });
3142    }
3143    Ok(GraphExpandResponse { nodes, edges })
3144}
3145
3146// ---- existence checks ----
3147
3148/// 404 if the memory_id has no row in this tenant's `episodes` table.
3149async fn ensure_episode_exists(
3150    tenant: &TenantHandle,
3151    memory_id: &str,
3152    node_id_full: &str,
3153) -> Result<(), ApiError> {
3154    let memory_id_q = memory_id.to_string();
3155    let exists: i64 = tenant
3156        .read()
3157        .interact(move |conn| {
3158            conn.query_row(
3159                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
3160                rusqlite::params![&memory_id_q],
3161                |r| r.get(0),
3162            )
3163        })
3164        .await
3165        .map_err(ApiError::from)?;
3166    if exists == 0 {
3167        return Err(ApiError::not_found(format!(
3168            "node_id {node_id_full:?} not found in current tenant"
3169        )));
3170    }
3171    Ok(())
3172}
3173
3174async fn ensure_cluster_exists(
3175    tenant: &TenantHandle,
3176    cluster_id: &str,
3177    node_id_full: &str,
3178) -> Result<(), ApiError> {
3179    let cluster_id_q = cluster_id.to_string();
3180    let exists: i64 = tenant
3181        .read()
3182        .interact(move |conn| {
3183            conn.query_row(
3184                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
3185                rusqlite::params![&cluster_id_q],
3186                |r| r.get(0),
3187            )
3188        })
3189        .await
3190        .map_err(ApiError::from)?;
3191    if exists == 0 {
3192        return Err(ApiError::not_found(format!(
3193            "node_id {node_id_full:?} not found in current tenant"
3194        )));
3195    }
3196    Ok(())
3197}
3198
3199async fn ensure_document_exists(
3200    tenant: &TenantHandle,
3201    doc_id: &str,
3202    node_id_full: &str,
3203) -> Result<(), ApiError> {
3204    let doc_id_q = doc_id.to_string();
3205    let exists: i64 = tenant
3206        .read()
3207        .interact(move |conn| {
3208            conn.query_row(
3209                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
3210                rusqlite::params![&doc_id_q],
3211                |r| r.get(0),
3212            )
3213        })
3214        .await
3215        .map_err(ApiError::from)?;
3216    if exists == 0 {
3217        return Err(ApiError::not_found(format!(
3218            "node_id {node_id_full:?} not found in current tenant"
3219        )));
3220    }
3221    Ok(())
3222}
3223
3224// ---------------------------------------------------------------------------
3225// Graph nodes + edges — paginated catalog reads (v0.10.0)
3226//
3227// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
3228// powers solo-web's initial graph render. Both are read-only, both
3229// share the same tenant / auth / cursor scaffolding, both inherit the
3230// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
3231//
3232// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
3233// notes (cursor format, entity scan strategy, semantic-edge rejection
3234// rationale, UNION pagination shape).
3235// ---------------------------------------------------------------------------
3236
3237const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
3238const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
3239const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
3240const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
3241const GRAPH_ENTITY_CAP: usize = 200;
3242
3243/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
3244/// frequency entities were dropped from the response. Clients can show
3245/// "entities truncated" UX without parsing the body.
3246const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";
3247
3248#[derive(Debug, Deserialize)]
3249struct GraphNodesQuery {
3250    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
3251    /// `?kind=` query params are NOT supported by axum's `Query<T>`
3252    /// extractor for `Option<String>` (it picks one) — comma-separated
3253    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
3254    #[serde(default)]
3255    kind: Option<String>,
3256    #[serde(default)]
3257    since_ms: Option<i64>,
3258    #[serde(default)]
3259    until_ms: Option<i64>,
3260    #[serde(default)]
3261    limit: Option<u32>,
3262    #[serde(default)]
3263    cursor: Option<String>,
3264}
3265
3266#[derive(Debug, Deserialize)]
3267struct GraphEdgesQuery {
3268    #[serde(default)]
3269    node_id: Option<String>,
3270    /// Comma-separated. Default = all kinds EXCEPT semantic.
3271    /// Values: triple|document_chunk|cluster_member|semantic.
3272    #[serde(default)]
3273    r#type: Option<String>,
3274    #[serde(default)]
3275    limit: Option<u32>,
3276    #[serde(default)]
3277    cursor: Option<String>,
3278}
3279
3280#[derive(Debug, Serialize)]
3281struct GraphNodesResponse {
3282    nodes: Vec<GraphNode>,
3283    /// Always serialised (as `null` when absent) so client codegen against
3284    /// the OpenAPI schema (`"type": ["string", "null"]`) sees a field
3285    /// rather than a missing key. See dev-log 0152 finding M3.
3286    next_cursor: Option<String>,
3287}
3288
3289#[derive(Debug, Serialize)]
3290struct GraphEdgesResponse {
3291    edges: Vec<GraphEdge>,
3292    /// Always serialised; see `GraphNodesResponse::next_cursor`.
3293    next_cursor: Option<String>,
3294}
3295
3296/// Decode the `kind` filter from the query string. Returns the set of
3297/// kinds the caller wants (all five when filter absent / empty). 400 on
3298/// unknown kind.
3299fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
3300    let raw = raw.unwrap_or("").trim();
3301    if raw.is_empty() {
3302        return Ok(vec![
3303            NodeKind::Episode,
3304            NodeKind::Document,
3305            NodeKind::Chunk,
3306            NodeKind::Cluster,
3307            NodeKind::Entity,
3308        ]);
3309    }
3310    let mut out = Vec::new();
3311    for token in raw.split(',') {
3312        let token = token.trim();
3313        if token.is_empty() {
3314            continue;
3315        }
3316        let kind = match token {
3317            "episode" => NodeKind::Episode,
3318            "document" => NodeKind::Document,
3319            "chunk" => NodeKind::Chunk,
3320            "cluster" => NodeKind::Cluster,
3321            "entity" => NodeKind::Entity,
3322            other => {
3323                return Err(ApiError::bad_request(format!(
3324                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
3325                )));
3326            }
3327        };
3328        if !out.contains(&kind) {
3329            out.push(kind);
3330        }
3331    }
3332    if out.is_empty() {
3333        return Err(ApiError::bad_request(
3334            "kind filter is empty after parsing; either omit or list at least one kind",
3335        ));
3336    }
3337    Ok(out)
3338}
3339
3340/// Edge-kind discriminator on `/v1/graph/edges`.
3341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3342enum EdgeKind {
3343    Triple,
3344    DocumentChunk,
3345    ClusterMember,
3346}
3347
3348impl EdgeKind {
3349    /// Sort-stable kind ordering for pagination. Lower runs first.
3350    fn order_idx(self) -> u8 {
3351        match self {
3352            Self::Triple => 0,
3353            Self::DocumentChunk => 1,
3354            Self::ClusterMember => 2,
3355        }
3356    }
3357}
3358
3359fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
3360    let raw = raw.unwrap_or("").trim();
3361    if raw.is_empty() {
3362        // Default = all three concrete kinds; semantic is opt-in via
3363        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
3364        return Ok(vec![
3365            EdgeKind::Triple,
3366            EdgeKind::DocumentChunk,
3367            EdgeKind::ClusterMember,
3368        ]);
3369    }
3370    let mut out = Vec::new();
3371    for token in raw.split(',') {
3372        let token = token.trim();
3373        if token.is_empty() {
3374            continue;
3375        }
3376        let kind = match token {
3377            "triple" => EdgeKind::Triple,
3378            "document_chunk" => EdgeKind::DocumentChunk,
3379            "cluster_member" => EdgeKind::ClusterMember,
3380            "semantic" => {
3381                // semantic edges aren't precomputed; they're HNSW queries
3382                // at request time. Wrong endpoint.
3383                return Err(ApiError::bad_request(
3384                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
3385                ));
3386            }
3387            other => {
3388                return Err(ApiError::bad_request(format!(
3389                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
3390                )));
3391            }
3392        };
3393        if !out.contains(&kind) {
3394            out.push(kind);
3395        }
3396    }
3397    if out.is_empty() {
3398        return Err(ApiError::bad_request(
3399            "type filter is empty after parsing; either omit or list at least one type",
3400        ));
3401    }
3402    Ok(out)
3403}
3404
3405/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
3406/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
3407/// cursor.id)` under sort `ts_ms DESC, id ASC`.
3408#[derive(Debug, Serialize, Deserialize)]
3409struct NodesCursor {
3410    ts_ms: i64,
3411    id: String,
3412}
3413
3414/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
3415/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
3416/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
3417/// row id (triple_id for triples, chunk_id for document_chunk, the
3418/// composite `cluster_id||memory_id` string for cluster_member).
3419#[derive(Debug, Serialize, Deserialize)]
3420struct EdgesCursor {
3421    kind_idx: u8,
3422    sub_id: String,
3423}
3424
3425fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
3426    use base64::Engine;
3427    let json = serde_json::to_vec(value)
3428        .map_err(|e| ApiError::internal(format!("cursor serialize: {e}")))?;
3429    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
3430}
3431
3432fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
3433    use base64::Engine;
3434    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
3435        .decode(raw.as_bytes())
3436        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
3437    serde_json::from_slice::<T>(&bytes)
3438        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
3439}
3440
3441/// Internal staging row for the nodes endpoint. Carries the GraphNode
3442/// plus the sort key so we can merge all kinds before applying the
3443/// pagination cut.
3444#[derive(Debug)]
3445struct StagingNode {
3446    node: GraphNode,
3447    sort_ts_ms: i64,
3448    sort_id: String,
3449}
3450
3451/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
3452/// tie-break on id.)
3453fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
3454    // ts_ms DESC: invert
3455    match b.0.cmp(&a.0) {
3456        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
3457        other => other,
3458    }
3459}
3460
3461/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
3462/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
3463/// the cursor.
3464fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
3465    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
3466        == std::cmp::Ordering::Greater
3467}
3468
3469// --- Per-kind row fetchers (each runs a bounded query, applies the time
3470//     filter, returns rows already sorted `ts_ms DESC, id ASC`).
3471
3472#[derive(Debug)]
3473struct NodeRowEp {
3474    memory_id: String,
3475    ts_ms: i64,
3476    content: String,
3477}
3478
3479fn fetch_episodes_for_nodes(
3480    conn: &rusqlite::Connection,
3481    since_ms: Option<i64>,
3482    until_ms: Option<i64>,
3483    cursor: Option<&NodesCursor>,
3484    limit: i64,
3485) -> rusqlite::Result<Vec<NodeRowEp>> {
3486    let mut sql = String::from(
3487        "SELECT memory_id, ts_ms, content
3488           FROM episodes
3489          WHERE status = 'active'",
3490    );
3491    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3492    if let Some(s) = since_ms {
3493        sql.push_str(" AND ts_ms >= ?");
3494        params.push(s.into());
3495    }
3496    if let Some(u) = until_ms {
3497        sql.push_str(" AND ts_ms <= ?");
3498        params.push(u.into());
3499    }
3500    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
3501    // anything strictly newer than the cursor's ts_ms is in a previous
3502    // page; rows with equal ts_ms may or may not be (depends on the
3503    // cross-kind ordering). The post-merge step applies the full
3504    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
3505    // that can't possibly survive.
3506    if let Some(cur) = cursor {
3507        sql.push_str(" AND ts_ms <= ?");
3508        params.push(cur.ts_ms.into());
3509    }
3510    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
3511    params.push(limit.into());
3512    let mut stmt = conn.prepare(&sql)?;
3513    let rows: Vec<NodeRowEp> = stmt
3514        .query_map(rusqlite::params_from_iter(params), |r| {
3515            Ok(NodeRowEp {
3516                memory_id: r.get(0)?,
3517                ts_ms: r.get(1)?,
3518                content: r.get(2)?,
3519            })
3520        })?
3521        .collect::<rusqlite::Result<Vec<_>>>()?;
3522    Ok(rows)
3523}
3524
3525#[derive(Debug)]
3526struct NodeRowDoc {
3527    doc_id: String,
3528    title: Option<String>,
3529    source: Option<String>,
3530    ingested_at_ms: i64,
3531}
3532
3533fn fetch_documents_for_nodes(
3534    conn: &rusqlite::Connection,
3535    since_ms: Option<i64>,
3536    until_ms: Option<i64>,
3537    cursor: Option<&NodesCursor>,
3538    limit: i64,
3539) -> rusqlite::Result<Vec<NodeRowDoc>> {
3540    let mut sql = String::from(
3541        "SELECT doc_id, title, source, ingested_at_ms
3542           FROM documents
3543          WHERE status = 'active'",
3544    );
3545    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3546    if let Some(s) = since_ms {
3547        sql.push_str(" AND ingested_at_ms >= ?");
3548        params.push(s.into());
3549    }
3550    if let Some(u) = until_ms {
3551        sql.push_str(" AND ingested_at_ms <= ?");
3552        params.push(u.into());
3553    }
3554    if let Some(cur) = cursor {
3555        sql.push_str(" AND ingested_at_ms <= ?");
3556        params.push(cur.ts_ms.into());
3557    }
3558    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
3559    params.push(limit.into());
3560    let mut stmt = conn.prepare(&sql)?;
3561    let rows: Vec<NodeRowDoc> = stmt
3562        .query_map(rusqlite::params_from_iter(params), |r| {
3563            Ok(NodeRowDoc {
3564                doc_id: r.get(0)?,
3565                title: r.get(1)?,
3566                source: r.get(2)?,
3567                ingested_at_ms: r.get(3)?,
3568            })
3569        })?
3570        .collect::<rusqlite::Result<Vec<_>>>()?;
3571    Ok(rows)
3572}
3573
3574#[derive(Debug)]
3575struct NodeRowChunk {
3576    chunk_id: String,
3577    chunk_index: i64,
3578    content: String,
3579    created_at_ms: i64,
3580}
3581
3582fn fetch_chunks_for_nodes(
3583    conn: &rusqlite::Connection,
3584    since_ms: Option<i64>,
3585    until_ms: Option<i64>,
3586    cursor: Option<&NodesCursor>,
3587    limit: i64,
3588) -> rusqlite::Result<Vec<NodeRowChunk>> {
3589    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
3590    // documents are filtered out by the join on `documents.status`.
3591    let mut sql = String::from(
3592        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
3593           FROM document_chunks c
3594           JOIN documents d ON d.doc_id = c.doc_id
3595          WHERE d.status = 'active'",
3596    );
3597    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3598    if let Some(s) = since_ms {
3599        sql.push_str(" AND c.created_at_ms >= ?");
3600        params.push(s.into());
3601    }
3602    if let Some(u) = until_ms {
3603        sql.push_str(" AND c.created_at_ms <= ?");
3604        params.push(u.into());
3605    }
3606    if let Some(cur) = cursor {
3607        sql.push_str(" AND c.created_at_ms <= ?");
3608        params.push(cur.ts_ms.into());
3609    }
3610    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
3611    params.push(limit.into());
3612    let mut stmt = conn.prepare(&sql)?;
3613    let rows: Vec<NodeRowChunk> = stmt
3614        .query_map(rusqlite::params_from_iter(params), |r| {
3615            Ok(NodeRowChunk {
3616                chunk_id: r.get(0)?,
3617                chunk_index: r.get(1)?,
3618                content: r.get(2)?,
3619                created_at_ms: r.get(3)?,
3620            })
3621        })?
3622        .collect::<rusqlite::Result<Vec<_>>>()?;
3623    Ok(rows)
3624}
3625
3626#[derive(Debug)]
3627struct NodeRowCluster {
3628    cluster_id: String,
3629    abstraction: Option<String>,
3630    created_at_ms: i64,
3631}
3632
3633fn fetch_clusters_for_nodes(
3634    conn: &rusqlite::Connection,
3635    since_ms: Option<i64>,
3636    until_ms: Option<i64>,
3637    cursor: Option<&NodesCursor>,
3638    limit: i64,
3639) -> rusqlite::Result<Vec<NodeRowCluster>> {
3640    // clusters has no `status` column; LEFT JOIN abstractions for the
3641    // optional label.
3642    let mut sql = String::from(
3643        "SELECT c.cluster_id, sa.content, c.created_at_ms
3644           FROM clusters c
3645           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
3646          WHERE 1=1",
3647    );
3648    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3649    if let Some(s) = since_ms {
3650        sql.push_str(" AND c.created_at_ms >= ?");
3651        params.push(s.into());
3652    }
3653    if let Some(u) = until_ms {
3654        sql.push_str(" AND c.created_at_ms <= ?");
3655        params.push(u.into());
3656    }
3657    if let Some(cur) = cursor {
3658        sql.push_str(" AND c.created_at_ms <= ?");
3659        params.push(cur.ts_ms.into());
3660    }
3661    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
3662    params.push(limit.into());
3663    let mut stmt = conn.prepare(&sql)?;
3664    let rows: Vec<NodeRowCluster> = stmt
3665        .query_map(rusqlite::params_from_iter(params), |r| {
3666            Ok(NodeRowCluster {
3667                cluster_id: r.get(0)?,
3668                abstraction: r.get(1)?,
3669                created_at_ms: r.get(2)?,
3670            })
3671        })?
3672        .collect::<rusqlite::Result<Vec<_>>>()?;
3673    Ok(rows)
3674}
3675
3676#[derive(Debug)]
3677struct NodeRowEntity {
3678    value: String,
3679    ref_count: i64,
3680    first_seen_ms: i64,
3681}
3682
3683/// Synthesize entity nodes from the triples table. Caps result at
3684/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
3685/// entities make the cut. Returns (rows, cap_reached).
3686///
3687/// **Cost**: this is O(N) over active triples per request. For tenants
3688/// with >100k triples this can be noticeable; v0.10.x can cache the
3689/// rollup if profiling justifies it. The 200-row cap keeps the wire
3690/// payload bounded regardless.
3691fn fetch_entities_for_nodes(
3692    conn: &rusqlite::Connection,
3693    since_ms: Option<i64>,
3694    until_ms: Option<i64>,
3695    cursor: Option<&NodesCursor>,
3696) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
3697    // Pull subject + object columns, group by value, compute count + min
3698    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
3699    // time filter against `valid_from_ms` (the closest analogue to "when
3700    // was this entity first referenced").
3701    let mut sql = String::from(
3702        "WITH all_refs AS (
3703            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3704            UNION ALL
3705            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3706         )
3707         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
3708           FROM all_refs
3709          WHERE 1=1",
3710    );
3711    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3712    if let Some(s) = since_ms {
3713        sql.push_str(" AND ts_ms >= ?");
3714        params.push(s.into());
3715    }
3716    if let Some(u) = until_ms {
3717        sql.push_str(" AND ts_ms <= ?");
3718        params.push(u.into());
3719    }
3720    // Cursor: drop entities whose first_seen_ms strictly newer than the
3721    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
3722    // cap-applicable filter sits in the HAVING clause.
3723    sql.push_str(" GROUP BY value");
3724    if let Some(ts) = cursor.map(|c| c.ts_ms) {
3725        sql.push_str(" HAVING MIN(ts_ms) <= ?");
3726        params.push(ts.into());
3727    }
3728    // Over-fetch by one to detect "cap reached".
3729    let want = GRAPH_ENTITY_CAP as i64 + 1;
3730    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
3731    params.push(want.into());
3732    let mut stmt = conn.prepare(&sql)?;
3733    let rows: Vec<NodeRowEntity> = stmt
3734        .query_map(rusqlite::params_from_iter(params), |r| {
3735            Ok(NodeRowEntity {
3736                value: r.get(0)?,
3737                ref_count: r.get(1)?,
3738                first_seen_ms: r.get(2)?,
3739            })
3740        })?
3741        .collect::<rusqlite::Result<Vec<_>>>()?;
3742    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
3743    let mut trimmed = rows;
3744    if cap_reached {
3745        trimmed.truncate(GRAPH_ENTITY_CAP);
3746    }
3747    Ok((trimmed, cap_reached))
3748}
3749
3750/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
3751/// See module-level comments for the contract.
3752async fn graph_nodes_handler(
3753    TenantExtractor(tenant): TenantExtractor,
3754    Query(q): Query<GraphNodesQuery>,
3755) -> Result<Response, ApiError> {
3756    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
3757    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
3758    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
3759    let since_ms = q.since_ms;
3760    let until_ms = q.until_ms;
3761    if let (Some(s), Some(u)) = (since_ms, until_ms) {
3762        if s > u {
3763            return Err(ApiError::bad_request(format!(
3764                "since_ms ({s}) must be <= until_ms ({u})"
3765            )));
3766        }
3767    }
3768    let cursor = match q.cursor.as_deref() {
3769        None => None,
3770        Some("") => None,
3771        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
3772    };
3773    let want_episode = kinds.contains(&NodeKind::Episode);
3774    let want_document = kinds.contains(&NodeKind::Document);
3775    let want_chunk = kinds.contains(&NodeKind::Chunk);
3776    let want_cluster = kinds.contains(&NodeKind::Cluster);
3777    let want_entity = kinds.contains(&NodeKind::Entity);
3778
3779    // Over-fetch `limit + 2` per kind:
3780    //   * `+1` so the merge step can detect "more rows available beyond
3781    //     this page" → emits a `next_cursor` instead of None.
3782    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
3783    //     can pull the previous page's last item back in; the post-merge
3784    //     cursor predicate drops it, costing one row of headroom.
3785    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
3786    // independently by the response cap, not the page limit.
3787    let per_kind_limit = (limit as i64).saturating_add(2);
3788    let tenant_id_for_blocking = tenant.tenant_id().to_string();
3789    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
3790        ts_ms: c.ts_ms,
3791        id: c.id.clone(),
3792    });
3793
3794    let (mut staged, cap_reached) = tenant
3795        .read()
3796        .interact(move |conn| {
3797            let mut staged: Vec<StagingNode> = Vec::new();
3798            let mut cap_reached = false;
3799            let cursor_ref = cursor_clone.as_ref();
3800
3801            if want_episode {
3802                let eps =
3803                    fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3804                for ep in eps {
3805                    let id = format!("ep:{}", ep.memory_id);
3806                    let exp = ExpandedEpisode {
3807                        memory_id: ep.memory_id,
3808                        ts_ms: ep.ts_ms,
3809                        content: ep.content,
3810                    };
3811                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
3812                    staged.push(StagingNode {
3813                        sort_ts_ms: ep.ts_ms,
3814                        sort_id: id.clone(),
3815                        node,
3816                    });
3817                }
3818            }
3819            if want_document {
3820                let docs = fetch_documents_for_nodes(
3821                    conn,
3822                    since_ms,
3823                    until_ms,
3824                    cursor_ref,
3825                    per_kind_limit,
3826                )?;
3827                for d in docs {
3828                    let id = format!("doc:{}", d.doc_id);
3829                    let exp = ExpandedDocument {
3830                        doc_id: d.doc_id,
3831                        title: d.title,
3832                        source: d.source,
3833                        ingested_at_ms: d.ingested_at_ms,
3834                    };
3835                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
3836                    staged.push(StagingNode {
3837                        sort_ts_ms: d.ingested_at_ms,
3838                        sort_id: id.clone(),
3839                        node,
3840                    });
3841                }
3842            }
3843            if want_chunk {
3844                let chunks =
3845                    fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3846                for c in chunks {
3847                    let id = format!("chunk:{}", c.chunk_id);
3848                    let exp = ExpandedChunk {
3849                        chunk_id: c.chunk_id,
3850                        chunk_index: c.chunk_index,
3851                        content: c.content,
3852                    };
3853                    // graph_node_for_chunk sets ts_ms = None for the
3854                    // wire format (chunks don't have a natural user-
3855                    // facing timestamp); but for sorting we use the
3856                    // row's created_at_ms.
3857                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
3858                    node.ts_ms = Some(c.created_at_ms);
3859                    staged.push(StagingNode {
3860                        sort_ts_ms: c.created_at_ms,
3861                        sort_id: id.clone(),
3862                        node,
3863                    });
3864                }
3865            }
3866            if want_cluster {
3867                let cls =
3868                    fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3869                for c in cls {
3870                    let id = format!("cl:{}", c.cluster_id);
3871                    let node = graph_node_for_cluster(
3872                        &tenant_id_for_blocking,
3873                        &c.cluster_id,
3874                        c.abstraction.as_deref(),
3875                        c.created_at_ms,
3876                    );
3877                    staged.push(StagingNode {
3878                        sort_ts_ms: c.created_at_ms,
3879                        sort_id: id.clone(),
3880                        node,
3881                    });
3882                }
3883            }
3884            if want_entity {
3885                let (ents, was_cap_reached) =
3886                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
3887                cap_reached = was_cap_reached;
3888                for e in ents {
3889                    let id = format!("ent:{}", e.value);
3890                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
3891                    node.ts_ms = Some(e.first_seen_ms);
3892                    node.preview = Some(format!("Referenced in {} triples", e.ref_count));
3893                    staged.push(StagingNode {
3894                        sort_ts_ms: e.first_seen_ms,
3895                        sort_id: id.clone(),
3896                        node,
3897                    });
3898                }
3899            }
3900            Ok::<_, rusqlite::Error>((staged, cap_reached))
3901        })
3902        .await
3903        .map_err(ApiError::from)?;
3904
3905    // Apply cursor filter.
3906    if let Some(cur) = &cursor {
3907        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
3908    }
3909
3910    // Sort `ts_ms DESC, id ASC`.
3911    staged
3912        .sort_by(|a, b| cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id)));
3913
3914    // Apply page limit + compute next_cursor.
3915    let limit_us = limit as usize;
3916    let next_cursor = if staged.len() > limit_us {
3917        let last = &staged[limit_us - 1];
3918        Some(NodesCursor {
3919            ts_ms: last.sort_ts_ms,
3920            id: last.sort_id.clone(),
3921        })
3922    } else {
3923        None
3924    };
3925    staged.truncate(limit_us);
3926
3927    let next_cursor_str = match next_cursor {
3928        Some(c) => Some(encode_cursor(&c)?),
3929        None => None,
3930    };
3931
3932    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
3933    let payload = GraphNodesResponse {
3934        nodes,
3935        next_cursor: next_cursor_str,
3936    };
3937
3938    // Attach the entity-cap header so clients can show truncation UX
3939    // without parsing the body.
3940    let mut response = Json(payload).into_response();
3941    if cap_reached {
3942        response
3943            .headers_mut()
3944            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
3945    }
3946    Ok(response)
3947}
3948
3949// --- /v1/graph/edges --------------------------------------------------
3950
3951#[derive(Debug)]
3952struct StagingEdge {
3953    edge: GraphEdge,
3954    kind_idx: u8,
3955    sub_id: String,
3956}
3957
3958fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
3959    match a.0.cmp(&b.0) {
3960        std::cmp::Ordering::Equal => a.1.cmp(b.1),
3961        other => other,
3962    }
3963}
3964
3965fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
3966    cmp_edge_sort_keys(
3967        (kind_idx, sub_id),
3968        (cursor.kind_idx, cursor.sub_id.as_str()),
3969    ) == std::cmp::Ordering::Greater
3970}
3971
3972/// Whether the supplied focus `node_id` (kind, value) matches an edge's
3973/// (source, target) endpoint pair under a given edge kind. Used to
3974/// filter `?node_id=...` queries.
3975fn edge_touches_focus(
3976    kind: EdgeKind,
3977    focus_kind: NodeKind,
3978    focus_value: &str,
3979    src_value: &str,
3980    tgt_value: &str,
3981    extra_value: Option<&str>,
3982) -> bool {
3983    // Determine which endpoint kinds this edge family produces; if the
3984    // focus kind isn't compatible, no match.
3985    match kind {
3986        EdgeKind::Triple => match focus_kind {
3987            // Triple edges flow source_episode → ent:<object_id>. We
3988            // also expose subject/object entities as endpoints (see
3989            // emit_triple_edges_for_focus); the matching here covers
3990            // episode focus + entity focus + the symmetric pair.
3991            NodeKind::Episode => src_value == focus_value,
3992            NodeKind::Entity => {
3993                tgt_value == focus_value
3994                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
3995                    || src_value == focus_value
3996            }
3997            _ => false,
3998        },
3999        EdgeKind::DocumentChunk => match focus_kind {
4000            NodeKind::Document => src_value == focus_value,
4001            NodeKind::Chunk => tgt_value == focus_value,
4002            _ => false,
4003        },
4004        EdgeKind::ClusterMember => match focus_kind {
4005            NodeKind::Cluster => src_value == focus_value,
4006            NodeKind::Episode => tgt_value == focus_value,
4007            _ => false,
4008        },
4009    }
4010}
4011
4012#[derive(Debug)]
4013struct EdgeRowTriple {
4014    triple_id: String,
4015    source_memory_id: Option<String>,
4016    object_id: String,
4017    predicate: String,
4018    confidence: f32,
4019}
4020
4021fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
4022    // Emit one edge per triple: source_episode → ent:object_id. Skip
4023    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
4024    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
4025    // doesn't OOM the page-builder; the merge-and-page step trims to
4026    // the real limit downstream.
4027    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4028    let mut stmt = conn.prepare(
4029        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
4030           FROM triples t
4031           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
4032          WHERE t.status = 'active'
4033          ORDER BY t.triple_id ASC
4034          LIMIT ?1",
4035    )?;
4036    let rows: Vec<EdgeRowTriple> = stmt
4037        .query_map(rusqlite::params![safety_cap], |r| {
4038            Ok(EdgeRowTriple {
4039                triple_id: r.get(0)?,
4040                source_memory_id: r.get::<_, Option<String>>(1)?,
4041                object_id: r.get(2)?,
4042                predicate: r.get(3)?,
4043                confidence: r.get(4)?,
4044            })
4045        })?
4046        .collect::<rusqlite::Result<Vec<_>>>()?;
4047    Ok(rows)
4048}
4049
4050#[derive(Debug)]
4051struct EdgeRowDocChunk {
4052    chunk_id: String,
4053    doc_id: String,
4054}
4055
4056fn fetch_document_chunk_edges(
4057    conn: &rusqlite::Connection,
4058) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
4059    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4060    let mut stmt = conn.prepare(
4061        "SELECT c.chunk_id, c.doc_id
4062           FROM document_chunks c
4063           JOIN documents d ON d.doc_id = c.doc_id
4064          WHERE d.status = 'active'
4065          ORDER BY c.chunk_id ASC
4066          LIMIT ?1",
4067    )?;
4068    let rows: Vec<EdgeRowDocChunk> = stmt
4069        .query_map(rusqlite::params![safety_cap], |r| {
4070            Ok(EdgeRowDocChunk {
4071                chunk_id: r.get(0)?,
4072                doc_id: r.get(1)?,
4073            })
4074        })?
4075        .collect::<rusqlite::Result<Vec<_>>>()?;
4076    Ok(rows)
4077}
4078
4079#[derive(Debug)]
4080struct EdgeRowClusterMember {
4081    cluster_id: String,
4082    memory_id: String,
4083}
4084
4085fn fetch_cluster_member_edges(
4086    conn: &rusqlite::Connection,
4087) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
4088    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4089    let mut stmt = conn.prepare(
4090        "SELECT ce.cluster_id, ce.memory_id
4091           FROM cluster_episodes ce
4092           JOIN episodes e ON e.memory_id = ce.memory_id
4093          WHERE e.status = 'active'
4094          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
4095          LIMIT ?1",
4096    )?;
4097    let rows: Vec<EdgeRowClusterMember> = stmt
4098        .query_map(rusqlite::params![safety_cap], |r| {
4099            Ok(EdgeRowClusterMember {
4100                cluster_id: r.get(0)?,
4101                memory_id: r.get(1)?,
4102            })
4103        })?
4104        .collect::<rusqlite::Result<Vec<_>>>()?;
4105    Ok(rows)
4106}
4107
4108/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
4109/// comments for the contract.
4110async fn graph_edges_handler(
4111    TenantExtractor(tenant): TenantExtractor,
4112    Query(q): Query<GraphEdgesQuery>,
4113) -> Result<Json<GraphEdgesResponse>, ApiError> {
4114    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
4115    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
4116    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
4117    let cursor = match q.cursor.as_deref() {
4118        None => None,
4119        Some("") => None,
4120        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
4121    };
4122
4123    let focus = match q.node_id.as_deref() {
4124        None => None,
4125        Some(raw) => {
4126            let (kind, value) = parse_node_id(raw)?;
4127            Some((kind, value.to_string()))
4128        }
4129    };
4130
4131    let want_triple = kinds.contains(&EdgeKind::Triple);
4132    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
4133    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);
4134
4135    let staged: Vec<StagingEdge> = tenant
4136        .read()
4137        .interact(move |conn| {
4138            let mut staged: Vec<StagingEdge> = Vec::new();
4139
4140            if want_triple {
4141                for t in fetch_triple_edges(conn)? {
4142                    let src_id = match &t.source_memory_id {
4143                        Some(mid) => format!("ep:{mid}"),
4144                        None => continue, // orphan triple — skip
4145                    };
4146                    let tgt_id = format!("ent:{}", t.object_id);
4147                    if let Some((fk, fv)) = &focus {
4148                        // `src_value` for matching is the bare memory_id
4149                        // (after the `ep:` prefix); `tgt_value` is the
4150                        // bare entity value.
4151                        if !edge_touches_focus(
4152                            EdgeKind::Triple,
4153                            *fk,
4154                            fv,
4155                            t.source_memory_id.as_deref().unwrap_or(""),
4156                            &t.object_id,
4157                            // Triples carry a subject_id too, but the
4158                            // emitted edge only goes ep → ent(object).
4159                            // For entity-focus matches we also accept
4160                            // hits on subject_id; surface it through
4161                            // the `extra` slot.
4162                            None,
4163                        ) {
4164                            continue;
4165                        }
4166                    }
4167                    let edge = GraphEdge {
4168                        id: edge_id(&src_id, "triple", &tgt_id),
4169                        source: src_id,
4170                        target: tgt_id,
4171                        kind: "triple",
4172                        predicate: Some(t.predicate),
4173                        weight: Some(t.confidence),
4174                    };
4175                    staged.push(StagingEdge {
4176                        edge,
4177                        kind_idx: EdgeKind::Triple.order_idx(),
4178                        sub_id: t.triple_id,
4179                    });
4180                }
4181            }
4182            if want_doc_chunk {
4183                for dc in fetch_document_chunk_edges(conn)? {
4184                    let src_id = format!("doc:{}", dc.doc_id);
4185                    let tgt_id = format!("chunk:{}", dc.chunk_id);
4186                    if let Some((fk, fv)) = &focus {
4187                        if !edge_touches_focus(
4188                            EdgeKind::DocumentChunk,
4189                            *fk,
4190                            fv,
4191                            &dc.doc_id,
4192                            &dc.chunk_id,
4193                            None,
4194                        ) {
4195                            continue;
4196                        }
4197                    }
4198                    let edge = GraphEdge {
4199                        id: edge_id(&src_id, "document_chunk", &tgt_id),
4200                        source: src_id,
4201                        target: tgt_id,
4202                        kind: "document_chunk",
4203                        predicate: None,
4204                        weight: None,
4205                    };
4206                    staged.push(StagingEdge {
4207                        edge,
4208                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
4209                        sub_id: dc.chunk_id,
4210                    });
4211                }
4212            }
4213            if want_cluster_member {
4214                for cm in fetch_cluster_member_edges(conn)? {
4215                    let src_id = format!("cl:{}", cm.cluster_id);
4216                    let tgt_id = format!("ep:{}", cm.memory_id);
4217                    if let Some((fk, fv)) = &focus {
4218                        if !edge_touches_focus(
4219                            EdgeKind::ClusterMember,
4220                            *fk,
4221                            fv,
4222                            &cm.cluster_id,
4223                            &cm.memory_id,
4224                            None,
4225                        ) {
4226                            continue;
4227                        }
4228                    }
4229                    let edge = GraphEdge {
4230                        id: edge_id(&src_id, "cluster_member", &tgt_id),
4231                        source: src_id,
4232                        target: tgt_id,
4233                        kind: "cluster_member",
4234                        predicate: None,
4235                        weight: None,
4236                    };
4237                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
4238                    staged.push(StagingEdge {
4239                        edge,
4240                        kind_idx: EdgeKind::ClusterMember.order_idx(),
4241                        sub_id,
4242                    });
4243                }
4244            }
4245            Ok::<_, rusqlite::Error>(staged)
4246        })
4247        .await
4248        .map_err(ApiError::from)?;
4249
4250    // Apply cursor filter.
4251    let mut staged = staged;
4252    if let Some(cur) = &cursor {
4253        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
4254    }
4255
4256    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
4257    staged.sort_by(|a, b| cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id)));
4258
4259    let limit_us = limit as usize;
4260    let next_cursor = if staged.len() > limit_us {
4261        let last = &staged[limit_us - 1];
4262        Some(EdgesCursor {
4263            kind_idx: last.kind_idx,
4264            sub_id: last.sub_id.clone(),
4265        })
4266    } else {
4267        None
4268    };
4269    staged.truncate(limit_us);
4270    let next_cursor_str = match next_cursor {
4271        Some(c) => Some(encode_cursor(&c)?),
4272        None => None,
4273    };
4274
4275    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
4276    Ok(Json(GraphEdgesResponse {
4277        edges,
4278        next_cursor: next_cursor_str,
4279    }))
4280}
4281
4282// ---------------------------------------------------------------------------
4283// Graph inspect — kind-discriminated full-record drill (v0.10.0)
4284//
4285// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
4286// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
4287// cl:/ent:); the handler dispatches per-kind and returns the same wire
4288// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
4289// triples_in[], triples_out[] }`.
4290//
4291// Per-kind contract (v0.10.0 P1):
4292//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
4293//                          triples_in = [],
4294//                          triples_out = triples WHERE source_episode_id = rowid
4295//                          (one edge per triple, ep -> ent(object), predicate
4296//                          + weight surfaced). Episodes never appear as triple
4297//                          subjects/objects, so triples_in is structurally
4298//                          empty.
4299//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
4300//                          "\n\n" (no `documents.full_text` column exists; the
4301//                          chunks-concat path produces the same final text the
4302//                          ingester chunked from). triples_in/out = [] --
4303//                          documents don't directly carry triples; their
4304//                          chunks transitively do, but the inspector reaches
4305//                          those via the existing `/v1/graph/expand` drill.
4306//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
4307//                          triples_in/out = [] (chunks aren't triple endpoints).
4308//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
4309//                          (`semantic_abstractions.content`) when an
4310//                          abstraction exists; just the label otherwise.
4311//                          triples_in/out = [].
4312//   * `ent:<value>`        full_text = None (entities have no body),
4313//                          triples_in = [],
4314//                          triples_out = all triples where the entity appears
4315//                          as subject OR object. Capped at
4316//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
4317//                          are synthetic -- an `ent:<value>` with zero triples
4318//                          in the tenant returns 404 (the entity exists only
4319//                          if at least one triple references it).
4320//
4321// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
4322// 400 if the prefix is unknown or the body after `:` is empty (reuses
4323// `parse_node_id`). Tenant + auth are handled by the existing extractors.
4324//
4325// Lesson #30: no audit emit. Inspect is a derived read over already-
4326// audited primitives.
4327// ---------------------------------------------------------------------------
4328
4329/// Cap on triples returned for an entity inspect. Entities can be heavily
4330/// referenced ("user", "Alice"); the inspector panel only needs enough
4331/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
4332/// paginated full set when the UI needs more.
4333const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;
4334
4335#[derive(Debug, Serialize)]
4336struct GraphInspectResponse {
4337    node: GraphNode,
4338    #[serde(skip_serializing_if = "Option::is_none")]
4339    full_text: Option<String>,
4340    triples_in: Vec<GraphEdge>,
4341    triples_out: Vec<GraphEdge>,
4342}
4343
4344/// `GET /v1/graph/inspect/{id}`. See module-level comments.
4345async fn graph_inspect_handler(
4346    TenantExtractor(tenant): TenantExtractor,
4347    Path(id): Path<String>,
4348) -> Result<Json<GraphInspectResponse>, ApiError> {
4349    let (kind, value) = parse_node_id(&id)?;
4350    let tenant_id_str = tenant.tenant_id().to_string();
4351    let value = value.to_string();
4352    let node_id_full = id;
4353    match kind {
4354        NodeKind::Episode => {
4355            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
4356        }
4357        NodeKind::Document => {
4358            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
4359        }
4360        NodeKind::Chunk => inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await,
4361        NodeKind::Cluster => {
4362            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
4363        }
4364        NodeKind::Entity => inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await,
4365    }
4366    .map(Json)
4367}
4368
4369// ---- per-kind paths ----
4370
4371async fn inspect_episode_node(
4372    tenant: &TenantHandle,
4373    tenant_id: &str,
4374    memory_id: String,
4375    node_id_full: String,
4376) -> Result<GraphInspectResponse, ApiError> {
4377    let memory_id_for_err = memory_id.clone();
4378    let memory_id_q = memory_id.clone();
4379    // Fetch the episode row + all triples sourced from it in one
4380    // interact() call to keep the connection check-out short.
4381    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
4382        .read()
4383        .interact(move |conn| {
4384            let ep_row: Option<(i64, i64, String)> = conn
4385                .query_row(
4386                    "SELECT rowid, ts_ms, content
4387                       FROM episodes
4388                      WHERE memory_id = ?1
4389                        AND status = 'active'",
4390                    rusqlite::params![&memory_id_q],
4391                    |r| {
4392                        Ok((
4393                            r.get::<_, i64>(0)?,
4394                            r.get::<_, i64>(1)?,
4395                            r.get::<_, String>(2)?,
4396                        ))
4397                    },
4398                )
4399                .map(Some)
4400                .or_else(|e| match e {
4401                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4402                    other => Err(other),
4403                })?;
4404            let Some((rowid, ts_ms, content)) = ep_row else {
4405                return Ok(None);
4406            };
4407            let mut stmt = conn.prepare(
4408                "SELECT subject_id, predicate, object_id, confidence
4409                   FROM triples
4410                  WHERE source_episode_id = ?1
4411                    AND status = 'active'
4412                  ORDER BY valid_from_ms DESC",
4413            )?;
4414            let triples = stmt
4415                .query_map(rusqlite::params![rowid], |r| {
4416                    Ok(TripleRow {
4417                        subject_id: r.get(0)?,
4418                        predicate: r.get(1)?,
4419                        object_id: r.get(2)?,
4420                        confidence: r.get(3)?,
4421                    })
4422                })?
4423                .collect::<rusqlite::Result<Vec<_>>>()?;
4424            let ep = ExpandedEpisode {
4425                memory_id: memory_id_q,
4426                ts_ms,
4427                content,
4428            };
4429            Ok::<_, rusqlite::Error>(Some((ep, triples)))
4430        })
4431        .await
4432        .map_err(ApiError::from)?;
4433
4434    let (ep, triples) = fetched.ok_or_else(|| {
4435        ApiError::not_found(format!(
4436            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
4437        ))
4438    })?;
4439
4440    let node = graph_node_for_episode(tenant_id, &ep);
4441    let full_text = Some(ep.content.clone());
4442    // Triples flow from this episode (the source) to entity endpoints.
4443    // Emit one edge per triple: ep -> ent(object), predicate from the
4444    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
4445    // triple-edge convention so the renderer can dedupe via composite id.
4446    let mut triples_out = Vec::with_capacity(triples.len());
4447    for t in triples {
4448        let tgt_id = format!("ent:{}", t.object_id);
4449        triples_out.push(GraphEdge {
4450            id: edge_id(&node_id_full, "triple", &tgt_id),
4451            source: node_id_full.clone(),
4452            target: tgt_id,
4453            kind: "triple",
4454            predicate: Some(t.predicate),
4455            weight: Some(t.confidence),
4456        });
4457    }
4458    Ok(GraphInspectResponse {
4459        node,
4460        full_text,
4461        triples_in: Vec::new(),
4462        triples_out,
4463    })
4464}
4465
4466async fn inspect_document_node(
4467    tenant: &TenantHandle,
4468    tenant_id: &str,
4469    doc_id: String,
4470    node_id_full: String,
4471) -> Result<GraphInspectResponse, ApiError> {
4472    let doc_id_for_err = doc_id.clone();
4473    let doc_id_q = doc_id.clone();
4474    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
4475    // one interact() call. The chunks-concat path is the source of full_text
4476    // since the `documents` table doesn't carry the original raw text. For
4477    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
4478    // panel's responsibility if the document is very large.
4479    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
4480        .read()
4481        .interact(move |conn| {
4482            let doc_row: Option<ExpandedDocument> = conn
4483                .query_row(
4484                    "SELECT doc_id, title, source, ingested_at_ms
4485                       FROM documents
4486                      WHERE doc_id = ?1
4487                        AND status = 'active'",
4488                    rusqlite::params![&doc_id_q],
4489                    |r| {
4490                        Ok(ExpandedDocument {
4491                            doc_id: r.get(0)?,
4492                            title: r.get(1)?,
4493                            source: r.get(2)?,
4494                            ingested_at_ms: r.get(3)?,
4495                        })
4496                    },
4497                )
4498                .map(Some)
4499                .or_else(|e| match e {
4500                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4501                    other => Err(other),
4502                })?;
4503            let Some(doc) = doc_row else {
4504                return Ok(None);
4505            };
4506            let mut stmt = conn.prepare(
4507                "SELECT content
4508                   FROM document_chunks
4509                  WHERE doc_id = ?1
4510                  ORDER BY chunk_index ASC",
4511            )?;
4512            let chunks = stmt
4513                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
4514                .collect::<rusqlite::Result<Vec<_>>>()?;
4515            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
4516        })
4517        .await
4518        .map_err(ApiError::from)?;
4519
4520    let (doc, chunks) = fetched.ok_or_else(|| {
4521        ApiError::not_found(format!(
4522            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
4523        ))
4524    })?;
4525
4526    let full_text = if chunks.is_empty() {
4527        // Document with zero chunks (e.g. mid-ingest, or an empty source).
4528        // Return None to signal "no body available" rather than an empty
4529        // string -- saves the renderer a degenerate code path.
4530        None
4531    } else {
4532        Some(chunks.join("\n\n"))
4533    };
4534
4535    Ok(GraphInspectResponse {
4536        node: graph_node_for_document(tenant_id, &doc),
4537        full_text,
4538        triples_in: Vec::new(),
4539        triples_out: Vec::new(),
4540    })
4541}
4542
4543async fn inspect_chunk_node(
4544    tenant: &TenantHandle,
4545    tenant_id: &str,
4546    chunk_id: String,
4547    node_id_full: String,
4548) -> Result<GraphInspectResponse, ApiError> {
4549    let chunk_id_for_err = chunk_id.clone();
4550    let chunk_id_q = chunk_id.clone();
4551    let row: Option<(ExpandedChunk, i64)> = tenant
4552        .read()
4553        .interact(move |conn| {
4554            conn.query_row(
4555                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
4556                   FROM document_chunks c
4557                   JOIN documents d ON d.doc_id = c.doc_id
4558                  WHERE c.chunk_id = ?1
4559                    AND d.status = 'active'",
4560                rusqlite::params![&chunk_id_q],
4561                |r| {
4562                    Ok((
4563                        ExpandedChunk {
4564                            chunk_id: r.get(0)?,
4565                            chunk_index: r.get(1)?,
4566                            content: r.get(2)?,
4567                        },
4568                        r.get::<_, i64>(3)?,
4569                    ))
4570                },
4571            )
4572            .map(Some)
4573            .or_else(|e| match e {
4574                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4575                other => Err(other),
4576            })
4577        })
4578        .await
4579        .map_err(ApiError::from)?;
4580
4581    let (chunk, created_at_ms) = row.ok_or_else(|| {
4582        ApiError::not_found(format!(
4583            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
4584        ))
4585    })?;
4586
4587    let full_text = Some(chunk.content.clone());
4588    let mut node = graph_node_for_chunk(tenant_id, &chunk);
4589    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
4590    // `created_at_ms` so the inspector panel has a sortable timestamp.
4591    node.ts_ms = Some(created_at_ms);
4592
4593    Ok(GraphInspectResponse {
4594        node,
4595        full_text,
4596        triples_in: Vec::new(),
4597        triples_out: Vec::new(),
4598    })
4599}
4600
4601async fn inspect_cluster_node(
4602    tenant: &TenantHandle,
4603    tenant_id: &str,
4604    cluster_id: String,
4605    node_id_full: String,
4606) -> Result<GraphInspectResponse, ApiError> {
4607    let cluster_id_for_err = cluster_id.clone();
4608    let cluster_id_q = cluster_id.clone();
4609    let row: Option<(Option<String>, i64)> = tenant
4610        .read()
4611        .interact(move |conn| {
4612            conn.query_row(
4613                "SELECT sa.content, c.created_at_ms
4614                   FROM clusters c
4615                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
4616                  WHERE c.cluster_id = ?1",
4617                rusqlite::params![&cluster_id_q],
4618                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
4619            )
4620            .map(Some)
4621            .or_else(|e| match e {
4622                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4623                other => Err(other),
4624            })
4625        })
4626        .await
4627        .map_err(ApiError::from)?;
4628
4629    let (abstraction, created_at_ms) = row.ok_or_else(|| {
4630        ApiError::not_found(format!(
4631            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
4632        ))
4633    })?;
4634
4635    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
4636    // exists; just the label otherwise. Brief "cluster" -- the cluster
4637    // label is `clusters.cluster_id` (the user-facing label is the
4638    // abstraction; clusters don't have a `label` column).
4639    let full_text = match abstraction.as_deref() {
4640        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
4641        None => Some(format!("cluster {cluster_id_for_err}")),
4642    };
4643
4644    Ok(GraphInspectResponse {
4645        node: graph_node_for_cluster(
4646            tenant_id,
4647            &cluster_id_for_err,
4648            abstraction.as_deref(),
4649            created_at_ms,
4650        ),
4651        full_text,
4652        triples_in: Vec::new(),
4653        triples_out: Vec::new(),
4654    })
4655}
4656
4657async fn inspect_entity_node(
4658    tenant: &TenantHandle,
4659    tenant_id: &str,
4660    entity_value: String,
4661    node_id_full: String,
4662) -> Result<GraphInspectResponse, ApiError> {
4663    // Entities are synthetic. They "exist" only if at least one triple
4664    // references them as subject or object. Zero triples -> 404 per brief.
4665    let entity_q = entity_value.clone();
4666    let rows: Vec<TripleRow> = tenant
4667        .read()
4668        .interact(move |conn| {
4669            let mut stmt = conn.prepare(
4670                "SELECT subject_id, predicate, object_id, confidence
4671                   FROM triples
4672                  WHERE (subject_id = ?1 OR object_id = ?1)
4673                    AND status = 'active'
4674                  ORDER BY valid_from_ms DESC
4675                  LIMIT ?2",
4676            )?;
4677            stmt.query_map(
4678                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
4679                |r| {
4680                    Ok(TripleRow {
4681                        subject_id: r.get(0)?,
4682                        predicate: r.get(1)?,
4683                        object_id: r.get(2)?,
4684                        confidence: r.get(3)?,
4685                    })
4686                },
4687            )?
4688            .collect::<rusqlite::Result<Vec<_>>>()
4689        })
4690        .await
4691        .map_err(ApiError::from)?;
4692
4693    if rows.is_empty() {
4694        return Err(ApiError::not_found(format!(
4695            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
4696        )));
4697    }
4698
4699    // Triples flow out FROM the entity to its counterpart. For each row
4700    // determine which side the entity appears on and emit ent:<self> ->
4701    // ent:<other>. Brief calls these triples_out (entities don't have
4702    // structural triples_in in v0.10.0 P1).
4703    let mut triples_out = Vec::with_capacity(rows.len());
4704    for t in rows {
4705        let other = if t.subject_id == entity_value {
4706            t.object_id
4707        } else {
4708            // entity_value matched on object_id; counterpart is subject.
4709            t.subject_id
4710        };
4711        let tgt_id = format!("ent:{other}");
4712        triples_out.push(GraphEdge {
4713            id: edge_id(&node_id_full, "triple", &tgt_id),
4714            source: node_id_full.clone(),
4715            target: tgt_id,
4716            kind: "triple",
4717            predicate: Some(t.predicate),
4718            weight: Some(t.confidence),
4719        });
4720    }
4721
4722    Ok(GraphInspectResponse {
4723        node: graph_node_for_entity(tenant_id, &entity_value),
4724        full_text: None,
4725        triples_in: Vec::new(),
4726        triples_out,
4727    })
4728}
4729
4730// ---------------------------------------------------------------------------
4731// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
4732//
4733// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
4734// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
4735// the family, combining:
4736//
4737//   * Explicit edges (triples / document_chunk / cluster_member) incident
4738//     to the focal node -- the same shape `/v1/graph/expand` produces for
4739//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
4740//     compatible with the focal node's kind.
4741//
4742//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
4743//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
4744//     400 when `kind=semantic` is requested alone, or are silently skipped
4745//     when `kind=both` is requested (explicit-only path still runs).
4746//
4747// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
4748// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
4749// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
4750// incident to the focal node into one response. Different UX (drill vs.
4751// overview); different API; both needed.
4752//
4753// ## Refactor decision
4754//
4755// The brief recommends extracting `expand`'s per-kind helpers into a
4756// shared module. In practice the `expand_*` async fns already do exactly
4757// what neighbors needs for the explicit path (same response shape, same
4758// tenant + auth + existence semantics). To keep the change surgical and
4759// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
4760// the existing `expand_*` async fns directly** rather than refactoring
4761// their bodies. The explicit path is a thin orchestrator that calls every
4762// `expand_*` fn compatible with the focal node's kind and concatenates
4763// the results.
4764//
4765// ## Dedup rule (kind=both)
4766//
4767// When an edge with the same (source, target) appears in BOTH the
4768// explicit and the semantic result sets, the explicit edge wins -- the
4769// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
4770// edge id, which encodes the kind too): the rule "explicit beats
4771// semantic" only makes sense when both endpoints agree, regardless of
4772// kind. In practice this is most likely to fire when an entity-focused
4773// expand (which surfaces episodes as triple-targets) collides with a
4774// semantic search hit on the same episode pair.
4775//
4776// ## Limit policy
4777//
4778// `limit` is applied PER KIND, not total. With `limit=25` and
4779// `kind=both`, the response carries up to 25 explicit + 25 semantic
4780// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
4781// `/v1/graph/*` family).
4782//
4783// ## Threshold filter
4784//
4785// `threshold` (default 0.75) filters semantic neighbors by
4786// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
4787// default is conservative -- below 0.75 the renderer typically shows too
4788// many spurious edges for a useful "show similar" overlay. Callers can
4789// dial down (e.g. `?threshold=0.5`) for a broader view.
4790//
4791// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
4792// ---------------------------------------------------------------------------
4793
4794/// Default page size when the caller omits `?limit=`. Conservative so the
4795/// "show similar" overlay isn't visually overwhelming on first click.
4796const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
4797/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
4798const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
4799/// Conservative similarity floor. Edges with `weight < threshold` are
4800/// dropped from the semantic result set.
4801const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;
4802
4803/// Discriminator for which neighbor kinds the caller wants. Default is
4804/// `both` (explicit edges + HNSW-semantic).
4805#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
4806#[serde(rename_all = "snake_case")]
4807enum GraphNeighborsKind {
4808    Explicit,
4809    Semantic,
4810    #[default]
4811    Both,
4812}
4813
4814#[derive(Debug, Deserialize)]
4815struct GraphNeighborsQuery {
4816    #[serde(default)]
4817    kind: Option<GraphNeighborsKind>,
4818    #[serde(default)]
4819    threshold: Option<f32>,
4820    #[serde(default)]
4821    limit: Option<u32>,
4822}
4823
4824/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
4825async fn graph_neighbors_handler(
4826    TenantExtractor(tenant): TenantExtractor,
4827    Path(id): Path<String>,
4828    Query(q): Query<GraphNeighborsQuery>,
4829) -> Result<Json<GraphExpandResponse>, ApiError> {
4830    let kind = q.kind.unwrap_or_default();
4831    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
4832    if !(0.0..=1.0).contains(&threshold) {
4833        return Err(ApiError::bad_request(format!(
4834            "threshold must be in [0.0, 1.0]; got {threshold}"
4835        )));
4836    }
4837    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
4838    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
4839    // locks in the clamp policy.
4840    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
4841    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);
4842
4843    let (node_kind, value) = parse_node_id(&id)?;
4844    let value_owned = value.to_string();
4845    let tenant_id_str = tenant.tenant_id().to_string();
4846    let node_id_full = id;
4847
4848    // Existence probe for the focal node. The explicit + semantic paths
4849    // each handle "node-found-but-zero-neighbors" gracefully (200 with
4850    // empty arrays) -- but we want a true 404 when the id resolves to no
4851    // row at all, regardless of which kind the caller asked for. This
4852    // matches the inspect endpoint's gate: a node has to exist to be
4853    // meaningfully "neighborable".
4854    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;
4855
4856    // Dispatch.
4857    let (explicit_nodes, explicit_edges) = if matches!(
4858        kind,
4859        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
4860    ) {
4861        neighbors_explicit(
4862            &tenant,
4863            &tenant_id_str,
4864            node_kind,
4865            &value_owned,
4866            &node_id_full,
4867            limit as i64,
4868        )
4869        .await?
4870    } else {
4871        (Vec::new(), Vec::new())
4872    };
4873
4874    let (semantic_nodes, semantic_edges) = if matches!(
4875        kind,
4876        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
4877    ) {
4878        match neighbors_semantic(
4879            &tenant,
4880            &tenant_id_str,
4881            node_kind,
4882            &value_owned,
4883            &node_id_full,
4884            limit,
4885            threshold,
4886        )
4887        .await
4888        {
4889            Ok(parts) => parts,
4890            Err(e) => {
4891                // `kind=semantic` alone against an unsupported focal node
4892                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
4893                // semantic neighbors and there are none possible.
4894                //
4895                // `kind=both` against an unsupported focal node silently
4896                // skips the semantic step; the explicit path still
4897                // delivers a meaningful answer. This mirrors the
4898                // pragmatic UX: clicking "show similar" on an entity
4899                // still surfaces the entity's triples without surfacing a
4900                // pointless error.
4901                if matches!(kind, GraphNeighborsKind::Semantic) {
4902                    return Err(e);
4903                }
4904                (Vec::new(), Vec::new())
4905            }
4906        }
4907    } else {
4908        (Vec::new(), Vec::new())
4909    };
4910
4911    // Merge + dedupe. Explicit edges win over semantic edges with the
4912    // same (source, target). Nodes dedupe by id.
4913    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
4914        std::collections::HashSet::with_capacity(explicit_edges.len());
4915    for e in &explicit_edges {
4916        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
4917    }
4918
4919    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4920    let mut edges: Vec<GraphEdge> = Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
4921    let mut seen_node_ids: std::collections::HashSet<String> =
4922        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4923
4924    for n in explicit_nodes {
4925        if seen_node_ids.insert(n.id.clone()) {
4926            nodes.push(n);
4927        }
4928    }
4929    for e in explicit_edges {
4930        edges.push(e);
4931    }
4932    for n in semantic_nodes {
4933        if seen_node_ids.insert(n.id.clone()) {
4934            nodes.push(n);
4935        }
4936    }
4937    for e in semantic_edges {
4938        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
4939            // Explicit edge already covers this pair -- drop the semantic
4940            // duplicate per the dedup rule. The semantic node may still
4941            // remain in `nodes` if no other edge already pulled it in;
4942            // that's fine -- the renderer renders nodes with weight-less
4943            // structural edges either way.
4944            continue;
4945        }
4946        edges.push(e);
4947    }
4948
4949    Ok(Json(GraphExpandResponse { nodes, edges }))
4950}
4951
4952/// Existence probe for the focal node. Translates the prefixed id into a
4953/// per-kind COUNT query against the matching table. Returns 404 (not 200
4954/// with empty arrays) when the node doesn't exist in the tenant's DB.
4955/// For entities the "existence" check is "is this entity referenced by
4956/// at least one triple" -- consistent with the inspect-entity contract
4957/// from `0115`.
4958async fn ensure_neighbors_focal_exists(
4959    tenant: &TenantHandle,
4960    node_kind: NodeKind,
4961    value: &str,
4962    node_id_full: &str,
4963) -> Result<(), ApiError> {
4964    match node_kind {
4965        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
4966        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
4967        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
4968        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
4969        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
4970    }
4971}
4972
4973/// 404 if the chunk_id has no row in this tenant's `document_chunks`
4974/// table whose parent doc is active. Mirrors `ensure_*_exists` from
4975/// `expand`.
4976async fn ensure_chunk_exists(
4977    tenant: &TenantHandle,
4978    chunk_id: &str,
4979    node_id_full: &str,
4980) -> Result<(), ApiError> {
4981    let chunk_id_q = chunk_id.to_string();
4982    let exists: i64 = tenant
4983        .read()
4984        .interact(move |conn| {
4985            conn.query_row(
4986                "SELECT COUNT(*)
4987                   FROM document_chunks c
4988                   JOIN documents d ON d.doc_id = c.doc_id
4989                  WHERE c.chunk_id = ?1
4990                    AND d.status = 'active'",
4991                rusqlite::params![&chunk_id_q],
4992                |r| r.get(0),
4993            )
4994        })
4995        .await
4996        .map_err(ApiError::from)?;
4997    if exists == 0 {
4998        return Err(ApiError::not_found(format!(
4999            "node_id {node_id_full:?} not found in current tenant"
5000        )));
5001    }
5002    Ok(())
5003}
5004
5005/// 404 if the entity isn't referenced by at least one active triple in
5006/// the tenant. Matches the inspect-entity 404 contract: entities are
5007/// synthetic, "existence" is "shows up in at least one triple".
5008async fn ensure_entity_referenced(
5009    tenant: &TenantHandle,
5010    entity_value: &str,
5011    node_id_full: &str,
5012) -> Result<(), ApiError> {
5013    let entity_q = entity_value.to_string();
5014    let exists: i64 = tenant
5015        .read()
5016        .interact(move |conn| {
5017            conn.query_row(
5018                "SELECT COUNT(*)
5019                   FROM triples
5020                  WHERE (subject_id = ?1 OR object_id = ?1)
5021                    AND status = 'active'",
5022                rusqlite::params![&entity_q],
5023                |r| r.get(0),
5024            )
5025        })
5026        .await
5027        .map_err(ApiError::from)?;
5028    if exists == 0 {
5029        return Err(ApiError::not_found(format!(
5030            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
5031        )));
5032    }
5033    Ok(())
5034}
5035
5036/// Explicit-neighbor path. Dispatches per focal node kind, calling the
5037/// existing `expand_*` async fns for each compatible edge kind and
5038/// concatenating the results. This is the "reuse" refactor decision:
5039/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
5040/// intact because we don't touch its bodies.
5041async fn neighbors_explicit(
5042    tenant: &TenantHandle,
5043    tenant_id: &str,
5044    node_kind: NodeKind,
5045    value: &str,
5046    node_id_full: &str,
5047    limit: i64,
5048) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5049    let mut nodes: Vec<GraphNode> = Vec::new();
5050    let mut edges: Vec<GraphEdge> = Vec::new();
5051
5052    match node_kind {
5053        NodeKind::Episode => {
5054            // Episodes have two compatible explicit-edge kinds:
5055            //   * cluster_member (episode -> clusters)
5056            //   * triple (episode -> entities, plus subj/obj entity pairs)
5057            //
5058            // document_chunk doesn't apply (episodes aren't documents).
5059            // Run each path, concat. Per-kind limit -- the caller asked for
5060            // up to `limit` neighbors PER KIND.
5061            let r1 =
5062                expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5063                    .await?;
5064            nodes.extend(r1.nodes);
5065            edges.extend(r1.edges);
5066            let r2 =
5067                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5068            nodes.extend(r2.nodes);
5069            edges.extend(r2.edges);
5070        }
5071        NodeKind::Document => {
5072            // Documents have one compatible explicit-edge kind:
5073            // document_chunk (document -> chunks).
5074            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5075                .await?;
5076            nodes.extend(r.nodes);
5077            edges.extend(r.edges);
5078        }
5079        NodeKind::Chunk => {
5080            // Chunks have one compatible explicit-edge kind:
5081            // document_chunk (chunk -> parent document).
5082            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5083                .await?;
5084            nodes.extend(r.nodes);
5085            edges.extend(r.edges);
5086        }
5087        NodeKind::Cluster => {
5088            // Clusters have one compatible explicit-edge kind:
5089            // cluster_member (cluster -> episodes).
5090            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5091                .await?;
5092            nodes.extend(r.nodes);
5093            edges.extend(r.edges);
5094        }
5095        NodeKind::Entity => {
5096            // Entities have one compatible explicit-edge kind:
5097            // triple (entity -> episodes where this entity is referenced).
5098            let r = expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5099            nodes.extend(r.nodes);
5100            edges.extend(r.edges);
5101        }
5102    }
5103    Ok((nodes, edges))
5104}
5105
5106/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
5107/// other kinds return 400. Reuses the existing inner pipelines:
5108///
5109///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
5110///     `expand_semantic` uses; filters out chunk hits).
5111///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
5112///     equivalent chunk-restricted vector pipeline).
5113///
5114/// Re-embed the focal node's content for the HNSW query rather than
5115/// loading the persisted vector from `embeddings` -- the same trade-off
5116/// `expand_semantic` made: cheaper code path overall, with deterministic
5117/// embedders in tests + batch-sized embedders in prod making the recompute
5118/// cost negligible.
5119async fn neighbors_semantic(
5120    tenant: &TenantHandle,
5121    tenant_id: &str,
5122    node_kind: NodeKind,
5123    value: &str,
5124    node_id_full: &str,
5125    limit: u32,
5126    threshold: f32,
5127) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5128    match node_kind {
5129        NodeKind::Episode => {
5130            neighbors_semantic_from_episode(
5131                tenant,
5132                tenant_id,
5133                value,
5134                node_id_full,
5135                limit,
5136                threshold,
5137            )
5138            .await
5139        }
5140        NodeKind::Chunk => {
5141            neighbors_semantic_from_chunk(tenant, tenant_id, value, node_id_full, limit, threshold)
5142                .await
5143        }
5144        _ => Err(ApiError::bad_request(format!(
5145            "semantic neighbors only valid for episode or chunk source; got {}",
5146            node_kind.as_wire_str()
5147        ))),
5148    }
5149}
5150
5151async fn neighbors_semantic_from_episode(
5152    tenant: &TenantHandle,
5153    tenant_id: &str,
5154    memory_id: &str,
5155    node_id_full: &str,
5156    limit: u32,
5157    threshold: f32,
5158) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5159    let memory_id_q = memory_id.to_string();
5160    let memory_id_for_self_excl = memory_id.to_string();
5161    let content: Option<String> = tenant
5162        .read()
5163        .interact(move |conn| {
5164            conn.query_row(
5165                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
5166                rusqlite::params![&memory_id_q],
5167                |r| r.get::<_, String>(0),
5168            )
5169            .map(Some)
5170            .or_else(|e| match e {
5171                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5172                other => Err(other),
5173            })
5174        })
5175        .await
5176        .map_err(ApiError::from)?;
5177
5178    // Existence is guaranteed by the focal-exists probe earlier; an
5179    // empty content here would be a status-transition race we treat as
5180    // "nothing to compare against".
5181    let Some(content) = content else {
5182        return Ok((Vec::new(), Vec::new()));
5183    };
5184
5185    // Widen the request by 1 so dropping self doesn't shrink the page.
5186    let widened = (limit as usize).saturating_add(1).min(100);
5187    let result = solo_query::recall::run_recall_inner(
5188        tenant.embedder(),
5189        tenant.hnsw(),
5190        tenant.read(),
5191        &content,
5192        widened,
5193    )
5194    .await
5195    .map_err(ApiError::from)?;
5196
5197    let mut nodes = Vec::new();
5198    let mut edges = Vec::new();
5199    for hit in result.hits.into_iter() {
5200        if hit.memory_id == memory_id_for_self_excl {
5201            // Skip self.
5202            continue;
5203        }
5204        if nodes.len() as u32 >= limit {
5205            break;
5206        }
5207        let weight = (1.0 - hit.cos_distance).max(0.0);
5208        if weight < threshold {
5209            continue;
5210        }
5211        let target_id = format!("ep:{}", hit.memory_id);
5212        edges.push(GraphEdge {
5213            id: edge_id(node_id_full, "semantic", &target_id),
5214            source: node_id_full.to_string(),
5215            target: target_id,
5216            kind: "semantic",
5217            predicate: None,
5218            weight: Some(weight),
5219        });
5220        nodes.push(GraphNode {
5221            id: format!("ep:{}", hit.memory_id),
5222            kind: NodeKind::Episode.as_wire_str(),
5223            label: episode_label(&hit.content),
5224            ts_ms: None,
5225            tenant_id: tenant_id.to_string(),
5226            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
5227        });
5228    }
5229    Ok((nodes, edges))
5230}
5231
5232async fn neighbors_semantic_from_chunk(
5233    tenant: &TenantHandle,
5234    tenant_id: &str,
5235    chunk_id: &str,
5236    node_id_full: &str,
5237    limit: u32,
5238    threshold: f32,
5239) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5240    let chunk_id_q = chunk_id.to_string();
5241    let chunk_id_for_self_excl = chunk_id.to_string();
5242    let content: Option<String> = tenant
5243        .read()
5244        .interact(move |conn| {
5245            conn.query_row(
5246                "SELECT c.content
5247                   FROM document_chunks c
5248                   JOIN documents d ON d.doc_id = c.doc_id
5249                  WHERE c.chunk_id = ?1
5250                    AND d.status = 'active'",
5251                rusqlite::params![&chunk_id_q],
5252                |r| r.get::<_, String>(0),
5253            )
5254            .map(Some)
5255            .or_else(|e| match e {
5256                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5257                other => Err(other),
5258            })
5259        })
5260        .await
5261        .map_err(ApiError::from)?;
5262
5263    let Some(content) = content else {
5264        return Ok((Vec::new(), Vec::new()));
5265    };
5266
5267    let widened = (limit as usize).saturating_add(1).min(100);
5268    let hits = solo_query::doc_search::run_doc_search_inner(
5269        tenant.embedder(),
5270        tenant.hnsw(),
5271        tenant.read(),
5272        &content,
5273        widened,
5274    )
5275    .await
5276    .map_err(ApiError::from)?;
5277
5278    let mut nodes = Vec::new();
5279    let mut edges = Vec::new();
5280    for hit in hits.into_iter() {
5281        if hit.chunk_id == chunk_id_for_self_excl {
5282            continue;
5283        }
5284        if nodes.len() as u32 >= limit {
5285            break;
5286        }
5287        let weight = (1.0 - hit.cos_distance).max(0.0);
5288        if weight < threshold {
5289            continue;
5290        }
5291        let target_id = format!("chunk:{}", hit.chunk_id);
5292        edges.push(GraphEdge {
5293            id: edge_id(node_id_full, "semantic", &target_id),
5294            source: node_id_full.to_string(),
5295            target: target_id,
5296            kind: "semantic",
5297            predicate: None,
5298            weight: Some(weight),
5299        });
5300        let exp = ExpandedChunk {
5301            chunk_id: hit.chunk_id.clone(),
5302            chunk_index: hit.chunk_index as i64,
5303            content: hit.content.clone(),
5304        };
5305        nodes.push(graph_node_for_chunk(tenant_id, &exp));
5306    }
5307    Ok((nodes, edges))
5308}
5309
5310// ---------------------------------------------------------------------------
5311// /v1/graph/stream — SSE invalidation feed (v0.10.0)
5312//
5313// Powers solo-web's live-update behaviour: instead of polling, the
5314// frontend subscribes once and refetches its pages only when the
5315// writer-actor signals "your tenant's data changed". Per scoping doc
5316// §3 Decision C, the wire format is invalidation-shaped (not row
5317// payload) — the SSE channel says "refetch the affected page" rather
5318// than streaming actual rows.
5319//
5320// Wire format:
5321//
5322//   ```
5323//   event: init
5324//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
5325//
5326//   event: invalidate
5327//   data: {"reason": "memory.remember", "tenant_id": "default",
5328//          "ts_ms": 1715625610000, "kind": "episode"}
5329//
5330//   event: heartbeat
5331//   data: {"ts_ms": 1715625640000}
5332//   ```
5333//
5334// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
5335// whether real events fired (simpler than resetting the timer on every
5336// invalidate; the cost is a few extra bytes per minute on idle).
5337//
5338// Lagged subscribers (subscriber polled slower than 256 writes) see one
5339// emit-only-once warning and resync via the next real `invalidate` —
5340// invalidation events are idempotent, so the missed batch reduces to a
5341// single refetch on the client side. No correctness loss.
5342//
5343// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
5344// ---------------------------------------------------------------------------
5345
5346/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
5347/// every 30 seconds — easier to reason about than "fire 30s after the
5348/// last event", and keeps proxies happy without code that races a
5349/// reset on every invalidate.
5350pub const STREAM_HEARTBEAT_SECS: u64 = 30;
5351
5352/// SSE event name emitted on connection open. Single fire; client uses
5353/// this to confirm the subscription is live.
5354const STREAM_EVENT_INIT: &str = "init";
5355
5356/// SSE event name emitted on every writer-actor commit (and on
5357/// `gdpr.forget_user`'s non-writer-actor cascade).
5358const STREAM_EVENT_INVALIDATE: &str = "invalidate";
5359
5360/// SSE event name emitted by the heartbeat interval.
5361const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";
5362
5363/// `GET /v1/graph/stream` — Server-Sent Events feed of
5364/// `InvalidateEvent`s scoped to the request's tenant.
5365///
5366/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
5367/// held by `TenantHandle` (populated by `TenantHandle::open`). The
5368/// stream:
5369///
5370///   1. Emits one `event: init` line at connection open.
5371///   2. Selects between (broadcast recv) and (heartbeat tick) in a
5372///      loop, emitting `invalidate` / `heartbeat` events as either
5373///      fires.
5374///   3. Exits when the client closes the connection (axum drops the
5375///      response future) OR the broadcast Sender is dropped (tenant
5376///      shutdown).
5377///
5378/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
5379/// `auth_middleware` returns 401 on missing bearer; the
5380/// `TenantExtractor` resolves the per-tenant DB. The handler itself
5381/// has no per-route auth logic.
5382async fn graph_stream_handler(
5383    TenantExtractor(tenant): TenantExtractor,
5384) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
5385    // Subscribe BEFORE building the init event so a writer-actor
5386    // commit that lands in the (microscopic) window between init and
5387    // the first poll is still observed. `broadcast::Receiver` buffers
5388    // up to the channel's capacity from the moment of subscribe.
5389    let rx = tenant.invalidate_sender().subscribe();
5390    let tenant_id = tenant.tenant_id().to_string();
5391    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
5392    // axum's keep-alive layer adds its own `:` comment line every
5393    // configured interval; we keep that OFF and ship our own typed
5394    // `heartbeat` event instead. The client distinguishes the two by
5395    // looking at the SSE `event:` field — typed heartbeats let solo-web
5396    // surface "connection healthy" in its UI without parsing comment
5397    // lines.
5398    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
5399}
5400
5401/// Per-subscriber state threaded through `futures::stream::unfold`.
5402/// Carries the receiver + heartbeat interval + a one-shot flag for
5403/// the initial `init` event.
5404struct StreamState {
5405    rx: broadcast::Receiver<InvalidateEvent>,
5406    heartbeat: tokio::time::Interval,
5407    tenant_id: String,
5408    /// `true` until the first poll completes — used to gate the `init`
5409    /// event. Flipped to `false` after the init event yields.
5410    needs_init: bool,
5411}
5412
5413/// Build the stream of SSE [`Event`]s for one subscriber.
5414///
5415/// First yield is the `init` event. After that, the stream selects
5416/// between the broadcast receiver and a tokio interval timer that
5417/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
5418/// swallowed with a single `tracing::warn!` line — the client resyncs
5419/// on the next real invalidate (invalidation events are idempotent).
5420fn build_invalidate_stream(
5421    rx: broadcast::Receiver<InvalidateEvent>,
5422    tenant_id: String,
5423    heartbeat_secs: u64,
5424) -> impl Stream<Item = Result<Event, Infallible>> {
5425    // `tokio::time::interval_at(start, period)` starts ticking at
5426    // `start`; we set `start = now + period` so the first heartbeat
5427    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
5428    // the default `interval()` would fire immediately at t=0, racing
5429    // the init event.
5430    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
5431    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
5432
5433    let state = StreamState {
5434        rx,
5435        heartbeat,
5436        tenant_id,
5437        needs_init: true,
5438    };
5439    futures::stream::unfold(state, move |mut state| async move {
5440        // First-poll: yield the init event without touching the
5441        // receiver or the heartbeat. Subsequent polls fall through to
5442        // the select loop.
5443        if state.needs_init {
5444            state.needs_init = false;
5445            let init_payload = serde_json::json!({
5446                "connected": true,
5447                "tenant_id": state.tenant_id,
5448                "ts_ms": chrono::Utc::now().timestamp_millis(),
5449            });
5450            let ev = Event::default()
5451                .event(STREAM_EVENT_INIT)
5452                .json_data(init_payload)
5453                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
5454            return Some((Ok::<Event, Infallible>(ev), state));
5455        }
5456        loop {
5457            tokio::select! {
5458                event = state.rx.recv() => {
5459                    match event {
5460                        Ok(ev) => {
5461                            let sse_event = Event::default()
5462                                .event(STREAM_EVENT_INVALIDATE)
5463                                .json_data(&ev)
5464                                .unwrap_or_else(|_| Event::default()
5465                                    .event(STREAM_EVENT_INVALIDATE));
5466                            return Some((Ok::<Event, Infallible>(sse_event), state));
5467                        }
5468                        Err(broadcast::error::RecvError::Lagged(n)) => {
5469                            tracing::warn!(
5470                                lagged = n,
5471                                "graph stream subscriber lagged; client will \
5472                                 resync on the next real invalidate"
5473                            );
5474                            // Continue receiving — do NOT yield anything
5475                            // for a lag.
5476                        }
5477                        Err(broadcast::error::RecvError::Closed) => {
5478                            tracing::debug!(
5479                                "graph stream broadcast closed; ending SSE stream"
5480                            );
5481                            return None;
5482                        }
5483                    }
5484                }
5485                _ = state.heartbeat.tick() => {
5486                    let hb_payload = serde_json::json!({
5487                        "ts_ms": chrono::Utc::now().timestamp_millis(),
5488                    });
5489                    let sse_event = Event::default()
5490                        .event(STREAM_EVENT_HEARTBEAT)
5491                        .json_data(hb_payload)
5492                        .unwrap_or_else(|_| Event::default()
5493                            .event(STREAM_EVENT_HEARTBEAT));
5494                    return Some((Ok::<Event, Infallible>(sse_event), state));
5495                }
5496            }
5497        }
5498    })
5499}
5500
5501// ---------------------------------------------------------------------------
5502// /v1/status — authenticated readiness/status (tenant-aware)
5503
5504#[derive(Debug, Serialize)]
5505struct StatusEmbedder {
5506    name: String,
5507    version: String,
5508    dim: usize,
5509    dtype: String,
5510}
5511
5512#[derive(Debug, Serialize)]
5513struct StatusTenant {
5514    id: String,
5515    registered: bool,
5516    status: Option<TenantStatusJson>,
5517    quota_bytes: Option<u64>,
5518    last_accessed_ms: Option<i64>,
5519}
5520
5521#[derive(Debug, Serialize)]
5522struct StatusMcp {
5523    sessions: usize,
5524}
5525
5526#[derive(Debug, Serialize)]
5527struct StatusResponse {
5528    ok: bool,
5529    version: &'static str,
5530    tenant: StatusTenant,
5531    embedder: StatusEmbedder,
5532    active_tenants: usize,
5533    mcp: StatusMcp,
5534}
5535
5536async fn status_handler(
5537    State(state): State<SoloHttpState>,
5538    TenantExtractor(tenant): TenantExtractor,
5539) -> Result<Json<StatusResponse>, ApiError> {
5540    let active_tenants = state.registry.list_active().await.map_err(ApiError::from)?;
5541    let tenant_record = active_tenants
5542        .iter()
5543        .find(|record| &record.tenant_id == tenant.tenant_id());
5544    let embedder = tenant.embedder();
5545    Ok(Json(StatusResponse {
5546        ok: true,
5547        version: env!("CARGO_PKG_VERSION"),
5548        tenant: StatusTenant {
5549            id: tenant.tenant_id().to_string(),
5550            registered: tenant_record.is_some(),
5551            status: tenant_record.map(|record| TenantStatusJson::from(&record.status)),
5552            quota_bytes: tenant_record.and_then(|record| record.quota_bytes),
5553            last_accessed_ms: tenant_record.and_then(|record| record.last_accessed_ms),
5554        },
5555        embedder: StatusEmbedder {
5556            name: embedder.name().to_string(),
5557            version: embedder.version().to_string(),
5558            dim: embedder.dim(),
5559            dtype: format!("{:?}", embedder.dtype()).to_ascii_lowercase(),
5560        },
5561        active_tenants: active_tenants.len(),
5562        mcp: StatusMcp {
5563            sessions: state.mcp_sessions.len(),
5564        },
5565    }))
5566}
5567
5568// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
5569//
5570// Powers solo-web's top-bar tenant picker (Decision F in
5571// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
5572// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
5573// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
5574// That keeps the privileged tenant-mutation surface off HTTP entirely
5575// while still letting an authenticated browser session enumerate the
5576// tenants it's allowed to see.
5577//
5578// Wire shape (200 OK):
5579//
5580//   ```json
5581//   {
5582//     "tenants": [
5583//       {
5584//         "id": "default",
5585//         "display_name": "Default tenant",
5586//         "created_at_ms": 1715625600000,
5587//         "last_accessed_ms": 1715625900000,
5588//         "status": "active",
5589//         "quota_bytes": null,
5590//         "episode_count": null,
5591//         "size_bytes": null,
5592//         "pct_used": null
5593//       }
5594//     ]
5595//   }
5596//   ```
5597//
5598// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
5599// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
5600// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
5601//
5602//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
5603//     Cheap; runs for every visible tenant.
5604//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
5605//     status='active'` against the per-tenant SQLCipher DB.
5606//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
5607//     100.0) when both are known; `null` if `quota_bytes` is unset.
5608//
5609// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
5610// budget is tight, so we cap `episode_count` hydration at
5611// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
5612// cap get `episode_count: null` and the response carries an
5613// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
5614// counts for the tail tenants out-of-band if needed (mirroring the
5615// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
5616// capped — it's just a `metadata` call.
5617//
5618// The CLI's `solo tenants list` retains the canonical per-tenant
5619// cost-numbers path for operators who need exhaustive data.
5620//
5621// ## Visibility filter (load-bearing — three cases)
5622//
5623// The handler reads `AuthenticatedPrincipal` out of request extensions
5624// via `MaybePrincipal` and filters the registry list before
5625// serialisation:
5626//
5627//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
5628//      loopback path, no `[auth]` block in `solo.config.toml`. Return
5629//      every `Active` tenant. Same scope as `solo tenants list` CLI.
5630//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
5631//      the `AuthenticatedPrincipal::bearer` signature emitted by
5632//      `BearerValidator::validate`). Single-principal daemon — the
5633//      bearer holder is the operator, so return every `Active`
5634//      tenant. Functionally equivalent to (1) from a leakage
5635//      standpoint.
5636//   3. **OIDC principal** (any other principal — `claims` carries the
5637//      JWT object). Filter to ONLY the tenant id matching
5638//      `principal.tenant_claim`. The configured OIDC tenant_claim is
5639//      already validated to a real `TenantId` by the auth middleware
5640//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
5641//      403 BEFORE this handler runs). If the claim doesn't match any
5642//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
5643//      — don't leak whether a tenant exists by 404'ing on names
5644//      outside the principal's scope.
5645//
5646// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
5647// list in every case. solo-web's tenant picker should not surface a
5648// tenant that's mid-migration or queued for hard-delete — clicking
5649// such a row would race the admin tooling. The CLI's `solo tenants
5650// list` still shows them under an explicit `--include-pending` flag
5651// (out of scope here).
5652//
5653// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
5654// ---------------------------------------------------------------------------
5655
5656/// One row of the `/v1/tenants` response body. Shape mirrors
5657/// `solo_storage::TenantRecord` for the persisted fields plus the
5658/// reserved-for-future cost-numbers triple (`episode_count`,
5659/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
5660#[derive(Debug, Clone, Serialize)]
5661struct TenantListItem {
5662    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
5663    /// `X-Solo-Tenant` header value clients send to other routes.
5664    id: String,
5665    /// Human-readable display name set at `solo tenants create`.
5666    /// `None` ⇒ omit from the JSON body.
5667    #[serde(skip_serializing_if = "Option::is_none")]
5668    display_name: Option<String>,
5669    /// Epoch ms when this tenant was registered.
5670    created_at_ms: i64,
5671    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
5672    /// (v0.9.0 P1). `None` for tenants that have never been opened
5673    /// since the migration ran.
5674    #[serde(skip_serializing_if = "Option::is_none")]
5675    last_accessed_ms: Option<i64>,
5676    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
5677    /// filter `PendingMigration` / `PendingDelete` out at list time).
5678    /// Surfaced for forward-compat — a future `?include_pending=1`
5679    /// query param could relax the filter without a shape change.
5680    status: TenantStatusJson,
5681    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
5682    /// ⇒ unlimited.
5683    #[serde(skip_serializing_if = "Option::is_none")]
5684    quota_bytes: Option<u64>,
5685    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
5686    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
5687    /// response; `null` for tenants beyond the cap (in which case the
5688    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
5689    /// Also `null` if the per-tenant DB file is missing or the COUNT
5690    /// failed.
5691    episode_count: Option<i64>,
5692    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
5693    /// `null` only if the file is missing or unreadable (corruption /
5694    /// permissions). Not affected by the cap — `std::fs::metadata` is
5695    /// cheap.
5696    size_bytes: Option<u64>,
5697    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
5698    /// when both `size_bytes` and `quota_bytes` are known. `null` if
5699    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
5700    /// is unknown.
5701    pct_used: Option<f64>,
5702}
5703
5704/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
5705/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
5706/// already has via `#[serde(rename_all = "snake_case")]`) so the
5707/// HTTP-side wire shape stays decoupled from the storage-side enum.
5708/// Today both serialise identically; a future status variant added to
5709/// storage doesn't automatically leak onto the wire.
5710#[derive(Debug, Clone, Copy, Serialize)]
5711#[serde(rename_all = "snake_case")]
5712enum TenantStatusJson {
5713    Active,
5714}
5715
5716impl From<&solo_storage::TenantStatus> for TenantStatusJson {
5717    fn from(s: &solo_storage::TenantStatus) -> Self {
5718        // We only ever build this enum from `Active` records (the list
5719        // handler filters at source); the match exhausts so future
5720        // variants force a compile error here, not a wire mismatch.
5721        match s {
5722            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
5723            // Defensive: should be filtered upstream. Map to Active to
5724            // avoid a panic, but the handler MUST keep filtering at
5725            // source. A clippy warning catches dead branches.
5726            solo_storage::TenantStatus::PendingMigration
5727            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
5728        }
5729    }
5730}
5731
5732/// Response body for `GET /v1/tenants`.
5733#[derive(Debug, Serialize)]
5734struct TenantsListResponse {
5735    tenants: Vec<TenantListItem>,
5736}
5737
5738/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
5739/// per `/v1/tenants` request. Opening + counting one tenant DB is
5740/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
5741/// first-paint budget tight. Tenants beyond the cap get
5742/// `episode_count: null` AND the response carries
5743/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
5744/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
5745/// the tail. The 50 figure mirrors the entity-cap pattern from
5746/// `/v1/graph/nodes`.
5747const TENANTS_COUNT_HYDRATION_CAP: usize = 50;
5748
5749/// v0.10.1: response header name set to `"true"` when the per-request
5750/// `episode_count` hydration cap was reached. Absent otherwise.
5751/// Grep-able by both server- and client-side code. Stored lowercase
5752/// per `axum::http::HeaderName::from_static` (header names are
5753/// case-insensitive on the wire; the canonical spelling is
5754/// `X-Solo-Tenants-Count-Cap-Reached`).
5755const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";
5756
5757/// `GET /v1/tenants` — list every tenant visible to the request's
5758/// principal. See module comment for the three-case visibility rule.
5759///
5760/// Errors:
5761///   * **401** — bearer required but missing/invalid (handled by
5762///     `auth_middleware` before this handler runs).
5763///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
5764///
5765/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
5766/// any registered tenant, the response is `200 OK` with `tenants:
5767/// []`. That keeps tenant existence out of side-channel range for an
5768/// OIDC user — they cannot probe for other tenants by id.
5769async fn tenants_list_handler(
5770    State(state): State<SoloHttpState>,
5771    MaybePrincipal(maybe_principal): MaybePrincipal,
5772) -> Result<Response, ApiError> {
5773    // Pull every registered tenant. `list_active` is the registry's
5774    // wrapper around `TenantsIndex::list`, which returns rows ordered
5775    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
5776    // doesn't shift between requests, which solo-web relies on to keep
5777    // its tenant picker entries from reordering visually.
5778    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;
5779
5780    // Filter at source: status MUST be Active (PendingMigration /
5781    // PendingDelete are admin-transient states that solo-web should
5782    // not surface). Matches the brief's
5783    // `tenants_status_filter_excludes_deleted` test.
5784    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));
5785
5786    // Apply the principal-driven visibility filter. The three cases
5787    // are exhaustive — see the module comment for the rationale on
5788    // each. `tenant_visibility_filter` is split out so the unit
5789    // tests can assert the rule independent of the SQL read.
5790    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());
5791
5792    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
5793    // registry helper handles missing DB files + the cap behavior. We
5794    // pass the cap so tenants beyond it return `None` for episode_count
5795    // — `size_bytes` is computed for everyone (cheap fs::metadata).
5796    let cap = TENANTS_COUNT_HYDRATION_CAP;
5797    let costs = state
5798        .registry
5799        .hydrate_tenant_cost_numbers(&filtered, cap)
5800        .await;
5801    let cap_reached = filtered.len() > cap;
5802
5803    let tenants: Vec<TenantListItem> = filtered
5804        .iter()
5805        .zip(costs.iter())
5806        .map(|(r, cost)| {
5807            let pct_used = match (cost.size_bytes, r.quota_bytes) {
5808                (Some(size), Some(quota)) if quota > 0 => {
5809                    let raw = (size as f64) * 100.0 / (quota as f64);
5810                    Some(raw.min(100.0))
5811                }
5812                _ => None,
5813            };
5814            TenantListItem {
5815                id: r.tenant_id.to_string(),
5816                display_name: r.display_name.clone(),
5817                created_at_ms: r.created_at_ms,
5818                last_accessed_ms: r.last_accessed_ms,
5819                status: TenantStatusJson::from(&r.status),
5820                quota_bytes: r.quota_bytes,
5821                episode_count: cost.episode_count,
5822                size_bytes: cost.size_bytes,
5823                pct_used,
5824            }
5825        })
5826        .collect();
5827
5828    let body = Json(TenantsListResponse { tenants });
5829    if cap_reached {
5830        let mut resp = body.into_response();
5831        resp.headers_mut().insert(
5832            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
5833            axum::http::HeaderValue::from_static("true"),
5834        );
5835        Ok(resp)
5836    } else {
5837        Ok(body.into_response())
5838    }
5839}
5840
5841/// Pure function: apply the three-case principal-driven visibility
5842/// rule to a list of `TenantRecord`s. Extracted from the handler so
5843/// unit tests can exercise the rule without driving an axum router.
5844///
5845///   * `principal == None` ⇒ all records returned (no-auth path).
5846///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
5847///     ⇒ all records returned (single-principal daemon).
5848///   * Any other principal (OIDC) ⇒ filter to records whose
5849///     `tenant_id == principal.tenant_claim`. An OIDC principal with
5850///     no `tenant_claim` (theoretically unreachable — the middleware
5851///     short-circuits at 403 before us, but we defend) returns an
5852///     empty list.
5853fn filter_tenants_for_principal(
5854    records: Vec<solo_storage::TenantRecord>,
5855    principal: Option<&AuthenticatedPrincipal>,
5856) -> Vec<solo_storage::TenantRecord> {
5857    let Some(p) = principal else {
5858        // Case 1: no auth configured — return all tenants. Same scope
5859        // as `solo tenants list`.
5860        return records;
5861    };
5862    if is_single_principal_bearer(p) {
5863        // Case 2: bearer principal — return all tenants. The single
5864        // bearer holder is functionally the daemon operator.
5865        return records;
5866    }
5867    // Case 3: OIDC principal — filter to the claimed tenant only. An
5868    // unmatched claim falls through to an empty list, NOT 404, to
5869    // avoid leaking tenant existence.
5870    let Some(claim) = p.tenant_claim.as_ref() else {
5871        return Vec::new();
5872    };
5873    records
5874        .into_iter()
5875        .filter(|r| r.tenant_id == *claim)
5876        .collect()
5877}
5878
5879/// True iff `principal` looks like a bearer-mode principal — the shape
5880/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
5881/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
5882/// empty. OIDC principals carry a JWT object in `claims` and the JWT
5883/// `sub` in `subject`, so they fail this predicate.
5884///
5885/// Split out so the unit tests can assert the discriminator
5886/// independent of the rest of the handler. Keeping the predicate in
5887/// one place also makes future expansion easier — e.g., a v0.11
5888/// "admin scope" might add an OIDC variant that passes this gate by
5889/// looking for a `"solo:admin"` entry in `scopes`.
5890fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
5891    principal.subject == "bearer" && principal.claims.is_null() && principal.scopes.is_empty()
5892}
5893
5894// ---------------------------------------------------------------------------
5895// v0.10.2 — MCP-over-HTTP transport on /mcp
5896// ---------------------------------------------------------------------------
5897
5898// v0.11.0 P2: the per-event names that used to live here as
5899// `MCP_STREAM_EVENT_INIT` moved into `crate::mcp_session` alongside the
5900// `McpEventKind` enum so the publisher (`SessionState::publish_event`)
5901// and the subscriber (`build_mcp_session_stream`) share one source of
5902// truth for the wire format. See `MCP_STREAM_EVENT_INIT_NAME`,
5903// `MCP_STREAM_EVENT_MESSAGE_NAME`, `MCP_STREAM_EVENT_PROGRESS_NAME`,
5904// `MCP_STREAM_EVENT_LAGGED_NAME`, and `MCP_STREAM_EVENT_HEARTBEAT_NAME`
5905// for the canonical strings.
5906
5907/// `POST /mcp` — JSON-RPC request/response.
5908///
5909/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
5910/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
5911/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
5912/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
5913/// application/json`. **Status 200** for valid JSON-RPC (in-body
5914/// errors); **status 400** for malformed JSON; **status 401** when
5915/// auth is configured and the bearer check fails (handled by the
5916/// `auth_middleware` ahead of this handler).
5917///
5918/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
5919/// one tenant at process start via `--tenant`. HTTP resolves the tenant
5920/// per request from the `X-Solo-Tenant` header (or
5921/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
5922/// daemon process can answer MCP calls for any tenant the registry
5923/// knows. The audit principal is `Some("bearer")` for bearer-
5924/// authenticated calls and the JWT `sub` for OIDC; `None` for
5925/// unauthenticated loopback. Documented in v0.10.2 dev log.
5926async fn mcp_http_post_handler(
5927    TenantExtractor(tenant): TenantExtractor,
5928    State(state): State<SoloHttpState>,
5929    AuditPrincipal(principal): AuditPrincipal,
5930    request: axum::extract::Request,
5931) -> Response {
5932    // v0.11.0 P1: read the session extension the middleware planted on
5933    // a hit; if absent, this is the session-init request — create one
5934    // and echo the assigned id back via `Mcp-Session-Id`.
5935    let existing_session_id: Option<crate::mcp_session::SessionId> = request
5936        .extensions()
5937        .get::<crate::mcp_session::SessionId>()
5938        .cloned();
5939    let principal_full = request
5940        .extensions()
5941        .get::<crate::auth::AuthenticatedPrincipal>()
5942        .cloned();
5943    let body_bytes = match axum::body::to_bytes(
5944        request.into_body(),
5945        // Match the 8 MiB cap solo-api already uses for JSON bodies in
5946        // other handlers (validated by `tower-http::limit::RequestBodyLimitLayer`
5947        // elsewhere). Locally we cap at 8 MiB so a malformed Content-Length
5948        // can't OOM the dispatch task.
5949        8 * 1024 * 1024,
5950    )
5951    .await
5952    {
5953        Ok(b) => b,
5954        Err(e) => {
5955            return (
5956                StatusCode::BAD_REQUEST,
5957                Json(serde_json::json!({
5958                    "error": format!("invalid request body: {e}"),
5959                    "status": 400,
5960                })),
5961            )
5962                .into_response();
5963        }
5964    };
5965    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
5966    // calls out 4xx for malformed wire input even though JSON-RPC's own
5967    // parse-error code is in-body — operator-facing tooling needs the
5968    // HTTP status to distinguish "the server rejected the request
5969    // shape" from "the method returned an error").
5970    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body_bytes) {
5971        Ok(r) => r,
5972        Err(e) => {
5973            return (
5974                StatusCode::BAD_REQUEST,
5975                Json(serde_json::json!({
5976                    "error": format!("invalid JSON-RPC request: {e}"),
5977                    "status": 400,
5978                })),
5979            )
5980                .into_response();
5981        }
5982    };
5983    if request.jsonrpc != "2.0" {
5984        return (
5985            StatusCode::BAD_REQUEST,
5986            Json(serde_json::json!({
5987                "error": format!(
5988                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
5989                    request.jsonrpc
5990                ),
5991                "status": 400,
5992            })),
5993        )
5994            .into_response();
5995    }
5996
5997    // v0.11.0 P1: assign a session id if the request arrived without
5998    // one. The assigned id is echoed back via the `Mcp-Session-Id`
5999    // response header so the client can reuse it.
6000    let (session_id, freshly_assigned) = match existing_session_id {
6001        Some(id) => (id, false),
6002        None => {
6003            let new_state =
6004                crate::mcp_session::SessionState::new(tenant.tenant_id().clone(), principal_full);
6005            let id = state.mcp_sessions.insert(new_state);
6006            (id, true)
6007        }
6008    };
6009
6010    // v0.11.0 P3: resolve the `Arc<SessionState>` for the dispatcher so
6011    // per-tool progress events can be published into the session's
6012    // broadcast channel. On a session-init request we just inserted
6013    // the state; for a continuing request the middleware planted an
6014    // Arc onto the request extensions, but we lost ownership when we
6015    // consumed the request above (`request.into_body()`). Re-fetch
6016    // via `mcp_sessions.get(&session_id)` — this is a single lock-free
6017    // DashMap shard read.
6018    let session_state: Option<std::sync::Arc<crate::mcp_session::SessionState>> =
6019        state.mcp_sessions.get(&session_id);
6020
6021    // v0.11.0 P4: on a freshly-assigned session, spawn the
6022    // invalidate-bridge task that forwards per-tenant `InvalidateEvent`
6023    // broadcasts to this session's event channel as MCP
6024    // `notifications/message` envelopes. Skipped for continuing requests
6025    // because the bridge spawned at session-init is still running (the
6026    // bridge auto-exits when the session drops from the store via the
6027    // `Weak<SessionState>` upgrade-fails path).
6028    if freshly_assigned && let Some(session_state_for_bridge) = session_state.clone() {
6029        // The JoinHandle is intentionally detached — the bridge task
6030        // owns its own exit path (Weak<SessionState> upgrade fails or
6031        // tenant broadcast closes). Holding the handle would require
6032        // a per-session reaper; the bridge's own lifecycle is enough.
6033        // `drop` is the clippy-clean way to discard a future.
6034        drop(crate::mcp_notify::spawn_invalidate_bridge(
6035            tenant.clone(),
6036            session_state_for_bridge,
6037        ));
6038    }
6039
6040    // Build the dispatcher with the resolved tenant + audit principal.
6041    // Dispatcher integration is Option B per v0.11.0 P1 plan: sessions
6042    // are HTTP-transport-only; the dispatcher stays session-agnostic.
6043    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
6044        state.registry.clone(),
6045        tenant,
6046        (*state.user_aliases).clone(),
6047        principal,
6048    );
6049
6050    let mut response = match dispatcher.dispatch(request, session_state).await {
6051        Some(response) => {
6052            // JSON-RPC errors are in-body; the HTTP status is 200 for
6053            // any valid JSON-RPC request, including ones that return an
6054            // error envelope. The client distinguishes success from
6055            // error by the presence of `result` vs `error` in the body.
6056            (StatusCode::OK, Json(response)).into_response()
6057        }
6058        None => {
6059            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
6060            // respond. The MCP Streamable HTTP transport spec uses
6061            // 202 Accepted for this shape so client-side polling does
6062            // not block on a body.
6063            StatusCode::ACCEPTED.into_response()
6064        }
6065    };
6066    // v0.11.0 P1: stamp the `Mcp-Session-Id` response header on every
6067    // response — both freshly-assigned (so the client learns it) and
6068    // continuing (so the client confirms the id is still valid). The
6069    // spec is loose here; echoing always is the safer client contract.
6070    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6071    // Tracing hook lets operators see new-session creation rate in
6072    // `solo daemon` logs without grepping body bytes.
6073    if freshly_assigned {
6074        tracing::debug!(
6075            session_id = %session_id,
6076            "mcp-http: assigned new session id"
6077        );
6078    }
6079    response
6080}
6081
6082/// Heartbeat cadence for the resumable `/mcp` GET stream. Matches the
6083/// `/v1/graph/stream` discipline (30s) so operator tooling can use one
6084/// timeout knob. v0.11.0 P2 makes this configurable indirectly via the
6085/// helper signature of [`build_mcp_session_stream`] so tests can pass
6086/// a short interval without driving the real clock for 30s.
6087pub const MCP_STREAM_HEARTBEAT_SECS: u64 = 30;
6088
6089/// `GET /mcp` — resumable Server-Sent Events stream for one MCP session.
6090///
6091/// v0.11.0 P2 replaces v0.10.2's `pending().await` stub with a real
6092/// `select!` loop over the session's broadcast event channel. Per the
6093/// MCP Streamable HTTP transport spec, the GET endpoint is the
6094/// server's path to push:
6095///
6096///   - `event: init` — handshake confirming the stream is live;
6097///   - `event: message` — JSON-RPC `notifications/message` (P4 bridge);
6098///   - `event: progress` — JSON-RPC `notifications/progress` (P3 long
6099///     tool calls);
6100///   - `event: heartbeat` — periodic liveness ping every
6101///     [`MCP_STREAM_HEARTBEAT_SECS`] seconds;
6102///   - `event: lagged` — emitted once when a reconnecting client's
6103///     `Last-Event-ID` is older than the broadcast buffer's oldest
6104///     retained event (Decision E).
6105///
6106/// Wire format per the SSE spec — each event carries:
6107/// `id: <u64>\nevent: <kind>\ndata: <json>\n\n`. The `id:` field is the
6108/// monotonic per-session event id; clients echo the last-seen value
6109/// back in the `Last-Event-ID` header on reconnect to drive the
6110/// replay-from-cursor path.
6111///
6112/// **Session id REQUIRED.** Unlike `POST /mcp` (which auto-creates a
6113/// session on the session-init request), `GET /mcp` returns `404 Not
6114/// Found` if the request arrived without a `Mcp-Session-Id` header.
6115/// The GET stream's whole point is to attach to an existing session's
6116/// notification channel — a client opening a stream without a session
6117/// to attach it to is a programming error, not the entry point to the
6118/// session lifecycle.
6119async fn mcp_http_get_handler(
6120    TenantExtractor(tenant): TenantExtractor,
6121    State(state): State<SoloHttpState>,
6122    AuditPrincipal(principal): AuditPrincipal,
6123    request: axum::extract::Request,
6124) -> Response {
6125    let _ = principal; // audit principal pre-resolved by extractor; unused on GET
6126    let _ = state; // session resolution lives in the middleware; state unused here
6127
6128    // v0.11.0 P2: session is REQUIRED on GET. The middleware planted
6129    // the SessionId + Arc<SessionState> extensions on a hit. If the
6130    // request arrived without an `Mcp-Session-Id` header, the
6131    // middleware passes through (so unauth'd POSTs can session-init);
6132    // we observe that as a missing extension and return 404 here.
6133    let session_id = match request.extensions().get::<crate::mcp_session::SessionId>() {
6134        Some(id) => id.clone(),
6135        None => {
6136            return (
6137                StatusCode::NOT_FOUND,
6138                Json(serde_json::json!({
6139                    "error": crate::mcp_session::MCP_SESSION_EXPIRED_ERROR,
6140                    "status": 404,
6141                    "message": "GET /mcp requires an `Mcp-Session-Id` header \
6142                                from a prior POST /mcp; open one first",
6143                    "retry": "re-initialize",
6144                })),
6145            )
6146                .into_response();
6147        }
6148    };
6149    let session_state = match request
6150        .extensions()
6151        .get::<std::sync::Arc<crate::mcp_session::SessionState>>()
6152    {
6153        Some(state) => state.clone(),
6154        None => {
6155            // Defensive: middleware should plant both extensions
6156            // together or neither, but log + 404 if we somehow see one
6157            // without the other.
6158            tracing::error!(
6159                "mcp_http_get_handler: SessionId extension present but \
6160                 SessionState extension missing — middleware bug"
6161            );
6162            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
6163        }
6164    };
6165
6166    // Optional `Last-Event-ID` header — parse as u64; on parse failure
6167    // treat as `0` (the "never seen anything" sentinel) so a
6168    // malformed header doesn't 400 the reconnect.
6169    let last_event_id: u64 = request
6170        .headers()
6171        .get(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER)
6172        .and_then(|v| v.to_str().ok())
6173        .and_then(|s| s.trim().parse::<u64>().ok())
6174        .unwrap_or(0);
6175
6176    let tenant_id = tenant.tenant_id().to_string();
6177    let stream = build_mcp_session_stream(
6178        session_state,
6179        session_id.clone(),
6180        tenant_id,
6181        last_event_id,
6182        MCP_STREAM_HEARTBEAT_SECS,
6183    );
6184    // No axum keep-alive comment lines — we ship our own typed
6185    // `heartbeat` event the way `/v1/graph/stream` does. Setting the
6186    // axum-side interval to 1 hour effectively disables it; clients
6187    // distinguish liveness via the typed events on the stream.
6188    let sse = Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)));
6189    let mut response = sse.into_response();
6190    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6191    response
6192}
6193
6194/// `DELETE /mcp` — explicit session termination (v0.11.4, MCP
6195/// Streamable HTTP transport spec).
6196///
6197/// The Streamable HTTP spec defines DELETE on the endpoint as a
6198/// client-driven session-close. Without it, sessions persist until the
6199/// sweep-task evicts them by timeout. With it, well-behaved clients
6200/// (Claude Desktop, ChatGPT MCP connector, scripted MCP clients) can
6201/// signal "I'm done" so the server reclaims resources immediately.
6202///
6203/// Semantics:
6204///   - Requires `Mcp-Session-Id` header. If absent → 400 Bad Request.
6205///     (Unlike `GET /mcp` which returns 404 in the same case — that
6206///     code reflected a different intent. DELETE without a session id
6207///     is malformed, not "not found", because there is nothing for the
6208///     client to retry by reinitialising.)
6209///   - If the session id is unknown to the store → 404 (the spec is
6210///     permissive here; some servers accept-and-ignore, but operators
6211///     benefit from the explicit "session you named doesn't exist").
6212///   - On success → 204 No Content. Body intentionally empty: the
6213///     contract is "session is gone".
6214///
6215/// The middleware does NOT plant session extensions on DELETE (it only
6216/// validates session-id presence for POST + GET); we read the header
6217/// directly here so the contract stays self-evident.
6218async fn mcp_http_delete_handler(
6219    State(state): State<SoloHttpState>,
6220    request: axum::extract::Request,
6221) -> Response {
6222    let session_id_str = match request
6223        .headers()
6224        .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
6225        .and_then(|v| v.to_str().ok())
6226    {
6227        Some(s) => s.to_string(),
6228        None => {
6229            return (
6230                StatusCode::BAD_REQUEST,
6231                Json(serde_json::json!({
6232                    "error": "missing required `Mcp-Session-Id` header",
6233                    "status": 400,
6234                })),
6235            )
6236                .into_response();
6237        }
6238    };
6239    let Some(session_id) = crate::mcp_session::SessionId::parse(&session_id_str) else {
6240        return (
6241            StatusCode::BAD_REQUEST,
6242            Json(serde_json::json!({
6243                "error": "malformed `Mcp-Session-Id` header",
6244                "status": 400,
6245            })),
6246        )
6247            .into_response();
6248    };
6249    if state.mcp_sessions.delete(&session_id) {
6250        StatusCode::NO_CONTENT.into_response()
6251    } else {
6252        (
6253            StatusCode::NOT_FOUND,
6254            Json(serde_json::json!({
6255                "error": format!("session `{session_id}` not found"),
6256                "status": 404,
6257            })),
6258        )
6259            .into_response()
6260    }
6261}
6262
6263/// Per-subscriber state threaded through `futures::stream::unfold` for
6264/// the resumable `/mcp` GET stream. Carries the broadcast receiver +
6265/// heartbeat ticker + the queue of replay events (if any) that need
6266/// to drain before live events start flowing.
6267///
6268/// Holds an `Arc<SessionState>` for the SOLE purpose of keeping the
6269/// broadcast `Sender` alive for as long as this subscriber's stream
6270/// is open. Without that strong ref, dropping the only Arc the
6271/// handler held would close the channel and `rx.recv()` would
6272/// immediately return `Err(Closed)` — the integration test for the
6273/// heartbeat cadence caught this regression.
6274struct McpStreamState {
6275    /// Live event receiver subscribed BEFORE the replay snapshot is
6276    /// drained — so any event published during the replay window
6277    /// lands here and the subscriber's `last_replayed_id` cursor
6278    /// dedupes it against the replayed copy.
6279    rx: broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
6280    /// Heartbeat tick — fires every `heartbeat_secs` regardless of
6281    /// real-event volume. Matches `/v1/graph/stream`'s discipline.
6282    heartbeat: tokio::time::Interval,
6283    /// FIFO queue of replay events still to emit before live events
6284    /// take over. Empties to `Vec::new()` after the last drain.
6285    replay_queue: Vec<crate::mcp_session::McpStreamEvent>,
6286    /// `Some(id)` once at least one event has been emitted (replayed
6287    /// OR live). Live broadcast events with `id <= last_emitted_id`
6288    /// are skipped — handles the race where an event lands in BOTH
6289    /// the replay snapshot AND the live broadcast receiver (because
6290    /// we subscribed before snapshotting).
6291    last_emitted_id: Option<u64>,
6292    /// `true` until the synthetic `event: init` has been emitted.
6293    /// Flipped to `false` on first poll.
6294    needs_init: bool,
6295    /// Init-event payload metadata. Pre-computed at handler entry so
6296    /// the unfold closure stays `Send`.
6297    session_id_str: String,
6298    tenant_id: String,
6299    /// Held only to keep the broadcast `Sender` (and thus the channel)
6300    /// alive for the stream's lifetime. The session store also holds
6301    /// an Arc, but that one expires under TTL — this Arc keeps the
6302    /// channel open for this single subscriber for as long as the
6303    /// client is connected.
6304    _session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6305}
6306
6307/// Build the resumable SSE stream for one `/mcp` GET subscriber.
6308///
6309/// Flow per `unfold` iteration:
6310///
6311///   1. **needs_init poll** — emit one `event: init` with id 0 (we
6312///      never allocate event id 0 in `SessionState::publish_event`;
6313///      0 is reserved for the init event + the client's "never seen"
6314///      sentinel on `Last-Event-ID`). Returns immediately.
6315///   2. **replay drain** — while `replay_queue` is non-empty, pop the
6316///      front entry and emit it. Updates `last_emitted_id`.
6317///   3. **live select** — `tokio::select!` between
6318///      `rx.recv()` and `heartbeat.tick()`:
6319///      - `rx.recv() = Ok(event)` and `event.id > last_emitted_id` →
6320///        emit and update cursor;
6321///      - `rx.recv() = Ok(event)` and `event.id <= last_emitted_id` →
6322///        skip (dedupe overlap with the replayed copy);
6323///      - `rx.recv() = Err(Lagged(n))` → emit one synthetic
6324///        `event: lagged` with `data: {dropped: n}` and continue;
6325///      - `rx.recv() = Err(Closed)` → end the stream (session
6326///        dropped);
6327///      - `heartbeat.tick()` → emit an unaccounted-id `event: heartbeat`
6328///        (heartbeats DO NOT consume the session's event id space —
6329///        they're synthetic and idempotent, so a reconnecting client
6330///        doesn't need to see them in replay).
6331///
6332/// Heartbeats use SSE event id `0` (the same id space the init event
6333/// uses) and clients filter them client-side; the broadcast-channel
6334/// events use the session's real monotonic ids.
6335fn build_mcp_session_stream(
6336    session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6337    session_id: crate::mcp_session::SessionId,
6338    tenant_id: String,
6339    last_event_id: u64,
6340    heartbeat_secs: u64,
6341) -> impl Stream<Item = Result<Event, Infallible>> {
6342    // 1. Subscribe BEFORE snapshotting so any event published during
6343    //    the snapshot window lands in the live receiver. We dedupe
6344    //    overlap against `last_emitted_id` below.
6345    let rx = session_state.subscribe_events();
6346
6347    // 2. Snapshot the replay buffer, then filter to events the client
6348    //    hasn't seen.
6349    let snapshot = session_state.snapshot_replay_buffer();
6350
6351    // 3. Decide replay shape based on `last_event_id` vs the snapshot.
6352    let mut replay_queue: Vec<crate::mcp_session::McpStreamEvent> = Vec::new();
6353    if last_event_id > 0 {
6354        // Client is reconnecting with a known cursor.
6355        let oldest_in_buffer = snapshot.first().map(|e| e.id);
6356        let newest_in_buffer = snapshot.last().map(|e| e.id);
6357        if let (Some(oldest), Some(newest)) = (oldest_in_buffer, newest_in_buffer) {
6358            if last_event_id + 1 < oldest {
6359                // Client missed events that have since been evicted
6360                // from the buffer. Emit one synthetic `event: lagged`
6361                // describing the gap, then resume from the buffer.
6362                let dropped = oldest.saturating_sub(last_event_id + 1);
6363                replay_queue.push(crate::mcp_session::McpStreamEvent {
6364                    id: 0,
6365                    event: crate::mcp_session::McpEventKind::Lagged,
6366                    data: serde_json::json!({
6367                        "dropped": dropped,
6368                        "last_event_id": last_event_id,
6369                        "oldest_available": oldest,
6370                    }),
6371                });
6372                replay_queue.extend(snapshot);
6373            } else if last_event_id >= newest {
6374                // Client is already caught up; nothing to replay.
6375            } else {
6376                replay_queue.extend(snapshot.into_iter().filter(|e| e.id > last_event_id));
6377            }
6378        }
6379        // Empty snapshot + non-zero last_event_id: nothing to replay.
6380    }
6381    // last_event_id == 0: brand-new subscriber; no replay needed
6382    // (the `init` event below is the start of the stream from the
6383    // client's POV).
6384
6385    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
6386    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
6387
6388    let stream_state = McpStreamState {
6389        rx,
6390        heartbeat,
6391        replay_queue,
6392        last_emitted_id: None,
6393        needs_init: true,
6394        session_id_str: session_id.to_string(),
6395        tenant_id,
6396        _session_state: session_state,
6397    };
6398
6399    futures::stream::unfold(stream_state, move |mut state| async move {
6400        // Phase 1: init event (one-shot).
6401        if state.needs_init {
6402            state.needs_init = false;
6403            let init_payload = serde_json::json!({
6404                "connected": true,
6405                "session_id": state.session_id_str,
6406                "tenant_id": state.tenant_id,
6407                "ts_ms": chrono::Utc::now().timestamp_millis(),
6408            });
6409            let ev = build_mcp_sse_event(0, crate::mcp_session::McpEventKind::Init, &init_payload);
6410            return Some((Ok::<Event, Infallible>(ev), state));
6411        }
6412        // Phase 2: replay-queue drain (one entry per poll).
6413        if !state.replay_queue.is_empty() {
6414            let entry = state.replay_queue.remove(0);
6415            // Lagged synthetic entries don't bump last_emitted_id —
6416            // they have id 0 and consuming them as the cursor would
6417            // cause every subsequent live event to dedupe against
6418            // them. Real events DO bump the cursor.
6419            if entry.event != crate::mcp_session::McpEventKind::Lagged {
6420                state.last_emitted_id = Some(entry.id);
6421            }
6422            let ev = build_mcp_sse_event(entry.id, entry.event, &entry.data);
6423            return Some((Ok::<Event, Infallible>(ev), state));
6424        }
6425        // Phase 3: live select loop.
6426        loop {
6427            tokio::select! {
6428                event = state.rx.recv() => {
6429                    match event {
6430                        Ok(ev) => {
6431                            // Dedupe against the replay overlap: any
6432                            // event whose id we've already emitted
6433                            // (because it was in the replay snapshot)
6434                            // gets skipped here.
6435                            if let Some(last) = state.last_emitted_id
6436                                && ev.id <= last
6437                            {
6438                                continue;
6439                            }
6440                            state.last_emitted_id = Some(ev.id);
6441                            let sse = build_mcp_sse_event(ev.id, ev.event, &ev.data);
6442                            return Some((Ok::<Event, Infallible>(sse), state));
6443                        }
6444                        Err(broadcast::error::RecvError::Lagged(n)) => {
6445                            // Live subscriber drifted past the
6446                            // broadcast buffer's capacity. Emit one
6447                            // synthetic `event: lagged` and resume —
6448                            // clients re-fetch state on this signal.
6449                            tracing::warn!(
6450                                lagged = n,
6451                                session_id = %state.session_id_str,
6452                                "mcp GET stream subscriber lagged"
6453                            );
6454                            let lagged_payload = serde_json::json!({
6455                                "dropped": n,
6456                            });
6457                            let sse = build_mcp_sse_event(
6458                                0,
6459                                crate::mcp_session::McpEventKind::Lagged,
6460                                &lagged_payload,
6461                            );
6462                            return Some((Ok::<Event, Infallible>(sse), state));
6463                        }
6464                        Err(broadcast::error::RecvError::Closed) => {
6465                            tracing::debug!(
6466                                session_id = %state.session_id_str,
6467                                "mcp GET stream broadcast closed; ending SSE stream"
6468                            );
6469                            return None;
6470                        }
6471                    }
6472                }
6473                _ = state.heartbeat.tick() => {
6474                    let hb_payload = serde_json::json!({
6475                        "ts_ms": chrono::Utc::now().timestamp_millis(),
6476                    });
6477                    let sse = build_mcp_sse_event(
6478                        0,
6479                        crate::mcp_session::McpEventKind::Heartbeat,
6480                        &hb_payload,
6481                    );
6482                    return Some((Ok::<Event, Infallible>(sse), state));
6483                }
6484            }
6485        }
6486    })
6487}
6488
6489/// Build an SSE [`Event`] from a `(id, kind, payload)` triple. Falls
6490/// back to an event-only frame on JSON serialisation failure (matches
6491/// `/v1/graph/stream`'s defensive pattern).
6492fn build_mcp_sse_event(
6493    id: u64,
6494    kind: crate::mcp_session::McpEventKind,
6495    data: &serde_json::Value,
6496) -> Event {
6497    Event::default()
6498        .id(id.to_string())
6499        .event(kind.as_str())
6500        .json_data(data)
6501        .unwrap_or_else(|_| Event::default().id(id.to_string()).event(kind.as_str()))
6502}
6503
6504// ---------------------------------------------------------------------------
6505// Error mapping
6506// ---------------------------------------------------------------------------
6507
6508#[derive(Debug)]
6509pub struct ApiError {
6510    status: StatusCode,
6511    message: String,
6512}
6513
6514impl ApiError {
6515    fn bad_request(msg: impl Into<String>) -> Self {
6516        Self {
6517            status: StatusCode::BAD_REQUEST,
6518            message: msg.into(),
6519        }
6520    }
6521    fn not_found(msg: impl Into<String>) -> Self {
6522        Self {
6523            status: StatusCode::NOT_FOUND,
6524            message: msg.into(),
6525        }
6526    }
6527    fn internal(msg: impl Into<String>) -> Self {
6528        Self {
6529            status: StatusCode::INTERNAL_SERVER_ERROR,
6530            message: msg.into(),
6531        }
6532    }
6533}
6534
6535impl From<solo_core::Error> for ApiError {
6536    fn from(e: solo_core::Error) -> Self {
6537        use solo_core::Error;
6538        match e {
6539            Error::NotFound(msg) => ApiError::not_found(msg),
6540            Error::InvalidInput(msg) => ApiError::bad_request(msg),
6541            Error::Conflict(msg) => Self {
6542                status: StatusCode::CONFLICT,
6543                message: msg,
6544            },
6545            other => ApiError::internal(other.to_string()),
6546        }
6547    }
6548}
6549
6550impl IntoResponse for ApiError {
6551    fn into_response(self) -> Response {
6552        let body = serde_json::json!({
6553            "error": self.message,
6554            "status": self.status.as_u16(),
6555        });
6556        (self.status, Json(body)).into_response()
6557    }
6558}
6559
6560// SQL helper for recall used to live here; consolidated into
6561// solo_query::recall.
6562
6563#[cfg(test)]
6564mod handler_tests {
6565    //! In-process integration tests for the HTTP handler surface. We
6566    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
6567    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
6568    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
6569    //!
6570    //! Tests live inline in this module rather than in a `tests/` dir
6571    //! because external integration-test exes triggered Windows UAC
6572    //! ERROR_ELEVATION_REQUIRED on the dev machine.
6573    use super::*;
6574    use axum::body::Body;
6575    use axum::http::{Request, StatusCode};
6576    use http_body_util::BodyExt;
6577    use serde_json::{Value, json};
6578    use solo_core::VectorIndex;
6579    use solo_storage::test_support::StubVectorIndex;
6580    use solo_storage::{
6581        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
6582        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
6583    };
6584    use std::sync::Arc as StdArc;
6585    use tower::ServiceExt;
6586
6587    fn fake_config(dim: u32) -> SoloConfig {
6588        SoloConfig {
6589            schema_version: 1,
6590            salt_hex: "00000000000000000000000000000000".to_string(),
6591            embedder: EmbedderConfig {
6592                name: "stub".to_string(),
6593                version: "v1".to_string(),
6594                dim,
6595                dtype: "f32".to_string(),
6596            },
6597            identity: IdentityConfig::default(),
6598            documents: solo_storage::DocumentConfig::default(),
6599            auth: None,
6600            audit: solo_storage::AuditSettings::default(),
6601            redaction: solo_storage::RedactionConfig::default(),
6602            llm: None,
6603            triples: solo_storage::TriplesConfig::default(),
6604            sampling: solo_storage::SamplingConfig::default(),
6605            steward: solo_storage::StewardSettings::default(),
6606        }
6607    }
6608
6609    struct Harness {
6610        router: axum::Router,
6611        _tmp: tempfile::TempDir,
6612        db_path: std::path::PathBuf,
6613        write_handle_extra: Option<solo_storage::WriteHandle>,
6614        join: Option<std::thread::JoinHandle<()>>,
6615        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
6616        /// tests can call `harness.invalidate_sender().send(...)` to
6617        /// simulate writer-actor invalidations (or grab a Receiver via
6618        /// `.subscribe()` for subscriber-count assertions).
6619        tenant_handle: StdArc<TenantHandle>,
6620        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
6621        /// seed additional tenant rows into the in-memory tenants_index
6622        /// stub via `registry.with_index(|idx| idx.register(...))`.
6623        registry: StdArc<TenantRegistry>,
6624        /// v0.11.0 P1: clone of the per-process MCP session store so
6625        /// tests can simulate TTL eviction (`delete` an id) without
6626        /// having to drive the full 30-min inactivity clock.
6627        mcp_sessions: crate::mcp_session::SessionStore,
6628    }
6629
6630    impl Harness {
6631        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
6632        /// fire `InvalidateEvent`s directly without going through the
6633        /// writer-actor. The harness's writer is spawned via
6634        /// `WriterActor::spawn_full` (legacy variant, no invalidate
6635        /// plumb) so writer-driven events won't reach SSE subscribers
6636        /// in tests — tests use this Sender to simulate them.
6637        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
6638            self.tenant_handle.invalidate_sender().clone()
6639        }
6640    }
6641
6642    impl Harness {
6643        fn new(runtime: &tokio::runtime::Runtime) -> Self {
6644            Self::new_with_auth(runtime, None)
6645        }
6646
6647        /// Open a fresh side connection against the harness's DB. Used
6648        /// by graph_expand tests to seed clusters / triples / documents
6649        /// directly (the writer-actor doesn't expose those write paths).
6650        fn open_db(&self) -> rusqlite::Connection {
6651            solo_storage::test_support::open_test_db_at(&self.db_path)
6652        }
6653
6654        fn new_with_auth(runtime: &tokio::runtime::Runtime, bearer_token: Option<String>) -> Self {
6655            Self::new_with_auth_config(
6656                runtime,
6657                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
6658            )
6659        }
6660
6661        fn new_with_auth_config(
6662            runtime: &tokio::runtime::Runtime,
6663            auth: Option<crate::auth::AuthConfig>,
6664        ) -> Self {
6665            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
6666
6667            let tmp = tempfile::TempDir::new().unwrap();
6668            let dim = 16usize;
6669            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
6670                StdArc::new(StubVectorIndex::new(dim));
6671            let embedder: StdArc<dyn solo_core::Embedder> =
6672                StdArc::new(StubEmbedder::new("stub", "v1", dim));
6673            let path = tmp.path().join("test.db");
6674
6675            let embedder_id = {
6676                let conn = solo_storage::test_support::open_test_db_at(&path);
6677                get_or_insert_embedder_id(
6678                    &conn,
6679                    &EmbedderIdentity {
6680                        name: "stub".into(),
6681                        version: "v1".into(),
6682                        dim: dim as u32,
6683                        dtype: "f32".into(),
6684                    },
6685                )
6686                .unwrap()
6687            };
6688
6689            let conn = solo_storage::test_support::open_test_db_at(&path);
6690            let WriterSpawn { handle, join } =
6691                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);
6692            let pool: ReaderPool =
6693                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
6694
6695            // Build a TenantHandle from the assembled parts and wrap it
6696            // in a single-tenant test registry.
6697            let tenant_id = solo_core::TenantId::default_tenant();
6698            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
6699                tenant_id.clone(),
6700                fake_config(dim as u32),
6701                path.clone(),
6702                tmp.path().to_path_buf(),
6703                embedder_id,
6704                hnsw,
6705                embedder.clone(),
6706                handle.clone(),
6707                // The harness owns ANOTHER WriteHandle clone + the join.
6708                // We give the TenantHandle a dummy join that immediately
6709                // returns — it never gets joined because shutdown_all
6710                // can't get exclusive Arc ownership when the harness
6711                // also holds a writer clone.
6712                std::thread::spawn(|| {}),
6713                pool,
6714            ));
6715            let tenant_handle_clone = tenant_handle.clone();
6716
6717            // Suppress the auto-spawned dummy thread by letting it finish.
6718            // We DON'T put the real `join` into the TenantHandle because
6719            // we keep our own clone of `handle` for the shutdown path.
6720            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
6721            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
6722                tmp.path().to_path_buf(),
6723                key,
6724                embedder,
6725                tenant_handle,
6726            ));
6727            let registry_clone = registry.clone();
6728
6729            // v0.11.0 P1: build the MCP session store inside the
6730            // harness runtime so the background sweep task's
6731            // `tokio::spawn` finds a runtime context. The store is
6732            // cheap to construct; the spawn happens once on `new()`.
6733            let mcp_sessions = runtime.block_on(async { crate::mcp_session::SessionStore::new() });
6734            let mcp_sessions_clone = mcp_sessions.clone();
6735            let state = SoloHttpState {
6736                registry,
6737                default_tenant: tenant_id,
6738                user_aliases: Arc::new(Vec::new()),
6739                mcp_sessions,
6740            };
6741            let router = router_with_auth_config(state, auth);
6742            Harness {
6743                router,
6744                _tmp: tmp,
6745                db_path: path,
6746                write_handle_extra: Some(handle),
6747                join: Some(join),
6748                tenant_handle: tenant_handle_clone,
6749                registry: registry_clone,
6750                mcp_sessions: mcp_sessions_clone,
6751            }
6752        }
6753
6754        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
6755            let join = self.join.take();
6756            let extra = self.write_handle_extra.take();
6757            // v0.10.0: the new `tenant_handle` Harness field holds another
6758            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
6759            // We must drop our reference here so the inner WriteHandle
6760            // can be released when the registry drops below. Without
6761            // this, the writer thread's mpsc never closes and the join
6762            // times out at 5s.
6763            let tenant_handle = self.tenant_handle;
6764            // v0.10.0: same story for the new `registry` Arc clone the
6765            // tenants-list tests use to seed extra index rows — the
6766            // state inside the router holds one Arc, this is the
6767            // other; both must drop before the underlying registry
6768            // dies and releases its index-mutex / cached handles.
6769            let registry = self.registry;
6770            runtime.block_on(async move {
6771                drop(extra);
6772                drop(tenant_handle); // drop Harness's direct tenant Arc
6773                drop(registry); // drop Harness's direct registry Arc
6774                drop(self.router); // drops state → drops pool inside runtime ctx
6775                drop(self._tmp);
6776                if let Some(join) = join {
6777                    let (tx, rx) = std::sync::mpsc::channel();
6778                    std::thread::spawn(move || {
6779                        let _ = tx.send(join.join());
6780                    });
6781                    tokio::task::spawn_blocking(move || {
6782                        rx.recv_timeout(std::time::Duration::from_secs(5))
6783                    })
6784                    .await
6785                    .expect("blocking task")
6786                    .expect("writer thread did not exit within 5s")
6787                    .expect("writer thread panicked");
6788                }
6789            });
6790        }
6791    }
6792
6793    fn rt() -> tokio::runtime::Runtime {
6794        tokio::runtime::Builder::new_multi_thread()
6795            .worker_threads(2)
6796            .enable_all()
6797            .build()
6798            .unwrap()
6799    }
6800
6801    /// Issue one HTTP request through the router and capture status +
6802    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
6803    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
6804    async fn call(
6805        router: axum::Router,
6806        method: &str,
6807        uri: &str,
6808        body: Option<Value>,
6809    ) -> (StatusCode, Value) {
6810        call_with_auth(router, method, uri, body, None).await
6811    }
6812
6813    async fn call_with_auth(
6814        router: axum::Router,
6815        method: &str,
6816        uri: &str,
6817        body: Option<Value>,
6818        auth: Option<&str>,
6819    ) -> (StatusCode, Value) {
6820        let mut req_builder = Request::builder()
6821            .method(method)
6822            .uri(uri)
6823            .header("content-type", "application/json");
6824        if let Some(a) = auth {
6825            req_builder = req_builder.header("authorization", a);
6826        }
6827        let req = if let Some(b) = body {
6828            let bytes = serde_json::to_vec(&b).unwrap();
6829            req_builder.body(Body::from(bytes)).unwrap()
6830        } else {
6831            req_builder = req_builder.header("content-length", "0");
6832            req_builder.body(Body::empty()).unwrap()
6833        };
6834        let resp = router.oneshot(req).await.expect("oneshot");
6835        let status = resp.status();
6836        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6837        let v: Value = if body_bytes.is_empty() {
6838            Value::Null
6839        } else {
6840            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6841        };
6842        (status, v)
6843    }
6844
6845    async fn call_with_tenant(
6846        router: axum::Router,
6847        method: &str,
6848        uri: &str,
6849        body: Option<Value>,
6850        tenant: &str,
6851    ) -> (StatusCode, Value) {
6852        let mut req_builder = Request::builder()
6853            .method(method)
6854            .uri(uri)
6855            .header("content-type", "application/json")
6856            .header("x-solo-tenant", tenant);
6857        let req = if let Some(b) = body {
6858            let bytes = serde_json::to_vec(&b).unwrap();
6859            req_builder.body(Body::from(bytes)).unwrap()
6860        } else {
6861            req_builder = req_builder.header("content-length", "0");
6862            req_builder.body(Body::empty()).unwrap()
6863        };
6864        let resp = router.oneshot(req).await.expect("oneshot");
6865        let status = resp.status();
6866        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6867        let v: Value = if body_bytes.is_empty() {
6868            Value::Null
6869        } else {
6870            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6871        };
6872        (status, v)
6873    }
6874
6875    #[test]
6876    fn health_returns_ok() {
6877        let runtime = rt();
6878        let h = Harness::new(&runtime);
6879        let r = h.router.clone();
6880        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
6881        assert_eq!(status, StatusCode::OK);
6882        h.shutdown(&runtime);
6883    }
6884
6885    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
6886    /// the four `memory.*` endpoints + their request/response schemas.
6887    /// Acts as a drift detector: if a future commit adds/removes a route
6888    /// without updating `openapi_spec`, this test fails loudly.
6889    #[test]
6890    fn openapi_json_describes_all_endpoints() {
6891        let runtime = rt();
6892        let h = Harness::new(&runtime);
6893        let r = h.router.clone();
6894        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
6895        assert_eq!(status, StatusCode::OK);
6896        assert!(spec.is_object(), "openapi.json must be a JSON object");
6897
6898        // Top-level shape per OpenAPI 3.1.
6899        assert!(
6900            spec.get("openapi")
6901                .and_then(|v| v.as_str())
6902                .is_some_and(|s| s.starts_with("3.")),
6903            "missing or wrong openapi version: {spec}"
6904        );
6905        assert!(spec.pointer("/info/title").is_some());
6906        assert!(spec.pointer("/info/version").is_some());
6907
6908        // Every route the router serves must be documented.
6909        let paths = spec
6910            .get("paths")
6911            .and_then(|v| v.as_object())
6912            .expect("paths must be an object");
6913        for expected in [
6914            "/health",
6915            "/openapi.json",
6916            "/memory",
6917            "/memory/search",
6918            "/memory/context",
6919            "/memory/consolidate",
6920            "/memory/{id}",
6921            "/backup",
6922            // Path 1 derived-layer endpoints (v0.4.0+):
6923            "/memory/themes",
6924            "/memory/facts_about",
6925            "/memory/entities",
6926            "/memory/contradictions",
6927            "/memory/contradictions/resolve",
6928            // v0.5.0 Priority 3:
6929            "/memory/clusters/{cluster_id}",
6930            // v0.7.0 P6 — document operations:
6931            "/memory/documents",
6932            "/memory/documents/search",
6933            "/memory/documents/{id}",
6934            // v0.9.x/v0.10.x solo-web graph + tenant endpoints:
6935            "/v1/graph/expand",
6936            "/v1/graph/nodes",
6937            "/v1/graph/edges",
6938            "/v1/graph/inspect/{id}",
6939            "/v1/graph/neighbors/{id}",
6940            "/v1/graph/stream",
6941            "/v1/status",
6942            "/v1/tenants",
6943            // v0.10.2+ MCP Streamable HTTP transport:
6944            "/mcp",
6945        ] {
6946            assert!(
6947                paths.contains_key(expected),
6948                "openapi paths missing {expected}: {paths:?}"
6949            );
6950        }
6951
6952        // Method coverage on /memory/documents: must document both POST
6953        // (ingest) and GET (list).
6954        let docs = paths.get("/memory/documents").expect("/memory/documents");
6955        assert!(
6956            docs.get("post").is_some(),
6957            "POST /memory/documents undocumented"
6958        );
6959        assert!(
6960            docs.get("get").is_some(),
6961            "GET /memory/documents undocumented"
6962        );
6963
6964        // Method coverage on /memory/documents/{id}: must document both
6965        // GET (inspect) and DELETE (forget).
6966        let docid = paths
6967            .get("/memory/documents/{id}")
6968            .expect("/memory/documents/{id}");
6969        assert!(
6970            docid.get("get").is_some(),
6971            "GET /memory/documents/{{id}} undocumented"
6972        );
6973        assert!(
6974            docid.get("delete").is_some(),
6975            "DELETE /memory/documents/{{id}} undocumented"
6976        );
6977
6978        // Method coverage on /memory/{id}: must document GET (inspect),
6979        // PATCH (update), and DELETE (forget).
6980        let memid = paths.get("/memory/{id}").expect("memory/{id}");
6981        assert!(
6982            memid.get("get").is_some(),
6983            "GET /memory/{{id}} undocumented"
6984        );
6985        assert!(
6986            memid.get("patch").is_some(),
6987            "PATCH /memory/{{id}} undocumented"
6988        );
6989        assert!(
6990            memid.get("delete").is_some(),
6991            "DELETE /memory/{{id}} undocumented"
6992        );
6993
6994        // Component schemas referenced from paths must be defined.
6995        for schema_name in [
6996            "RememberRequest",
6997            "RememberResponse",
6998            "RecallRequest",
6999            "RecallResult",
7000            "MemoryContextRequest",
7001            "MemoryContextResult",
7002            "MemoryUpdateRequest",
7003            "MemoryUpdateResult",
7004            "EpisodeRecord",
7005            "ApiError",
7006            "ConsolidationScope",
7007            "ConsolidationReport",
7008            // Path 1 derived-layer schemas (v0.4.0+):
7009            "ThemeHit",
7010            "FactHit",
7011            "EntityHit",
7012            "ContradictionHit",
7013            "ContradictionResolveRequest",
7014            "ContradictionResolution",
7015            // v0.5.0 Priority 3:
7016            "ClusterRecord",
7017            // v0.7.0 P6 — document schemas:
7018            "IngestDocumentRequest",
7019            "IngestReport",
7020            "ForgetDocumentReport",
7021            "SearchDocsRequest",
7022            "DocSearchHit",
7023            "DocumentInspectResult",
7024            "DocumentSummary",
7025            // solo-web graph + tenant schemas:
7026            "GraphNode",
7027            "GraphEdge",
7028            "GraphResponse",
7029            "GraphNodesResponse",
7030            "GraphEdgesResponse",
7031            "GraphInspectResponse",
7032            "TenantListItem",
7033            "TenantsListResponse",
7034            "StatusResponse",
7035            // MCP HTTP JSON-RPC schemas:
7036            "JsonRpcRequest",
7037            "JsonRpcResponse",
7038        ] {
7039            let ptr = format!("/components/schemas/{schema_name}");
7040            assert!(
7041                spec.pointer(&ptr).is_some(),
7042                "component schema {schema_name} missing"
7043            );
7044        }
7045
7046        let mcp = paths.get("/mcp").expect("/mcp");
7047        assert!(mcp.get("post").is_some(), "POST /mcp undocumented");
7048        assert!(mcp.get("get").is_some(), "GET /mcp undocumented");
7049
7050        let tenants = paths.get("/v1/tenants").expect("/v1/tenants");
7051        assert!(tenants.get("get").is_some(), "GET /v1/tenants undocumented");
7052
7053        let status_path = paths.get("/v1/status").expect("/v1/status");
7054        let status_get = status_path.get("get").expect("GET /v1/status undocumented");
7055        assert_eq!(
7056            status_get.pointer("/responses/200/content/application~1json/schema/$ref"),
7057            Some(&json!("#/components/schemas/StatusResponse")),
7058            "GET /v1/status must return StatusResponse"
7059        );
7060
7061        let status_schema = spec
7062            .pointer("/components/schemas/StatusResponse")
7063            .expect("StatusResponse schema");
7064        for field in [
7065            "ok",
7066            "version",
7067            "tenant",
7068            "embedder",
7069            "active_tenants",
7070            "mcp",
7071        ] {
7072            assert!(
7073                status_schema
7074                    .pointer("/required")
7075                    .and_then(|v| v.as_array())
7076                    .is_some_and(|required| required.iter().any(|v| v == field)),
7077                "StatusResponse missing required field {field}"
7078            );
7079        }
7080        for ptr in [
7081            "/properties/tenant/required",
7082            "/properties/embedder/required",
7083            "/properties/mcp/required",
7084            "/properties/embedder/properties/dim/minimum",
7085            "/properties/mcp/properties/sessions/minimum",
7086        ] {
7087            assert!(
7088                status_schema.pointer(ptr).is_some(),
7089                "StatusResponse schema missing {ptr}"
7090            );
7091        }
7092
7093        // bearerAuth security scheme is declared (LAN deployments need it).
7094        assert!(
7095            spec.pointer("/components/securitySchemes/bearerAuth")
7096                .is_some(),
7097            "bearerAuth security scheme missing"
7098        );
7099
7100        h.shutdown(&runtime);
7101    }
7102
7103    /// `/openapi.json` must remain unauthenticated even when bearer auth
7104    /// is enabled — the spec describes the API shape, not secrets, and
7105    /// codegen tooling shouldn't need a credential to fetch it.
7106    #[test]
7107    fn openapi_json_is_exempt_from_bearer_auth() {
7108        let runtime = rt();
7109        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
7110        let r = h.router.clone();
7111        // No Authorization header → still 200 for /openapi.json.
7112        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
7113        assert_eq!(status, StatusCode::OK);
7114        h.shutdown(&runtime);
7115    }
7116
7117    #[test]
7118    fn remember_returns_memory_id() {
7119        let runtime = rt();
7120        let h = Harness::new(&runtime);
7121        let r = h.router.clone();
7122        let (status, body) = runtime.block_on(call(
7123            r,
7124            "POST",
7125            "/memory",
7126            Some(json!({ "content": "http harness test" })),
7127        ));
7128        assert_eq!(status, StatusCode::OK);
7129        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
7130        assert_eq!(mid.len(), 36, "uuid length");
7131        h.shutdown(&runtime);
7132    }
7133
7134    #[test]
7135    fn update_memory_rewrites_content_and_inspect_sees_it() {
7136        let runtime = rt();
7137        let h = Harness::new(&runtime);
7138        let r = h.router.clone();
7139        let (status, body) = runtime.block_on(call(
7140            r.clone(),
7141            "POST",
7142            "/memory",
7143            Some(json!({ "content": "old transport memory" })),
7144        ));
7145        assert_eq!(status, StatusCode::OK);
7146        let mid = body
7147            .get("memory_id")
7148            .and_then(|v| v.as_str())
7149            .expect("memory_id")
7150            .to_string();
7151
7152        let (status, body) = runtime.block_on(call(
7153            r.clone(),
7154            "PATCH",
7155            &format!("/memory/{mid}"),
7156            Some(json!({ "content": "new transport memory" })),
7157        ));
7158        assert_eq!(status, StatusCode::OK, "update failed: {body}");
7159        assert_eq!(
7160            body.get("content").and_then(|v| v.as_str()),
7161            Some("new transport memory")
7162        );
7163
7164        let (status, body) = runtime.block_on(call(r, "GET", &format!("/memory/{mid}"), None));
7165        assert_eq!(status, StatusCode::OK);
7166        assert_eq!(
7167            body.get("content").and_then(|v| v.as_str()),
7168            Some("new transport memory")
7169        );
7170        h.shutdown(&runtime);
7171    }
7172
7173    #[test]
7174    fn empty_content_returns_400() {
7175        let runtime = rt();
7176        let h = Harness::new(&runtime);
7177        let r = h.router.clone();
7178        let (status, body) =
7179            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
7180        assert_eq!(status, StatusCode::BAD_REQUEST);
7181        assert!(
7182            body.get("error")
7183                .and_then(|e| e.as_str())
7184                .map(|s| s.contains("must not be empty"))
7185                .unwrap_or(false),
7186            "got: {body}"
7187        );
7188        h.shutdown(&runtime);
7189    }
7190
7191    #[test]
7192    fn empty_query_returns_400() {
7193        let runtime = rt();
7194        let h = Harness::new(&runtime);
7195        let r = h.router.clone();
7196        let (status, body) = runtime.block_on(call(
7197            r,
7198            "POST",
7199            "/memory/search",
7200            Some(json!({ "query": "" })),
7201        ));
7202        assert_eq!(status, StatusCode::BAD_REQUEST);
7203        assert!(
7204            body.get("error")
7205                .and_then(|e| e.as_str())
7206                .map(|s| s.contains("must not be empty"))
7207                .unwrap_or(false),
7208            "got: {body}"
7209        );
7210        h.shutdown(&runtime);
7211    }
7212
7213    #[test]
7214    fn inspect_unknown_returns_404() {
7215        let runtime = rt();
7216        let h = Harness::new(&runtime);
7217        let r = h.router.clone();
7218        let (status, body) = runtime.block_on(call(
7219            r,
7220            "GET",
7221            "/memory/00000000-0000-7000-8000-000000000000",
7222            None,
7223        ));
7224        assert_eq!(status, StatusCode::NOT_FOUND);
7225        assert!(body.get("error").is_some(), "got: {body}");
7226        h.shutdown(&runtime);
7227    }
7228
7229    #[test]
7230    fn inspect_invalid_id_returns_400() {
7231        let runtime = rt();
7232        let h = Harness::new(&runtime);
7233        let r = h.router.clone();
7234        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
7235        assert_eq!(status, StatusCode::BAD_REQUEST);
7236        h.shutdown(&runtime);
7237    }
7238
7239    #[test]
7240    fn forget_unknown_returns_404() {
7241        let runtime = rt();
7242        let h = Harness::new(&runtime);
7243        let r = h.router.clone();
7244        let (status, _body) = runtime.block_on(call(
7245            r,
7246            "DELETE",
7247            "/memory/00000000-0000-7000-8000-000000000000",
7248            None,
7249        ));
7250        assert_eq!(status, StatusCode::NOT_FOUND);
7251        h.shutdown(&runtime);
7252    }
7253
7254    /// `POST /memory/consolidate` runs the cluster pass and returns
7255    /// the report as JSON. With an empty body, `ConsolidationScope`
7256    /// defaults to unbounded; with a non-empty body, the
7257    /// `window_days` field is honored. The Harness's writer is
7258    /// spawned without a Steward, so `abstractions_built` stays 0
7259    /// even when `clusters_built` is nonzero — same posture as the
7260    /// daemon today.
7261    #[test]
7262    fn consolidate_endpoint_returns_report() {
7263        let runtime = rt();
7264        let h = Harness::new(&runtime);
7265        let r = h.router.clone();
7266        runtime.block_on(async move {
7267            // Empty DB → all-zero report; structural assertion only.
7268            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
7269            assert_eq!(status, StatusCode::OK);
7270            for field in [
7271                "episodes_seen",
7272                "clusters_built",
7273                "episodes_clustered",
7274                "abstractions_built",
7275                "triples_built",
7276                "contradictions_found",
7277            ] {
7278                assert!(
7279                    body.get(field).and_then(|v| v.as_u64()).is_some(),
7280                    "missing field {field}: {body}"
7281                );
7282            }
7283            assert_eq!(body["episodes_seen"], 0);
7284            assert_eq!(body["clusters_built"], 0);
7285
7286            // Non-empty body with window_days → still 200; unmistakable
7287            // shape round-trips through ConsolidationScope's serde.
7288            let (status2, _body2) = call(
7289                r,
7290                "POST",
7291                "/memory/consolidate",
7292                Some(json!({ "window_days": 7 })),
7293            )
7294            .await;
7295            assert_eq!(status2, StatusCode::OK);
7296        });
7297        h.shutdown(&runtime);
7298    }
7299
7300    #[test]
7301    fn auth_required_routes_reject_missing_token() {
7302        let runtime = rt();
7303        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
7304        let r = h.router.clone();
7305        runtime.block_on(async move {
7306            // No Authorization header → 401.
7307            let (status, _body) = call(
7308                r.clone(),
7309                "POST",
7310                "/memory",
7311                Some(json!({ "content": "x" })),
7312            )
7313            .await;
7314            assert_eq!(status, StatusCode::UNAUTHORIZED);
7315
7316            // Wrong token → 401.
7317            let (status, _body) = call_with_auth(
7318                r.clone(),
7319                "POST",
7320                "/memory",
7321                Some(json!({ "content": "x" })),
7322                Some("Bearer wrong-token"),
7323            )
7324            .await;
7325            assert_eq!(status, StatusCode::UNAUTHORIZED);
7326
7327            // Correct token → handler runs (200).
7328            let (status, body) = call_with_auth(
7329                r.clone(),
7330                "POST",
7331                "/memory",
7332                Some(json!({ "content": "authed" })),
7333                Some("Bearer secret-xyz"),
7334            )
7335            .await;
7336            assert_eq!(status, StatusCode::OK);
7337            assert!(body.get("memory_id").is_some());
7338        });
7339        h.shutdown(&runtime);
7340    }
7341
7342    #[test]
7343    fn health_endpoint_does_not_require_auth() {
7344        let runtime = rt();
7345        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7346        let r = h.router.clone();
7347        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
7348        // Liveness probes should work without credentials.
7349        assert_eq!(status, StatusCode::OK);
7350        h.shutdown(&runtime);
7351    }
7352
7353    #[test]
7354    fn auth_response_includes_www_authenticate_header() {
7355        // Verify the WWW-Authenticate hint that lets a well-behaved
7356        // client know it's a bearer-auth scheme. We check via raw
7357        // request → response (oneshot returns Response, but our
7358        // call() helper drops the headers; build the request manually).
7359        let runtime = rt();
7360        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7361        let r = h.router.clone();
7362        runtime.block_on(async move {
7363            let req = Request::builder()
7364                .method("POST")
7365                .uri("/memory")
7366                .header("content-type", "application/json")
7367                .body(Body::from(
7368                    serde_json::to_vec(&json!({ "content": "x" })).unwrap(),
7369                ))
7370                .unwrap();
7371            let resp = r.oneshot(req).await.unwrap();
7372            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
7373            let www = resp
7374                .headers()
7375                .get("www-authenticate")
7376                .and_then(|v| v.to_str().ok())
7377                .unwrap_or("");
7378            assert!(
7379                www.starts_with("Bearer"),
7380                "expected WWW-Authenticate: Bearer..., got: {www}"
7381            );
7382        });
7383        h.shutdown(&runtime);
7384    }
7385
7386    // ---------------------------------------------------------------------
7387    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
7388    // serves an OIDC discovery doc + JWKS, mint a token claiming
7389    // `solo_tenant = "default"`, and verify it routes through the
7390    // middleware + TenantExtractor + handler.
7391    // ---------------------------------------------------------------------
7392
7393    fn base64_url_for_test(bytes: &[u8]) -> String {
7394        use base64::Engine;
7395        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
7396    }
7397
7398    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
7399    /// (mock_server, discovery_url, secret, kid).
7400    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
7401        use wiremock::matchers::{method, path};
7402        use wiremock::{Mock, MockServer, ResponseTemplate};
7403        let server = MockServer::start().await;
7404        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
7405        let kid = "http-test-kid";
7406        let discovery = serde_json::json!({
7407            "issuer": server.uri(),
7408            "jwks_uri": format!("{}/jwks", server.uri()),
7409        });
7410        Mock::given(method("GET"))
7411            .and(path("/.well-known/openid-configuration"))
7412            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
7413            .mount(&server)
7414            .await;
7415        let jwks = serde_json::json!({
7416            "keys": [
7417                {
7418                    "kty": "oct",
7419                    "kid": kid,
7420                    "alg": "HS256",
7421                    "k": base64_url_for_test(&secret),
7422                }
7423            ]
7424        });
7425        Mock::given(method("GET"))
7426            .and(path("/jwks"))
7427            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
7428            .mount(&server)
7429            .await;
7430        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
7431        (server, discovery_url, secret, kid)
7432    }
7433
7434    fn mint_idp_token(
7435        server_uri: &str,
7436        kid: &str,
7437        secret: &[u8],
7438        tenant_claim: &str,
7439        audience: &str,
7440    ) -> String {
7441        use jsonwebtoken::{Algorithm, EncodingKey, Header};
7442        let mut header = Header::new(Algorithm::HS256);
7443        header.kid = Some(kid.to_string());
7444        let now = std::time::SystemTime::now()
7445            .duration_since(std::time::UNIX_EPOCH)
7446            .unwrap()
7447            .as_secs();
7448        let claims = serde_json::json!({
7449            "iss": server_uri,
7450            "sub": "test-user-1",
7451            "aud": audience,
7452            "exp": now + 600,
7453            "iat": now,
7454            "solo_tenant": tenant_claim,
7455        });
7456        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
7457            .expect("mint token")
7458    }
7459
7460    #[test]
7461    fn http_oidc_accept_resolves_to_tenant_from_claim() {
7462        let runtime = rt();
7463        let (fake_server, discovery_url, secret, kid) =
7464            runtime.block_on(async { spin_fake_idp().await });
7465        let server_uri = fake_server.uri();
7466        // Keep the wiremock server alive for the duration of this test.
7467        let _server_guard = fake_server;
7468
7469        let auth = crate::auth::AuthConfig::Oidc {
7470            discovery_url,
7471            audience: "test-audience".to_string(),
7472            tenant_claim_name: "solo_tenant".to_string(),
7473        };
7474        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7475        let r = h.router.clone();
7476
7477        // Mint a token claiming the harness's default tenant.
7478        let token = mint_idp_token(&server_uri, kid, &secret, "default", "test-audience");
7479
7480        runtime.block_on(async move {
7481            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
7482            let (status, body) = call_with_auth(
7483                r.clone(),
7484                "POST",
7485                "/memory",
7486                Some(json!({ "content": "oidc-routed content" })),
7487                Some(&format!("Bearer {token}")),
7488            )
7489            .await;
7490            assert_eq!(status, StatusCode::OK, "got body: {body}");
7491            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
7492        });
7493        h.shutdown(&runtime);
7494    }
7495
7496    #[test]
7497    fn http_oidc_reject_missing_token_returns_401() {
7498        let runtime = rt();
7499        let (fake_server, discovery_url, _secret, _kid) =
7500            runtime.block_on(async { spin_fake_idp().await });
7501        let _server_guard = fake_server;
7502        let auth = crate::auth::AuthConfig::Oidc {
7503            discovery_url,
7504            audience: "test-audience".to_string(),
7505            tenant_claim_name: "solo_tenant".to_string(),
7506        };
7507        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7508        let r = h.router.clone();
7509        runtime.block_on(async move {
7510            // No Authorization header.
7511            let (status, _body) = call(
7512                r.clone(),
7513                "POST",
7514                "/memory",
7515                Some(json!({ "content": "x" })),
7516            )
7517            .await;
7518            assert_eq!(status, StatusCode::UNAUTHORIZED);
7519
7520            // Garbage token → 401 (invalid signature / not a JWT).
7521            let (status, _body) = call_with_auth(
7522                r.clone(),
7523                "POST",
7524                "/memory",
7525                Some(json!({ "content": "x" })),
7526                Some("Bearer not-a-real-jwt"),
7527            )
7528            .await;
7529            assert_eq!(status, StatusCode::UNAUTHORIZED);
7530        });
7531        h.shutdown(&runtime);
7532    }
7533
7534    #[test]
7535    fn full_remember_recall_inspect_forget_round_trip() {
7536        let runtime = rt();
7537        let h = Harness::new(&runtime);
7538        let r = h.router.clone();
7539        runtime.block_on(async move {
7540            // POST /memory
7541            let (status, body) = call(
7542                r.clone(),
7543                "POST",
7544                "/memory",
7545                Some(json!({ "content": "round-trip content" })),
7546            )
7547            .await;
7548            assert_eq!(status, StatusCode::OK);
7549            let mid = body
7550                .get("memory_id")
7551                .and_then(|v| v.as_str())
7552                .unwrap()
7553                .to_string();
7554
7555            // POST /memory/search — exact-match (StubEmbedder) returns the row.
7556            let (status, body) = call(
7557                r.clone(),
7558                "POST",
7559                "/memory/search",
7560                Some(json!({ "query": "round-trip content", "limit": 5 })),
7561            )
7562            .await;
7563            assert_eq!(status, StatusCode::OK);
7564            assert!(
7565                body.get("candidates_considered")
7566                    .and_then(|v| v.as_u64())
7567                    .is_some_and(|n| n >= 1),
7568                "recall should expose pre-filter candidate diagnostics: {body}"
7569            );
7570            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7571            assert!(
7572                hits.iter().any(
7573                    |h| h.get("content").and_then(|c| c.as_str()) == Some("round-trip content")
7574                ),
7575                "expected hit with content; got: {body}"
7576            );
7577
7578            // GET /memory/{id}
7579            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7580            assert_eq!(status, StatusCode::OK);
7581            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));
7582
7583            // DELETE /memory/{id}
7584            let (status, _body) = call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
7585            assert_eq!(status, StatusCode::NO_CONTENT);
7586
7587            // GET again — still readable but status='forgotten'
7588            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7589            assert_eq!(status, StatusCode::OK);
7590            assert_eq!(
7591                body.get("status").and_then(|v| v.as_str()),
7592                Some("forgotten")
7593            );
7594
7595            // POST /memory/search — forgotten row excluded.
7596            let (status, body) = call(
7597                r.clone(),
7598                "POST",
7599                "/memory/search",
7600                Some(json!({ "query": "round-trip content", "limit": 5 })),
7601            )
7602            .await;
7603            assert_eq!(status, StatusCode::OK);
7604            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7605            assert!(
7606                hits.iter()
7607                    .all(|h| h.get("memory_id").and_then(|m| m.as_str()) != Some(mid.as_str())),
7608                "forgotten row should be excluded from recall: {body}"
7609            );
7610        });
7611        h.shutdown(&runtime);
7612    }
7613
7614    #[test]
7615    fn memory_context_endpoint_returns_bundle() {
7616        let runtime = rt();
7617        let h = Harness::new(&runtime);
7618        let r = h.router.clone();
7619        runtime.block_on(async move {
7620            let (status, _body) = call(
7621                r.clone(),
7622                "POST",
7623                "/memory",
7624                Some(json!({ "content": "http memory context needle" })),
7625            )
7626            .await;
7627            assert_eq!(status, StatusCode::OK);
7628
7629            let (status, body) = call(
7630                r,
7631                "POST",
7632                "/memory/context",
7633                Some(json!({ "query": "memory context needle", "limit": 5 })),
7634            )
7635            .await;
7636            assert_eq!(status, StatusCode::OK);
7637            assert_eq!(
7638                body.get("query").and_then(|v| v.as_str()),
7639                Some("memory context needle")
7640            );
7641            let hits = body
7642                .pointer("/recall/hits")
7643                .and_then(|v| v.as_array())
7644                .unwrap_or_else(|| panic!("missing /recall/hits: {body}"));
7645            assert!(
7646                hits.iter()
7647                    .any(|h| h.get("content").and_then(|c| c.as_str())
7648                        == Some("http memory context needle")),
7649                "expected context recall hit: {body}"
7650            );
7651            assert!(body.get("themes").is_some_and(|v| v.is_array()));
7652            assert!(body.get("facts").is_some_and(|v| v.is_array()));
7653            assert!(body.get("contradictions").is_some_and(|v| v.is_array()));
7654        });
7655        h.shutdown(&runtime);
7656    }
7657
7658    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
7659    // the actual content correctness is covered by solo-query::derived's
7660    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
7661    // Query-string param parsing, JSON-array response body, validation
7662    // 400s for invalid inputs.
7663
7664    #[test]
7665    fn themes_endpoint_returns_empty_array_on_empty_db() {
7666        let runtime = rt();
7667        let h = Harness::new(&runtime);
7668        let r = h.router.clone();
7669        let (status, body) = runtime.block_on(call(r, "GET", "/memory/themes", None));
7670        assert_eq!(status, StatusCode::OK);
7671        assert!(body.is_array(), "expected array, got {body}");
7672        assert_eq!(body.as_array().unwrap().len(), 0);
7673        h.shutdown(&runtime);
7674    }
7675
7676    #[test]
7677    fn themes_endpoint_passes_through_query_params() {
7678        let runtime = rt();
7679        let h = Harness::new(&runtime);
7680        let r = h.router.clone();
7681        let (status, body) = runtime.block_on(call(
7682            r,
7683            "GET",
7684            "/memory/themes?window_days=7&limit=20",
7685            None,
7686        ));
7687        assert_eq!(status, StatusCode::OK);
7688        assert!(body.is_array(), "expected array, got {body}");
7689        h.shutdown(&runtime);
7690    }
7691
7692    #[test]
7693    fn facts_about_endpoint_requires_subject() {
7694        let runtime = rt();
7695        let h = Harness::new(&runtime);
7696        let r = h.router.clone();
7697        // Missing subject — axum's Query extractor 422 (Unprocessable
7698        // Entity) on missing required field; some axum versions
7699        // surface as 400. Accept either.
7700        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/facts_about", None));
7701        assert!(
7702            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
7703            "expected 400 or 422 for missing subject, got {status}"
7704        );
7705        h.shutdown(&runtime);
7706    }
7707
7708    #[test]
7709    fn facts_about_endpoint_rejects_blank_subject() {
7710        let runtime = rt();
7711        let h = Harness::new(&runtime);
7712        let r = h.router.clone();
7713        // Whitespace-only subject reaches the handler then trips its
7714        // own validation → ApiError::bad_request → 400.
7715        let (status, body) =
7716            runtime.block_on(call(r, "GET", "/memory/facts_about?subject=%20%20", None));
7717        assert_eq!(status, StatusCode::BAD_REQUEST);
7718        assert!(
7719            body.get("error")
7720                .and_then(|v| v.as_str())
7721                .is_some_and(|s| s.contains("subject")),
7722            "expected error mentioning subject, got {body}"
7723        );
7724        h.shutdown(&runtime);
7725    }
7726
7727    #[test]
7728    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
7729        let runtime = rt();
7730        let h = Harness::new(&runtime);
7731        let r = h.router.clone();
7732        let (status, body) = runtime.block_on(call(
7733            r,
7734            "GET",
7735            "/memory/facts_about?subject=NobodyKnows",
7736            None,
7737        ));
7738        assert_eq!(status, StatusCode::OK);
7739        assert_eq!(body.as_array().unwrap().len(), 0);
7740        h.shutdown(&runtime);
7741    }
7742
7743    #[test]
7744    fn facts_about_endpoint_parses_include_as_object_query_param() {
7745        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
7746        // through the `Query<FactsAboutQuery>` extractor. If the
7747        // struct field is missing or wrongly typed, axum returns
7748        // 400/422 before reaching the handler. We don't seed
7749        // triples; we only need the request to reach the handler
7750        // and produce a normal 200 + empty array. Mirrors
7751        // `inspect_cluster_endpoint_passes_full_content_query_param`.
7752        let runtime = rt();
7753        let h = Harness::new(&runtime);
7754        let r = h.router.clone();
7755        let (status, body) = runtime.block_on(call(
7756            r,
7757            "GET",
7758            "/memory/facts_about?subject=Maya&include_as_object=true",
7759            None,
7760        ));
7761        assert_eq!(
7762            status,
7763            StatusCode::OK,
7764            "expected 200 with include_as_object query param, got {status}"
7765        );
7766        assert!(body.is_array());
7767        h.shutdown(&runtime);
7768    }
7769
7770    #[test]
7771    fn entities_endpoint_returns_matching_graph_entities() {
7772        let runtime = rt();
7773        let h = Harness::new(&runtime);
7774        {
7775            let conn = h.open_db();
7776            let memory_id = MemoryId::new().to_string();
7777            let rowid = seed_episode(&conn, &memory_id, 100, "Alice works with graph transport");
7778            seed_triple_row(
7779                &conn,
7780                "t-http-entity-1",
7781                "Alice",
7782                "knows",
7783                "Bob",
7784                Some(rowid),
7785            );
7786            seed_triple_row(
7787                &conn,
7788                "t-http-entity-2",
7789                "Alicia",
7790                "works_at",
7791                "Solo",
7792                Some(rowid),
7793            );
7794        }
7795
7796        let r = h.router.clone();
7797        let (status, body) =
7798            runtime.block_on(call(r, "GET", "/memory/entities?query=Ali&limit=5", None));
7799        assert_eq!(status, StatusCode::OK);
7800        let arr = body.as_array().expect("entities array");
7801        assert!(
7802            arr.iter()
7803                .any(|v| v.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
7804            "expected Alice entity, got {body}"
7805        );
7806        h.shutdown(&runtime);
7807    }
7808
7809    #[test]
7810    fn inspect_cluster_endpoint_unknown_id_returns_404() {
7811        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
7812        // through `ApiError::from` → 404. Mirrors the unknown-memory
7813        // case for `GET /memory/{id}`.
7814        let runtime = rt();
7815        let h = Harness::new(&runtime);
7816        let r = h.router.clone();
7817        let (status, body) =
7818            runtime.block_on(call(r, "GET", "/memory/clusters/no-such-cluster", None));
7819        assert_eq!(status, StatusCode::NOT_FOUND);
7820        assert!(
7821            body.get("error")
7822                .and_then(|v| v.as_str())
7823                .is_some_and(|s| s.contains("no-such-cluster")),
7824            "expected error mentioning cluster id, got {body}"
7825        );
7826        h.shutdown(&runtime);
7827    }
7828
7829    #[test]
7830    fn inspect_cluster_endpoint_passes_full_content_query_param() {
7831        // Even with no matching cluster (→ 404), the request must
7832        // reach the handler — proves the `?full_content=true` query
7833        // string parses cleanly (Query<InspectClusterQuery>::default
7834        // path didn't choke). If we accidentally fail at the extractor
7835        // we'd get a 400/422, not the expected 404.
7836        let runtime = rt();
7837        let h = Harness::new(&runtime);
7838        let r = h.router.clone();
7839        let (status, _body) = runtime.block_on(call(
7840            r,
7841            "GET",
7842            "/memory/clusters/missing?full_content=true",
7843            None,
7844        ));
7845        assert_eq!(status, StatusCode::NOT_FOUND);
7846        h.shutdown(&runtime);
7847    }
7848
7849    #[test]
7850    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
7851        let runtime = rt();
7852        let h = Harness::new(&runtime);
7853        let r = h.router.clone();
7854        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7855        assert_eq!(status, StatusCode::OK);
7856        assert!(body.is_array());
7857        assert_eq!(body.as_array().unwrap().len(), 0);
7858        h.shutdown(&runtime);
7859    }
7860
7861    #[test]
7862    fn contradiction_resolve_endpoint_updates_lifecycle() {
7863        let runtime = rt();
7864        let h = Harness::new(&runtime);
7865        {
7866            let conn = h.open_db();
7867            let memory_id = MemoryId::new().to_string();
7868            let rowid = seed_episode(&conn, &memory_id, 100, "contradiction source");
7869            seed_triple_row(&conn, "t-http-a", "Alice", "likes", "tea", Some(rowid));
7870            seed_triple_row(&conn, "t-http-b", "Alice", "likes", "coffee", Some(rowid));
7871            seed_contradiction_row(&conn, "t-http-a", "t-http-b", "other");
7872        }
7873
7874        let r = h.router.clone();
7875        let (status, body) = runtime.block_on(call(
7876            r.clone(),
7877            "POST",
7878            "/memory/contradictions/resolve",
7879            Some(json!({
7880                "a_id": "t-http-a",
7881                "b_id": "t-http-b",
7882                "kind": "other",
7883                "resolution_note": "tea is current",
7884                "winning_triple_id": "t-http-a"
7885            })),
7886        ));
7887        assert_eq!(status, StatusCode::OK, "resolve failed: {body}");
7888        assert_eq!(
7889            body.get("status").and_then(|v| v.as_str()),
7890            Some("resolved")
7891        );
7892        assert!(
7893            body.get("resolved_at_ms")
7894                .and_then(|v| v.as_i64())
7895                .is_some()
7896        );
7897
7898        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7899        assert_eq!(status, StatusCode::OK);
7900        assert_eq!(
7901            body.pointer("/0/status").and_then(|v| v.as_str()),
7902            Some("resolved")
7903        );
7904        h.shutdown(&runtime);
7905    }
7906
7907    #[test]
7908    fn derived_endpoints_require_bearer_when_auth_enabled() {
7909        let runtime = rt();
7910        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
7911        // Each of the three new endpoints should reject missing token.
7912        // Per the existing tests' shutdown-timing comment: don't hold a
7913        // long-lived router clone across multiple iterations — drop the
7914        // clone before each subsequent oneshot, and don't keep a `let r =
7915        // h.router.clone()` alive across h.shutdown(). Re-clone per
7916        // iteration; the per-call clone is consumed by oneshot.
7917        for path in [
7918            "/memory/themes",
7919            "/memory/facts_about?subject=Sam",
7920            "/memory/entities?query=Sam",
7921            "/memory/contradictions",
7922            "/memory/clusters/any-id",
7923        ] {
7924            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
7925            assert_eq!(
7926                status,
7927                StatusCode::UNAUTHORIZED,
7928                "{path} should 401 without token"
7929            );
7930        }
7931        h.shutdown(&runtime);
7932    }
7933
7934    // ---- Document endpoints (v0.7.0 P6) ----
7935    //
7936    // Wire-path coverage. The `Harness` here uses
7937    // `WriterActor::spawn_full` without an embedder — same shape as the
7938    // existing handler tests. Ingest/search would fail at the writer
7939    // boundary with "writer has no embedder", but every other path
7940    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
7941    // documentation) is exercisable. Real end-to-end ingest→search
7942    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
7943    // with a fully-wired writer.
7944
7945    #[test]
7946    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
7947        let runtime = rt();
7948        let h = Harness::new(&runtime);
7949        let r = h.router.clone();
7950        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
7951        assert_eq!(status, StatusCode::OK);
7952        assert!(body.is_array(), "expected array, got {body}");
7953        assert_eq!(body.as_array().unwrap().len(), 0);
7954        h.shutdown(&runtime);
7955    }
7956
7957    #[test]
7958    fn list_documents_endpoint_parses_query_params() {
7959        let runtime = rt();
7960        let h = Harness::new(&runtime);
7961        let r = h.router.clone();
7962        let (status, body) = runtime.block_on(call(
7963            r,
7964            "GET",
7965            "/memory/documents?limit=5&offset=0&include_forgotten=true",
7966            None,
7967        ));
7968        assert_eq!(status, StatusCode::OK);
7969        assert!(body.is_array());
7970        h.shutdown(&runtime);
7971    }
7972
7973    #[test]
7974    fn ingest_document_endpoint_rejects_empty_path() {
7975        let runtime = rt();
7976        let h = Harness::new(&runtime);
7977        let r = h.router.clone();
7978        let (status, body) = runtime.block_on(call(
7979            r,
7980            "POST",
7981            "/memory/documents",
7982            Some(json!({ "path": "" })),
7983        ));
7984        assert_eq!(status, StatusCode::BAD_REQUEST);
7985        assert!(
7986            body.get("error")
7987                .and_then(|v| v.as_str())
7988                .is_some_and(|s| s.contains("path")),
7989            "expected error mentioning path, got {body}"
7990        );
7991        h.shutdown(&runtime);
7992    }
7993
7994    #[test]
7995    fn search_docs_endpoint_rejects_empty_query() {
7996        let runtime = rt();
7997        let h = Harness::new(&runtime);
7998        let r = h.router.clone();
7999        let (status, body) = runtime.block_on(call(
8000            r,
8001            "POST",
8002            "/memory/documents/search",
8003            Some(json!({ "query": "   " })),
8004        ));
8005        assert_eq!(status, StatusCode::BAD_REQUEST);
8006        assert!(
8007            body.get("error")
8008                .and_then(|v| v.as_str())
8009                .is_some_and(|s| s.contains("must not be empty") || s.contains("doc_search")),
8010            "expected error mentioning empty query, got {body}"
8011        );
8012        h.shutdown(&runtime);
8013    }
8014
8015    #[test]
8016    fn inspect_document_endpoint_unknown_id_returns_404() {
8017        let runtime = rt();
8018        let h = Harness::new(&runtime);
8019        let r = h.router.clone();
8020        let (status, body) = runtime.block_on(call(
8021            r,
8022            "GET",
8023            "/memory/documents/00000000-0000-7000-8000-000000000000",
8024            None,
8025        ));
8026        assert_eq!(status, StatusCode::NOT_FOUND);
8027        assert!(body.get("error").is_some(), "got: {body}");
8028        h.shutdown(&runtime);
8029    }
8030
8031    #[test]
8032    fn inspect_document_endpoint_rejects_malformed_id() {
8033        let runtime = rt();
8034        let h = Harness::new(&runtime);
8035        let r = h.router.clone();
8036        let (status, _body) =
8037            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
8038        assert_eq!(status, StatusCode::BAD_REQUEST);
8039        h.shutdown(&runtime);
8040    }
8041
8042    #[test]
8043    fn forget_document_endpoint_unknown_id_returns_404() {
8044        // Valid UUID format; no row exists → writer's `forget_document`
8045        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
8046        let runtime = rt();
8047        let h = Harness::new(&runtime);
8048        let r = h.router.clone();
8049        let (status, _body) = runtime.block_on(call(
8050            r,
8051            "DELETE",
8052            "/memory/documents/00000000-0000-7000-8000-000000000000",
8053            None,
8054        ));
8055        assert_eq!(status, StatusCode::NOT_FOUND);
8056        h.shutdown(&runtime);
8057    }
8058
8059    #[test]
8060    fn forget_document_endpoint_rejects_malformed_id() {
8061        let runtime = rt();
8062        let h = Harness::new(&runtime);
8063        let r = h.router.clone();
8064        let (status, _body) =
8065            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
8066        assert_eq!(status, StatusCode::BAD_REQUEST);
8067        h.shutdown(&runtime);
8068    }
8069
8070    #[test]
8071    fn document_endpoints_require_bearer_when_auth_enabled() {
8072        // All five doc endpoints sit behind the same authed Router and
8073        // must 401 without the bearer token. Mirrors
8074        // `derived_endpoints_require_bearer_when_auth_enabled`.
8075        let runtime = rt();
8076        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
8077        let cases: &[(&str, &str, Option<Value>)] = &[
8078            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
8079            ("GET", "/memory/documents", None),
8080            (
8081                "POST",
8082                "/memory/documents/search",
8083                Some(json!({ "query": "x" })),
8084            ),
8085            (
8086                "GET",
8087                "/memory/documents/00000000-0000-7000-8000-000000000000",
8088                None,
8089            ),
8090            (
8091                "DELETE",
8092                "/memory/documents/00000000-0000-7000-8000-000000000000",
8093                None,
8094            ),
8095        ];
8096        for (method, path, body) in cases {
8097            let (status, _) = runtime.block_on(call(h.router.clone(), method, path, body.clone()));
8098            assert_eq!(
8099                status,
8100                StatusCode::UNAUTHORIZED,
8101                "{method} {path} should 401 without token"
8102            );
8103        }
8104        h.shutdown(&runtime);
8105    }
8106
8107    #[test]
8108    fn document_endpoints_accept_correct_bearer_token() {
8109        // Sanity check: with the right token, the same five endpoints
8110        // pass auth and reach the handler. We only assert that the
8111        // status code is NOT 401 — exact downstream behaviour depends
8112        // on the harness (no embedder → ingest/search would 500; empty
8113        // DB → list/inspect/forget return 200/404).
8114        let runtime = rt();
8115        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
8116        runtime.block_on(async {
8117            // GET /memory/documents → 200 + empty array (auth passes).
8118            let (status, _) = call_with_auth(
8119                h.router.clone(),
8120                "GET",
8121                "/memory/documents",
8122                None,
8123                Some("Bearer doc-secret"),
8124            )
8125            .await;
8126            assert_eq!(status, StatusCode::OK);
8127
8128            // GET /memory/documents/<unknown> → 404 (auth passes).
8129            let (status, _) = call_with_auth(
8130                h.router.clone(),
8131                "GET",
8132                "/memory/documents/00000000-0000-7000-8000-000000000000",
8133                None,
8134                Some("Bearer doc-secret"),
8135            )
8136            .await;
8137            assert_eq!(status, StatusCode::NOT_FOUND);
8138        });
8139        h.shutdown(&runtime);
8140    }
8141
8142    // ---------------------------------------------------------------------
8143    // v0.8.0 P2: tenant header extractor tests
8144    // ---------------------------------------------------------------------
8145
8146    /// `X-Solo-Tenant: default` resolves to the default tenant (which
8147    /// in the test harness is the only one wired in the registry).
8148    #[test]
8149    fn tenant_header_default_resolves() {
8150        let runtime = rt();
8151        let h = Harness::new(&runtime);
8152        let r = h.router.clone();
8153        let (status, _body) = runtime.block_on(async {
8154            let req = Request::builder()
8155                .method("GET")
8156                .uri("/memory/00000000-0000-7000-8000-000000000000")
8157                .header("x-solo-tenant", "default")
8158                .body(Body::empty())
8159                .unwrap();
8160            let resp = r.oneshot(req).await.expect("oneshot");
8161            let s = resp.status();
8162            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8163            (s, _b)
8164        });
8165        // 404 because the id doesn't exist — but it's a routed 404 from
8166        // inspect_handler, not a 400 from a bad tenant header. That's
8167        // the proof point.
8168        assert_eq!(status, StatusCode::NOT_FOUND);
8169        h.shutdown(&runtime);
8170    }
8171
8172    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
8173    #[test]
8174    fn tenant_header_invalid_returns_400() {
8175        let runtime = rt();
8176        let h = Harness::new(&runtime);
8177        let r = h.router.clone();
8178        let (status, body) = runtime.block_on(async {
8179            let req = Request::builder()
8180                .method("GET")
8181                .uri("/memory/00000000-0000-7000-8000-000000000000")
8182                .header("x-solo-tenant", "UPPER")
8183                .body(Body::empty())
8184                .unwrap();
8185            let resp = r.oneshot(req).await.expect("oneshot");
8186            let s = resp.status();
8187            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
8188            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
8189            (s, v)
8190        });
8191        assert_eq!(status, StatusCode::BAD_REQUEST);
8192        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
8193        assert!(
8194            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
8195            "error must mention tenant/invalid: {msg}"
8196        );
8197        h.shutdown(&runtime);
8198    }
8199
8200    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
8201    #[test]
8202    fn tenant_header_unknown_returns_404() {
8203        let runtime = rt();
8204        let h = Harness::new(&runtime);
8205        let r = h.router.clone();
8206        let (status, _body) = runtime.block_on(async {
8207            let req = Request::builder()
8208                .method("GET")
8209                .uri("/memory/00000000-0000-7000-8000-000000000000")
8210                .header("x-solo-tenant", "never-registered")
8211                .body(Body::empty())
8212                .unwrap();
8213            let resp = r.oneshot(req).await.expect("oneshot");
8214            let s = resp.status();
8215            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8216            (s, _b)
8217        });
8218        assert_eq!(status, StatusCode::NOT_FOUND);
8219        h.shutdown(&runtime);
8220    }
8221
8222    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
8223    /// The reach-through to `inspect_handler` should produce the normal
8224    /// 404 for an unknown id rather than a tenant-routing error.
8225    #[test]
8226    fn tenant_header_missing_defaults_to_state_default_tenant() {
8227        let runtime = rt();
8228        let h = Harness::new(&runtime);
8229        let r = h.router.clone();
8230        let (status, _body) = runtime.block_on(async {
8231            let req = Request::builder()
8232                .method("GET")
8233                .uri("/memory/00000000-0000-7000-8000-000000000000")
8234                .body(Body::empty())
8235                .unwrap();
8236            let resp = r.oneshot(req).await.expect("oneshot");
8237            let s = resp.status();
8238            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8239            (s, _b)
8240        });
8241        assert_eq!(status, StatusCode::NOT_FOUND);
8242        h.shutdown(&runtime);
8243    }
8244
8245    // ---------------------------------------------------------------------
8246    // v0.9.x: GET /v1/graph/expand
8247    //
8248    // Seeds tables directly via the Harness's side connection and walks
8249    // the four expansion kinds. The Harness is single-tenant (default);
8250    // the routing-isolation case is already covered by the
8251    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
8252    // header against the same node_id surfaces 404 from the registry,
8253    // proving cross-tenant lookups can't bleed).
8254    // ---------------------------------------------------------------------
8255
8256    /// Insert one episode row directly. Returns its rowid for callers
8257    /// that need to wire `triples.source_episode_id`.
8258    fn seed_episode(
8259        conn: &rusqlite::Connection,
8260        memory_id: &str,
8261        ts_ms: i64,
8262        content: &str,
8263    ) -> i64 {
8264        conn.execute(
8265            "INSERT INTO episodes
8266                (memory_id, ts_ms, source_type, content,
8267                 encoding_context_json, tier, status,
8268                 confidence, strength, salience,
8269                 created_at_ms, updated_at_ms)
8270                VALUES (?1, ?2, 'user_message', ?3,
8271                        '{}', 'hot', 'active',
8272                        1.0, 0.5, 0.5, ?2, ?2)",
8273            rusqlite::params![memory_id, ts_ms, content],
8274        )
8275        .expect("seed episode");
8276        conn.last_insert_rowid()
8277    }
8278
8279    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
8280        conn.execute(
8281            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
8282                  VALUES (?1, 0.5, ?2)",
8283            rusqlite::params![cluster_id, created_at_ms],
8284        )
8285        .expect("seed cluster");
8286    }
8287
8288    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
8289        conn.execute(
8290            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
8291            rusqlite::params![cluster_id, memory_id],
8292        )
8293        .expect("seed cluster_episodes");
8294    }
8295
8296    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
8297        conn.execute(
8298            "INSERT INTO documents
8299                (doc_id, source, title, mime_type, ingested_at_ms,
8300                 modified_at_ms, status, chunk_count, content_hash, byte_size)
8301                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
8302                        'active', 0, ?1, NULL)",
8303            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
8304        )
8305        .expect("seed doc");
8306    }
8307
8308    fn seed_chunk_row(
8309        conn: &rusqlite::Connection,
8310        chunk_id: &str,
8311        doc_id: &str,
8312        chunk_index: i64,
8313        content: &str,
8314    ) {
8315        conn.execute(
8316            "INSERT INTO document_chunks
8317                (chunk_id, doc_id, chunk_index, content,
8318                 token_count, start_offset, end_offset, created_at_ms)
8319                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
8320            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
8321        )
8322        .expect("seed chunk");
8323    }
8324
8325    fn seed_triple_row(
8326        conn: &rusqlite::Connection,
8327        triple_id: &str,
8328        subject: &str,
8329        predicate: &str,
8330        object: &str,
8331        source_episode_rowid: Option<i64>,
8332    ) {
8333        conn.execute(
8334            "INSERT INTO triples
8335                 (triple_id, subject_id, predicate, object_id, object_kind,
8336                  valid_from_ms, valid_to_ms, confidence, provenance_json,
8337                  status, created_at_ms, updated_at_ms, source_episode_id)
8338                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
8339                         'active', 0, 0, ?5)",
8340            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
8341        )
8342        .expect("seed triple");
8343    }
8344
8345    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
8346        conn.execute(
8347            "INSERT INTO contradictions
8348                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
8349                  status, resolved_at_ms, resolution_note, winning_triple_id)
8350                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
8351                         'unresolved', NULL, NULL, NULL)",
8352            rusqlite::params![a_id, b_id, kind],
8353        )
8354        .expect("seed contradiction");
8355    }
8356
8357    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
8358    /// by the cluster-inspect test to verify the abstraction concat path.
8359    fn seed_abstraction_row(
8360        conn: &rusqlite::Connection,
8361        abstraction_id: &str,
8362        cluster_id: &str,
8363        content: &str,
8364    ) {
8365        conn.execute(
8366            "INSERT INTO semantic_abstractions
8367                 (abstraction_id, cluster_id, content, provenance_json,
8368                  confidence, created_at_ms)
8369                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
8370            rusqlite::params![abstraction_id, cluster_id, content],
8371        )
8372        .expect("seed abstraction");
8373    }
8374
8375    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
8376    /// so we percent-encode only `:` and a few other delimiters by hand.
8377    fn percent_encode_node_id(node_id: &str) -> String {
8378        let mut out = String::with_capacity(node_id.len());
8379        for c in node_id.chars() {
8380            match c {
8381                ':' => out.push_str("%3A"),
8382                ' ' => out.push_str("%20"),
8383                '&' => out.push_str("%26"),
8384                '+' => out.push_str("%2B"),
8385                '?' => out.push_str("%3F"),
8386                '#' => out.push_str("%23"),
8387                _ => out.push(c),
8388            }
8389        }
8390        out
8391    }
8392
8393    fn graph_uri(node_id: &str, kind: &str) -> String {
8394        let encoded = percent_encode_node_id(node_id);
8395        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
8396    }
8397
8398    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
8399        let encoded = percent_encode_node_id(node_id);
8400        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
8401    }
8402
8403    #[test]
8404    fn expand_cluster_member_from_episode_returns_clusters() {
8405        let runtime = rt();
8406        let h = Harness::new(&runtime);
8407        let memory_id = "11111111-1111-7000-8000-000000000001";
8408        {
8409            let conn = h.open_db();
8410            seed_episode(&conn, memory_id, 100, "ep content");
8411            seed_cluster_row(&conn, "cl-a", 200);
8412            seed_cluster_member(&conn, "cl-a", memory_id);
8413        }
8414        let node_id = format!("ep:{memory_id}");
8415        let (status, body) = runtime.block_on(call(
8416            h.router.clone(),
8417            "GET",
8418            &graph_uri(&node_id, "cluster_member"),
8419            None,
8420        ));
8421        assert_eq!(status, StatusCode::OK, "body: {body}");
8422        let nodes = body
8423            .get("nodes")
8424            .and_then(|v| v.as_array())
8425            .expect("nodes array");
8426        let edges = body
8427            .get("edges")
8428            .and_then(|v| v.as_array())
8429            .expect("edges array");
8430        assert_eq!(nodes.len(), 1, "{body}");
8431        assert_eq!(nodes[0]["id"], "cl:cl-a");
8432        assert_eq!(nodes[0]["kind"], "cluster");
8433        assert_eq!(edges.len(), 1);
8434        assert_eq!(edges[0]["source"], node_id);
8435        assert_eq!(edges[0]["target"], "cl:cl-a");
8436        assert_eq!(edges[0]["kind"], "cluster_member");
8437        h.shutdown(&runtime);
8438    }
8439
8440    #[test]
8441    fn expand_cluster_member_from_cluster_returns_episodes() {
8442        let runtime = rt();
8443        let h = Harness::new(&runtime);
8444        {
8445            let conn = h.open_db();
8446            seed_cluster_row(&conn, "cl-multi", 500);
8447            for i in 0..5 {
8448                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
8449                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8450                seed_cluster_member(&conn, "cl-multi", &mid);
8451            }
8452        }
8453        let (status, body) = runtime.block_on(call(
8454            h.router.clone(),
8455            "GET",
8456            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
8457            None,
8458        ));
8459        assert_eq!(status, StatusCode::OK, "body: {body}");
8460        let nodes = body["nodes"].as_array().unwrap();
8461        let edges = body["edges"].as_array().unwrap();
8462        assert_eq!(nodes.len(), 3, "limit honored: {body}");
8463        assert_eq!(edges.len(), 3);
8464        for n in nodes {
8465            assert_eq!(n["kind"], "episode");
8466        }
8467        h.shutdown(&runtime);
8468    }
8469
8470    #[test]
8471    fn expand_document_chunk_from_document_returns_chunks() {
8472        let runtime = rt();
8473        let h = Harness::new(&runtime);
8474        let doc_id = "33333333-3333-7000-8000-000000000001";
8475        {
8476            let conn = h.open_db();
8477            seed_document_row(&conn, doc_id, "doc A");
8478            // Insert chunks in shuffled order so the ORDER BY chunk_index
8479            // is load-bearing.
8480            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
8481            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
8482            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
8483            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
8484        }
8485        let node_id = format!("doc:{doc_id}");
8486        let (status, body) = runtime.block_on(call(
8487            h.router.clone(),
8488            "GET",
8489            &graph_uri(&node_id, "document_chunk"),
8490            None,
8491        ));
8492        assert_eq!(status, StatusCode::OK, "body: {body}");
8493        let nodes = body["nodes"].as_array().unwrap();
8494        let edges = body["edges"].as_array().unwrap();
8495        assert_eq!(nodes.len(), 4);
8496        assert_eq!(edges.len(), 4);
8497        // Verify in-order chunk_index emission.
8498        assert_eq!(nodes[0]["id"], "chunk:c0");
8499        assert_eq!(nodes[1]["id"], "chunk:c1");
8500        assert_eq!(nodes[2]["id"], "chunk:c2");
8501        assert_eq!(nodes[3]["id"], "chunk:c3");
8502        for e in edges {
8503            assert_eq!(e["kind"], "document_chunk");
8504        }
8505        h.shutdown(&runtime);
8506    }
8507
8508    #[test]
8509    fn expand_document_chunk_from_chunk_returns_parent_document() {
8510        let runtime = rt();
8511        let h = Harness::new(&runtime);
8512        let doc_id = "44444444-4444-7000-8000-000000000001";
8513        {
8514            let conn = h.open_db();
8515            seed_document_row(&conn, doc_id, "parent doc");
8516            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
8517        }
8518        let (status, body) = runtime.block_on(call(
8519            h.router.clone(),
8520            "GET",
8521            &graph_uri("chunk:c-orphan", "document_chunk"),
8522            None,
8523        ));
8524        assert_eq!(status, StatusCode::OK, "body: {body}");
8525        let nodes = body["nodes"].as_array().unwrap();
8526        let edges = body["edges"].as_array().unwrap();
8527        assert_eq!(nodes.len(), 1);
8528        assert_eq!(edges.len(), 1);
8529        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
8530        assert_eq!(edges[0]["source"], "chunk:c-orphan");
8531        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
8532        h.shutdown(&runtime);
8533    }
8534
8535    #[test]
8536    fn expand_triple_from_episode_returns_entities() {
8537        let runtime = rt();
8538        let h = Harness::new(&runtime);
8539        let memory_id = "55555555-5555-7000-8000-000000000001";
8540        let rowid;
8541        {
8542            let conn = h.open_db();
8543            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
8544            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
8545            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
8546            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
8547        }
8548        let node_id = format!("ep:{memory_id}");
8549        let (status, body) = runtime.block_on(call(
8550            h.router.clone(),
8551            "GET",
8552            &graph_uri(&node_id, "triple"),
8553            None,
8554        ));
8555        assert_eq!(status, StatusCode::OK, "body: {body}");
8556        let nodes = body["nodes"].as_array().unwrap();
8557        let edges = body["edges"].as_array().unwrap();
8558        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
8559        assert_eq!(edges.len(), 2);
8560        let ids: std::collections::HashSet<String> = nodes
8561            .iter()
8562            .map(|n| n["id"].as_str().unwrap().to_string())
8563            .collect();
8564        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
8565            assert!(ids.contains(expected), "missing {expected} in {body}");
8566        }
8567        for e in edges {
8568            assert_eq!(e["kind"], "triple");
8569            assert!(e["predicate"].is_string(), "predicate set: {body}");
8570        }
8571        h.shutdown(&runtime);
8572    }
8573
8574    #[test]
8575    fn expand_triple_from_entity_returns_episodes() {
8576        let runtime = rt();
8577        let h = Harness::new(&runtime);
8578        {
8579            let conn = h.open_db();
8580            let r1 = seed_episode(
8581                &conn,
8582                "66666666-6666-7000-8000-000000000001",
8583                100,
8584                "alice ep one",
8585            );
8586            let r2 = seed_episode(
8587                &conn,
8588                "66666666-6666-7000-8000-000000000002",
8589                200,
8590                "alice ep two",
8591            );
8592            let r3 = seed_episode(
8593                &conn,
8594                "66666666-6666-7000-8000-000000000003",
8595                300,
8596                "alice ep three",
8597            );
8598            // 3 triples all mentioning Alice on one side or another.
8599            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
8600            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
8601            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
8602            // One triple with no source — must be skipped by the IS NOT NULL filter.
8603            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
8604        }
8605        let (status, body) = runtime.block_on(call(
8606            h.router.clone(),
8607            "GET",
8608            &graph_uri("ent:Alice", "triple"),
8609            None,
8610        ));
8611        assert_eq!(status, StatusCode::OK, "body: {body}");
8612        let nodes = body["nodes"].as_array().unwrap();
8613        let edges = body["edges"].as_array().unwrap();
8614        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
8615        assert_eq!(edges.len(), 3);
8616        for n in nodes {
8617            assert_eq!(n["kind"], "episode");
8618        }
8619        for e in edges {
8620            assert_eq!(e["source"], "ent:Alice");
8621            assert_eq!(e["kind"], "triple");
8622        }
8623        h.shutdown(&runtime);
8624    }
8625
8626    #[test]
8627    fn expand_semantic_from_episode_returns_similar() {
8628        let runtime = rt();
8629        let h = Harness::new(&runtime);
8630        // Seed three episodes via the writer-actor so they get embedded
8631        // + inserted into HNSW. StubEmbedder is deterministic: identical
8632        // content → identical vector → cos_distance = 0. So we use
8633        // distinct strings, then expand from one of them and assert at
8634        // least one similar peer comes back.
8635        runtime.block_on(async {
8636            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
8637            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
8638            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8639            // Expand from mid1.
8640            let (status, body) = call(
8641                h.router.clone(),
8642                "GET",
8643                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
8644                None,
8645            )
8646            .await;
8647            assert_eq!(status, StatusCode::OK, "body: {body}");
8648            let nodes = body["nodes"].as_array().unwrap();
8649            let edges = body["edges"].as_array().unwrap();
8650            // Must NOT include the source.
8651            for n in nodes {
8652                assert_ne!(
8653                    n["id"].as_str().unwrap(),
8654                    format!("ep:{mid1}"),
8655                    "self must be excluded: {body}"
8656                );
8657            }
8658            // Edges must be tagged semantic with a numeric weight.
8659            for e in edges {
8660                assert_eq!(e["kind"], "semantic");
8661                assert!(e["weight"].is_number(), "weight set: {body}");
8662            }
8663        });
8664        h.shutdown(&runtime);
8665    }
8666
8667    /// Helper: POST /memory and return the new memory_id.
8668    async fn post_remember(router: axum::Router, content: &str) -> String {
8669        let (status, body) = call(
8670            router,
8671            "POST",
8672            "/memory",
8673            Some(json!({ "content": content })),
8674        )
8675        .await;
8676        assert_eq!(status, StatusCode::OK, "post failed: {body}");
8677        body["memory_id"].as_str().unwrap().to_string()
8678    }
8679
8680    #[test]
8681    fn expand_400_on_invalid_kind() {
8682        let runtime = rt();
8683        let h = Harness::new(&runtime);
8684        let (status, _body) = runtime.block_on(call(
8685            h.router.clone(),
8686            "GET",
8687            "/v1/graph/expand?node_id=ep:any&kind=banana",
8688            None,
8689        ));
8690        // axum's Query extractor rejects unknown enum value with 400/422.
8691        assert!(
8692            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
8693            "expected 400/422 for bad kind, got {status}"
8694        );
8695        h.shutdown(&runtime);
8696    }
8697
8698    #[test]
8699    fn expand_400_on_invalid_node_for_kind() {
8700        let runtime = rt();
8701        let h = Harness::new(&runtime);
8702        // kind=semantic from a cluster source → 400.
8703        let (status, body) = runtime.block_on(call(
8704            h.router.clone(),
8705            "GET",
8706            &graph_uri("cl:doesnt-matter", "semantic"),
8707            None,
8708        ));
8709        assert_eq!(status, StatusCode::BAD_REQUEST);
8710        assert!(
8711            body["error"]
8712                .as_str()
8713                .is_some_and(|s| s.contains("semantic only valid for episode")),
8714            "got: {body}"
8715        );
8716        h.shutdown(&runtime);
8717    }
8718
8719    #[test]
8720    fn expand_404_on_missing_node_id() {
8721        let runtime = rt();
8722        let h = Harness::new(&runtime);
8723        let (status, body) = runtime.block_on(call(
8724            h.router.clone(),
8725            "GET",
8726            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8727            None,
8728        ));
8729        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
8730        h.shutdown(&runtime);
8731    }
8732
8733    #[test]
8734    fn expand_limit_clamped_at_100() {
8735        let runtime = rt();
8736        let h = Harness::new(&runtime);
8737        // Seed > 100 cluster members so we can see the clamp in action.
8738        {
8739            let conn = h.open_db();
8740            seed_cluster_row(&conn, "cl-huge", 1_000);
8741            for i in 0..150 {
8742                let mid = format!("77777777-7777-7000-8000-{:012}", i);
8743                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8744                seed_cluster_member(&conn, "cl-huge", &mid);
8745            }
8746        }
8747        let (status, body) = runtime.block_on(call(
8748            h.router.clone(),
8749            "GET",
8750            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
8751            None,
8752        ));
8753        assert_eq!(status, StatusCode::OK, "body: {body}");
8754        let nodes = body["nodes"].as_array().unwrap();
8755        assert_eq!(
8756            nodes.len(),
8757            100,
8758            "limit must be silently clamped to 100, got {}",
8759            nodes.len()
8760        );
8761        h.shutdown(&runtime);
8762    }
8763
8764    #[test]
8765    fn expand_bad_node_id_prefix_returns_400() {
8766        let runtime = rt();
8767        let h = Harness::new(&runtime);
8768        let (status, body) = runtime.block_on(call(
8769            h.router.clone(),
8770            "GET",
8771            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
8772            None,
8773        ));
8774        assert_eq!(status, StatusCode::BAD_REQUEST);
8775        assert!(
8776            body["error"]
8777                .as_str()
8778                .is_some_and(|s| s.contains("node_id must be")),
8779            "got: {body}"
8780        );
8781        h.shutdown(&runtime);
8782    }
8783
8784    #[test]
8785    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
8786        // Routing via X-Solo-Tenant: a header pointing to an unknown
8787        // tenant must 404 before the handler even runs — the
8788        // TenantExtractor is the gatekeeper, so node ids can't be
8789        // resolved against the wrong tenant's DB.
8790        let runtime = rt();
8791        let h = Harness::new(&runtime);
8792        // Seed a real episode in the default tenant so we know it
8793        // exists there. If tenant scoping leaked, this lookup would 200
8794        // even with the wrong tenant header.
8795        let memory_id = "88888888-8888-7000-8000-000000000001";
8796        {
8797            let conn = h.open_db();
8798            seed_episode(&conn, memory_id, 100, "scoped");
8799            seed_cluster_row(&conn, "cl-scoped", 200);
8800            seed_cluster_member(&conn, "cl-scoped", memory_id);
8801        }
8802        let node_id = format!("ep:{memory_id}");
8803        let r = h.router.clone();
8804        let (status, _body) = runtime.block_on(async {
8805            let req = Request::builder()
8806                .method("GET")
8807                .uri(graph_uri(&node_id, "cluster_member"))
8808                .header("x-solo-tenant", "never-registered-tenant")
8809                .body(Body::empty())
8810                .unwrap();
8811            let resp = r.oneshot(req).await.expect("oneshot");
8812            let s = resp.status();
8813            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8814            (s, _b)
8815        });
8816        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
8817        // lookups can't smuggle through this endpoint.
8818        assert_eq!(status, StatusCode::NOT_FOUND);
8819        h.shutdown(&runtime);
8820    }
8821
8822    #[test]
8823    fn expand_respects_auth_when_enabled() {
8824        let runtime = rt();
8825        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
8826        // No Authorization header → 401.
8827        let (status, _) = runtime.block_on(call(
8828            h.router.clone(),
8829            "GET",
8830            &graph_uri("ep:any", "cluster_member"),
8831            None,
8832        ));
8833        assert_eq!(status, StatusCode::UNAUTHORIZED);
8834        // Right token → handler runs (404 for unknown node, NOT 401).
8835        let (status, _) = runtime.block_on(call_with_auth(
8836            h.router.clone(),
8837            "GET",
8838            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8839            None,
8840            Some("Bearer graph-secret"),
8841        ));
8842        assert_eq!(status, StatusCode::NOT_FOUND);
8843        h.shutdown(&runtime);
8844    }
8845
8846    #[test]
8847    fn expand_works_when_auth_none() {
8848        let runtime = rt();
8849        let h = Harness::new(&runtime);
8850        // Unauthenticated request hits the handler; 404 for unknown node
8851        // proves the auth-none path doesn't reject the request.
8852        let (status, _) = runtime.block_on(call(
8853            h.router.clone(),
8854            "GET",
8855            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8856            None,
8857        ));
8858        assert_eq!(status, StatusCode::NOT_FOUND);
8859        h.shutdown(&runtime);
8860    }
8861
8862    // ---------------------------------------------------------------------
8863    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
8864    //
8865    // Paginated catalog reads. Both endpoints share auth + tenant +
8866    // cursor scaffolding from /v1/graph/expand, so tests focus on the
8867    // new surface: filter parsing, entity synthesis cap, cursor round-
8868    // trip, edge-type defaults (semantic excluded), and the semantic
8869    // 400 redirect to /v1/graph/neighbors.
8870    // ---------------------------------------------------------------------
8871
8872    /// Lower-level helper that captures response headers in addition to
8873    /// status + JSON body. Used by the entity-cap header test.
8874    async fn call_with_headers(
8875        router: axum::Router,
8876        method: &str,
8877        uri: &str,
8878    ) -> (StatusCode, axum::http::HeaderMap, Value) {
8879        let req = Request::builder()
8880            .method(method)
8881            .uri(uri)
8882            .header("content-length", "0")
8883            .body(Body::empty())
8884            .unwrap();
8885        let resp = router.oneshot(req).await.expect("oneshot");
8886        let status = resp.status();
8887        let headers = resp.headers().clone();
8888        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
8889        let v: Value = if body_bytes.is_empty() {
8890            Value::Null
8891        } else {
8892            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
8893        };
8894        (status, headers, v)
8895    }
8896
8897    #[test]
8898    fn nodes_returns_all_kinds_when_no_filter() {
8899        let runtime = rt();
8900        let h = Harness::new(&runtime);
8901        {
8902            let conn = h.open_db();
8903            let rowid = seed_episode(
8904                &conn,
8905                "aaaaaaaa-0000-7000-8000-000000000001",
8906                100,
8907                "episode one",
8908            );
8909            seed_document_row(&conn, "doc-1", "doc one");
8910            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
8911            seed_cluster_row(&conn, "cl-one", 200);
8912            seed_triple_row(&conn, "t-one", "Alice", "knows", "Bob", Some(rowid));
8913        }
8914        let (status, body) =
8915            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
8916        assert_eq!(status, StatusCode::OK, "body: {body}");
8917        let nodes = body["nodes"].as_array().unwrap();
8918        let kinds: std::collections::HashSet<&str> =
8919            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8920        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
8921            assert!(
8922                kinds.contains(expected),
8923                "expected {expected} kind in response: {body}"
8924            );
8925        }
8926        h.shutdown(&runtime);
8927    }
8928
8929    #[test]
8930    fn nodes_filter_by_single_kind() {
8931        let runtime = rt();
8932        let h = Harness::new(&runtime);
8933        {
8934            let conn = h.open_db();
8935            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
8936            seed_document_row(&conn, "doc-only", "d");
8937            seed_cluster_row(&conn, "cl-only", 300);
8938        }
8939        let (status, body) = runtime.block_on(call(
8940            h.router.clone(),
8941            "GET",
8942            "/v1/graph/nodes?kind=episode",
8943            None,
8944        ));
8945        assert_eq!(status, StatusCode::OK, "body: {body}");
8946        let nodes = body["nodes"].as_array().unwrap();
8947        assert!(!nodes.is_empty(), "{body}");
8948        for n in nodes {
8949            assert_eq!(
8950                n["kind"], "episode",
8951                "kind filter must be exclusive: {body}"
8952            );
8953        }
8954        h.shutdown(&runtime);
8955    }
8956
8957    #[test]
8958    fn nodes_filter_by_multiple_kinds() {
8959        let runtime = rt();
8960        let h = Harness::new(&runtime);
8961        {
8962            let conn = h.open_db();
8963            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
8964            seed_document_row(&conn, "doc-multi", "d");
8965            seed_cluster_row(&conn, "cl-multi", 300);
8966        }
8967        let (status, body) = runtime.block_on(call(
8968            h.router.clone(),
8969            "GET",
8970            "/v1/graph/nodes?kind=episode,document",
8971            None,
8972        ));
8973        assert_eq!(status, StatusCode::OK, "body: {body}");
8974        let nodes = body["nodes"].as_array().unwrap();
8975        let kinds: std::collections::HashSet<&str> =
8976            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8977        assert!(kinds.contains("episode"), "{body}");
8978        assert!(kinds.contains("document"), "{body}");
8979        assert!(
8980            !kinds.contains("cluster"),
8981            "cluster must be filtered out: {body}"
8982        );
8983        h.shutdown(&runtime);
8984    }
8985
8986    #[test]
8987    fn nodes_entity_synthesis_caps_at_200() {
8988        let runtime = rt();
8989        let h = Harness::new(&runtime);
8990        {
8991            let conn = h.open_db();
8992            // Seed one episode + 250 distinct triple object values so the
8993            // entity rollup surfaces >200 entities. ref_count is 1 for
8994            // each; pick subject = "Alice" for all so the entity count
8995            // collapses on subject (1 "Alice") + 250 distinct objects.
8996            let rowid = seed_episode(&conn, "dddddddd-0000-7000-8000-000000000001", 100, "ep");
8997            for i in 0..250 {
8998                let triple_id = format!("t-cap-{i:03}");
8999                let obj = format!("Entity{i:03}");
9000                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
9001            }
9002        }
9003        let (status, headers, body) = runtime.block_on(call_with_headers(
9004            h.router.clone(),
9005            "GET",
9006            "/v1/graph/nodes?kind=entity&limit=500",
9007        ));
9008        assert_eq!(status, StatusCode::OK, "body: {body}");
9009        let nodes = body["nodes"].as_array().unwrap();
9010        assert_eq!(
9011            nodes.len(),
9012            200,
9013            "entity cap must be enforced at 200, got {}",
9014            nodes.len()
9015        );
9016        assert_eq!(
9017            headers
9018                .get("x-solo-entity-cap-reached")
9019                .and_then(|v| v.to_str().ok()),
9020            Some("true"),
9021            "cap-reached header missing: headers={headers:?}"
9022        );
9023        for n in nodes {
9024            assert_eq!(n["kind"], "entity");
9025        }
9026        h.shutdown(&runtime);
9027    }
9028
9029    #[test]
9030    fn nodes_since_until_filter_works() {
9031        let runtime = rt();
9032        let h = Harness::new(&runtime);
9033        {
9034            let conn = h.open_db();
9035            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000001", 100, "early");
9036            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000002", 500, "middle");
9037            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000003", 1000, "late");
9038        }
9039        let (status, body) = runtime.block_on(call(
9040            h.router.clone(),
9041            "GET",
9042            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
9043            None,
9044        ));
9045        assert_eq!(status, StatusCode::OK, "body: {body}");
9046        let nodes = body["nodes"].as_array().unwrap();
9047        assert_eq!(nodes.len(), 1, "{body}");
9048        assert_eq!(nodes[0]["id"], "ep:eeeeeeee-0000-7000-8000-000000000002");
9049        h.shutdown(&runtime);
9050    }
9051
9052    #[test]
9053    fn nodes_pagination_round_trip() {
9054        let runtime = rt();
9055        let h = Harness::new(&runtime);
9056        {
9057            let conn = h.open_db();
9058            for i in 0..150 {
9059                let mid = format!("f0000000-0000-7000-8000-{i:012}");
9060                // ts_ms scales with i so the sort order is deterministic;
9061                // newest (highest i) appears first.
9062                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
9063            }
9064        }
9065        let limit = 50u32;
9066        let mut seen: std::collections::HashSet<String> = Default::default();
9067        let mut next_cursor: Option<String> = None;
9068        for page_idx in 0..4 {
9069            let cursor_param = next_cursor
9070                .as_deref()
9071                .map(|c| format!("&cursor={c}"))
9072                .unwrap_or_default();
9073            let uri = format!("/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}");
9074            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9075            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
9076            let nodes = body["nodes"].as_array().unwrap();
9077            assert!(
9078                nodes.len() <= limit as usize,
9079                "page {page_idx} over-fetched: {body}"
9080            );
9081            for n in nodes {
9082                let id = n["id"].as_str().unwrap().to_string();
9083                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
9084            }
9085            next_cursor = body
9086                .get("next_cursor")
9087                .and_then(|v| v.as_str())
9088                .map(|s| s.to_string());
9089            if next_cursor.is_none() {
9090                break;
9091            }
9092        }
9093        assert_eq!(
9094            seen.len(),
9095            150,
9096            "expected 150 distinct ids across pages, got {}",
9097            seen.len()
9098        );
9099        assert!(
9100            next_cursor.is_none(),
9101            "cursor should be null after last page; got {next_cursor:?}"
9102        );
9103        h.shutdown(&runtime);
9104    }
9105
9106    #[test]
9107    fn nodes_respects_tenant_scoping() {
9108        let runtime = rt();
9109        let h = Harness::new(&runtime);
9110        {
9111            let conn = h.open_db();
9112            seed_episode(
9113                &conn,
9114                "11110000-0000-7000-8000-000000000001",
9115                100,
9116                "tenant scope",
9117            );
9118        }
9119        // Request against a never-registered tenant header → 404 from
9120        // the tenant extractor before the handler runs.
9121        let r = h.router.clone();
9122        let (status, _body) = runtime.block_on(async {
9123            let req = Request::builder()
9124                .method("GET")
9125                .uri("/v1/graph/nodes")
9126                .header("x-solo-tenant", "never-registered-tenant")
9127                .body(Body::empty())
9128                .unwrap();
9129            let resp = r.oneshot(req).await.expect("oneshot");
9130            let s = resp.status();
9131            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9132            (s, _b)
9133        });
9134        assert_eq!(status, StatusCode::NOT_FOUND);
9135        h.shutdown(&runtime);
9136    }
9137
9138    #[test]
9139    fn nodes_respects_auth_when_enabled() {
9140        let runtime = rt();
9141        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
9142        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9143        assert_eq!(
9144            status,
9145            StatusCode::UNAUTHORIZED,
9146            "must reject unauthenticated request"
9147        );
9148        let (status, _) = runtime.block_on(call_with_auth(
9149            h.router.clone(),
9150            "GET",
9151            "/v1/graph/nodes",
9152            None,
9153            Some("Bearer nodes-secret"),
9154        ));
9155        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
9156        h.shutdown(&runtime);
9157    }
9158
9159    #[test]
9160    fn nodes_works_with_auth_none() {
9161        let runtime = rt();
9162        let h = Harness::new(&runtime);
9163        let (status, body) =
9164            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9165        assert_eq!(status, StatusCode::OK, "{body}");
9166        assert!(body.get("nodes").is_some());
9167        h.shutdown(&runtime);
9168    }
9169
9170    // --- /v1/graph/edges ---
9171
9172    #[test]
9173    fn edges_returns_all_default_kinds() {
9174        let runtime = rt();
9175        let h = Harness::new(&runtime);
9176        {
9177            let conn = h.open_db();
9178            let rowid = seed_episode(&conn, "22220000-0000-7000-8000-000000000001", 100, "ep src");
9179            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
9180            seed_document_row(&conn, "doc-e", "doc");
9181            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
9182            seed_cluster_row(&conn, "cl-e", 200);
9183            seed_cluster_member(&conn, "cl-e", "22220000-0000-7000-8000-000000000001");
9184        }
9185        let (status, body) =
9186            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9187        assert_eq!(status, StatusCode::OK, "body: {body}");
9188        let edges = body["edges"].as_array().unwrap();
9189        let kinds: std::collections::HashSet<&str> =
9190            edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9191        assert!(kinds.contains("triple"), "{body}");
9192        assert!(kinds.contains("document_chunk"), "{body}");
9193        assert!(kinds.contains("cluster_member"), "{body}");
9194        assert!(
9195            !kinds.contains("semantic"),
9196            "semantic is NOT in default response: {body}"
9197        );
9198        h.shutdown(&runtime);
9199    }
9200
9201    #[test]
9202    fn edges_filter_by_node_id_finds_incident_edges() {
9203        let runtime = rt();
9204        let h = Harness::new(&runtime);
9205        let memory_id = "33330000-0000-7000-8000-000000000001";
9206        {
9207            let conn = h.open_db();
9208            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
9209            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
9210            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
9211            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
9212            // Decoy episode with its own triple — must NOT come back.
9213            let decoy_rowid =
9214                seed_episode(&conn, "33330000-0000-7000-8000-000000000999", 200, "decoy");
9215            seed_triple_row(&conn, "t-decoy", "Alice", "p", "Eve", Some(decoy_rowid));
9216        }
9217        let uri = format!(
9218            "/v1/graph/edges?type=triple&node_id={}",
9219            percent_encode_node_id(&format!("ep:{memory_id}"))
9220        );
9221        let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9222        assert_eq!(status, StatusCode::OK, "body: {body}");
9223        let edges = body["edges"].as_array().unwrap();
9224        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
9225        for e in edges {
9226            assert_eq!(e["source"], format!("ep:{memory_id}"));
9227            assert_eq!(e["kind"], "triple");
9228        }
9229        h.shutdown(&runtime);
9230    }
9231
9232    #[test]
9233    fn edges_filter_by_type_works() {
9234        let runtime = rt();
9235        let h = Harness::new(&runtime);
9236        {
9237            let conn = h.open_db();
9238            let rowid = seed_episode(&conn, "44440000-0000-7000-8000-000000000001", 100, "ep");
9239            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
9240            seed_document_row(&conn, "doc-skip", "doc");
9241            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
9242        }
9243        let (status, body) = runtime.block_on(call(
9244            h.router.clone(),
9245            "GET",
9246            "/v1/graph/edges?type=triple",
9247            None,
9248        ));
9249        assert_eq!(status, StatusCode::OK, "{body}");
9250        let edges = body["edges"].as_array().unwrap();
9251        assert!(!edges.is_empty(), "{body}");
9252        for e in edges {
9253            assert_eq!(e["kind"], "triple", "{body}");
9254        }
9255        h.shutdown(&runtime);
9256    }
9257
9258    #[test]
9259    fn edges_rejects_semantic_type_with_400() {
9260        let runtime = rt();
9261        let h = Harness::new(&runtime);
9262        let (status, body) = runtime.block_on(call(
9263            h.router.clone(),
9264            "GET",
9265            "/v1/graph/edges?type=semantic",
9266            None,
9267        ));
9268        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9269        let err = body["error"].as_str().unwrap_or_default();
9270        assert!(
9271            err.contains("/v1/graph/neighbors"),
9272            "error must point to /v1/graph/neighbors: {body}"
9273        );
9274        h.shutdown(&runtime);
9275    }
9276
9277    #[test]
9278    fn edges_pagination_round_trip() {
9279        let runtime = rt();
9280        let h = Harness::new(&runtime);
9281        {
9282            let conn = h.open_db();
9283            let rowid = seed_episode(&conn, "55550000-0000-7000-8000-000000000001", 100, "ep big");
9284            // 60 triples → 60 triple edges. limit=25 → 3 pages.
9285            for i in 0..60 {
9286                let tid = format!("t-page-{i:03}");
9287                let obj = format!("Obj{i:03}");
9288                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
9289            }
9290        }
9291        let limit = 25u32;
9292        let mut seen: std::collections::HashSet<String> = Default::default();
9293        let mut next_cursor: Option<String> = None;
9294        for page_idx in 0..5 {
9295            let cursor_param = next_cursor
9296                .as_deref()
9297                .map(|c| format!("&cursor={c}"))
9298                .unwrap_or_default();
9299            let uri = format!("/v1/graph/edges?type=triple&limit={limit}{cursor_param}");
9300            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9301            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
9302            let edges = body["edges"].as_array().unwrap();
9303            for e in edges {
9304                let id = e["id"].as_str().unwrap().to_string();
9305                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
9306            }
9307            next_cursor = body
9308                .get("next_cursor")
9309                .and_then(|v| v.as_str())
9310                .map(|s| s.to_string());
9311            if next_cursor.is_none() {
9312                break;
9313            }
9314        }
9315        assert_eq!(
9316            seen.len(),
9317            60,
9318            "expected 60 distinct edges, got {}",
9319            seen.len()
9320        );
9321        assert!(next_cursor.is_none(), "expected exhausted cursor");
9322        h.shutdown(&runtime);
9323    }
9324
9325    #[test]
9326    fn edges_respects_tenant_scoping() {
9327        let runtime = rt();
9328        let h = Harness::new(&runtime);
9329        {
9330            let conn = h.open_db();
9331            let rowid = seed_episode(&conn, "66660000-0000-7000-8000-000000000001", 100, "ep");
9332            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
9333        }
9334        let r = h.router.clone();
9335        let (status, _) = runtime.block_on(async {
9336            let req = Request::builder()
9337                .method("GET")
9338                .uri("/v1/graph/edges")
9339                .header("x-solo-tenant", "never-registered-tenant")
9340                .body(Body::empty())
9341                .unwrap();
9342            let resp = r.oneshot(req).await.expect("oneshot");
9343            let s = resp.status();
9344            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9345            (s, _b)
9346        });
9347        assert_eq!(status, StatusCode::NOT_FOUND);
9348        h.shutdown(&runtime);
9349    }
9350
9351    #[test]
9352    fn edges_respects_auth_when_enabled() {
9353        let runtime = rt();
9354        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
9355        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9356        assert_eq!(status, StatusCode::UNAUTHORIZED);
9357        let (status, _) = runtime.block_on(call_with_auth(
9358            h.router.clone(),
9359            "GET",
9360            "/v1/graph/edges",
9361            None,
9362            Some("Bearer edges-secret"),
9363        ));
9364        assert_eq!(status, StatusCode::OK);
9365        h.shutdown(&runtime);
9366    }
9367
9368    // ---------------------------------------------------------------------
9369    // v0.10.0: GET /v1/graph/inspect/{id}
9370    //
9371    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
9372    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
9373    // so tests focus on the new surface: per-kind full_text source +
9374    // triples_in/out shape + entity zero-triple 404 semantics + the
9375    // standard 400/404/auth/tenant cases.
9376    // ---------------------------------------------------------------------
9377
9378    fn inspect_uri(node_id: &str) -> String {
9379        // Path parameter must be percent-encoded (`:` is `%3A` after
9380        // the URI parser splits segments). axum's Path<String>
9381        // extractor percent-decodes automatically.
9382        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
9383    }
9384
9385    #[test]
9386    fn inspect_episode_returns_full_text_plus_triples_out() {
9387        let runtime = rt();
9388        let h = Harness::new(&runtime);
9389        let memory_id = "a1110000-0000-7000-8000-000000000001";
9390        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
9391        {
9392            let conn = h.open_db();
9393            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
9394            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
9395            seed_triple_row(
9396                &conn,
9397                "t-ep-2",
9398                "user",
9399                "discussed",
9400                "deploy_pipeline",
9401                Some(rowid),
9402            );
9403            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
9404        }
9405        let (status, body) = runtime.block_on(call(
9406            h.router.clone(),
9407            "GET",
9408            &inspect_uri(&format!("ep:{memory_id}")),
9409            None,
9410        ));
9411        assert_eq!(status, StatusCode::OK, "body: {body}");
9412        assert_eq!(body["node"]["kind"], "episode");
9413        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
9414        assert_eq!(
9415            body["full_text"].as_str().unwrap(),
9416            full_text,
9417            "full_text must match episodes.content verbatim, untruncated"
9418        );
9419        let triples_out = body["triples_out"].as_array().unwrap();
9420        assert_eq!(triples_out.len(), 3, "{body}");
9421        let triples_in = body["triples_in"].as_array().unwrap();
9422        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
9423        for e in triples_out {
9424            assert_eq!(e["kind"], "triple");
9425            assert_eq!(e["source"], format!("ep:{memory_id}"));
9426            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9427            assert!(e["predicate"].as_str().is_some());
9428            assert!(e["weight"].as_f64().is_some());
9429        }
9430        h.shutdown(&runtime);
9431    }
9432
9433    #[test]
9434    fn inspect_episode_triples_in_is_empty_for_v10p1() {
9435        // Seed an episode + a triple from a DIFFERENT episode that
9436        // happens to mention the focal episode's content. Even with
9437        // entities referencing the episode topic, episode.triples_in
9438        // is structurally empty in v0.10.0 P1.
9439        let runtime = rt();
9440        let h = Harness::new(&runtime);
9441        let focal = "a2220000-0000-7000-8000-000000000001";
9442        let other = "a2220000-0000-7000-8000-000000000002";
9443        {
9444            let conn = h.open_db();
9445            seed_episode(&conn, focal, 100, "focal episode body");
9446            let other_rowid = seed_episode(&conn, other, 200, "another episode");
9447            // Entity "user" gets referenced heavily; doesn't matter --
9448            // episode triples_in stays empty.
9449            for i in 0..5 {
9450                let tid = format!("t-other-{i}");
9451                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
9452            }
9453        }
9454        let (status, body) = runtime.block_on(call(
9455            h.router.clone(),
9456            "GET",
9457            &inspect_uri(&format!("ep:{focal}")),
9458            None,
9459        ));
9460        assert_eq!(status, StatusCode::OK, "body: {body}");
9461        let triples_in = body["triples_in"].as_array().unwrap();
9462        assert!(
9463            triples_in.is_empty(),
9464            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
9465        );
9466        h.shutdown(&runtime);
9467    }
9468
9469    #[test]
9470    fn inspect_document_returns_full_text_concatenated_from_chunks() {
9471        let runtime = rt();
9472        let h = Harness::new(&runtime);
9473        let doc_id = "d3330000-0000-7000-8000-000000000001";
9474        {
9475            let conn = h.open_db();
9476            seed_document_row(&conn, doc_id, "doc-title");
9477            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
9478            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
9479            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
9480        }
9481        let (status, body) = runtime.block_on(call(
9482            h.router.clone(),
9483            "GET",
9484            &inspect_uri(&format!("doc:{doc_id}")),
9485            None,
9486        ));
9487        assert_eq!(status, StatusCode::OK, "body: {body}");
9488        assert_eq!(body["node"]["kind"], "document");
9489        let full_text = body["full_text"].as_str().unwrap();
9490        // Concatenation order matches chunk_index ASC; separator is "\n\n".
9491        assert_eq!(
9492            full_text,
9493            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
9494        );
9495        assert!(body["triples_in"].as_array().unwrap().is_empty());
9496        assert!(body["triples_out"].as_array().unwrap().is_empty());
9497        h.shutdown(&runtime);
9498    }
9499
9500    #[test]
9501    fn inspect_chunk_returns_text() {
9502        let runtime = rt();
9503        let h = Harness::new(&runtime);
9504        let chunk_body = "This is the body of the chunk being inspected.";
9505        {
9506            let conn = h.open_db();
9507            seed_document_row(&conn, "doc-chunk-host", "host");
9508            seed_chunk_row(
9509                &conn,
9510                "chunk-inspect-target",
9511                "doc-chunk-host",
9512                0,
9513                chunk_body,
9514            );
9515        }
9516        let (status, body) = runtime.block_on(call(
9517            h.router.clone(),
9518            "GET",
9519            &inspect_uri("chunk:chunk-inspect-target"),
9520            None,
9521        ));
9522        assert_eq!(status, StatusCode::OK, "body: {body}");
9523        assert_eq!(body["node"]["kind"], "chunk");
9524        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
9525        assert!(body["triples_in"].as_array().unwrap().is_empty());
9526        assert!(body["triples_out"].as_array().unwrap().is_empty());
9527        h.shutdown(&runtime);
9528    }
9529
9530    #[test]
9531    fn inspect_cluster_returns_label_and_abstraction() {
9532        let runtime = rt();
9533        let h = Harness::new(&runtime);
9534        let cluster_id = "cl-inspect-target";
9535        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
9536        {
9537            let conn = h.open_db();
9538            seed_cluster_row(&conn, cluster_id, 12345);
9539            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
9540        }
9541        let (status, body) = runtime.block_on(call(
9542            h.router.clone(),
9543            "GET",
9544            &inspect_uri(&format!("cl:{cluster_id}")),
9545            None,
9546        ));
9547        assert_eq!(status, StatusCode::OK, "body: {body}");
9548        assert_eq!(body["node"]["kind"], "cluster");
9549        let full_text = body["full_text"].as_str().unwrap();
9550        assert!(
9551            full_text.contains(cluster_id),
9552            "full_text must include cluster label: {full_text}"
9553        );
9554        assert!(
9555            full_text.contains(abstraction_text),
9556            "full_text must include abstraction text: {full_text}"
9557        );
9558        // "label\n\nabstraction" -- separated by blank line for the
9559        // inspector renderer.
9560        assert!(
9561            full_text.contains("\n\n"),
9562            "label and abstraction must be separated: {full_text}"
9563        );
9564        h.shutdown(&runtime);
9565    }
9566
9567    #[test]
9568    fn inspect_entity_returns_triples_only() {
9569        let runtime = rt();
9570        let h = Harness::new(&runtime);
9571        {
9572            let conn = h.open_db();
9573            let rowid = seed_episode(
9574                &conn,
9575                "e5550000-0000-7000-8000-000000000001",
9576                100,
9577                "host episode",
9578            );
9579            // 5 triples that reference Alice (as subject or object).
9580            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
9581            seed_triple_row(
9582                &conn,
9583                "t-ent-2",
9584                "Alice",
9585                "works_at",
9586                "Anthropic",
9587                Some(rowid),
9588            );
9589            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
9590            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
9591            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
9592        }
9593        let (status, body) = runtime.block_on(call(
9594            h.router.clone(),
9595            "GET",
9596            &inspect_uri("ent:Alice"),
9597            None,
9598        ));
9599        assert_eq!(status, StatusCode::OK, "body: {body}");
9600        assert_eq!(body["node"]["kind"], "entity");
9601        assert_eq!(body["node"]["id"], "ent:Alice");
9602        assert!(
9603            body["full_text"].is_null(),
9604            "entity full_text must be null (entities have no body): {body}"
9605        );
9606        let triples_out = body["triples_out"].as_array().unwrap();
9607        assert_eq!(triples_out.len(), 5, "{body}");
9608        assert!(body["triples_in"].as_array().unwrap().is_empty());
9609        for e in triples_out {
9610            assert_eq!(e["kind"], "triple");
9611            assert_eq!(e["source"], "ent:Alice");
9612            // Counterpart is always an entity; Alice never appears on
9613            // both ends so target != source.
9614            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9615            assert_ne!(e["target"], "ent:Alice");
9616        }
9617        h.shutdown(&runtime);
9618    }
9619
9620    #[test]
9621    fn inspect_entity_with_zero_triples_returns_404() {
9622        let runtime = rt();
9623        let h = Harness::new(&runtime);
9624        // Seed unrelated triples so the table isn't empty; the target
9625        // entity still has zero references.
9626        {
9627            let conn = h.open_db();
9628            let rowid = seed_episode(&conn, "e6660000-0000-7000-8000-000000000001", 100, "ep");
9629            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
9630        }
9631        let (status, body) = runtime.block_on(call(
9632            h.router.clone(),
9633            "GET",
9634            &inspect_uri("ent:Nonexistent"),
9635            None,
9636        ));
9637        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9638        let err = body["error"].as_str().unwrap_or_default();
9639        assert!(
9640            err.contains("Nonexistent") || err.contains("entity"),
9641            "error must mention entity: {body}"
9642        );
9643        h.shutdown(&runtime);
9644    }
9645
9646    #[test]
9647    fn inspect_404_on_missing_node() {
9648        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
9649        let runtime = rt();
9650        let h = Harness::new(&runtime);
9651        let (status, body) = runtime.block_on(call(
9652            h.router.clone(),
9653            "GET",
9654            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9655            None,
9656        ));
9657        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9658        h.shutdown(&runtime);
9659    }
9660
9661    #[test]
9662    fn inspect_400_on_invalid_prefix() {
9663        let runtime = rt();
9664        let h = Harness::new(&runtime);
9665        let (status, body) =
9666            runtime.block_on(call(h.router.clone(), "GET", &inspect_uri("xyz:foo"), None));
9667        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9668        let err = body["error"].as_str().unwrap_or_default();
9669        assert!(
9670            err.contains("xyz") || err.contains("prefix"),
9671            "error must mention bad prefix: {body}"
9672        );
9673        h.shutdown(&runtime);
9674    }
9675
9676    #[test]
9677    fn inspect_respects_tenant_scoping() {
9678        let runtime = rt();
9679        let h = Harness::new(&runtime);
9680        let memory_id = "a7770000-0000-7000-8000-000000000001";
9681        {
9682            let conn = h.open_db();
9683            seed_episode(&conn, memory_id, 100, "tenant scope");
9684        }
9685        // Real id in default tenant resolves; the same request against
9686        // a never-registered tenant header surfaces 404 from the tenant
9687        // extractor before the handler runs.
9688        let r = h.router.clone();
9689        let (status, _) = runtime.block_on(async {
9690            let req = Request::builder()
9691                .method("GET")
9692                .uri(inspect_uri(&format!("ep:{memory_id}")))
9693                .header("x-solo-tenant", "never-registered-tenant")
9694                .body(Body::empty())
9695                .unwrap();
9696            let resp = r.oneshot(req).await.expect("oneshot");
9697            let s = resp.status();
9698            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9699            (s, _b)
9700        });
9701        assert_eq!(status, StatusCode::NOT_FOUND);
9702        // Sanity: same id resolves on the default tenant.
9703        let (status, body) = runtime.block_on(call(
9704            h.router.clone(),
9705            "GET",
9706            &inspect_uri(&format!("ep:{memory_id}")),
9707            None,
9708        ));
9709        assert_eq!(
9710            status,
9711            StatusCode::OK,
9712            "default tenant must resolve: {body}"
9713        );
9714        h.shutdown(&runtime);
9715    }
9716
9717    #[test]
9718    fn inspect_respects_auth_when_enabled() {
9719        let runtime = rt();
9720        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
9721        // Missing bearer -> 401 before handler runs.
9722        let (status, _) = runtime.block_on(call(
9723            h.router.clone(),
9724            "GET",
9725            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9726            None,
9727        ));
9728        assert_eq!(status, StatusCode::UNAUTHORIZED);
9729        // Valid bearer + unknown node -> handler runs and returns 404,
9730        // proving auth passed through.
9731        let (status, _) = runtime.block_on(call_with_auth(
9732            h.router.clone(),
9733            "GET",
9734            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9735            None,
9736            Some("Bearer inspect-secret"),
9737        ));
9738        assert_eq!(status, StatusCode::NOT_FOUND);
9739        h.shutdown(&runtime);
9740    }
9741
9742    // ---------------------------------------------------------------------
9743    // v0.10.0: GET /v1/graph/neighbors/{id}
9744    //
9745    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
9746    // "show similar" overlay. Tests cover the kind dispatch (explicit /
9747    // semantic / both default), threshold filter, limit clamp, dedupe
9748    // rule, and the standard 400/404/auth/tenant gates.
9749    // ---------------------------------------------------------------------
9750
9751    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
9752    /// are all optional; pass `None` to omit the corresponding query
9753    /// parameter. The node id is percent-encoded so `:` survives the path
9754    /// extractor.
9755    fn neighbors_uri(
9756        node_id: &str,
9757        kind: Option<&str>,
9758        threshold: Option<f32>,
9759        limit: Option<u32>,
9760    ) -> String {
9761        let mut qs: Vec<String> = Vec::new();
9762        if let Some(k) = kind {
9763            qs.push(format!("kind={k}"));
9764        }
9765        if let Some(t) = threshold {
9766            qs.push(format!("threshold={t}"));
9767        }
9768        if let Some(l) = limit {
9769            qs.push(format!("limit={l}"));
9770        }
9771        let encoded = percent_encode_node_id(node_id);
9772        if qs.is_empty() {
9773            format!("/v1/graph/neighbors/{encoded}")
9774        } else {
9775            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
9776        }
9777    }
9778
9779    /// 1. `?kind=explicit` returns only structural edges (no semantic).
9780    /// Seeds an episode with 2 explicit (triple) neighbors + several
9781    /// distinct other episodes so the semantic path COULD surface
9782    /// candidates. The `kind=explicit` filter must drop all of them.
9783    #[test]
9784    fn neighbors_explicit_only_returns_no_semantic_edges() {
9785        let runtime = rt();
9786        let h = Harness::new(&runtime);
9787        runtime.block_on(async {
9788            // Seed several episodes via the writer-actor so they get HNSW
9789            // entries -- the semantic path would surface these if it
9790            // wasn't filtered out.
9791            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9792            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9793            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9794            // Add explicit triples sourced from `focal`. seed_triple_row
9795            // needs the focal rowid -- look it up via a side connection.
9796            {
9797                let conn = h.open_db();
9798                let rowid: i64 = conn
9799                    .query_row(
9800                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9801                        rusqlite::params![&focal],
9802                        |r| r.get(0),
9803                    )
9804                    .unwrap();
9805                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9806                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
9807            }
9808            let (status, body) = call(
9809                h.router.clone(),
9810                "GET",
9811                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
9812                None,
9813            )
9814            .await;
9815            assert_eq!(status, StatusCode::OK, "body: {body}");
9816            let edges = body["edges"].as_array().unwrap();
9817            assert!(!edges.is_empty(), "expected explicit edges: {body}");
9818            for e in edges {
9819                assert_ne!(
9820                    e["kind"], "semantic",
9821                    "kind=explicit must drop semantic edges: {body}"
9822                );
9823            }
9824        });
9825        h.shutdown(&runtime);
9826    }
9827
9828    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
9829    /// Inverse of test 1 -- same fixture, opposite filter.
9830    #[test]
9831    fn neighbors_semantic_only_returns_no_explicit_edges() {
9832        let runtime = rt();
9833        let h = Harness::new(&runtime);
9834        runtime.block_on(async {
9835            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9836            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9837            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9838            {
9839                let conn = h.open_db();
9840                let rowid: i64 = conn
9841                    .query_row(
9842                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9843                        rusqlite::params![&focal],
9844                        |r| r.get(0),
9845                    )
9846                    .unwrap();
9847                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9848            }
9849            // Threshold=0 so every HNSW hit clears the filter.
9850            let (status, body) = call(
9851                h.router.clone(),
9852                "GET",
9853                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9854                None,
9855            )
9856            .await;
9857            assert_eq!(status, StatusCode::OK, "body: {body}");
9858            let edges = body["edges"].as_array().unwrap();
9859            for e in edges {
9860                assert_eq!(
9861                    e["kind"], "semantic",
9862                    "kind=semantic must drop explicit edges: {body}"
9863                );
9864                assert!(
9865                    e["weight"].is_number(),
9866                    "semantic edges carry weight: {body}"
9867                );
9868            }
9869        });
9870        h.shutdown(&runtime);
9871    }
9872
9873    /// 3. Default (no `kind=` param) returns both explicit + semantic.
9874    #[test]
9875    fn neighbors_both_default_returns_combined() {
9876        let runtime = rt();
9877        let h = Harness::new(&runtime);
9878        runtime.block_on(async {
9879            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9880            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9881            {
9882                let conn = h.open_db();
9883                let rowid: i64 = conn
9884                    .query_row(
9885                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9886                        rusqlite::params![&focal],
9887                        |r| r.get(0),
9888                    )
9889                    .unwrap();
9890                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
9891            }
9892            let (status, body) = call(
9893                h.router.clone(),
9894                "GET",
9895                // No kind param -> default = both. Threshold 0 so semantic
9896                // hits make it through the filter.
9897                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
9898                None,
9899            )
9900            .await;
9901            assert_eq!(status, StatusCode::OK, "body: {body}");
9902            let edges = body["edges"].as_array().unwrap();
9903            let kinds: std::collections::HashSet<&str> =
9904                edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9905            assert!(
9906                kinds.contains("triple"),
9907                "expected at least one triple edge: {body}"
9908            );
9909            assert!(
9910                kinds.contains("semantic"),
9911                "expected at least one semantic edge: {body}"
9912            );
9913        });
9914        h.shutdown(&runtime);
9915    }
9916
9917    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
9918    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
9919    /// both produce an edge X -> Y. After dedupe only the explicit edge
9920    /// survives.
9921    #[test]
9922    fn neighbors_dedupes_semantic_when_explicit_exists() {
9923        let runtime = rt();
9924        let h = Harness::new(&runtime);
9925        runtime.block_on(async {
9926            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9927            // Seed an explicit triple from focal -> ent:peer-target.
9928            // The semantic path produces edges focal -> ep:<other>; we
9929            // ensure both paths produce an edge ending at the same id by
9930            // wiring `peer-target = ep:<other_memory_id>` -- but the
9931            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
9932            // collision we need an edge form where source+target overlap.
9933            //
9934            // Simpler construction: the `expand_triple_from_episode` path
9935            // emits an edge `ent:subject -> ent:object`, not from the
9936            // focal episode -- meaning the explicit edges don't end at
9937            // an ep: node in the first place. So we have to engineer a
9938            // collision via the cluster_member path:
9939            //   * explicit: focal (episode) -> cluster (via cluster_member)
9940            //   * semantic: focal -> similar episode
9941            // The two endpoints (cluster vs. episode) never collide in
9942            // shape. To produce a real (source, target) overlap that
9943            // exercises the dedupe code, mint a synthetic semantic edge
9944            // by adding an explicit triple sourced from the focal that
9945            // happens to end at the SAME entity the semantic path would
9946            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
9947            //
9948            // The brief flagged this scenario as unlikely. Build the
9949            // simplest collision the codebase admits: have the focal
9950            // episode's semantic neighbor's memory_id appear as a
9951            // triple's object_id (formatted as ent:<that-uuid>). The
9952            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
9953            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
9954            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
9955            // would NOT fire -- which is correct: those are structurally
9956            // different relationships.
9957            //
9958            // Therefore the realistic dedupe test is the trivial
9959            // tautology: explicit and semantic produce no collisions in
9960            // practice. Lock that in by asserting that the same memory_id
9961            // never appears with an edge from both paths.
9962            let _other = post_remember(h.router.clone(), "beta beta beta").await;
9963            {
9964                let conn = h.open_db();
9965                let rowid: i64 = conn
9966                    .query_row(
9967                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9968                        rusqlite::params![&focal],
9969                        |r| r.get(0),
9970                    )
9971                    .unwrap();
9972                seed_triple_row(&conn, "t-dedupe-1", "Alice", "knows", "Bob", Some(rowid));
9973            }
9974            let (status, body) = call(
9975                h.router.clone(),
9976                "GET",
9977                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
9978                None,
9979            )
9980            .await;
9981            assert_eq!(status, StatusCode::OK, "body: {body}");
9982            // For every edge, count occurrences of (source, target). No
9983            // pair should appear twice (which is what the dedupe rule
9984            // guarantees).
9985            let edges = body["edges"].as_array().unwrap();
9986            let mut seen: std::collections::HashMap<(String, String), i32> =
9987                std::collections::HashMap::new();
9988            for e in edges {
9989                let key = (
9990                    e["source"].as_str().unwrap().to_string(),
9991                    e["target"].as_str().unwrap().to_string(),
9992                );
9993                *seen.entry(key).or_insert(0) += 1;
9994            }
9995            for (pair, count) in &seen {
9996                assert_eq!(
9997                    *count, 1,
9998                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
9999                );
10000            }
10001        });
10002        h.shutdown(&runtime);
10003    }
10004
10005    /// 5. Threshold filter -- raising the threshold drops low-similarity
10006    /// semantic neighbors.
10007    #[test]
10008    fn neighbors_threshold_filters_low_similarity() {
10009        let runtime = rt();
10010        let h = Harness::new(&runtime);
10011        runtime.block_on(async {
10012            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
10013            let _o1 = post_remember(h.router.clone(), "beta one").await;
10014            let _o2 = post_remember(h.router.clone(), "beta two").await;
10015            let _o3 = post_remember(h.router.clone(), "beta three").await;
10016            // Low threshold -- expect more semantic hits.
10017            let (status, low_body) = call(
10018                h.router.clone(),
10019                "GET",
10020                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
10021                None,
10022            )
10023            .await;
10024            assert_eq!(status, StatusCode::OK, "body: {low_body}");
10025            let low_edge_count = low_body["edges"].as_array().unwrap().len();
10026            // High threshold -- expect fewer (or equal) semantic hits.
10027            let (status, high_body) = call(
10028                h.router.clone(),
10029                "GET",
10030                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
10031                None,
10032            )
10033            .await;
10034            assert_eq!(status, StatusCode::OK, "body: {high_body}");
10035            let high_edge_count = high_body["edges"].as_array().unwrap().len();
10036            assert!(
10037                high_edge_count <= low_edge_count,
10038                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
10039            );
10040            // Also assert every surviving high-threshold edge satisfies
10041            // the filter.
10042            for e in high_body["edges"].as_array().unwrap() {
10043                if let Some(w) = e["weight"].as_f64() {
10044                    assert!(
10045                        w >= 0.99,
10046                        "edge with weight {w} survived threshold=0.99: {e}"
10047                    );
10048                }
10049            }
10050        });
10051        h.shutdown(&runtime);
10052    }
10053
10054    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
10055    /// same policy as `/v1/graph/expand`.
10056    #[test]
10057    fn neighbors_limit_clamped_at_100() {
10058        let runtime = rt();
10059        let h = Harness::new(&runtime);
10060        // Seed a cluster with > 100 episodes so the explicit cluster_member
10061        // path could surface > 100 -- clamp must cap at 100.
10062        {
10063            let conn = h.open_db();
10064            seed_cluster_row(&conn, "cl-huge-n", 1000);
10065            for i in 0..150 {
10066                let mid = format!("99119911-1111-7000-8000-{:012}", i);
10067                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
10068                seed_cluster_member(&conn, "cl-huge-n", &mid);
10069            }
10070        }
10071        let (status, body) = runtime.block_on(call(
10072            h.router.clone(),
10073            "GET",
10074            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
10075            None,
10076        ));
10077        assert_eq!(status, StatusCode::OK, "body: {body}");
10078        let edges = body["edges"].as_array().unwrap();
10079        assert_eq!(
10080            edges.len(),
10081            100,
10082            "limit must be silently clamped to 100, got {}",
10083            edges.len()
10084        );
10085        h.shutdown(&runtime);
10086    }
10087
10088    /// 7. `kind=semantic` on a document focal node returns 400.
10089    #[test]
10090    fn neighbors_semantic_rejects_document_source() {
10091        let runtime = rt();
10092        let h = Harness::new(&runtime);
10093        let doc_id = "d-semrej-0000-7000-8000-000000000001";
10094        {
10095            let conn = h.open_db();
10096            seed_document_row(&conn, doc_id, "host");
10097        }
10098        let (status, body) = runtime.block_on(call(
10099            h.router.clone(),
10100            "GET",
10101            &neighbors_uri(&format!("doc:{doc_id}"), Some("semantic"), None, None),
10102            None,
10103        ));
10104        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10105        let err = body["error"].as_str().unwrap_or_default();
10106        assert!(
10107            err.contains("episode") && err.contains("chunk"),
10108            "error must list supported kinds: {body}"
10109        );
10110        h.shutdown(&runtime);
10111    }
10112
10113    /// 8. `kind=semantic` on a cluster focal node returns 400.
10114    #[test]
10115    fn neighbors_semantic_rejects_cluster_source() {
10116        let runtime = rt();
10117        let h = Harness::new(&runtime);
10118        let cluster_id = "cl-semrej-target";
10119        {
10120            let conn = h.open_db();
10121            seed_cluster_row(&conn, cluster_id, 12345);
10122        }
10123        let (status, body) = runtime.block_on(call(
10124            h.router.clone(),
10125            "GET",
10126            &neighbors_uri(&format!("cl:{cluster_id}"), Some("semantic"), None, None),
10127            None,
10128        ));
10129        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10130        h.shutdown(&runtime);
10131    }
10132
10133    /// 9. Entity focal node returns only explicit triple edges; no
10134    /// semantic edges (entities have no embeddings, semantic path is
10135    /// silently skipped under `kind=both`).
10136    #[test]
10137    fn neighbors_entity_returns_triples_only() {
10138        let runtime = rt();
10139        let h = Harness::new(&runtime);
10140        runtime.block_on(async {
10141            // Use the writer-actor so the host episode lands in HNSW too
10142            // (any HNSW state is irrelevant since entities can't trigger
10143            // semantic recall; included to prove the semantic path is
10144            // silently skipped, not erroring).
10145            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
10146            {
10147                let conn = h.open_db();
10148                let rowid: i64 = conn
10149                    .query_row(
10150                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
10151                        rusqlite::params![&host_mid],
10152                        |r| r.get(0),
10153                    )
10154                    .unwrap();
10155                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
10156                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
10157            }
10158            let (status, body) = call(
10159                h.router.clone(),
10160                "GET",
10161                &neighbors_uri("ent:Alice", None, Some(0.0), None),
10162                None,
10163            )
10164            .await;
10165            assert_eq!(status, StatusCode::OK, "body: {body}");
10166            let edges = body["edges"].as_array().unwrap();
10167            assert!(!edges.is_empty(), "expected explicit triples: {body}");
10168            for e in edges {
10169                assert_eq!(
10170                    e["kind"], "triple",
10171                    "entity focal must produce only triple edges: {body}"
10172                );
10173            }
10174        });
10175        h.shutdown(&runtime);
10176    }
10177
10178    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
10179    /// the handler runs.
10180    #[test]
10181    fn neighbors_respects_tenant_scoping() {
10182        let runtime = rt();
10183        let h = Harness::new(&runtime);
10184        let memory_id = "a8880000-0000-7000-8000-000000000001";
10185        {
10186            let conn = h.open_db();
10187            seed_episode(&conn, memory_id, 100, "tenant scope");
10188        }
10189        // Wrong tenant header -> 404 from registry, before handler runs.
10190        let r = h.router.clone();
10191        let (status, _) = runtime.block_on(async {
10192            let req = Request::builder()
10193                .method("GET")
10194                .uri(neighbors_uri(
10195                    &format!("ep:{memory_id}"),
10196                    Some("explicit"),
10197                    None,
10198                    None,
10199                ))
10200                .header("x-solo-tenant", "never-registered-tenant-n")
10201                .body(Body::empty())
10202                .unwrap();
10203            let resp = r.oneshot(req).await.expect("oneshot");
10204            let s = resp.status();
10205            let _b = resp.into_body().collect().await.unwrap().to_bytes();
10206            (s, _b)
10207        });
10208        assert_eq!(status, StatusCode::NOT_FOUND);
10209        // Sanity: same id resolves on default tenant.
10210        let (status, body) = runtime.block_on(call(
10211            h.router.clone(),
10212            "GET",
10213            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
10214            None,
10215        ));
10216        assert_eq!(
10217            status,
10218            StatusCode::OK,
10219            "default tenant must resolve: {body}"
10220        );
10221        h.shutdown(&runtime);
10222    }
10223
10224    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
10225    /// node -> 404 (auth passed, handler ran).
10226    #[test]
10227    fn neighbors_respects_auth_when_enabled() {
10228        let runtime = rt();
10229        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
10230        // Missing Authorization -> 401.
10231        let (status, _) = runtime.block_on(call(
10232            h.router.clone(),
10233            "GET",
10234            &neighbors_uri(
10235                "ep:99999999-9999-7000-8000-000000000999",
10236                Some("explicit"),
10237                None,
10238                None,
10239            ),
10240            None,
10241        ));
10242        assert_eq!(status, StatusCode::UNAUTHORIZED);
10243        // Valid bearer + unknown node -> 404 from the handler.
10244        let (status, _) = runtime.block_on(call_with_auth(
10245            h.router.clone(),
10246            "GET",
10247            &neighbors_uri(
10248                "ep:99999999-9999-7000-8000-000000000999",
10249                Some("explicit"),
10250                None,
10251                None,
10252            ),
10253            None,
10254            Some("Bearer neighbors-secret"),
10255        ));
10256        assert_eq!(status, StatusCode::NOT_FOUND);
10257        h.shutdown(&runtime);
10258    }
10259
10260    // ---------------------------------------------------------------------
10261    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
10262    //
10263    // Driving SSE through axum's in-process router (`oneshot`) requires
10264    // reading the response body as a stream of frames and parsing each
10265    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
10266    // The `read_one_sse_event` helper below does that incrementally so
10267    // tests don't have to wait for the stream to close (which would
10268    // never happen — the SSE loop runs until the client drops).
10269    // ---------------------------------------------------------------------
10270
10271    /// One parsed SSE event: the `event:` field plus the `data:` payload
10272    /// re-parsed as JSON. The `id:` field is captured for v0.11.0 P2's
10273    /// `/mcp` GET stream which threads monotonic event ids through
10274    /// the wire — `None` for streams (`/v1/graph/stream`) that don't
10275    /// emit `id:` lines. Empty / comment-only frames are filtered out
10276    /// by the parser; callers only see real events.
10277    #[derive(Debug, Clone)]
10278    struct ParsedSseEvent {
10279        event: String,
10280        data: Value,
10281        /// Raw SSE `id:` field, if present. v0.11.0 P2 emits monotonic
10282        /// `u64` ids for `/mcp` events; the wire encodes them as
10283        /// strings.
10284        id: Option<String>,
10285    }
10286
10287    /// Read frames off the SSE body until ONE complete event lands, then
10288    /// return it. Times out after `timeout` to keep red-test feedback
10289    /// fast. On timeout returns `None`.
10290    async fn read_one_sse_event(
10291        body: &mut axum::body::Body,
10292        timeout: std::time::Duration,
10293    ) -> Option<ParsedSseEvent> {
10294        use http_body_util::BodyExt;
10295        let mut buf = String::new();
10296        let start = std::time::Instant::now();
10297        loop {
10298            if start.elapsed() >= timeout {
10299                return None;
10300            }
10301            let remaining = timeout.saturating_sub(start.elapsed());
10302            let frame_res = tokio::time::timeout(remaining, body.frame()).await;
10303            let frame = match frame_res {
10304                Ok(Some(Ok(f))) => f,
10305                Ok(Some(Err(_))) | Ok(None) => return None,
10306                Err(_) => return None,
10307            };
10308            if let Ok(data) = frame.into_data() {
10309                buf.push_str(&String::from_utf8_lossy(&data));
10310                // Parse complete events (double newline separator).
10311                while let Some(idx) = buf.find("\n\n") {
10312                    let block: String = buf.drain(..idx + 2).collect();
10313                    if let Some(parsed) = parse_sse_block(&block) {
10314                        return Some(parsed);
10315                    }
10316                }
10317            }
10318        }
10319    }
10320
10321    /// Parse one SSE block (raw text between two `\n\n` separators).
10322    /// Returns `None` for comment-only blocks (lines starting with `:`)
10323    /// or blocks missing either `event:` or `data:`.
10324    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
10325        let mut event: Option<String> = None;
10326        let mut data: Option<String> = None;
10327        let mut id: Option<String> = None;
10328        for line in block.lines() {
10329            if let Some(rest) = line.strip_prefix("event:") {
10330                event = Some(rest.trim().to_string());
10331            } else if let Some(rest) = line.strip_prefix("data:") {
10332                data = Some(rest.trim().to_string());
10333            } else if let Some(rest) = line.strip_prefix("id:") {
10334                id = Some(rest.trim().to_string());
10335            }
10336        }
10337        let event = event?;
10338        let data_str = data?;
10339        let data_json = serde_json::from_str(&data_str).ok()?;
10340        Some(ParsedSseEvent {
10341            event,
10342            data: data_json,
10343            id,
10344        })
10345    }
10346
10347    /// Open the SSE stream and return the response body for further
10348    /// frame-level reads. The headers are validated (Content-Type +
10349    /// status) before the body is returned.
10350    async fn open_sse_stream_inner(
10351        router: axum::Router,
10352        auth: Option<&str>,
10353        tenant: Option<&str>,
10354    ) -> (StatusCode, axum::body::Body) {
10355        let mut builder = Request::builder().method("GET").uri("/v1/graph/stream");
10356        if let Some(a) = auth {
10357            builder = builder.header("authorization", a);
10358        }
10359        if let Some(t) = tenant {
10360            builder = builder.header("x-solo-tenant", t);
10361        }
10362        let req = builder
10363            .header("content-length", "0")
10364            .body(Body::empty())
10365            .unwrap();
10366        let resp = router.oneshot(req).await.expect("oneshot");
10367        let status = resp.status();
10368        let body = resp.into_body();
10369        (status, body)
10370    }
10371
10372    /// 1. `init` event lands as the first chunk.
10373    #[test]
10374    fn stream_emits_init_event_on_connect() {
10375        let runtime = rt();
10376        let h = Harness::new(&runtime);
10377        let r = h.router.clone();
10378        runtime.block_on(async {
10379            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10380            assert_eq!(status, StatusCode::OK);
10381            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10382                .await
10383                .expect("must receive init event within 2s");
10384            assert_eq!(ev.event, "init");
10385            assert_eq!(ev.data["connected"].as_bool(), Some(true));
10386            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10387            assert!(ev.data["ts_ms"].is_number());
10388        });
10389        h.shutdown(&runtime);
10390    }
10391
10392    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
10393    /// as an `invalidate` SSE event.
10394    #[test]
10395    fn stream_emits_invalidate_after_writer_event() {
10396        let runtime = rt();
10397        let h = Harness::new(&runtime);
10398        let r = h.router.clone();
10399        let sender = h.invalidate_sender();
10400        runtime.block_on(async {
10401            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10402            assert_eq!(status, StatusCode::OK);
10403            // Discard the init event.
10404            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10405                .await
10406                .unwrap();
10407            assert_eq!(init.event, "init");
10408            // Fire a writer-actor-style event on the broadcast.
10409            sender
10410                .send(InvalidateEvent {
10411                    reason: "memory.remember".to_string(),
10412                    tenant_id: "default".to_string(),
10413                    ts_ms: 1_715_625_600_000,
10414                    kind: "episode".to_string(),
10415                })
10416                .expect("must have at least one subscriber");
10417            // The SSE handler must surface it.
10418            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10419                .await
10420                .expect("invalidate event must arrive within 2s");
10421            assert_eq!(ev.event, "invalidate");
10422            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10423            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10424            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
10425        });
10426        h.shutdown(&runtime);
10427    }
10428
10429    /// 3. Each kind of writer-actor event surfaces with its mapped
10430    /// `(reason, kind)` shape.
10431    #[test]
10432    fn stream_emits_invalidate_for_each_writer_command() {
10433        let runtime = rt();
10434        let h = Harness::new(&runtime);
10435        let r = h.router.clone();
10436        let sender = h.invalidate_sender();
10437        let cases = [
10438            ("memory.remember", "episode"),
10439            ("memory.forget", "episode"),
10440            ("memory.consolidate", "cluster"),
10441            ("memory.ingest_document", "document"),
10442            ("memory.forget_document", "document"),
10443            ("memory.triples_extract", "cluster"),
10444            ("memory.reembed", "episode"),
10445            ("gdpr.forget_user", "tenant"),
10446        ];
10447        runtime.block_on(async {
10448            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10449            assert_eq!(status, StatusCode::OK);
10450            // Discard the init.
10451            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10452                .await
10453                .unwrap();
10454            for (reason, kind) in cases {
10455                sender
10456                    .send(InvalidateEvent {
10457                        reason: reason.to_string(),
10458                        tenant_id: "default".to_string(),
10459                        ts_ms: 1_715_625_600_000,
10460                        kind: kind.to_string(),
10461                    })
10462                    .unwrap();
10463                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10464                    .await
10465                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
10466                assert_eq!(ev.event, "invalidate");
10467                assert_eq!(ev.data["reason"].as_str(), Some(reason), "reason mismatch");
10468                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
10469            }
10470        });
10471        h.shutdown(&runtime);
10472    }
10473
10474    /// 4. Heartbeat events fire on the configured interval when no real
10475    /// events arrive. Drives `build_invalidate_stream` at a 1-second
10476    /// heartbeat (the public handler uses 30s in prod), wraps it in an
10477    /// `Sse` response, then reads + parses the SSE body via the same
10478    /// `read_one_sse_event` helper the HTTP-layer tests use. This
10479    /// exercises the public Event → body byte path without touching
10480    /// `Event::finalize` (which is private).
10481    #[test]
10482    fn stream_emits_heartbeat_when_no_events() {
10483        let runtime = rt();
10484        let h = Harness::new(&runtime);
10485        let sender = h.invalidate_sender();
10486        runtime.block_on(async {
10487            // Subscribe FIRST so a later writer-side `send` would lag
10488            // the receiver if the subscriber stalled.
10489            let rx = sender.subscribe();
10490            // Build the SSE stream with a 1-second heartbeat interval —
10491            // bypassing the 30s production default.
10492            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
10493            // Wrap in an Sse response + extract the body bytes through
10494            // axum's IntoResponse path. This produces real on-the-wire
10495            // SSE bytes that `read_one_sse_event` can parse.
10496            let sse: Sse<_> = Sse::new(stream);
10497            let resp = sse.into_response();
10498            let mut body = resp.into_body();
10499            // First event must be `init`.
10500            let first = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10501                .await
10502                .expect("init event must arrive");
10503            assert_eq!(first.event, "init");
10504            // Second must be heartbeat (no invalidates fired, ~1s
10505            // interval; allow 3s window for runtime jitter).
10506            let second = read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
10507                .await
10508                .expect("heartbeat event must arrive within 3s");
10509            assert_eq!(second.event, "heartbeat");
10510            assert!(second.data["ts_ms"].is_number());
10511        });
10512        h.shutdown(&runtime);
10513    }
10514
10515    /// 5. Two subscribers connected to the same tenant both receive
10516    /// every invalidate.
10517    #[test]
10518    fn stream_concurrent_subscribers_same_tenant() {
10519        let runtime = rt();
10520        let h = Harness::new(&runtime);
10521        let r1 = h.router.clone();
10522        let r2 = h.router.clone();
10523        let r3 = h.router.clone();
10524        let sender = h.invalidate_sender();
10525        runtime.block_on(async {
10526            // Open three subscribers.
10527            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
10528            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
10529            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
10530            assert_eq!(s1, StatusCode::OK);
10531            assert_eq!(s2, StatusCode::OK);
10532            assert_eq!(s3, StatusCode::OK);
10533            // Drain init events from each.
10534            for body in [&mut body1, &mut body2, &mut body3] {
10535                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10536                    .await
10537                    .unwrap();
10538                assert_eq!(ev.event, "init");
10539            }
10540            // Receiver count should be at least 3 now.
10541            assert!(
10542                sender.receiver_count() >= 3,
10543                "expected ≥3 subscribers, got {}",
10544                sender.receiver_count()
10545            );
10546            // Fire one invalidate.
10547            sender
10548                .send(InvalidateEvent {
10549                    reason: "memory.remember".to_string(),
10550                    tenant_id: "default".to_string(),
10551                    ts_ms: 1_715_625_600_000,
10552                    kind: "episode".to_string(),
10553                })
10554                .expect("send must succeed");
10555            // All three receive it.
10556            for body in [&mut body1, &mut body2, &mut body3] {
10557                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10558                    .await
10559                    .unwrap();
10560                assert_eq!(ev.event, "invalidate");
10561                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10562            }
10563        });
10564        h.shutdown(&runtime);
10565    }
10566
10567    /// 6. Dropping the SSE client decrements the per-tenant subscriber
10568    /// count — graceful cleanup invariant.
10569    #[test]
10570    fn stream_handles_client_disconnect_gracefully() {
10571        let runtime = rt();
10572        let h = Harness::new(&runtime);
10573        let r = h.router.clone();
10574        let sender = h.invalidate_sender();
10575        let before = sender.receiver_count();
10576        runtime.block_on(async {
10577            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10578            assert_eq!(status, StatusCode::OK);
10579            // Drain the init so the stream is fully active.
10580            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10581                .await
10582                .unwrap();
10583            let during = sender.receiver_count();
10584            assert!(
10585                during > before,
10586                "subscriber count must increase while stream is live (before={before}, during={during})"
10587            );
10588            // Drop the body — simulates the client closing the
10589            // connection. axum drops the stream future, which drops the
10590            // Receiver.
10591            drop(body);
10592        });
10593        // Allow tokio a beat to drop the Receiver task.
10594        runtime.block_on(async {
10595            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
10596        });
10597        let after = sender.receiver_count();
10598        assert!(
10599            after <= before,
10600            "subscriber count must drop back after disconnect (before={before}, after={after})"
10601        );
10602        h.shutdown(&runtime);
10603    }
10604
10605    /// 7. Bearer-auth gate: missing token -> 401.
10606    #[test]
10607    fn stream_respects_auth_when_enabled() {
10608        let runtime = rt();
10609        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10610        let r = h.router.clone();
10611        runtime.block_on(async {
10612            let (status, _body) = open_sse_stream_inner(r, None, None).await;
10613            assert_eq!(status, StatusCode::UNAUTHORIZED);
10614        });
10615        h.shutdown(&runtime);
10616    }
10617
10618    /// 8. Anonymous OK when auth=None (loopback default).
10619    #[test]
10620    fn stream_works_with_auth_none() {
10621        let runtime = rt();
10622        let h = Harness::new(&runtime);
10623        let r = h.router.clone();
10624        runtime.block_on(async {
10625            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10626            assert_eq!(status, StatusCode::OK);
10627            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10628                .await
10629                .expect("must receive init event");
10630            assert_eq!(ev.event, "init");
10631        });
10632        h.shutdown(&runtime);
10633    }
10634
10635    /// 9. Bearer-auth gate: valid token allows the stream to open.
10636    #[test]
10637    fn stream_respects_auth_accepts_valid_token() {
10638        let runtime = rt();
10639        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10640        let r = h.router.clone();
10641        runtime.block_on(async {
10642            let (status, mut body) =
10643                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
10644            assert_eq!(status, StatusCode::OK);
10645            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10646                .await
10647                .expect("must receive init event with valid bearer");
10648            assert_eq!(ev.event, "init");
10649            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10650        });
10651        h.shutdown(&runtime);
10652    }
10653
10654    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
10655    /// stream opens — wrong tenant header never reaches the handler.
10656    #[test]
10657    fn stream_respects_tenant_scoping() {
10658        let runtime = rt();
10659        let h = Harness::new(&runtime);
10660        let r = h.router.clone();
10661        runtime.block_on(async {
10662            let (status, _body) =
10663                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
10664            // The single-tenant test registry returns NotFound from
10665            // get_or_open when the header points to a tenant that isn't
10666            // cached; the TenantExtractor maps that to 404.
10667            assert_eq!(status, StatusCode::NOT_FOUND);
10668        });
10669        h.shutdown(&runtime);
10670    }
10671
10672    // -----------------------------------------------------------------
10673    // /v1/status — authenticated tenant-aware readiness
10674    // -----------------------------------------------------------------
10675
10676    #[test]
10677    fn status_returns_tenant_aware_payload() {
10678        let runtime = rt();
10679        let h = Harness::new(&runtime);
10680        let r = h.router.clone();
10681        runtime.block_on(async {
10682            let tid = solo_core::TenantId::default_tenant();
10683            h.registry
10684                .with_index(|idx| {
10685                    idx.register_with_quota(
10686                        &tid,
10687                        "default.db",
10688                        Some("Default tenant"),
10689                        Some(1_234_567),
10690                    )
10691                    .unwrap();
10692                })
10693                .await;
10694
10695            let (status, body) = call(r, "GET", "/v1/status", None).await;
10696            assert_eq!(status, StatusCode::OK, "body: {body}");
10697            assert_eq!(body["ok"].as_bool(), Some(true));
10698            assert_eq!(body["version"].as_str(), Some(env!("CARGO_PKG_VERSION")));
10699            assert_eq!(
10700                body.pointer("/tenant/id").and_then(|v| v.as_str()),
10701                Some("default")
10702            );
10703            assert_eq!(
10704                body.pointer("/tenant/registered").and_then(|v| v.as_bool()),
10705                Some(true)
10706            );
10707            assert_eq!(
10708                body.pointer("/tenant/status").and_then(|v| v.as_str()),
10709                Some("active")
10710            );
10711            assert_eq!(
10712                body.pointer("/tenant/quota_bytes").and_then(|v| v.as_u64()),
10713                Some(1_234_567)
10714            );
10715            assert!(
10716                body.pointer("/tenant/last_accessed_ms")
10717                    .and_then(|v| v.as_i64())
10718                    .is_some(),
10719                "status should surface the TenantExtractor touch: {body}"
10720            );
10721            assert_eq!(
10722                body.pointer("/embedder/name").and_then(|v| v.as_str()),
10723                Some("stub")
10724            );
10725            assert_eq!(
10726                body.pointer("/embedder/version").and_then(|v| v.as_str()),
10727                Some("v1")
10728            );
10729            assert_eq!(
10730                body.pointer("/embedder/dim").and_then(|v| v.as_u64()),
10731                Some(16)
10732            );
10733            assert_eq!(
10734                body.pointer("/embedder/dtype").and_then(|v| v.as_str()),
10735                Some("f32")
10736            );
10737            assert_eq!(body["active_tenants"].as_u64(), Some(1));
10738            assert_eq!(
10739                body.pointer("/mcp/sessions").and_then(|v| v.as_u64()),
10740                Some(0)
10741            );
10742        });
10743        h.shutdown(&runtime);
10744    }
10745
10746    #[test]
10747    fn status_respects_auth_when_enabled() {
10748        let runtime = rt();
10749        let h = Harness::new_with_auth(&runtime, Some("status-secret".into()));
10750        let r = h.router.clone();
10751        runtime.block_on(async {
10752            let (status, _body) = call(r.clone(), "GET", "/v1/status", None).await;
10753            assert_eq!(status, StatusCode::UNAUTHORIZED);
10754
10755            let (status, body) =
10756                call_with_auth(r, "GET", "/v1/status", None, Some("Bearer status-secret")).await;
10757            assert_eq!(status, StatusCode::OK, "body: {body}");
10758            assert_eq!(body["ok"].as_bool(), Some(true));
10759        });
10760        h.shutdown(&runtime);
10761    }
10762
10763    #[test]
10764    fn status_respects_tenant_scoping() {
10765        let runtime = rt();
10766        let h = Harness::new(&runtime);
10767        let r = h.router.clone();
10768        runtime.block_on(async {
10769            let (status, body) =
10770                call_with_tenant(r, "GET", "/v1/status", None, "never-registered").await;
10771            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
10772        });
10773        h.shutdown(&runtime);
10774    }
10775
10776    // -----------------------------------------------------------------
10777    // /v1/tenants — principal-scoped tenant list (v0.10.0)
10778    //
10779    // Seeds the harness's in-memory tenants_index stub via
10780    // `harness.registry.with_index(|idx| idx.register(...))` to drive
10781    // the read-only list endpoint. The default tenant from the
10782    // harness's HashMap is NOT in the index stub by construction (the
10783    // `for_tests_with_single_tenant` factory only wires the cached
10784    // HashMap entry; the index starts empty after migrations), so each
10785    // test that wants the default tenant listed registers it
10786    // explicitly. This keeps the test setup explicit about what's
10787    // visible to `list_active` versus what's open in memory.
10788    // -----------------------------------------------------------------
10789
10790    /// Seed three Active tenants into the registry's index. Returns the
10791    /// ids in the order they were registered, which is the order
10792    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
10793    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
10794        use solo_core::TenantId as TenantIdT;
10795        let ids = ["alice", "bob", "default"];
10796        for id in ids {
10797            let tid = TenantIdT::new(id).unwrap();
10798            registry
10799                .with_index(|idx| {
10800                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
10801                        .unwrap();
10802                    // Ensure created_at_ms diverges so the ASC sort is
10803                    // deterministic — the index uses `chrono::Utc::now()`
10804                    // per row and 3 sequential inserts can land in the
10805                    // same ms on fast hardware.
10806                })
10807                .await;
10808            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
10809        }
10810        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
10811        // `TenantsIndex::list` returns. We inserted in (alice, bob,
10812        // default) order with 2ms gaps, so that's the expected order.
10813        vec!["alice".into(), "bob".into(), "default".into()]
10814    }
10815
10816    /// 1. With `AuthConfig::None`, the handler returns every tenant
10817    ///    visible in the registry — same scope as `solo tenants list`.
10818    ///    Exercises the "no principal" branch of the visibility filter.
10819    #[test]
10820    fn tenants_returns_all_when_auth_none() {
10821        let runtime = rt();
10822        let h = Harness::new(&runtime);
10823        let r = h.router.clone();
10824        runtime.block_on(async {
10825            let _expected = seed_three_tenants(&h.registry).await;
10826            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10827            assert_eq!(status, StatusCode::OK);
10828            let arr = body
10829                .get("tenants")
10830                .and_then(|v| v.as_array())
10831                .expect("tenants array");
10832            assert_eq!(arr.len(), 3, "got body: {body}");
10833            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
10834            assert_eq!(ids, vec!["alice", "bob", "default"]);
10835        });
10836        h.shutdown(&runtime);
10837    }
10838
10839    /// 2. Under Bearer auth (single-principal mode), the handler
10840    ///    returns every tenant — the bearer holder is treated as the
10841    ///    daemon operator with full visibility. Exercises the bearer
10842    ///    branch of the visibility filter.
10843    #[test]
10844    fn tenants_returns_all_when_bearer_auth() {
10845        let runtime = rt();
10846        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
10847        let r = h.router.clone();
10848        runtime.block_on(async {
10849            seed_three_tenants(&h.registry).await;
10850            let (status, body) =
10851                call_with_auth(r, "GET", "/v1/tenants", None, Some("Bearer tlist-secret")).await;
10852            assert_eq!(status, StatusCode::OK, "got body: {body}");
10853            let arr = body["tenants"].as_array().expect("tenants array");
10854            assert_eq!(arr.len(), 3, "bearer must see all tenants");
10855        });
10856        h.shutdown(&runtime);
10857    }
10858
10859    /// 3. Under OIDC, an authenticated principal carrying
10860    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
10861    ///    default. Exercises the OIDC branch of the visibility filter.
10862    #[test]
10863    fn tenants_filters_to_principal_claim_when_oidc() {
10864        let runtime = rt();
10865        let (fake_server, discovery_url, secret, kid) =
10866            runtime.block_on(async { spin_fake_idp().await });
10867        let server_uri = fake_server.uri();
10868        let _server_guard = fake_server;
10869
10870        let auth = crate::auth::AuthConfig::Oidc {
10871            discovery_url,
10872            audience: "tlist-audience".to_string(),
10873            tenant_claim_name: "solo_tenant".to_string(),
10874        };
10875        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10876        let r = h.router.clone();
10877
10878        runtime.block_on(async {
10879            seed_three_tenants(&h.registry).await;
10880            let token = mint_idp_token(&server_uri, kid, &secret, "alice", "tlist-audience");
10881            let (status, body) = call_with_auth(
10882                r,
10883                "GET",
10884                "/v1/tenants",
10885                None,
10886                Some(&format!("Bearer {token}")),
10887            )
10888            .await;
10889            assert_eq!(status, StatusCode::OK, "got body: {body}");
10890            let arr = body["tenants"].as_array().expect("tenants array");
10891            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
10892            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
10893        });
10894        h.shutdown(&runtime);
10895    }
10896
10897    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
10898    ///    registered tenant, the response is `200 OK` with
10899    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
10900    ///    exist via a status-code side-channel for an OIDC principal
10901    ///    that lacks visibility to them.
10902    #[test]
10903    fn tenants_returns_empty_when_oidc_claim_unmatched() {
10904        let runtime = rt();
10905        let (fake_server, discovery_url, secret, kid) =
10906            runtime.block_on(async { spin_fake_idp().await });
10907        let server_uri = fake_server.uri();
10908        let _server_guard = fake_server;
10909
10910        let auth = crate::auth::AuthConfig::Oidc {
10911            discovery_url,
10912            audience: "tlist-audience".to_string(),
10913            tenant_claim_name: "solo_tenant".to_string(),
10914        };
10915        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10916        let r = h.router.clone();
10917
10918        runtime.block_on(async {
10919            seed_three_tenants(&h.registry).await;
10920            // Mint a token claiming a tenant that IS a valid TenantId
10921            // (passes middleware) but doesn't exist in the index.
10922            let token = mint_idp_token(&server_uri, kid, &secret, "nonexistent", "tlist-audience");
10923            let (status, body) = call_with_auth(
10924                r,
10925                "GET",
10926                "/v1/tenants",
10927                None,
10928                Some(&format!("Bearer {token}")),
10929            )
10930            .await;
10931            assert_eq!(
10932                status,
10933                StatusCode::OK,
10934                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
10935            );
10936            let arr = body["tenants"].as_array().expect("tenants array");
10937            assert_eq!(
10938                arr.len(),
10939                0,
10940                "unmatched OIDC claim must produce empty list, got: {body}"
10941            );
10942        });
10943        h.shutdown(&runtime);
10944    }
10945
10946    /// 5. JSON response shape matches what solo-web's TypeScript
10947    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
10948    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
10949    ///    last_accessed_ms}`. Catches accidental field renames at PR
10950    ///    time.
10951    ///
10952    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
10953    ///    hydrated when the per-tenant DB file exists. This test
10954    ///    registers a tenant whose DB file does NOT exist (the
10955    ///    `for_tests_with_single_tenant` harness only writes the
10956    ///    `default` tenant's DB), so the three numeric fields land as
10957    ///    JSON `null` — verifying the `null` JSON value (not absence)
10958    ///    so clients see a stable shape regardless of hydration
10959    ///    success.
10960    #[test]
10961    fn tenants_response_shape_matches_solo_web_types() {
10962        let runtime = rt();
10963        let h = Harness::new(&runtime);
10964        let r = h.router.clone();
10965        runtime.block_on(async {
10966            // Register one tenant with a display_name + quota so all
10967            // optional fields are present in the response.
10968            let tid = solo_core::TenantId::new("shaped").unwrap();
10969            h.registry
10970                .with_index(|idx| {
10971                    idx.register_with_quota(
10972                        &tid,
10973                        "shaped.db",
10974                        Some("Shaped tenant"),
10975                        Some(1_048_576),
10976                    )
10977                    .unwrap();
10978                })
10979                .await;
10980            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10981            assert_eq!(status, StatusCode::OK);
10982            let item = &body["tenants"][0];
10983            // id, display_name, created_at_ms, status: required
10984            assert_eq!(item["id"].as_str(), Some("shaped"));
10985            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
10986            assert!(
10987                item["created_at_ms"].is_i64(),
10988                "created_at_ms must be an i64, got {item}"
10989            );
10990            assert_eq!(item["status"].as_str(), Some("active"));
10991            // quota_bytes: present + numeric
10992            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
10993            // v0.10.1: episode_count / size_bytes / pct_used become
10994            // null when the per-tenant DB file is missing on disk
10995            // (this harness only writes the default tenant's file —
10996            // shaped.db does not exist). Clients must tolerate the
10997            // null JSON shape; absence would be a breaking change.
10998            assert!(
10999                item["episode_count"].is_null(),
11000                "episode_count must be JSON null when tenant DB is missing, got {item}"
11001            );
11002            assert!(
11003                item["size_bytes"].is_null(),
11004                "size_bytes must be JSON null when tenant DB is missing, got {item}"
11005            );
11006            assert!(
11007                item["pct_used"].is_null(),
11008                "pct_used must be JSON null when size_bytes is null, got {item}"
11009            );
11010        });
11011        h.shutdown(&runtime);
11012    }
11013
11014    /// 6. Bearer auth enabled + missing Authorization header → 401
11015    ///    before the handler runs. Confirms the route is plumbed
11016    ///    through `auth_middleware` (it sits inside the `authed`
11017    ///    sub-router, not the `public` one).
11018    /// `last_accessed_ms` is observational but user-facing: the
11019    /// registry stamps it when a tenant is resolved, and `/v1/tenants`
11020    /// must surface that value for solo-web's tenant/status UI. Listing
11021    /// alone must not fake a touch; a real tenant-scoped request should.
11022    #[test]
11023    fn tenants_response_surfaces_last_accessed_after_tenant_request() {
11024        let runtime = rt();
11025        let h = Harness::new(&runtime);
11026        let r = h.router.clone();
11027        runtime.block_on(async {
11028            let tid = solo_core::TenantId::default_tenant();
11029            h.registry
11030                .with_index(|idx| {
11031                    idx.register(&tid, "default.db", Some("Default tenant"))
11032                        .unwrap();
11033                })
11034                .await;
11035
11036            let (status, before_body) = call(r.clone(), "GET", "/v1/tenants", None).await;
11037            assert_eq!(status, StatusCode::OK);
11038            let before_item = &before_body["tenants"][0];
11039            assert_eq!(before_item["id"].as_str(), Some("default"));
11040            assert!(
11041                before_item["last_accessed_ms"].is_null(),
11042                "freshly registered tenant should start untouched: {before_item}"
11043            );
11044
11045            let before_touch_ms = chrono::Utc::now().timestamp_millis();
11046            let (graph_status, graph_body) = call(
11047                r.clone(),
11048                "GET",
11049                "/v1/graph/nodes?kind=episode&limit=1",
11050                None,
11051            )
11052            .await;
11053            assert_eq!(graph_status, StatusCode::OK, "graph body: {graph_body}");
11054
11055            let (status, after_body) = call(r, "GET", "/v1/tenants", None).await;
11056            assert_eq!(status, StatusCode::OK);
11057            let after_item = &after_body["tenants"][0];
11058            let last_accessed = after_item["last_accessed_ms"]
11059                .as_i64()
11060                .unwrap_or_else(|| panic!("last_accessed_ms must be stamped: {after_item}"));
11061            assert!(
11062                last_accessed >= before_touch_ms,
11063                "last_accessed_ms should reflect the graph request touch: {after_item}"
11064            );
11065        });
11066        h.shutdown(&runtime);
11067    }
11068
11069    #[test]
11070    fn tenants_respects_auth_when_enabled() {
11071        let runtime = rt();
11072        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
11073        let r = h.router.clone();
11074        runtime.block_on(async {
11075            seed_three_tenants(&h.registry).await;
11076            // No Authorization header → 401.
11077            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
11078            assert_eq!(status, StatusCode::UNAUTHORIZED);
11079        });
11080        h.shutdown(&runtime);
11081    }
11082
11083    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
11084    ///    from the response. solo-web's tenant picker should never
11085    ///    surface a row that's mid-admin-operation (race with admin
11086    ///    tooling). Only Active tenants make the list.
11087    #[test]
11088    fn tenants_status_filter_excludes_non_active() {
11089        let runtime = rt();
11090        let h = Harness::new(&runtime);
11091        let r = h.router.clone();
11092        runtime.block_on(async {
11093            // Three tenants, three statuses. Only `keeper` (Active)
11094            // should appear on the wire.
11095            let keeper = solo_core::TenantId::new("keeper").unwrap();
11096            let migrating = solo_core::TenantId::new("migrating").unwrap();
11097            let deleting = solo_core::TenantId::new("deleting").unwrap();
11098            h.registry
11099                .with_index(|idx| {
11100                    idx.register(&keeper, "keeper.db", None).unwrap();
11101                    idx.register_with_status(
11102                        &migrating,
11103                        "migrating.db",
11104                        None,
11105                        solo_storage::TenantStatus::PendingMigration,
11106                    )
11107                    .unwrap();
11108                    idx.register_with_status(
11109                        &deleting,
11110                        "deleting.db",
11111                        None,
11112                        solo_storage::TenantStatus::PendingDelete,
11113                    )
11114                    .unwrap();
11115                })
11116                .await;
11117            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11118            assert_eq!(status, StatusCode::OK);
11119            let arr = body["tenants"].as_array().expect("tenants array");
11120            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
11121            assert_eq!(
11122                ids,
11123                vec!["keeper"],
11124                "only Active tenants visible; got: {body}"
11125            );
11126        });
11127        h.shutdown(&runtime);
11128    }
11129
11130    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
11131    ///    against accidental `None` serialisation or 404'ing on an
11132    ///    empty list. solo-web's first paint on a brand-new daemon
11133    ///    needs an empty array to render the "no tenants yet" state.
11134    #[test]
11135    fn tenants_returns_empty_array_when_no_tenants_registered() {
11136        let runtime = rt();
11137        let h = Harness::new(&runtime);
11138        let r = h.router.clone();
11139        runtime.block_on(async {
11140            // Don't seed anything — the harness's in-memory index
11141            // starts at zero rows (the cached default-tenant handle in
11142            // the HashMap is invisible to `list_active`).
11143            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11144            assert_eq!(status, StatusCode::OK);
11145            let arr = body["tenants"].as_array().expect("tenants array");
11146            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
11147        });
11148        h.shutdown(&runtime);
11149    }
11150
11151    // ---- v0.10.1: cost-number hydration tests ----
11152    //
11153    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
11154    // to-end through the `/v1/tenants` handler. The harness's
11155    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
11156    // DB (not real SQLCipher); the hydration helper has a fallback
11157    // open path for that case (see registry.rs). The
11158    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
11159    // hydration helper looks. These tests create real files there to
11160    // exercise the size_bytes path; episode_count requires the file to
11161    // be a SQLite DB with the `episodes` table.
11162    //
11163    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
11164    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
11165    // So we either (a) register a fresh tenant id pointing at a DB we
11166    // create at the expected layout, or (b) check the documented
11167    // behavior under "file missing" (returns null counts gracefully).
11168    // Both shapes are tested here.
11169    //
11170    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.
11171
11172    /// Helper: create a per-tenant DB file at the layout the hydration
11173    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
11174    /// with the `episodes` table + `n_active` active episodes +
11175    /// `n_forgotten` forgotten episodes. Returns the absolute path.
11176    fn seed_per_tenant_db_with_episodes(
11177        data_dir: &std::path::Path,
11178        db_filename: &str,
11179        n_active: i64,
11180        n_forgotten: i64,
11181    ) -> std::path::PathBuf {
11182        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
11183        std::fs::create_dir_all(&tenants_dir).unwrap();
11184        let db_path = tenants_dir.join(db_filename);
11185        // Open as plain SQLite (test path; matches the harness's
11186        // `open_test_db_at` shape; hydration helper falls back to plain
11187        // open when SQLCipher open fails).
11188        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
11189        // Run the same migrations the real per-tenant DB does so the
11190        // `episodes` table + `status` CHECK constraint match production.
11191        solo_storage::run_migrations(&mut conn).unwrap();
11192        for i in 0..n_active {
11193            conn.execute(
11194                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11195                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
11196                rusqlite::params![format!("a-{i}")],
11197            )
11198            .unwrap();
11199        }
11200        for i in 0..n_forgotten {
11201            conn.execute(
11202                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11203                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
11204                rusqlite::params![format!("f-{i}")],
11205            )
11206            .unwrap();
11207        }
11208        drop(conn);
11209        db_path
11210    }
11211
11212    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
11213    /// episode count when the per-tenant DB exists. Seed 3 active + 2
11214    /// forgotten episodes; expect `episode_count: 3` (the `status =
11215    /// 'active'` filter excludes the forgotten rows).
11216    #[test]
11217    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
11218        let runtime = rt();
11219        let h = Harness::new(&runtime);
11220        let r = h.router.clone();
11221        let data_dir = h._tmp.path().to_path_buf();
11222        runtime.block_on(async {
11223            let tid = solo_core::TenantId::new("counted").unwrap();
11224            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
11225            h.registry
11226                .with_index(|idx| {
11227                    idx.register(&tid, "counted.db", Some("Counted tenant"))
11228                        .unwrap();
11229                })
11230                .await;
11231            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11232            assert_eq!(status, StatusCode::OK);
11233            let item = &body["tenants"][0];
11234            assert_eq!(item["id"].as_str(), Some("counted"));
11235            assert_eq!(
11236                item["episode_count"].as_i64(),
11237                Some(3),
11238                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
11239            );
11240        });
11241        h.shutdown(&runtime);
11242    }
11243
11244    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
11245    /// per-tenant DB file. Asserts the response value matches
11246    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
11247    /// read the right file, not e.g. data_dir or a temp.
11248    #[test]
11249    fn tenants_response_hydrates_size_bytes_from_db_file() {
11250        let runtime = rt();
11251        let h = Harness::new(&runtime);
11252        let r = h.router.clone();
11253        let data_dir = h._tmp.path().to_path_buf();
11254        runtime.block_on(async {
11255            let tid = solo_core::TenantId::new("sized").unwrap();
11256            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
11257            h.registry
11258                .with_index(|idx| {
11259                    idx.register(&tid, "sized.db", None).unwrap();
11260                })
11261                .await;
11262            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11263            assert!(on_disk > 0, "test setup: db file should be non-empty");
11264            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11265            assert_eq!(status, StatusCode::OK);
11266            let item = &body["tenants"][0];
11267            assert_eq!(item["id"].as_str(), Some("sized"));
11268            assert_eq!(
11269                item["size_bytes"].as_u64(),
11270                Some(on_disk),
11271                "size_bytes must match fs::metadata; got {item}"
11272            );
11273        });
11274        h.shutdown(&runtime);
11275    }
11276
11277    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
11278    /// quota_bytes * 100` when both are known. Pick a quota much
11279    /// larger than the DB so the percentage stays in a sane range
11280    /// (and survives any unrelated DB-page padding).
11281    #[test]
11282    fn tenants_response_computes_pct_used_when_quota_set() {
11283        let runtime = rt();
11284        let h = Harness::new(&runtime);
11285        let r = h.router.clone();
11286        let data_dir = h._tmp.path().to_path_buf();
11287        runtime.block_on(async {
11288            let tid = solo_core::TenantId::new("quoted").unwrap();
11289            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
11290            // Pick a quota that's large enough that pct_used lands
11291            // between 0 and 50% regardless of SQLite page boundary
11292            // rounding. Asserting an exact float would be flaky.
11293            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11294            let quota = on_disk * 4; // pct_used should be ~25%
11295            h.registry
11296                .with_index(|idx| {
11297                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
11298                        .unwrap();
11299                })
11300                .await;
11301            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11302            assert_eq!(status, StatusCode::OK);
11303            let item = &body["tenants"][0];
11304            let pct = item["pct_used"]
11305                .as_f64()
11306                .expect("pct_used must be a number");
11307            assert!(
11308                (0.0..=100.0).contains(&pct),
11309                "pct_used must be in [0, 100], got {pct}"
11310            );
11311            // Allow a wide band — exact value depends on SQLite page
11312            // size — but the recipe (size/quota*100) means a
11313            // size=quota/4 setup must land near 25%.
11314            assert!(
11315                (20.0..=30.0).contains(&pct),
11316                "pct_used must be ~25% for size=quota/4, got {pct}"
11317            );
11318        });
11319        h.shutdown(&runtime);
11320    }
11321
11322    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
11323    /// null (the "unlimited" case). Pins that we don't accidentally
11324    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
11325    #[test]
11326    fn tenants_response_pct_used_null_when_quota_null() {
11327        let runtime = rt();
11328        let h = Harness::new(&runtime);
11329        let r = h.router.clone();
11330        let data_dir = h._tmp.path().to_path_buf();
11331        runtime.block_on(async {
11332            let tid = solo_core::TenantId::new("unlimited").unwrap();
11333            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
11334            h.registry
11335                .with_index(|idx| {
11336                    idx.register(&tid, "unlimited.db", None).unwrap();
11337                })
11338                .await;
11339            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11340            assert_eq!(status, StatusCode::OK);
11341            let item = &body["tenants"][0];
11342            assert_eq!(item["id"].as_str(), Some("unlimited"));
11343            assert!(
11344                item["quota_bytes"].is_null(),
11345                "test setup: quota_bytes must be null, got {item}"
11346            );
11347            assert!(
11348                item["pct_used"].is_null(),
11349                "pct_used must be JSON null when quota_bytes is null, got {item}"
11350            );
11351            // size_bytes still present (no quota doesn't suppress
11352            // size — only pct_used).
11353            assert!(
11354                item["size_bytes"].is_u64(),
11355                "size_bytes must still be present when quota_bytes is null, got {item}"
11356            );
11357        });
11358        h.shutdown(&runtime);
11359    }
11360
11361    /// v0.10.1 test 5: the response includes
11362    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
11363    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
11364    /// beyond the cap have `episode_count: null` even though their
11365    /// `size_bytes` is still hydrated (fs::metadata is cheap).
11366    ///
11367    /// We don't seed 51 real DBs (would be slow); instead, we
11368    /// register 51 tenant rows in the index. The cap is documented
11369    /// to apply to `episode_count` hydration, and the header is
11370    /// emitted purely from the count of filtered records. The
11371    /// header semantics here are independent of per-tenant DB
11372    /// existence.
11373    #[test]
11374    fn tenants_response_sets_cap_reached_header_when_over_cap() {
11375        let runtime = rt();
11376        let h = Harness::new(&runtime);
11377        let r = h.router.clone();
11378        runtime.block_on(async {
11379            // Register 51 tenants (cap = 50, so we exceed it).
11380            h.registry
11381                .with_index(|idx| {
11382                    for i in 0..51 {
11383                        let id = format!("t{i:02}");
11384                        let tid = solo_core::TenantId::new(&id).unwrap();
11385                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
11386                    }
11387                })
11388                .await;
11389            // Send a raw request so we can inspect headers.
11390            use axum::body::Body;
11391            use axum::http::Request;
11392            use http_body_util::BodyExt;
11393            let req = Request::builder()
11394                .method("GET")
11395                .uri("/v1/tenants")
11396                .body(Body::empty())
11397                .unwrap();
11398            let resp = r.oneshot(req).await.unwrap();
11399            assert_eq!(resp.status(), StatusCode::OK);
11400            let cap_header = resp
11401                .headers()
11402                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11403                .expect("cap-reached header must be present");
11404            assert_eq!(
11405                cap_header.to_str().unwrap(),
11406                "true",
11407                "cap-reached header value must be 'true' when over cap"
11408            );
11409            // Parse body to verify shape — beyond-cap tenants have
11410            // null episode_count.
11411            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
11412            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
11413            let arr = body["tenants"].as_array().expect("tenants array");
11414            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
11415            // The last (sorted-by-created_at_ms) tenant should be
11416            // beyond the cap. The hydration order matches the
11417            // filtered list order, so index 50 is the 51st tenant
11418            // and should have null episode_count.
11419            assert!(
11420                arr[50]["episode_count"].is_null(),
11421                "the 51st tenant (beyond cap) must have null episode_count, got {}",
11422                arr[50]
11423            );
11424        });
11425        h.shutdown(&runtime);
11426    }
11427
11428    /// v0.10.1 test 6: when the response is under the cap, the
11429    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
11430    /// negative case so a future refactor that always emits the
11431    /// header (with "false") doesn't pass silently.
11432    #[test]
11433    fn tenants_response_omits_cap_header_when_under_cap() {
11434        let runtime = rt();
11435        let h = Harness::new(&runtime);
11436        let r = h.router.clone();
11437        runtime.block_on(async {
11438            seed_three_tenants(&h.registry).await;
11439            use axum::body::Body;
11440            use axum::http::Request;
11441            let req = Request::builder()
11442                .method("GET")
11443                .uri("/v1/tenants")
11444                .body(Body::empty())
11445                .unwrap();
11446            let resp = r.oneshot(req).await.unwrap();
11447            assert_eq!(resp.status(), StatusCode::OK);
11448            assert!(
11449                resp.headers()
11450                    .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11451                    .is_none(),
11452                "cap-reached header must be absent under the cap"
11453            );
11454        });
11455        h.shutdown(&runtime);
11456    }
11457
11458    // ---- Pure unit tests on the visibility filter ----
11459    //
11460    // These exercise `filter_tenants_for_principal` and
11461    // `is_single_principal_bearer` without an axum router — fast
11462    // feedback for the load-bearing visibility rule. The
11463    // router-level tests above cover the wire path.
11464
11465    /// Build a synthetic `TenantRecord` so the pure unit tests don't
11466    /// need a real SQLCipher round-trip.
11467    fn make_record(id: &str) -> solo_storage::TenantRecord {
11468        solo_storage::TenantRecord {
11469            tenant_id: solo_core::TenantId::new(id).unwrap(),
11470            db_filename: format!("{id}.db"),
11471            display_name: None,
11472            created_at_ms: 0,
11473            status: solo_storage::TenantStatus::Active,
11474            quota_bytes: None,
11475            last_accessed_ms: None,
11476        }
11477    }
11478
11479    #[test]
11480    fn filter_no_principal_returns_all() {
11481        let records = vec![make_record("a"), make_record("b")];
11482        let out = filter_tenants_for_principal(records.clone(), None);
11483        assert_eq!(out.len(), 2);
11484        assert_eq!(out[0].tenant_id.as_str(), "a");
11485        assert_eq!(out[1].tenant_id.as_str(), "b");
11486    }
11487
11488    #[test]
11489    fn filter_bearer_principal_returns_all() {
11490        let records = vec![make_record("a"), make_record("b")];
11491        let p = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("a").unwrap());
11492        let out = filter_tenants_for_principal(records, Some(&p));
11493        assert_eq!(out.len(), 2);
11494    }
11495
11496    #[test]
11497    fn filter_oidc_principal_keeps_only_claim() {
11498        let records = vec![make_record("a"), make_record("b"), make_record("c")];
11499        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
11500        let p = AuthenticatedPrincipal {
11501            subject: "alice@example.com".to_string(),
11502            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
11503            scopes: vec!["read".to_string()],
11504            claims: serde_json::json!({ "sub": "alice@example.com" }),
11505        };
11506        let out = filter_tenants_for_principal(records, Some(&p));
11507        assert_eq!(out.len(), 1);
11508        assert_eq!(out[0].tenant_id.as_str(), "b");
11509    }
11510
11511    #[test]
11512    fn filter_oidc_principal_with_no_claim_returns_empty() {
11513        // Theoretically unreachable — middleware short-circuits at 403
11514        // before we see a no-claim OIDC principal. Defend anyway.
11515        let records = vec![make_record("a")];
11516        let p = AuthenticatedPrincipal {
11517            subject: "alice@example.com".to_string(),
11518            tenant_claim: None,
11519            scopes: vec![],
11520            claims: serde_json::json!({ "sub": "alice@example.com" }),
11521        };
11522        let out = filter_tenants_for_principal(records, Some(&p));
11523        assert!(out.is_empty());
11524    }
11525
11526    #[test]
11527    fn is_single_principal_bearer_discriminator() {
11528        let bearer = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("default").unwrap());
11529        assert!(is_single_principal_bearer(&bearer));
11530
11531        let oidc = AuthenticatedPrincipal {
11532            subject: "alice".to_string(),
11533            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
11534            scopes: vec![],
11535            claims: serde_json::json!({ "x": 1 }),
11536        };
11537        assert!(!is_single_principal_bearer(&oidc));
11538
11539        // Subject == "bearer" but claims is a non-null object → not a
11540        // bearer-shaped principal. Defends against a forged-bearer
11541        // shape that might smuggle JWT claims.
11542        let weird = AuthenticatedPrincipal {
11543            subject: "bearer".to_string(),
11544            tenant_claim: Some(solo_core::TenantId::default_tenant()),
11545            scopes: vec![],
11546            claims: serde_json::json!({ "leak": 1 }),
11547        };
11548        assert!(!is_single_principal_bearer(&weird));
11549    }
11550
11551    // ---------------------------------------------------------------
11552    // v0.10.2 — MCP-over-HTTP transport on /mcp
11553    // ---------------------------------------------------------------
11554    //
11555    // These tests pin the wire contract for the new `/mcp` route added
11556    // in v0.10.2 P2. We exercise the route through the same `Harness`
11557    // pattern the rest of the file uses (in-process axum Router via
11558    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
11559    //
11560    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
11561    // cover the JSON-RPC envelope shape in isolation. These tests are
11562    // the integration layer: real `TenantHandle`, real `WriterActor`,
11563    // real `SoloMcpServer::dispatch_tool` path.
11564
11565    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
11566    /// the canonical 18 tools. Matches the stdio smoke test
11567    /// `mcp_stdio_lists_eighteen_canonical_tools` from
11568    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
11569    /// two transports fails one of the two suites loudly.
11570    #[test]
11571    fn mcp_http_tools_list_returns_eighteen_canonical_tools() {
11572        let runtime = rt();
11573        let h = Harness::new(&runtime);
11574        let r = h.router.clone();
11575        runtime.block_on(async move {
11576            let req = json!({
11577                "jsonrpc": "2.0",
11578                "id": 1,
11579                "method": "tools/list",
11580            });
11581            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11582            assert_eq!(status, StatusCode::OK);
11583            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
11584            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
11585            let tools = body
11586                .pointer("/result/tools")
11587                .and_then(|v| v.as_array())
11588                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
11589            let mut names: Vec<String> = tools
11590                .iter()
11591                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
11592                .collect();
11593            names.sort();
11594            assert_eq!(
11595                names,
11596                vec![
11597                    "memory_context".to_string(),
11598                    "memory_contradiction_resolve".to_string(),
11599                    "memory_contradictions".to_string(),
11600                    "memory_entities".to_string(),
11601                    "memory_facts_about".to_string(),
11602                    "memory_forget".to_string(),
11603                    "memory_forget_document".to_string(),
11604                    "memory_ingest_document".to_string(),
11605                    "memory_inspect".to_string(),
11606                    "memory_inspect_cluster".to_string(),
11607                    "memory_inspect_document".to_string(),
11608                    "memory_list_documents".to_string(),
11609                    "memory_recall".to_string(),
11610                    "memory_remember".to_string(),
11611                    "memory_remember_batch".to_string(),
11612                    "memory_search_docs".to_string(),
11613                    "memory_themes".to_string(),
11614                    "memory_update".to_string(),
11615                ],
11616                "mcp_http: tools/list returned unexpected name set"
11617            );
11618        });
11619        h.shutdown(&runtime);
11620    }
11621
11622    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
11623    /// episode and returns a confirmation string. Then a separate
11624    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
11625    /// proving one process is serving both surfaces against the same
11626    /// writer.
11627    #[test]
11628    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
11629        let runtime = rt();
11630        let h = Harness::new(&runtime);
11631        let r = h.router.clone();
11632        runtime.block_on(async move {
11633            // 1. memory_remember via /mcp.
11634            let req = json!({
11635                "jsonrpc": "2.0",
11636                "id": 2,
11637                "method": "tools/call",
11638                "params": {
11639                    "name": "memory_remember",
11640                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
11641                },
11642            });
11643            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11644            assert_eq!(status, StatusCode::OK);
11645            let result_text = body
11646                .pointer("/result/content/0/text")
11647                .and_then(|v| v.as_str())
11648                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11649            assert!(
11650                result_text.starts_with("remembered "),
11651                "expected `remembered <id>`, got: {result_text}"
11652            );
11653
11654            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
11655            //    tenant — the cross-surface smoke that motivates v0.10.2.
11656            //    Episode nodes carry the content under `label` +
11657            //    `preview` (the v0.10.0 graph-nodes wire shape).
11658            let (status2, nodes_body) =
11659                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11660            assert_eq!(status2, StatusCode::OK);
11661            let nodes = nodes_body
11662                .get("nodes")
11663                .and_then(|v| v.as_array())
11664                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11665            assert!(
11666                nodes.iter().any(|n| {
11667                    let label_hit = n
11668                        .get("label")
11669                        .and_then(|c| c.as_str())
11670                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11671                    let preview_hit = n
11672                        .get("preview")
11673                        .and_then(|c| c.as_str())
11674                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11675                    label_hit || preview_hit
11676                }),
11677                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
11678            );
11679        });
11680        h.shutdown(&runtime);
11681    }
11682
11683    /// `memory_remember_batch` must accept the canonical `{ items: [...] }`
11684    /// argument envelope and land all rows in the same graph REST surface.
11685    /// This is the batch variant of the cross-surface smoke above and
11686    /// protects external clients from drifting to a renamed field.
11687    #[test]
11688    fn mcp_http_remember_batch_items_visible_via_graph_nodes() {
11689        let runtime = rt();
11690        let h = Harness::new(&runtime);
11691        let r = h.router.clone();
11692        runtime.block_on(async move {
11693            let marker_a = "mcp-http-batch-cross-surface-smoke-a";
11694            let marker_b = "mcp-http-batch-cross-surface-smoke-b";
11695            let req = json!({
11696                "jsonrpc": "2.0",
11697                "id": 22,
11698                "method": "tools/call",
11699                "params": {
11700                    "name": "memory_remember_batch",
11701                    "arguments": {
11702                        "items": [
11703                            { "content": marker_a, "source_type": "smoke", "salience": 0.7 },
11704                            { "content": marker_b, "source_type": "smoke", "salience": 0.7 }
11705                        ]
11706                    },
11707                },
11708            });
11709            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11710            assert_eq!(status, StatusCode::OK, "batch body: {body}");
11711            let result_text = body
11712                .pointer("/result/content/0/text")
11713                .and_then(|v| v.as_str())
11714                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11715            let ids: Vec<String> = serde_json::from_str(result_text)
11716                .unwrap_or_else(|e| panic!("batch result should be JSON id array: {e}: {body}"));
11717            assert_eq!(ids.len(), 2, "two items in, two ids out: {result_text}");
11718
11719            let (status2, nodes_body) =
11720                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11721            assert_eq!(status2, StatusCode::OK);
11722            let nodes = nodes_body
11723                .get("nodes")
11724                .and_then(|v| v.as_array())
11725                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11726            for marker in [marker_a, marker_b] {
11727                assert!(
11728                    nodes.iter().any(|n| {
11729                        let label_hit = n
11730                            .get("label")
11731                            .and_then(|c| c.as_str())
11732                            .is_some_and(|s| s.contains(marker));
11733                        let preview_hit = n
11734                            .get("preview")
11735                            .and_then(|c| c.as_str())
11736                            .is_some_and(|s| s.contains(marker));
11737                        label_hit || preview_hit
11738                    }),
11739                    "graph/nodes didn't surface batch marker {marker}: {nodes_body}"
11740                );
11741            }
11742        });
11743        h.shutdown(&runtime);
11744    }
11745
11746    /// `memory_remember_batch`'s HTTP MCP contract is the canonical
11747    /// `{ items: [...] }` envelope. A drift back to `{ entries: [...] }`
11748    /// must fail as JSON-RPC invalid_params, not silently accept or
11749    /// partially write.
11750    #[test]
11751    fn mcp_http_remember_batch_rejects_entries_envelope() {
11752        let runtime = rt();
11753        let h = Harness::new(&runtime);
11754        let r = h.router.clone();
11755        runtime.block_on(async move {
11756            let req = json!({
11757                "jsonrpc": "2.0",
11758                "id": 23,
11759                "method": "tools/call",
11760                "params": {
11761                    "name": "memory_remember_batch",
11762                    "arguments": {
11763                        "entries": [
11764                            { "content": "legacy-entries-envelope" }
11765                        ]
11766                    },
11767                },
11768            });
11769            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11770            assert_eq!(
11771                status,
11772                StatusCode::OK,
11773                "JSON-RPC errors stay in-body: {body}"
11774            );
11775            assert_eq!(
11776                body.pointer("/error/code").and_then(|v| v.as_i64()),
11777                Some(-32602),
11778                "expected JSON-RPC INVALID_PARAMS (-32602), got: {body}"
11779            );
11780            let message = body
11781                .pointer("/error/message")
11782                .and_then(|v| v.as_str())
11783                .unwrap_or("");
11784            assert!(
11785                message.contains("invalid tool arguments") && message.contains("items"),
11786                "error must point clients back to the canonical items envelope: {body}"
11787            );
11788        });
11789        h.shutdown(&runtime);
11790    }
11791
11792    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
11793    /// just-remembered episode. Smoke for the read path under the new
11794    /// transport.
11795    #[test]
11796    fn mcp_http_recall_returns_just_remembered_episode() {
11797        let runtime = rt();
11798        let h = Harness::new(&runtime);
11799        let r = h.router.clone();
11800        runtime.block_on(async move {
11801            // Remember first.
11802            let needle = "mcp-http-recall-needle-deadbeef";
11803            let req = json!({
11804                "jsonrpc": "2.0",
11805                "id": 3,
11806                "method": "tools/call",
11807                "params": {
11808                    "name": "memory_remember",
11809                    "arguments": { "content": needle },
11810                },
11811            });
11812            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11813            assert_eq!(status, StatusCode::OK);
11814
11815            // Recall via the same /mcp transport.
11816            let req = json!({
11817                "jsonrpc": "2.0",
11818                "id": 4,
11819                "method": "tools/call",
11820                "params": {
11821                    "name": "memory_recall",
11822                    "arguments": { "query": needle, "limit": 5 },
11823                },
11824            });
11825            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11826            assert_eq!(status, StatusCode::OK);
11827            let recall_text = body
11828                .pointer("/result/content/0/text")
11829                .and_then(|v| v.as_str())
11830                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11831            assert!(
11832                recall_text.contains(needle),
11833                "recall didn't surface needle `{needle}`: {recall_text}"
11834            );
11835        });
11836        h.shutdown(&runtime);
11837    }
11838
11839    /// Malformed JSON body must surface as 400 (the wire envelope is
11840    /// invalid; the JSON-RPC layer never sees the request). The error
11841    /// body shape matches the rest of the API (`{error, status}`) so
11842    /// existing client error-handling paths keep working.
11843    #[test]
11844    fn mcp_http_malformed_body_returns_400() {
11845        let runtime = rt();
11846        let h = Harness::new(&runtime);
11847        let r = h.router.clone();
11848        runtime.block_on(async move {
11849            let req = Request::builder()
11850                .method("POST")
11851                .uri("/mcp")
11852                .header("content-type", "application/json")
11853                .body(Body::from("not-json-at-all".as_bytes()))
11854                .unwrap();
11855            let resp = r.oneshot(req).await.unwrap();
11856            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
11857            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
11858            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11859            assert!(
11860                v.get("error")
11861                    .and_then(|e| e.as_str())
11862                    .map(|s| s.contains("invalid JSON-RPC request"))
11863                    .unwrap_or(false),
11864                "got: {v}"
11865            );
11866        });
11867        h.shutdown(&runtime);
11868    }
11869
11870    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
11871    /// requires the literal string `"2.0"`.
11872    #[test]
11873    fn mcp_http_wrong_jsonrpc_version_returns_400() {
11874        let runtime = rt();
11875        let h = Harness::new(&runtime);
11876        let r = h.router.clone();
11877        runtime.block_on(async move {
11878            let req = json!({
11879                "jsonrpc": "1.0",
11880                "id": 1,
11881                "method": "tools/list",
11882            });
11883            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
11884            assert_eq!(status, StatusCode::BAD_REQUEST);
11885        });
11886        h.shutdown(&runtime);
11887    }
11888
11889    /// Unknown method returns a JSON-RPC error envelope with code
11890    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
11891    /// envelope itself parsed fine — JSON-RPC errors are in-body.
11892    #[test]
11893    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
11894        let runtime = rt();
11895        let h = Harness::new(&runtime);
11896        let r = h.router.clone();
11897        runtime.block_on(async move {
11898            let req = json!({
11899                "jsonrpc": "2.0",
11900                "id": 5,
11901                "method": "definitely/not/a/method",
11902            });
11903            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11904            assert_eq!(status, StatusCode::OK);
11905            assert_eq!(
11906                body.pointer("/error/code").and_then(|v| v.as_i64()),
11907                Some(-32601),
11908                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
11909            );
11910        });
11911        h.shutdown(&runtime);
11912    }
11913
11914    /// `POST /mcp` with the bearer-auth middleware enabled returns
11915    /// 401 without the token and 200 with the correct token.
11916    #[test]
11917    fn mcp_http_post_respects_bearer_auth() {
11918        let runtime = rt();
11919        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
11920        let r = h.router.clone();
11921        runtime.block_on(async move {
11922            // No Authorization header → 401.
11923            let req = json!({
11924                "jsonrpc": "2.0",
11925                "id": 6,
11926                "method": "tools/list",
11927            });
11928            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
11929            assert_eq!(status, StatusCode::UNAUTHORIZED);
11930
11931            // With correct bearer → 200 + valid JSON-RPC reply.
11932            let (status, body) = call_with_auth(
11933                r,
11934                "POST",
11935                "/mcp",
11936                Some(req),
11937                Some("Bearer secret-mcp-token"),
11938            )
11939            .await;
11940            assert_eq!(status, StatusCode::OK);
11941            assert_eq!(
11942                body.pointer("/result/tools")
11943                    .and_then(|v| v.as_array())
11944                    .map(|a| a.len()),
11945                Some(18),
11946                "authed tools/list should still return 18 tools: {body}"
11947            );
11948        });
11949        h.shutdown(&runtime);
11950    }
11951
11952    /// `/mcp` goes through the same `TenantExtractor` as REST graph
11953    /// routes. Invalid tenant ids are rejected before JSON-RPC dispatch
11954    /// so clients don't accidentally create a session against a bad
11955    /// tenant key.
11956    #[test]
11957    fn mcp_http_post_rejects_invalid_tenant_header() {
11958        let runtime = rt();
11959        let h = Harness::new(&runtime);
11960        let r = h.router.clone();
11961        runtime.block_on(async move {
11962            let req = json!({
11963                "jsonrpc": "2.0",
11964                "id": 7,
11965                "method": "tools/list",
11966            });
11967            let (status, body) = call_with_tenant(r, "POST", "/mcp", Some(req), "UPPER").await;
11968            assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
11969            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11970            assert!(
11971                msg.contains("x-solo-tenant") && msg.contains("invalid tenant id"),
11972                "error must identify the invalid tenant header: {body}"
11973            );
11974        });
11975        h.shutdown(&runtime);
11976    }
11977
11978    /// Unknown tenants should be a route-level 404 on `/mcp`, matching
11979    /// REST. This protects solo-jarvis from receiving a JSON-RPC-looking
11980    /// success envelope for a typoed tenant.
11981    #[test]
11982    fn mcp_http_post_rejects_unknown_tenant_header() {
11983        let runtime = rt();
11984        let h = Harness::new(&runtime);
11985        let r = h.router.clone();
11986        runtime.block_on(async move {
11987            let req = json!({
11988                "jsonrpc": "2.0",
11989                "id": 8,
11990                "method": "tools/list",
11991            });
11992            let (status, body) =
11993                call_with_tenant(r, "POST", "/mcp", Some(req), "never-registered").await;
11994            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
11995            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11996            assert!(
11997                msg.contains("tenant") && msg.contains("not found"),
11998                "error must identify the missing tenant: {body}"
11999            );
12000        });
12001        h.shutdown(&runtime);
12002    }
12003
12004    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
12005    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
12006    /// the `access-control-allow-headers` carries both
12007    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
12008    /// allow-list addition.
12009    #[test]
12010    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
12011        let runtime = rt();
12012        let h = Harness::new(&runtime);
12013        let r = h.router.clone();
12014        runtime.block_on(async move {
12015            let req = Request::builder()
12016                .method("OPTIONS")
12017                .uri("/mcp")
12018                .header("origin", "http://localhost:5173")
12019                .header("access-control-request-method", "POST")
12020                .header(
12021                    "access-control-request-headers",
12022                    "content-type, mcp-session-id, x-solo-tenant, authorization",
12023                )
12024                .body(Body::empty())
12025                .unwrap();
12026            let resp = r.oneshot(req).await.unwrap();
12027            // tower-http CorsLayer returns 200 for permitted preflight.
12028            assert_eq!(resp.status(), StatusCode::OK);
12029            let allow_headers = resp
12030                .headers()
12031                .get("access-control-allow-headers")
12032                .and_then(|h| h.to_str().ok())
12033                .unwrap_or("")
12034                .to_lowercase();
12035            assert!(
12036                allow_headers.contains("mcp-session-id"),
12037                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
12038            );
12039            assert!(
12040                allow_headers.contains("x-solo-tenant"),
12041                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
12042            );
12043            // Allow-origin must echo the localhost origin (per the
12044            // permissive-localhost predicate).
12045            let allow_origin = resp
12046                .headers()
12047                .get("access-control-allow-origin")
12048                .and_then(|h| h.to_str().ok())
12049                .unwrap_or("");
12050            assert_eq!(allow_origin, "http://localhost:5173");
12051        });
12052        h.shutdown(&runtime);
12053    }
12054
12055    /// CORS preflight for browser memory corrections must allow PATCH.
12056    /// solo-web sends `PATCH /memory/{id}` with `content-type` and
12057    /// `x-solo-tenant`; if PATCH is absent from the allow-methods list,
12058    /// the browser rejects the request before it reaches the API.
12059    #[test]
12060    fn memory_update_cors_preflight_allows_patch() {
12061        let runtime = rt();
12062        let h = Harness::new(&runtime);
12063        let r = h.router.clone();
12064        runtime.block_on(async move {
12065            let req = Request::builder()
12066                .method("OPTIONS")
12067                .uri("/memory/ep:test")
12068                .header("origin", "http://localhost:5173")
12069                .header("access-control-request-method", "PATCH")
12070                .header(
12071                    "access-control-request-headers",
12072                    "content-type, x-solo-tenant",
12073                )
12074                .body(Body::empty())
12075                .unwrap();
12076            let resp = r.oneshot(req).await.unwrap();
12077            assert_eq!(resp.status(), StatusCode::OK);
12078            let allow_methods = resp
12079                .headers()
12080                .get("access-control-allow-methods")
12081                .and_then(|h| h.to_str().ok())
12082                .unwrap_or("")
12083                .to_lowercase();
12084            assert!(
12085                allow_methods.contains("patch"),
12086                "preflight allow-methods must include patch; got: {allow_methods}"
12087            );
12088            let allow_headers = resp
12089                .headers()
12090                .get("access-control-allow-headers")
12091                .and_then(|h| h.to_str().ok())
12092                .unwrap_or("")
12093                .to_lowercase();
12094            assert!(
12095                allow_headers.contains("x-solo-tenant"),
12096                "preflight allow-headers must include x-solo-tenant; got: {allow_headers}"
12097            );
12098            assert_eq!(
12099                resp.headers()
12100                    .get("access-control-allow-origin")
12101                    .and_then(|h| h.to_str().ok()),
12102                Some("http://localhost:5173")
12103            );
12104        });
12105        h.shutdown(&runtime);
12106    }
12107
12108    /// Notification messages (no `id`) return 202 Accepted with an
12109    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
12110    #[test]
12111    fn mcp_http_notification_returns_202_accepted() {
12112        let runtime = rt();
12113        let h = Harness::new(&runtime);
12114        let r = h.router.clone();
12115        runtime.block_on(async move {
12116            let req = json!({
12117                "jsonrpc": "2.0",
12118                "method": "notifications/initialized",
12119                "params": {},
12120            });
12121            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12122            assert_eq!(status, StatusCode::ACCEPTED);
12123            // Empty body — call() returns Value::Null when the body is
12124            // empty.
12125            assert_eq!(body, Value::Null);
12126        });
12127        h.shutdown(&runtime);
12128    }
12129
12130    // ---------------------------------------------------------------
12131    // v0.11.0 P1 — MCP `Mcp-Session-Id` middleware integration tests
12132    // ---------------------------------------------------------------
12133    //
12134    // These pin the per-request session contract: the POST handler
12135    // creates a fresh session id on a request that arrives without
12136    // the header (echoed back via `Mcp-Session-Id` response header);
12137    // a subsequent request carrying that same id continues using the
12138    // same session record; unknown or stale ids surface as 404 with
12139    // a re-init instruction. The lazy/background expiry semantics are
12140    // unit-tested in `mcp_session::tests`.
12141
12142    /// `POST /mcp` with `tools/list` (no `Mcp-Session-Id` header) must
12143    /// echo back a fresh session id in the response header. The
12144    /// session count in the store grows by exactly 1.
12145    #[test]
12146    fn mcp_post_without_session_id_creates_new_session() {
12147        let runtime = rt();
12148        let h = Harness::new(&runtime);
12149        let r = h.router.clone();
12150        runtime.block_on(async move {
12151            let req = Request::builder()
12152                .method("POST")
12153                .uri("/mcp")
12154                .header("content-type", "application/json")
12155                .body(Body::from(
12156                    serde_json::to_vec(&json!({
12157                        "jsonrpc": "2.0",
12158                        "id": 100,
12159                        "method": "tools/list",
12160                    }))
12161                    .unwrap(),
12162                ))
12163                .unwrap();
12164            let resp = r.oneshot(req).await.unwrap();
12165            assert_eq!(resp.status(), StatusCode::OK);
12166            let session_id = resp
12167                .headers()
12168                .get("mcp-session-id")
12169                .and_then(|v| v.to_str().ok())
12170                .map(|s| s.to_string())
12171                .unwrap_or_else(|| {
12172                    panic!(
12173                        "mcp-session-id response header missing on session-init POST: {:?}",
12174                        resp.headers()
12175                    )
12176                });
12177            assert!(
12178                !session_id.is_empty(),
12179                "session id must be a non-empty string"
12180            );
12181        });
12182        h.shutdown(&runtime);
12183    }
12184
12185    /// Two `POST /mcp` calls with the same session id in the request
12186    /// header must hit the same `SessionState` (i.e. no new entry
12187    /// gets allocated). The second response echoes the same id back.
12188    #[test]
12189    fn mcp_post_with_valid_session_id_continues_session() {
12190        let runtime = rt();
12191        let h = Harness::new(&runtime);
12192        let r = h.router.clone();
12193        runtime.block_on(async move {
12194            // First request: no header → fresh id.
12195            let req = Request::builder()
12196                .method("POST")
12197                .uri("/mcp")
12198                .header("content-type", "application/json")
12199                .body(Body::from(
12200                    serde_json::to_vec(&json!({
12201                        "jsonrpc": "2.0",
12202                        "id": 101,
12203                        "method": "tools/list",
12204                    }))
12205                    .unwrap(),
12206                ))
12207                .unwrap();
12208            let resp1 = r.clone().oneshot(req).await.unwrap();
12209            assert_eq!(resp1.status(), StatusCode::OK);
12210            let assigned_id = resp1
12211                .headers()
12212                .get("mcp-session-id")
12213                .and_then(|v| v.to_str().ok())
12214                .map(|s| s.to_string())
12215                .expect("first response must carry mcp-session-id");
12216
12217            // Second request: carry the same id forward.
12218            let req2 = Request::builder()
12219                .method("POST")
12220                .uri("/mcp")
12221                .header("content-type", "application/json")
12222                .header("mcp-session-id", &assigned_id)
12223                .body(Body::from(
12224                    serde_json::to_vec(&json!({
12225                        "jsonrpc": "2.0",
12226                        "id": 102,
12227                        "method": "tools/list",
12228                    }))
12229                    .unwrap(),
12230                ))
12231                .unwrap();
12232            let resp2 = r.oneshot(req2).await.unwrap();
12233            assert_eq!(resp2.status(), StatusCode::OK);
12234            let echoed = resp2
12235                .headers()
12236                .get("mcp-session-id")
12237                .and_then(|v| v.to_str().ok())
12238                .map(|s| s.to_string())
12239                .expect("continuation response must echo mcp-session-id");
12240            assert_eq!(
12241                echoed, assigned_id,
12242                "second response must echo the same session id"
12243            );
12244        });
12245        h.shutdown(&runtime);
12246    }
12247
12248    /// A `POST /mcp` carrying a random / never-assigned `Mcp-Session-Id`
12249    /// must surface as 404 with the `session_expired` error
12250    /// discriminator and the re-initialize instruction in the body.
12251    #[test]
12252    fn mcp_post_with_unknown_session_id_returns_404() {
12253        let runtime = rt();
12254        let h = Harness::new(&runtime);
12255        let r = h.router.clone();
12256        runtime.block_on(async move {
12257            let req = Request::builder()
12258                .method("POST")
12259                .uri("/mcp")
12260                .header("content-type", "application/json")
12261                // A plausibly-shaped id the server never assigned.
12262                .header("mcp-session-id", "11111111-2222-3333-4444-555555555555")
12263                .body(Body::from(
12264                    serde_json::to_vec(&json!({
12265                        "jsonrpc": "2.0",
12266                        "id": 103,
12267                        "method": "tools/list",
12268                    }))
12269                    .unwrap(),
12270                ))
12271                .unwrap();
12272            let resp = r.oneshot(req).await.unwrap();
12273            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12274            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12275            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12276            assert_eq!(
12277                v.get("error").and_then(|e| e.as_str()),
12278                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12279                "404 body must carry the session_expired discriminator: {v}"
12280            );
12281            assert!(
12282                v.get("retry")
12283                    .and_then(|e| e.as_str())
12284                    .map(|s| s == "re-initialize")
12285                    .unwrap_or(false),
12286                "404 body must instruct re-initialize: {v}"
12287            );
12288        });
12289        h.shutdown(&runtime);
12290    }
12291
12292    /// A `POST /mcp` carrying a `Mcp-Session-Id` that WAS assigned but
12293    /// has since been expired (we evict it directly from the store to
12294    /// simulate the TTL sweep) must surface the same 404 +
12295    /// `session_expired` discriminator. Distinct from the
12296    /// "unknown id" test above — same wire response, different cause.
12297    #[test]
12298    fn mcp_post_with_expired_session_id_returns_404() {
12299        let runtime = rt();
12300        let h = Harness::new(&runtime);
12301        let r = h.router.clone();
12302        let store = h.mcp_sessions.clone();
12303        runtime.block_on(async move {
12304            // First request to allocate a session id.
12305            let req1 = Request::builder()
12306                .method("POST")
12307                .uri("/mcp")
12308                .header("content-type", "application/json")
12309                .body(Body::from(
12310                    serde_json::to_vec(&json!({
12311                        "jsonrpc": "2.0",
12312                        "id": 104,
12313                        "method": "tools/list",
12314                    }))
12315                    .unwrap(),
12316                ))
12317                .unwrap();
12318            let resp1 = r.clone().oneshot(req1).await.unwrap();
12319            let assigned_id_str = resp1
12320                .headers()
12321                .get("mcp-session-id")
12322                .and_then(|v| v.to_str().ok())
12323                .map(|s| s.to_string())
12324                .expect("first response must carry mcp-session-id");
12325
12326            // Force-evict the session directly via the harness's
12327            // SessionStore clone. This is the moral equivalent of the
12328            // background sweep evicting an entry past TTL — same
12329            // observable from the wire (the handler's middleware sees
12330            // `SessionStore::get` return `None`). Driving the real
12331            // 30-min inactivity clock is not test-friendly.
12332            let parsed = crate::mcp_session::SessionId::parse(&assigned_id_str)
12333                .expect("just-assigned id must parse");
12334            assert!(store.delete(&parsed), "stored session must be deletable");
12335
12336            // Now the id is "stale" (no longer in the store) — same
12337            // observable as a TTL eviction.
12338            let req2 = Request::builder()
12339                .method("POST")
12340                .uri("/mcp")
12341                .header("content-type", "application/json")
12342                .header("mcp-session-id", &assigned_id_str)
12343                .body(Body::from(
12344                    serde_json::to_vec(&json!({
12345                        "jsonrpc": "2.0",
12346                        "id": 105,
12347                        "method": "tools/list",
12348                    }))
12349                    .unwrap(),
12350                ))
12351                .unwrap();
12352            let resp2 = r.oneshot(req2).await.unwrap();
12353            assert_eq!(resp2.status(), StatusCode::NOT_FOUND);
12354            let body_bytes = resp2.into_body().collect().await.unwrap().to_bytes();
12355            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12356            assert_eq!(
12357                v.get("error").and_then(|e| e.as_str()),
12358                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12359                "expired-session 404 body must carry session_expired: {v}"
12360            );
12361        });
12362        h.shutdown(&runtime);
12363    }
12364
12365    /// v0.11.0 P2: `GET /mcp` REQUIRES an `Mcp-Session-Id` header. The
12366    /// GET stream is "attach to an existing session's notification
12367    /// channel" — there's no session-init story over GET (POST owns
12368    /// session creation). A GET without the header must return 404
12369    /// with the `session_expired` discriminator + `re-initialize`
12370    /// instruction, mirroring the unknown-id 404 wire shape so clients
12371    /// have a single recovery code path.
12372    ///
12373    /// Diverges deliberately from v0.11.0 P1's behaviour (which
12374    /// auto-created on GET) — see `docs/dev-log/0134-v0.11.0-p2-impl.md`
12375    /// for the rationale.
12376    #[test]
12377    fn mcp_get_without_session_id_returns_404() {
12378        let runtime = rt();
12379        let h = Harness::new(&runtime);
12380        let r = h.router.clone();
12381        runtime.block_on(async move {
12382            let req = Request::builder()
12383                .method("GET")
12384                .uri("/mcp")
12385                .header("accept", "text/event-stream")
12386                .body(Body::empty())
12387                .unwrap();
12388            let resp = r.oneshot(req).await.unwrap();
12389            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12390            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12391            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12392            assert_eq!(
12393                v.get("error").and_then(|e| e.as_str()),
12394                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12395                "GET /mcp without session id must carry session_expired: {v}"
12396            );
12397            assert_eq!(
12398                v.get("retry").and_then(|e| e.as_str()),
12399                Some("re-initialize"),
12400            );
12401        });
12402        h.shutdown(&runtime);
12403    }
12404
12405    // ---------------------------------------------------------------
12406    // v0.11.0 P2 — resumable /mcp GET stream + Last-Event-ID
12407    // ---------------------------------------------------------------
12408    //
12409    // These pin the v0.11.0 P2 wire contract for the resumable GET
12410    // stream: an `Mcp-Session-Id`-bound subscriber sees `event: init`
12411    // first, then any buffered replay events past `Last-Event-ID`,
12412    // then live broadcast events as they're published. The unit-test
12413    // half of the contract (publish_event monotonic + buffer cap) lives
12414    // in `crate::mcp_session::tests`.
12415
12416    /// Open the `/mcp` GET stream for one session id. Returns
12417    /// `(status, body)` where the body is the SSE frame stream.
12418    async fn open_mcp_get_stream(
12419        router: axum::Router,
12420        session_id: &str,
12421        last_event_id: Option<&str>,
12422    ) -> (StatusCode, axum::body::Body, axum::http::HeaderMap) {
12423        let mut builder = Request::builder()
12424            .method("GET")
12425            .uri("/mcp")
12426            .header("accept", "text/event-stream")
12427            .header(crate::mcp_session::MCP_SESSION_ID_HEADER, session_id);
12428        if let Some(leid) = last_event_id {
12429            builder = builder.header(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER, leid);
12430        }
12431        let req = builder
12432            .header("content-length", "0")
12433            .body(Body::empty())
12434            .unwrap();
12435        let resp = router.oneshot(req).await.expect("oneshot");
12436        let status = resp.status();
12437        let headers = resp.headers().clone();
12438        let body = resp.into_body();
12439        (status, body, headers)
12440    }
12441
12442    /// Allocate one session via a POST so a follow-up GET can attach.
12443    /// Returns the assigned session id from the response header.
12444    async fn allocate_mcp_session(router: axum::Router) -> String {
12445        let req = Request::builder()
12446            .method("POST")
12447            .uri("/mcp")
12448            .header("content-type", "application/json")
12449            .body(Body::from(
12450                serde_json::to_vec(&json!({
12451                    "jsonrpc": "2.0",
12452                    "id": 1,
12453                    "method": "tools/list",
12454                }))
12455                .unwrap(),
12456            ))
12457            .unwrap();
12458        let resp = router.oneshot(req).await.expect("oneshot");
12459        assert_eq!(resp.status(), StatusCode::OK, "POST must allocate session");
12460        resp.headers()
12461            .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12462            .and_then(|v| v.to_str().ok())
12463            .map(|s| s.to_string())
12464            .expect("POST must echo Mcp-Session-Id")
12465    }
12466
12467    /// Look up the in-store `Arc<SessionState>` so a test can publish
12468    /// events directly onto the same record the GET handler subscribed
12469    /// to. Takes the [`SessionStore`] directly so callers can clone it
12470    /// out of the harness before moving the harness into the async
12471    /// block.
12472    fn session_state_for_test(
12473        store: &crate::mcp_session::SessionStore,
12474        session_id: &str,
12475    ) -> std::sync::Arc<crate::mcp_session::SessionState> {
12476        let parsed =
12477            crate::mcp_session::SessionId::parse(session_id).expect("test session id must parse");
12478        store.get(&parsed).expect("session must still be in store")
12479    }
12480
12481    /// GET `/mcp` against a session that's been force-evicted (TTL
12482    /// sweep) returns 404 with the `session_expired` discriminator —
12483    /// same wire shape as POST.
12484    #[test]
12485    fn mcp_get_with_expired_session_id_returns_404() {
12486        let runtime = rt();
12487        let h = Harness::new(&runtime);
12488        let r = h.router.clone();
12489        let store = h.mcp_sessions.clone();
12490        runtime.block_on(async move {
12491            let session_id = allocate_mcp_session(r.clone()).await;
12492            // Force-evict via the harness store handle.
12493            let parsed = crate::mcp_session::SessionId::parse(&session_id).unwrap();
12494            assert!(store.delete(&parsed));
12495            // Now GET against the stale id.
12496            let req = Request::builder()
12497                .method("GET")
12498                .uri("/mcp")
12499                .header("accept", "text/event-stream")
12500                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12501                .body(Body::empty())
12502                .unwrap();
12503            let resp = r.oneshot(req).await.unwrap();
12504            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12505            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12506            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12507            assert_eq!(
12508                v.get("error").and_then(|e| e.as_str()),
12509                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12510            );
12511        });
12512        h.shutdown(&runtime);
12513    }
12514
12515    /// v0.11.4: `DELETE /mcp` with a valid session id terminates the
12516    /// session (204 No Content) and a follow-up DELETE returns 404
12517    /// (session is gone). Per MCP Streamable HTTP transport spec.
12518    #[test]
12519    fn mcp_delete_with_valid_session_id_terminates() {
12520        let runtime = rt();
12521        let h = Harness::new(&runtime);
12522        let r = h.router.clone();
12523        let store = h.mcp_sessions.clone();
12524        runtime.block_on(async move {
12525            let session_id = allocate_mcp_session(r.clone()).await;
12526            // Confirm the session exists in the store before DELETE.
12527            let parsed = crate::mcp_session::SessionId::parse(&session_id).unwrap();
12528            assert!(store.get(&parsed).is_some(), "session must exist pre-DELETE");
12529
12530            // DELETE /mcp with the session id → 204 No Content.
12531            let req = Request::builder()
12532                .method("DELETE")
12533                .uri("/mcp")
12534                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12535                .body(Body::empty())
12536                .unwrap();
12537            let resp = r.clone().oneshot(req).await.unwrap();
12538            assert_eq!(resp.status(), StatusCode::NO_CONTENT);
12539
12540            // Store no longer holds the session.
12541            assert!(store.get(&parsed).is_none(), "session must be gone after DELETE");
12542
12543            // Second DELETE on the same id → 404 Not Found.
12544            let req2 = Request::builder()
12545                .method("DELETE")
12546                .uri("/mcp")
12547                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12548                .body(Body::empty())
12549                .unwrap();
12550            let resp2 = r.oneshot(req2).await.unwrap();
12551            assert_eq!(resp2.status(), StatusCode::NOT_FOUND);
12552        });
12553        h.shutdown(&runtime);
12554    }
12555
12556    /// v0.11.4: `DELETE /mcp` without the `Mcp-Session-Id` header is
12557    /// malformed (400 Bad Request) — DELETE has no implicit "current
12558    /// session" notion.
12559    #[test]
12560    fn mcp_delete_without_session_id_returns_400() {
12561        let runtime = rt();
12562        let h = Harness::new(&runtime);
12563        let r = h.router.clone();
12564        runtime.block_on(async move {
12565            let req = Request::builder()
12566                .method("DELETE")
12567                .uri("/mcp")
12568                .body(Body::empty())
12569                .unwrap();
12570            let resp = r.oneshot(req).await.unwrap();
12571            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
12572        });
12573        h.shutdown(&runtime);
12574    }
12575
12576    /// Happy-path subscribe: open `/mcp` with a freshly-allocated
12577    /// session id, expect `event: init` as the first frame with the
12578    /// session id echoed in both the response header AND the init
12579    /// payload.
12580    #[test]
12581    fn mcp_get_with_valid_session_id_subscribes() {
12582        let runtime = rt();
12583        let h = Harness::new(&runtime);
12584        let r = h.router.clone();
12585        runtime.block_on(async move {
12586            let session_id = allocate_mcp_session(r.clone()).await;
12587            let (status, mut body, headers) = open_mcp_get_stream(r, &session_id, None).await;
12588            assert_eq!(status, StatusCode::OK);
12589            // Response header echoes the session id.
12590            let echoed = headers
12591                .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12592                .and_then(|v| v.to_str().ok())
12593                .unwrap();
12594            assert_eq!(echoed, session_id);
12595            // First frame is the init event.
12596            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12597                .await
12598                .expect("init event must arrive within 2s");
12599            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12600            assert_eq!(ev.data["connected"].as_bool(), Some(true));
12601            assert_eq!(ev.data["session_id"].as_str(), Some(session_id.as_str()));
12602            // Init carries id "0" (reserved sentinel — the first real
12603            // publish_event allocates id 1).
12604            assert_eq!(ev.id.as_deref(), Some("0"));
12605        });
12606        h.shutdown(&runtime);
12607    }
12608
12609    /// Publish 5 events on the session, reconnect with
12610    /// `Last-Event-ID: 2`, observe `init` then events 3, 4, 5 (in
12611    /// order). Pins the resume-from-cursor contract.
12612    #[test]
12613    fn mcp_get_resumes_from_last_event_id() {
12614        let runtime = rt();
12615        let h = Harness::new(&runtime);
12616        let r = h.router.clone();
12617        let store = h.mcp_sessions.clone();
12618        runtime.block_on(async move {
12619            let session_id = allocate_mcp_session(r.clone()).await;
12620            let state = session_state_for_test(&store, &session_id);
12621            for i in 1..=5 {
12622                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({"n": i}));
12623            }
12624            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("2")).await;
12625            assert_eq!(status, StatusCode::OK);
12626            // First frame is init.
12627            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12628                .await
12629                .unwrap();
12630            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12631            // Then events 3, 4, 5 in order.
12632            for expected_id in 3..=5 {
12633                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12634                    .await
12635                    .expect("replay event must arrive within 2s");
12636                assert_eq!(
12637                    ev.event,
12638                    crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,
12639                    "expected replay of message event id {expected_id}, got {ev:?}",
12640                );
12641                assert_eq!(ev.id.as_deref(), Some(expected_id.to_string().as_str()));
12642                assert_eq!(ev.data["n"].as_u64(), Some(expected_id));
12643            }
12644        });
12645        h.shutdown(&runtime);
12646    }
12647
12648    /// Publish past the broadcast buffer's capacity (300 events) then
12649    /// reconnect with `Last-Event-ID: 0` (the sentinel for "I just
12650    /// joined and missed everything since event 1"). Observe `event:
12651    /// init`, then a synthetic `event: lagged` describing the gap,
12652    /// then the tail of the buffer.
12653    #[test]
12654    fn mcp_get_emits_lagged_when_last_event_id_too_old() {
12655        let runtime = rt();
12656        let h = Harness::new(&runtime);
12657        let r = h.router.clone();
12658        let store = h.mcp_sessions.clone();
12659        runtime.block_on(async move {
12660            let session_id = allocate_mcp_session(r.clone()).await;
12661            let state = session_state_for_test(&store, &session_id);
12662            // Publish 300 events — buffer cap is 256, so events 1..=44
12663            // get evicted (oldest retained id = 45).
12664            for _ in 0..300 {
12665                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({}));
12666            }
12667            // Last-Event-ID: 1 — claim we've only seen event 1, but
12668            // event 2 (and 3..=44) are gone from the buffer.
12669            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12670            assert_eq!(status, StatusCode::OK);
12671            // First frame: init.
12672            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12673                .await
12674                .unwrap();
12675            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12676            // Second frame: lagged (synthetic) with id 0.
12677            let lagged = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12678                .await
12679                .expect("lagged event must arrive within 2s");
12680            assert_eq!(
12681                lagged.event,
12682                crate::mcp_session::MCP_STREAM_EVENT_LAGGED_NAME,
12683                "expected `event: lagged` after Last-Event-ID before buffer",
12684            );
12685            assert_eq!(lagged.id.as_deref(), Some("0"));
12686            assert!(
12687                lagged.data["dropped"].as_u64().unwrap_or(0) > 0,
12688                "lagged event must carry a non-zero `dropped` count: {:?}",
12689                lagged.data,
12690            );
12691        });
12692        h.shutdown(&runtime);
12693    }
12694
12695    /// CORS preflight (OPTIONS) with `Access-Control-Request-Headers:
12696    /// last-event-id` must succeed and the `last-event-id` header must
12697    /// appear in `Access-Control-Allow-Headers`. Without this, a
12698    /// browser reconnecting an SSE stream with `Last-Event-ID:` fails
12699    /// the preflight before the actual GET lands.
12700    #[test]
12701    fn cors_preflight_allows_last_event_id_header() {
12702        let runtime = rt();
12703        let h = Harness::new(&runtime);
12704        let r = h.router.clone();
12705        runtime.block_on(async move {
12706            let req = Request::builder()
12707                .method("OPTIONS")
12708                .uri("/mcp")
12709                .header("origin", "http://localhost:5173")
12710                .header("access-control-request-method", "GET")
12711                .header(
12712                    "access-control-request-headers",
12713                    "last-event-id,mcp-session-id",
12714                )
12715                .body(Body::empty())
12716                .unwrap();
12717            let resp = r.oneshot(req).await.unwrap();
12718            assert!(
12719                resp.status().is_success() || resp.status() == StatusCode::NO_CONTENT,
12720                "preflight must succeed, got: {}",
12721                resp.status(),
12722            );
12723            let allow = resp
12724                .headers()
12725                .get("access-control-allow-headers")
12726                .and_then(|h| h.to_str().ok())
12727                .map(|s| s.to_ascii_lowercase())
12728                .unwrap_or_default();
12729            assert!(
12730                allow.contains("last-event-id"),
12731                "preflight must allow `last-event-id`; allow-headers = {allow:?}",
12732            );
12733            assert!(
12734                allow.contains("mcp-session-id"),
12735                "preflight must allow `mcp-session-id` too; allow-headers = {allow:?}",
12736            );
12737        });
12738        h.shutdown(&runtime);
12739    }
12740
12741    /// Heartbeat cadence: with a short interval, the stream emits a
12742    /// typed `event: heartbeat` after the init event. The production
12743    /// cadence is [`MCP_STREAM_HEARTBEAT_SECS`] (30s); the test
12744    /// exercises `build_mcp_session_stream` directly with a 1-second
12745    /// interval so we don't burn 30s of CI wall time.
12746    #[test]
12747    fn mcp_get_heartbeats_after_init() {
12748        let runtime = rt();
12749        let h = Harness::new(&runtime);
12750        runtime.block_on(async move {
12751            let state = std::sync::Arc::new(crate::mcp_session::SessionState::new(
12752                solo_core::TenantId::default_tenant(),
12753                None,
12754            ));
12755            let session_id = crate::mcp_session::SessionId::new();
12756            let stream = build_mcp_session_stream(
12757                state,
12758                session_id.clone(),
12759                "default".to_string(),
12760                0,
12761                1, // 1-second heartbeat for the test
12762            );
12763            // Pull frames off the stream. Should see init then
12764            // (with no live events) a heartbeat within ~1.5s.
12765            use futures::StreamExt;
12766            let mut stream = std::pin::pin!(stream);
12767            let init_ev = tokio::time::timeout(std::time::Duration::from_secs(2), stream.next())
12768                .await
12769                .expect("init must arrive within 2s")
12770                .expect("stream must yield init");
12771            // Rendering the Event is opaque; we don't introspect it
12772            // here — the wire-format integration test
12773            // `mcp_get_with_valid_session_id_subscribes` covers that.
12774            // This test pins that a SECOND frame lands within the
12775            // heartbeat window. Drop the init frame.
12776            drop(init_ev);
12777            let hb = tokio::time::timeout(std::time::Duration::from_secs(3), stream.next())
12778                .await
12779                .expect("heartbeat must arrive within ~3s")
12780                .expect("stream must yield heartbeat");
12781            // Same opacity — we observe presence, not content. The
12782            // integration-level test
12783            // `mcp_get_with_valid_session_id_subscribes` covers wire
12784            // content.
12785            drop(hb);
12786        });
12787        h.shutdown(&runtime);
12788    }
12789
12790    /// v0.11.0 P3: `memory_ingest_document` emits the first two phase
12791    /// events (parsed, chunked) BEFORE the writer-actor call, so they
12792    /// fire even when the underlying writer has no embedder configured.
12793    /// This pins the upstream half of the 4-phase ingest progress
12794    /// taxonomy without needing a fully-equipped writer harness — the
12795    /// post-writer phases (embedded, inserted) are pinned indirectly
12796    /// by the `MCP_NOTIFICATION_PROGRESS_METHOD` grep-ability and by
12797    /// the dispatch_tests-level progress-emission tests for the other
12798    /// two long-running tools (search_docs / remember_batch).
12799    #[test]
12800    fn mcp_http_ingest_document_emits_parsed_and_chunked_progress_events() {
12801        let runtime = rt();
12802        let h = Harness::new(&runtime);
12803        let r = h.router.clone();
12804        let store = h.mcp_sessions.clone();
12805        runtime.block_on(async move {
12806            let session_id = allocate_mcp_session(r.clone()).await;
12807            let state = session_state_for_test(&store, &session_id);
12808            let mut rx = state.subscribe_events();
12809            // Write a `.txt` so the parser doesn't reject before the
12810            // first progress event fires — handle_ingest_document
12811            // emits `parsed` BEFORE the writer call but AFTER the
12812            // empty-path guard; both pre-writer phases fire regardless
12813            // of writer-side embedder configuration.
12814            let tmpdir = tempfile::TempDir::new().unwrap();
12815            let tmpfile = tmpdir.path().join("ingest-progress.txt");
12816            std::fs::write(&tmpfile, b"hello world progress test").unwrap();
12817            let body = json!({
12818                "jsonrpc": "2.0",
12819                "id": 2,
12820                "method": "tools/call",
12821                "params": {
12822                    "name": "memory_ingest_document",
12823                    "arguments": { "path": tmpfile.to_string_lossy() },
12824                    "_meta": { "progressToken": "ingest-tok" },
12825                },
12826            });
12827            let req = Request::builder()
12828                .method("POST")
12829                .uri("/mcp")
12830                .header("content-type", "application/json")
12831                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12832                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12833                .unwrap();
12834            let resp = r.clone().oneshot(req).await.expect("oneshot");
12835            assert_eq!(resp.status(), StatusCode::OK);
12836            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12837            let mut events = Vec::new();
12838            while let Ok(ev) = rx.try_recv() {
12839                events.push(ev);
12840            }
12841            // We expect AT LEAST the 2 pre-writer phase events. In a
12842            // fully-equipped harness the writer would succeed and the
12843            // post-writer phases (embedded + inserted) would also fire;
12844            // here we pin the pre-writer half + the spec envelope shape.
12845            assert!(
12846                events.len() >= 2,
12847                "expected at least 2 progress events (parsed + chunked), got {}: {events:?}",
12848                events.len()
12849            );
12850            // Phase 1 = "parsed"; phase 2 = "chunked"; both carry
12851            // total=4 and progressToken="ingest-tok".
12852            assert_eq!(events[0].data["params"]["progress"], json!(1));
12853            assert_eq!(events[0].data["params"]["message"], json!("parsed"));
12854            assert_eq!(events[1].data["params"]["progress"], json!(2));
12855            assert_eq!(events[1].data["params"]["message"], json!("chunked"));
12856            for ev in &events {
12857                assert_eq!(ev.event, crate::mcp_session::McpEventKind::Progress,);
12858                assert_eq!(
12859                    ev.data["method"],
12860                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12861                );
12862                assert_eq!(ev.data["params"]["progressToken"], json!("ingest-tok"));
12863                assert_eq!(ev.data["params"]["total"], json!(4));
12864            }
12865        });
12866        h.shutdown(&runtime);
12867    }
12868
12869    /// v0.11.0 P3: end-to-end progress event roundtrip — POST a
12870    /// `tools/call` carrying `_meta.progressToken`, then reconnect via
12871    /// `GET /mcp` with a `Last-Event-ID` that triggers buffer replay.
12872    /// Confirms the wire path:
12873    /// `tools/call params._meta.progressToken` → ProgressReporter →
12874    /// SessionState.publish_event → replay buffer → GET SSE replay
12875    /// drain → client receives spec-shape envelope.
12876    ///
12877    /// `Last-Event-ID: 0` is treated as "brand new subscriber, no
12878    /// replay" per the v0.11.0 P2 contract — so we drive a non-zero
12879    /// `Last-Event-ID` smaller than every event id by first force-
12880    /// publishing one synthetic seed event (id 1), then issuing the
12881    /// real `tools/call` (which publishes 3 progress events with
12882    /// ids 2..=4), then GET with `Last-Event-ID: 1` to replay
12883    /// exactly the progress trio.
12884    #[test]
12885    fn mcp_http_progress_event_subscribers_receive_via_get_mcp_stream() {
12886        let runtime = rt();
12887        let h = Harness::new(&runtime);
12888        let r = h.router.clone();
12889        let store = h.mcp_sessions.clone();
12890        runtime.block_on(async move {
12891            // 1. Allocate a session via an initial POST.
12892            let session_id = allocate_mcp_session(r.clone()).await;
12893            // 2. Seed one synthetic event (id 1) so the buffer is
12894            //    non-empty before the real progress events. The GET
12895            //    handler's replay path only fires when last_event_id
12896            //    > 0; we'll pass Last-Event-ID: 1 to skip the seed and
12897            //    replay the progress events that follow.
12898            let state = session_state_for_test(&store, &session_id);
12899            state.publish_event(
12900                crate::mcp_session::McpEventKind::Message,
12901                json!({"seed": true}),
12902            );
12903            // 3. POST a `memory_search_docs` tools/call carrying
12904            //    `_meta.progressToken` (well above the top_k threshold
12905            //    so progress IS emitted). The query returns empty hits
12906            //    in the harness — what matters here is that the 3
12907            //    progress events fire as side effects of the call.
12908            let body = json!({
12909                "jsonrpc": "2.0",
12910                "id": 2,
12911                "method": "tools/call",
12912                "params": {
12913                    "name": "memory_search_docs",
12914                    "arguments": { "query": "anything", "limit": 150 },
12915                    "_meta": { "progressToken": "progress-roundtrip" },
12916                },
12917            });
12918            let req = Request::builder()
12919                .method("POST")
12920                .uri("/mcp")
12921                .header("content-type", "application/json")
12922                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12923                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12924                .unwrap();
12925            let resp = r.clone().oneshot(req).await.expect("oneshot");
12926            assert_eq!(resp.status(), StatusCode::OK);
12927            // Drain the POST response so the future completes before
12928            // we open the GET stream.
12929            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12930            // 4. Open the GET stream with Last-Event-ID: 1 — replay
12931            //    every event past the seed.
12932            let (status, mut stream_body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12933            assert_eq!(status, StatusCode::OK);
12934            // First frame: init (id 0, reserved sentinel).
12935            let init = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12936                .await
12937                .expect("init must arrive within 2s");
12938            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12939            // Then 3 progress events (the search_docs handler emits 3
12940            // when top_k > 100). Collect them and assert the spec
12941            // envelope shape.
12942            for expected_progress in 1u64..=3u64 {
12943                let ev = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12944                    .await
12945                    .expect("progress event must arrive within 2s");
12946                assert_eq!(
12947                    ev.event,
12948                    crate::mcp_session::MCP_STREAM_EVENT_PROGRESS_NAME,
12949                    "expected progress event #{expected_progress}, got {ev:?}",
12950                );
12951                // Spec-shape envelope: jsonrpc + method + params{progressToken, progress, total}.
12952                assert_eq!(ev.data["jsonrpc"], json!("2.0"));
12953                assert_eq!(
12954                    ev.data["method"],
12955                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12956                );
12957                assert_eq!(
12958                    ev.data["params"]["progressToken"],
12959                    json!("progress-roundtrip")
12960                );
12961                assert_eq!(ev.data["params"]["progress"], json!(expected_progress));
12962                assert_eq!(ev.data["params"]["total"], json!(3));
12963            }
12964        });
12965        h.shutdown(&runtime);
12966    }
12967
12968    /// `initialize` returns the `{name: "solo", version: <crate
12969    /// version>}` server-info pinned by the stdio invariant test
12970    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
12971    /// check that the v0.10.2 HTTP transport doesn't drift away from
12972    /// the stdio identity.
12973    #[test]
12974    fn mcp_http_initialize_returns_solo_server_info() {
12975        let runtime = rt();
12976        let h = Harness::new(&runtime);
12977        let r = h.router.clone();
12978        runtime.block_on(async move {
12979            let req = json!({
12980                "jsonrpc": "2.0",
12981                "id": 7,
12982                "method": "initialize",
12983                "params": {
12984                    "protocolVersion": "2024-11-05",
12985                    "capabilities": {},
12986                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
12987                },
12988            });
12989            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12990            assert_eq!(status, StatusCode::OK);
12991            assert_eq!(
12992                body.pointer("/result/serverInfo/name")
12993                    .and_then(|v| v.as_str()),
12994                Some("solo"),
12995                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
12996            );
12997            // `protocolVersion` is the static value the dispatcher
12998            // emits. v0.11.4 bumped this to "2025-03-26" — the spec
12999            // version that introduced the Streamable HTTP transport
13000            // this handler implements. The stdio loop reports
13001            // "2024-11-05" (rmcp 0.1.x's supported spec). Different
13002            // transports, different versions, each honest.
13003            assert_eq!(
13004                body.pointer("/result/protocolVersion")
13005                    .and_then(|v| v.as_str()),
13006                Some("2025-03-26"),
13007            );
13008        });
13009        h.shutdown(&runtime);
13010    }
13011
13012    // ----------------------------------------------------------------
13013    // v0.11.0 P4 — notifications/message bridge from InvalidateEvent
13014    // ----------------------------------------------------------------
13015
13016    /// v0.11.0 P4: a fresh POST /mcp (no session id) causes the per-
13017    /// session invalidate bridge to be spawned. Pin by firing an
13018    /// invalidate on the harness's broadcast sender AFTER the session
13019    /// is allocated and asserting the session's own event channel
13020    /// receives an MCP `notifications/message` event.
13021    #[test]
13022    fn session_subscribes_to_tenant_invalidate_on_creation() {
13023        let runtime = rt();
13024        let h = Harness::new(&runtime);
13025        let r = h.router.clone();
13026        let store = h.mcp_sessions.clone();
13027        let sender = h.invalidate_sender();
13028        runtime.block_on(async move {
13029            // Allocate session — POST handler spawns the bridge.
13030            let session_id = allocate_mcp_session(r).await;
13031            let state = session_state_for_test(&store, &session_id);
13032            let mut rx = state.subscribe_events();
13033            // Fire one invalidate on the tenant's broadcast.
13034            sender
13035                .send(InvalidateEvent {
13036                    reason: "memory.remember".to_string(),
13037                    tenant_id: "default".to_string(),
13038                    ts_ms: 1_715_625_600_000,
13039                    kind: "episode".to_string(),
13040                })
13041                .expect("at least one subscriber (the bridge)");
13042            // Bridge forwards it to the session as an MCP Message.
13043            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
13044                .await
13045                .expect("bridge must forward invalidate within 2s")
13046                .expect("session receiver must observe published event");
13047            assert_eq!(received.event, crate::mcp_session::McpEventKind::Message);
13048            assert_eq!(
13049                received.data["method"].as_str(),
13050                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
13051            );
13052        });
13053        h.shutdown(&runtime);
13054    }
13055
13056    /// v0.11.0 P4: pin the exact envelope shape — `jsonrpc=2.0`,
13057    /// `method=notifications/message`, `params.{level,logger,data,details}`.
13058    /// One full round-trip through the bridge so a future refactor
13059    /// that changes the wire format trips this test.
13060    #[test]
13061    fn invalidate_event_translates_to_mcp_notifications_message() {
13062        let runtime = rt();
13063        let h = Harness::new(&runtime);
13064        let r = h.router.clone();
13065        let store = h.mcp_sessions.clone();
13066        let sender = h.invalidate_sender();
13067        runtime.block_on(async move {
13068            let session_id = allocate_mcp_session(r).await;
13069            let state = session_state_for_test(&store, &session_id);
13070            let mut rx = state.subscribe_events();
13071            sender
13072                .send(InvalidateEvent {
13073                    reason: "memory.ingest_document".to_string(),
13074                    tenant_id: "default".to_string(),
13075                    ts_ms: 1_715_625_999_999,
13076                    kind: "document".to_string(),
13077                })
13078                .expect("at least one subscriber");
13079            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
13080                .await
13081                .expect("forward within 2s")
13082                .expect("session must receive event");
13083            // Envelope shape.
13084            assert_eq!(received.data["jsonrpc"].as_str(), Some("2.0"));
13085            assert_eq!(
13086                received.data["method"].as_str(),
13087                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
13088            );
13089            let params = &received.data["params"];
13090            assert_eq!(
13091                params["level"].as_str(),
13092                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LEVEL),
13093            );
13094            assert_eq!(
13095                params["logger"].as_str(),
13096                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LOGGER),
13097            );
13098            // document kind maps to documents_updated.
13099            assert_eq!(
13100                params["data"].as_str(),
13101                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_DOCUMENTS_UPDATED),
13102            );
13103            // details preserves the structured original event.
13104            assert_eq!(
13105                params["details"]["reason"].as_str(),
13106                Some("memory.ingest_document"),
13107            );
13108            assert_eq!(params["details"]["kind"].as_str(), Some("document"),);
13109            assert_eq!(params["details"]["ts_ms"].as_i64(), Some(1_715_625_999_999),);
13110        });
13111        h.shutdown(&runtime);
13112    }
13113
13114    /// v0.11.0 P4: two sessions exist; each has its own bridge. An
13115    /// invalidate fires once on the (shared, single-tenant) broadcast
13116    /// and BOTH sessions receive it. Pins that the bridge is correctly
13117    /// per-session-scoped: it doesn't leak to a wrong session AND it
13118    /// doesn't fail to fan out to all sessions of the same tenant.
13119    ///
13120    /// The harness is single-tenant by design, so the "wrong tenant
13121    /// doesn't receive" half is structurally guaranteed (different
13122    /// tenants would have different `invalidate_sender`s — the
13123    /// `mcp_notify` unit tests pin the bridge wiring against a fake
13124    /// channel directly). This integration test pins the
13125    /// per-session-of-same-tenant fan-out behaviour.
13126    #[test]
13127    fn invalidate_event_published_to_correct_session_only() {
13128        let runtime = rt();
13129        let h = Harness::new(&runtime);
13130        let r = h.router.clone();
13131        let store = h.mcp_sessions.clone();
13132        let sender = h.invalidate_sender();
13133        runtime.block_on(async move {
13134            // Allocate two distinct sessions.
13135            let session_id_a = allocate_mcp_session(r.clone()).await;
13136            let session_id_b = allocate_mcp_session(r).await;
13137            assert_ne!(session_id_a, session_id_b);
13138            let state_a = session_state_for_test(&store, &session_id_a);
13139            let state_b = session_state_for_test(&store, &session_id_b);
13140            let mut rx_a = state_a.subscribe_events();
13141            let mut rx_b = state_b.subscribe_events();
13142            // Fire one invalidate.
13143            sender
13144                .send(InvalidateEvent {
13145                    reason: "memory.consolidate".to_string(),
13146                    tenant_id: "default".to_string(),
13147                    ts_ms: 1_715_625_600_000,
13148                    kind: "cluster".to_string(),
13149                })
13150                .expect("at least one subscriber");
13151            // Both sessions' bridges receive it independently.
13152            let a = tokio::time::timeout(std::time::Duration::from_secs(2), rx_a.recv())
13153                .await
13154                .expect("session A receives within 2s")
13155                .expect("session A receiver alive");
13156            let b = tokio::time::timeout(std::time::Duration::from_secs(2), rx_b.recv())
13157                .await
13158                .expect("session B receives within 2s")
13159                .expect("session B receiver alive");
13160            for evt in [&a, &b] {
13161                assert_eq!(evt.event, crate::mcp_session::McpEventKind::Message);
13162                assert_eq!(
13163                    evt.data["params"]["data"].as_str(),
13164                    Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_CONSOLIDATION_UPDATED),
13165                );
13166            }
13167        });
13168        h.shutdown(&runtime);
13169    }
13170
13171    /// v0.11.0 P4: full GET-stream integration. A POST opens a session
13172    /// AND spawns its bridge; an invalidate fires on the tenant's
13173    /// broadcast; a GET subscriber reading the SSE wire format
13174    /// observes the `event: message` SSE frame carrying the spec-shape
13175    /// `notifications/message` envelope.
13176    ///
13177    /// Uses the `Last-Event-ID` resume path with id 0 (sentinel —
13178    /// "I'm a new subscriber, no replay"); the invalidate fires AFTER
13179    /// the GET opens so the live broadcast receiver picks it up.
13180    #[test]
13181    fn mcp_get_subscriber_receives_notifications_message_event() {
13182        let runtime = rt();
13183        let h = Harness::new(&runtime);
13184        let r = h.router.clone();
13185        let sender = h.invalidate_sender();
13186        runtime.block_on(async move {
13187            let session_id = allocate_mcp_session(r.clone()).await;
13188            // Open the GET stream first so the live broadcast receiver
13189            // is attached BEFORE the invalidate fires.
13190            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, None).await;
13191            assert_eq!(status, StatusCode::OK);
13192            // Drain the init frame.
13193            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13194                .await
13195                .expect("init event must arrive within 2s");
13196            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME,);
13197            // Now fire the invalidate.
13198            sender
13199                .send(InvalidateEvent {
13200                    reason: "memory.triples_extract".to_string(),
13201                    tenant_id: "default".to_string(),
13202                    ts_ms: 1_715_625_600_000,
13203                    kind: "triple".to_string(),
13204                })
13205                .expect("send must succeed");
13206            // Bridge forwards → SessionState.publish_event → broadcast
13207            // → GET stream consumer → SSE wire frame.
13208            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13209                .await
13210                .expect("message event must arrive within 2s");
13211            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,);
13212            assert_eq!(ev.data["jsonrpc"].as_str(), Some("2.0"));
13213            assert_eq!(
13214                ev.data["method"].as_str(),
13215                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
13216            );
13217            assert_eq!(
13218                ev.data["params"]["data"].as_str(),
13219                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_GRAPH_UPDATED),
13220            );
13221            assert_eq!(
13222                ev.data["params"]["details"]["reason"].as_str(),
13223                Some("memory.triples_extract"),
13224            );
13225        });
13226        h.shutdown(&runtime);
13227    }
13228}
13229
13230#[cfg(test)]
13231mod cors_tests {
13232    use super::is_localhost_origin;
13233
13234    #[test]
13235    fn accepts_canonical_localhost_origins() {
13236        assert!(is_localhost_origin("http://localhost"));
13237        assert!(is_localhost_origin("http://localhost:3000"));
13238        assert!(is_localhost_origin("https://localhost:8443"));
13239        assert!(is_localhost_origin("http://127.0.0.1"));
13240        assert!(is_localhost_origin("http://127.0.0.1:5173"));
13241        assert!(is_localhost_origin("http://[::1]"));
13242        assert!(is_localhost_origin("http://[::1]:8080"));
13243    }
13244
13245    #[test]
13246    fn rejects_remote_origins() {
13247        assert!(!is_localhost_origin("http://example.com"));
13248        assert!(!is_localhost_origin("https://malicious.example"));
13249        assert!(!is_localhost_origin("http://192.168.1.5"));
13250        assert!(!is_localhost_origin("http://10.0.0.1"));
13251    }
13252
13253    #[test]
13254    fn rejects_dns_rebinding_tricks() {
13255        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
13256        // Origin header carries the public-DNS name. Rejecting these
13257        // closes the rebinding-via-Origin gap.
13258        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
13259        assert!(!is_localhost_origin("http://localhost.evil.com"));
13260        assert!(!is_localhost_origin("http://evil.localhost"));
13261    }
13262
13263    #[test]
13264    fn rejects_non_http_schemes() {
13265        assert!(!is_localhost_origin("file:///"));
13266        assert!(!is_localhost_origin("ws://localhost:3000"));
13267        assert!(!is_localhost_origin("javascript:alert(1)"));
13268    }
13269
13270    #[test]
13271    fn rejects_malformed() {
13272        assert!(!is_localhost_origin(""));
13273        assert!(!is_localhost_origin("localhost"));
13274        assert!(!is_localhost_origin("//localhost"));
13275    }
13276}